예제 #1
0
GENE_CALL_DIR = ""  # set by set method, via parameter list
HMM_OUT_DIR = ""  # set by set method, via parameter list
PVOGS_OUT_DIR = ""  # set by set method, via parameter list
PVOGS_FASTA_DB_NAME = "pVOGs.fasta"

HIT_COUNT_MAX = 50  # Put a limit on the number of hits that should be processed; let's not go over this
MAX_SEQ_HITS = 5  # HMM search can return many hits; report only top number designated; set default here

# output formats
TBL = 1
XML = 2
LIST = 3

# templates
annotation = phate_annotation.annotationRecord()


class multiHMM(object):
    def __init__(self):
        self.hmmProgram = 'jackhmmer'  # Select from 'jackhmmer', 'hmmsearch', 'hmmbuild', 'hmmalign', 'hmmscan', 'phmmer'; default = jackhmmer
        self.blastAnnotations = [
        ]  # List of phate_annotation objects; blast output get temporarily stored here
        # move to hit class:  self.topHitList     = []        #
        self.geneCallDir = ""  # needs to be set
        self.hmmOutDir = ""  # needs to be set
        self.outputFormat = TBL  # default format for jackhmmer output
        self.pVOGsOutDir = ""  # needs to be set
        self.topHitCount = MAX_SEQ_HITS
        self.NCBI_VIRUS_PROTEIN_HMM = False  # ditto
        self.NR_HMM = False  # ditto
예제 #2
0
# Verbosity

CLEAN_RAW_DATA = os.environ["CLEAN_RAW_DATA"]
PHATE_WARNINGS = os.environ["PHATE_WARNINGS"]
PHATE_MESSAGES = os.environ["PHATE_MESSAGES"]
PHATE_PROGRESS = os.environ["PHATE_PROGRESS"]

DEBUG = False
#DEBUG           = True

# For GFF output
GFF_COMMENT = "##gff-version 3"

# Templates
annotationObj = phate_annotation.annotationRecord()
fastaObj = phate_fastaSequence.fasta()

# Reverse complement
#complements = string.maketrans('acgtrymkbdhvACGTRYMKBDHV', 'tgcayrkmvhdbTGCAYRKMVHDB') # Python 2
complements = str.maketrans('acgtrymkbdhvACGTRYMKBDHV',
                            'tgcayrkmvhdbTGCAYRKMVHDB')  # Python 3


class genome(object):

    # Class genome uses fasta objects to represent a genome/chromosome or a plasmid and its genes and proteins.
    # The chromosome's contig set, gene set, and protein set are each represented by multi-fasta objects.

    def __init__(self):
        self.filename = ""  # name of file that contains the genome sequence