GENE_CALL_DIR = "" # set by set method, via parameter list HMM_OUT_DIR = "" # set by set method, via parameter list PVOGS_OUT_DIR = "" # set by set method, via parameter list PVOGS_FASTA_DB_NAME = "pVOGs.fasta" HIT_COUNT_MAX = 50 # Put a limit on the number of hits that should be processed; let's not go over this MAX_SEQ_HITS = 5 # HMM search can return many hits; report only top number designated; set default here # output formats TBL = 1 XML = 2 LIST = 3 # templates annotation = phate_annotation.annotationRecord() class multiHMM(object): def __init__(self): self.hmmProgram = 'jackhmmer' # Select from 'jackhmmer', 'hmmsearch', 'hmmbuild', 'hmmalign', 'hmmscan', 'phmmer'; default = jackhmmer self.blastAnnotations = [ ] # List of phate_annotation objects; blast output get temporarily stored here # move to hit class: self.topHitList = [] # self.geneCallDir = "" # needs to be set self.hmmOutDir = "" # needs to be set self.outputFormat = TBL # default format for jackhmmer output self.pVOGsOutDir = "" # needs to be set self.topHitCount = MAX_SEQ_HITS self.NCBI_VIRUS_PROTEIN_HMM = False # ditto self.NR_HMM = False # ditto
# Verbosity CLEAN_RAW_DATA = os.environ["CLEAN_RAW_DATA"] PHATE_WARNINGS = os.environ["PHATE_WARNINGS"] PHATE_MESSAGES = os.environ["PHATE_MESSAGES"] PHATE_PROGRESS = os.environ["PHATE_PROGRESS"] DEBUG = False #DEBUG = True # For GFF output GFF_COMMENT = "##gff-version 3" # Templates annotationObj = phate_annotation.annotationRecord() fastaObj = phate_fastaSequence.fasta() # Reverse complement #complements = string.maketrans('acgtrymkbdhvACGTRYMKBDHV', 'tgcayrkmvhdbTGCAYRKMVHDB') # Python 2 complements = str.maketrans('acgtrymkbdhvACGTRYMKBDHV', 'tgcayrkmvhdbTGCAYRKMVHDB') # Python 3 class genome(object): # Class genome uses fasta objects to represent a genome/chromosome or a plasmid and its genes and proteins. # The chromosome's contig set, gene set, and protein set are each represented by multi-fasta objects. def __init__(self): self.filename = "" # name of file that contains the genome sequence