# Instantiate the fasta rec lists with BioPython Seq using geneID field of discriptor as key to seq objects genomeOneFastasDict = SeqIO.to_dict(SeqIO.parse(open(genomeFileOne, "rU"), 'fasta'), key_function = lambda rec : rec.description.split()[0]) genomeTwoFastasDict = SeqIO.to_dict(SeqIO.parse(open(genomeFileTwo, "rU"), 'fasta'), key_function = lambda rec : rec.description.split()[0]) # Initiate resultList resultList = [] # Explode orthologList into list of lists JamesDefs.explodeDelimitedList(orthologList, '\t') # Populate a list of GeneIDs in each genome's dict of boundary seqs genomeOneGeneIDs = genomeOneFastasDict.keys() genomeTwoGeneIDs = genomeTwoFastasDict.keys() # Loop through orthologList and call each fasta in orthoPair, format # the new comboFasta and append it to resultList for orthoPair in orthologList: # Test for orthoPair[0] in genomeOneFastasDict and same for orthoPair[1] in genomeTwoFastasDict orthoPair_0_warn = None orthoPair_1_warn = None if orthoPair[0] not in genomeOneGeneIDs: orthoPair_0_warn = 'Yes' if orthoPair[1] not in genomeTwoGeneIDs:
else: print "WARNING: boundaryRegion variable should only be 'up' or 'down'.\nScript exiting." sys.exit() #-------------------------------------------------- # Strip trailing newlines codingBoundsList = map(string.strip, codingBoundsList) resolvedConflictsList = map(string.strip, resolvedConflictsList) # Convert these into lists of lists so that field vals can be interrogated and copied # Explode tab delimited strings of each record into list of values JamesDefs.explodeDelimitedList(codingBoundsList, '\t') JamesDefs.explodeDelimitedList(resolvedConflictsList, '\t') len_codingBoundsList = len(codingBoundsList) len_resolvedConflictsList = len(resolvedConflictsList) # Populate unUsableList unUsableGeneNames = [] i = 0 while i < len_resolvedConflictsList: if int(resolvedConflictsList[i][5]) < shortestUsableBdryReg: unUseableGene = resolvedConflictsList.pop(i)