def __workerThread(self, ubiquityThreshold, singleCopyThreshold, minGenomes, colocatedDistThreshold, colocatedGenomeThreshold, metadata, queueIn, queueOut): """Process each data item in parallel.""" img = IMG('/srv/whitlam/bio/db/checkm/img/img_metadata.tsv', '/srv/whitlam/bio/db/checkm/pfam/tigrfam2pfam.tsv') markerSetBuilder = MarkerSetBuilder() while True: lineage = queueIn.get(block=True, timeout=None) if lineage == None: break if lineage == 'Universal': genomeIds = img.genomeIdsByTaxonomy('prokaryotes', metadata) else: genomeIds = img.genomeIdsByTaxonomy(lineage, metadata) if len(genomeIds) >= minGenomes: markerSet = markerSetBuilder.buildMarkerSet( genomeIds, ubiquityThreshold, singleCopyThreshold, colocatedDistThreshold) colocatedSets = markerSet.markerSet else: colocatedSets = None # allow results to be processed or written to file queueOut.put((lineage, colocatedSets, len(genomeIds)))
def __workerThread(self, ubiquityThreshold, singleCopyThreshold, minGenomes, colocatedDistThreshold, colocatedGenomeThreshold, metadata, queueIn, queueOut): """Process each data item in parallel.""" img = IMG('/srv/whitlam/bio/db/checkm/img/img_metadata.tsv', '/srv/whitlam/bio/db/checkm/pfam/tigrfam2pfam.tsv') markerSetBuilder = MarkerSetBuilder() while True: lineage = queueIn.get(block=True, timeout=None) if lineage == None: break if lineage == 'Universal': genomeIds = img.genomeIdsByTaxonomy('prokaryotes', metadata) else: genomeIds = img.genomeIdsByTaxonomy(lineage, metadata) if len(genomeIds) >= minGenomes: markerSet = markerSetBuilder.buildMarkerSet(genomeIds, ubiquityThreshold, singleCopyThreshold, colocatedDistThreshold) colocatedSets = markerSet.markerSet else: colocatedSets = None # allow results to be processed or written to file queueOut.put((lineage, colocatedSets, len(genomeIds)))
def __getUniversalMarkerGenes(self, phyloUbiquityThreshold, phyloSingleCopyThreshold, outputGeneDir): img = IMG('/srv/whitlam/bio/db/checkm/img/img_metadata.tsv', '/srv/whitlam/bio/db/checkm/pfam/tigrfam2pfam.tsv') markerSetBuilder = MarkerSetBuilder() metadata = img.genomeMetadata() allTrustedGenomeIds = set() phyloMarkerGenes = {} for lineage in ['Archaea', 'Bacteria']: # get all genomes in lineage print('\nIdentifying all ' + lineage + ' genomes.') trustedGenomeIds = img.genomeIdsByTaxonomy(lineage, metadata) print(' Trusted genomes in lineage: ' + str(len(trustedGenomeIds))) if len(trustedGenomeIds) < 1: print( ' Skipping lineage due to insufficient number of genomes.' ) continue allTrustedGenomeIds.update(trustedGenomeIds) print(' Building marker set.') markerGenes = markerSetBuilder.buildMarkerGenes( trustedGenomeIds, phyloUbiquityThreshold, phyloSingleCopyThreshold) phyloMarkerGenes[lineage] = markerGenes #print lineage #print len(markerGenes) #print 'pfam01379: ', ('pfam01379' in markerGenes) #print '--------------------' # universal marker genes universalMarkerGenes = None for markerGenes in list(phyloMarkerGenes.values()): if universalMarkerGenes == None: universalMarkerGenes = markerGenes else: universalMarkerGenes.intersection_update(markerGenes) fout = open(os.path.join(outputGeneDir, 'phylo_marker_set.txt'), 'w') fout.write(str(universalMarkerGenes)) fout.close() print('') print(' Universal marker genes: ' + str(len(universalMarkerGenes))) return allTrustedGenomeIds, universalMarkerGenes
def __getUniversalMarkerGenes(self, phyloUbiquityThreshold, phyloSingleCopyThreshold, outputGeneDir): img = IMG('/srv/whitlam/bio/db/checkm/img/img_metadata.tsv', '/srv/whitlam/bio/db/checkm/pfam/tigrfam2pfam.tsv') markerSetBuilder = MarkerSetBuilder() metadata = img.genomeMetadata() allTrustedGenomeIds = set() phyloMarkerGenes = {} for lineage in ['Archaea', 'Bacteria']: # get all genomes in lineage print '\nIdentifying all ' + lineage + ' genomes.' trustedGenomeIds = img.genomeIdsByTaxonomy(lineage, metadata) print ' Trusted genomes in lineage: ' + str(len(trustedGenomeIds)) if len(trustedGenomeIds) < 1: print ' Skipping lineage due to insufficient number of genomes.' continue allTrustedGenomeIds.update(trustedGenomeIds) print ' Building marker set.' markerGenes = markerSetBuilder.buildMarkerGenes(trustedGenomeIds, phyloUbiquityThreshold, phyloSingleCopyThreshold) phyloMarkerGenes[lineage] = markerGenes #print lineage #print len(markerGenes) #print 'pfam01379: ', ('pfam01379' in markerGenes) #print '--------------------' # universal marker genes universalMarkerGenes = None for markerGenes in phyloMarkerGenes.values(): if universalMarkerGenes == None: universalMarkerGenes = markerGenes else: universalMarkerGenes.intersection_update(markerGenes) fout = open(os.path.join(outputGeneDir, 'phylo_marker_set.txt'), 'w') fout.write(str(universalMarkerGenes)) fout.close() print '' print ' Universal marker genes: ' + str(len(universalMarkerGenes)) return allTrustedGenomeIds, universalMarkerGenes