def main(): ''' Script to perform LCA binning with a given read alignment file. ''' # Input arguments argparser = TestRunArgParser() args = argparser.parse_args() # Access database dataAccess = DataAccess(args) print '1. Loading tax tree...' tax_tree = TaxTree() print 'done.' print '2. Loading alignment file...' read_container = ReadContainer() read_container.load_alignment_data(args.alignment_file) #---SET TAXIDS FOR ALL ALIGNMENTS--# read_container.set_taxids(dataAccess) print 'done' print '4. Creating LCA solution...' lca_binner = LCABinner(tax_tree) lca_sol = lca_binner.bin_reads(read_container) print 'done' print '5. Nice output of read assignment here' lca_sol.print_nicely(tax_tree)
def main(): ''' Script to extract reads without any alignments ''' # Input arguments argparser = ArgParser() args = argparser.parse_args() # Access database dataAccess = DataAccess(args) # ------------------ # print '1. Loading tax tree...' start = time.time() tax_tree = TaxTree() end = time.time() print("done: {0:.2f} sec".format(end - start)) # ------------------ # print '2. Loading alignment file...' start = time.time() read_container = ReadContainer() read_container.load_alignment_data(args.alignment_file) #---SET TAXIDS FOR ALL ALIGNMENTS--# read_container.set_taxids(dataAccess) end = time.time() print("done: {0:.2f} sec".format(end - start)) # ------------------ # # Loop through reads and take those with no alignments out_file = open(args.read_ids_out, 'w') no_aln_count = 0 for read in read_container.fetch_all_reads(format=list): if not read.has_alignments(): out_file.write("{0}\n".format(read.id)) no_aln_count += 1 out_file.close() total_read_count = read_container.get_read_count() print("total number of reads : {0}".format( total_read_count )) print("reads without alignments: {0}".format(no_aln_count)) print print("no aln percentage: {0:.2f}%".format(no_aln_count * 100 / float(total_read_count)))
def main(): ''' Script to experiment with binner. ''' print "Hello world!" # Input arguments argparser = TestRunArgParser() args = argparser.parse_args() # Access database dataAccess = DataAccess(args) print '1. Loading tax tree...' tax_tree = TaxTree() print 'done.' print '2. Loading alignment file...' read_container = ReadContainer() read_container.load_alignment_data(args.input) #---SET TAXIDS FOR ALL ALIGNMENTS--# read_container.set_taxids(dataAccess) print 'done' print '3. Loading correct solution from FASTA with reads produced by metasim...' sol = Solution.from_metasim_fasta(args.metasim_fasta, dataAccess) print 'done' print '4. Creating LCA solution...' lca_binner = LCABinner(tax_tree) lca_sol = lca_binner.bin_reads(read_container) print 'done' print '5. Evaluating LCA solution...' rankAcc = RankAccuracy(tax_tree, sol, lca_sol) print 'done' # Print test results rankAcc.print_data()
def main(): ''' Script to run binner in one of the most common usage scenarios. * load alignment data * load taxonomy data * do basic alignment data filtering (remove host reads ecc) ''' #----------------------------------# #------ INPUT ARGUMENTS -----------# argparser = TestRunArgParser() args = argparser.parse_args() #----------------------------------# #------- STATIC DATA SOURCE -------# # CDS - GI2TAXID -- NAMES -- NODES # dataAccess = DataAccess(args) #raw_input('Data access created') #----------------------------------# #-------- TAXONOMY TREE -----------# print '1. Loading tax tree...' tax_tree = TaxTree() # tax_tree.load_taxonomy_data(dataAccess) print 'done.' #----------------------------------# #------- ALIGNMENT DATA SOURCE ----# print '2. Loading alignment file...' read_container = ReadContainer() read_container.load_alignment_data(args.input) #---SET TAXIDS FOR ALL ALIGNMENTS--# read_container.set_taxids(dataAccess) # Remember total number of reads total_read_num = read_container.get_read_count() print 'done' #------- FILTER HOST READS -------# print '3. Filtering host reads & alignments...' new_reads = host_filter.filter_potential_host_reads( read_container.fetch_all_reads(format=list), tax_tree.tax2relevantTax, tax_tree.potential_hosts, #delete_host_alignments = True, #filter_unassigned = True, #unassigned_taxid= -1, host_filter.perc_of_host_alignments_larger_than) dataAccess.clear_cache() # deletes gi2taxid cache reads_with_no_host_alignments = host_filter.filter_potential_hosts_alignments( new_reads, tax_tree.tax2relevantTax, tax_tree.potential_hosts, True, # delete host alignments True, # filter unassigned -1) # unassigned taxid host_read_count = len(read_container.fetch_all_reads(format=list)) - len(reads_with_no_host_alignments) read_container.set_new_reads(reads_with_no_host_alignments) print 'done' #----------------------------------# #------- LOAD ALL RECORDS -------# print '4. Loading referenced records...' record_container = RecordContainer() record_container.set_db_access(dataAccess) record_container.populate(read_container.fetch_all_reads_versions(), table='cds') print 'done' #----------------------------------# #-- MAP ALIGNMENTS TO GENES -----# print '5. Mapping alignments to genes...' read_container.populate_cdss(record_container) #----------------------------------# #- RECORD ALL ALIGNEMENTS TO GENE -# cds_aln_container = CdsAlnContainer() cds_aln_container.populate(read_container.fetch_all_reads(format=list)) print 'done' print '6. Estimating organisms present in sample...' target_organisms = [633, 632, 263, 543, 86661, 1392, 55080, 1386] # What is this part? print 'done.' print '7. Annotating reads...' annotated_reads = rstate.annotate_reads( read_container.fetch_all_reads(format=list), cds_aln_container.read2cds, tax_tree, target_organisms) read_container.set_new_reads(annotated_reads) print 'done' print '8. Binning reads...' orgs = bin_reads( read_container.fetch_all_reads(format=list), cds_aln_container.cds_repository, cds_aln_container.read2cds, tax_tree, target_organisms, None, None, False) ''' for org in orgs.values(): print org.name print len(set(org.get_reads())) print len(org.identified_coding_regions) print 'done.' ''' print ("total_read_num: " + str(total_read_num)) print '9. Generating XML...' dataset = Dataset(args.xml_description_file) xml_organisms = [] host = Organism (host_read_count, host_read_count/float(total_read_num), None, None, "Host", None, None, [], [], [], is_host=True) xml_organisms.append(host) for org in orgs.values(): xml_organisms.append(org.to_xml_organism(tax_tree, total_read_num)) xml_organisms.sort(key=operator.attrgetter("amount_count"), reverse=True) xml = XMLOutput(dataset, xml_organisms, args.output) xml.xml_output();
def main(): # Input arguments argparser = ArgParser() args = argparser.parse_args() # Access database dataAccess = DataAccess(args) # ------------------ # print '1. Loading tax tree...' start = time.time() tax_tree = TaxTree() end = time.time() print("done: {0:.2f} sec".format(end - start)) # ------------------ # print '2. Loading alignment file...' start = time.time() read_container = ReadContainer() read_container.load_alignment_data(args.alignment_file) #---SET TAXIDS FOR ALL ALIGNMENTS--# read_container.set_taxids(dataAccess) end = time.time() print("done: {0:.2f} sec".format(end - start)) # ------------------ # # Create folder if does not exist if not os.path.exists(args.export_folder): os.makedirs(args.export_folder) # File for data analysis summary summary_path = os.path.join(args.export_folder, "CDSs_summary.txt") cds_summary = open(summary_path, 'w') if args.remove_host: print "Removing host..." start = time.time() #------- FILTER HOST READS -------# #print '3. Filtering host reads & alignments...' new_reads = host_filter.filter_potential_host_reads( read_container.fetch_all_reads(format=list), tax_tree.tax2relevantTax, tax_tree.potential_hosts, #delete_host_alignments = True, #filter_unassigned = True, #unassigned_taxid= -1, host_filter.perc_of_host_alignments_larger_than) dataAccess.clear_cache() # deletes gi2taxid cache reads_with_no_host_alignments = host_filter.filter_potential_hosts_alignments( new_reads, tax_tree.tax2relevantTax, tax_tree.potential_hosts, True, # delete host alignments True, # filter unassigned -1) # unassigned taxid read_count = len(read_container.fetch_all_reads(format=list)) host_read_count = read_count - len(reads_with_no_host_alignments) non_host_read_count = read_count - host_read_count cds_summary.write("total : {0:8d}\n".format(read_count)) cds_summary.write("host : {0:8d} {1:.2f}\n".format(host_read_count, host_read_count / float(read_count) )) cds_summary.write("non-host: {0:8d} {1:.2f}\n".format(non_host_read_count, non_host_read_count / float(read_count) )) # Set host-free reads read_container.set_new_reads(reads_with_no_host_alignments) end = time.time() print("done: {0:.2f} sec".format(end - start)) #------- LOAD ALL RECORDS -------# print '4. Loading referenced records...' start = time.time() record_container = RecordContainer() record_container.set_db_access(dataAccess) record_container.populate(read_container.fetch_all_reads_versions(), table='cds') end = time.time() print("done: {0:.2f} sec".format(end - start)) #-- MAP ALIGNMENTS TO GENES -----# print '5. Mapping alignments to genes...' start = time.time() read_container.populate_cdss(record_container) end = time.time() print("done: {0:.2f} sec".format(end - start)) #- RECORD ALL ALIGNEMENTS TO GENE -# print '6. Populating CDS container...' start = time.time() cds_aln_container = CdsAlnContainer() cds_aln_container.populate(read_container.fetch_all_reads(format=list)) end = time.time() print("done: {0:.2f} sec".format(end - start)) # ------------------------------- # print 'Sorting CDSs ...DISABLED' start = time.time() # Sort CDSs by their "good looks"! cds_alns = cds_aln_container.fetch_all_cds_alns(format=list) ''' cds_alns = sorted(cds_alns, key=lambda cds_aln: cds_aln.get_std_over_mean(), reverse=False) ''' end = time.time() print("done: {0:.2f} sec".format(end - start)) # ------------------------------- # ''' print "Exporting phase 0 - all CDSs..." export_CDS_stats_data(cds_alns, args.export_folder, "0_all_CDSs.txt") print "done" ''' # Count Nones in cds_alns nones = count_nones(cds_alns) cds_summary.write("\n") cds_summary.write("gene None : {0}\n".format(nones['gene'])) cds_summary.write("protein_id None: {0}\n".format(nones['protein_id'])) cds_summary.write("product None : {0}\n".format(nones['product'])) cds_summary.write("\n") cds_summary.write("CDSs all: {0}\n".format(len(cds_alns))) print 'Filtering valid CDSs...' start = time.time() # Remove CDSs with too low mean coverage value or length min_mean_coverage = 0 min_length = 0 cds_alns_targeted = [cds_aln for cds_aln in cds_alns # Filters if cds_aln.get_cds_length() > min_length and cds_aln.get_mean_coverage() > min_mean_coverage] # Remove CDSs with no gene/product cds_alns_targeted = [cds_aln for cds_aln in cds_alns_targeted if cds_aln.cds.product is not None] #if cds_aln.cds.gene != None #and cds_aln.cds.product != None] end = time.time() print("done: {0:.2f} sec".format(end - start)) # All valid CDSs - Output coverage/length histogram data print "Exporting phase 1 - all CDSs..." start = time.time() export_CDS_stats_data(cds_alns_targeted, args.export_folder, "1_all_valid_CDSs.txt") end = time.time() print("done: {0:.2f} sec".format(end - start)) # ------------------- CDSs filtered and ready to be analyzed ------------------- # print 'Extracting ribosomal CDSs...' # Number of targeted CDSs cds_summary.write("CDSs valid: {0}\n".format(len(cds_alns_targeted))) cds_alns_ribosomal = [] for cds_aln in cds_alns_targeted: # If has word "ribosomal" in name, store coverage data for graph gene = cds_aln.cds.gene product = cds_aln.cds.product protein_id = cds_aln.cds.protein_id if is_ribosomal(product): #print("{0} {1} {2}\n".format(gene, protein_id, product)) cds_alns_ribosomal.append(cds_aln) print 'done' # ------------------- Ribosomal CDSs acquired! --------------------- # print 'Analysing ribosomals...' # Extract interesting data # Mean coverage, max coverage mm_cov = 0 max_cov = 0 for cds_aln in cds_alns_ribosomal: mean_cov = cds_aln.get_mean_coverage() mm_cov += mean_cov max_cov = max(max_cov, mean_cov) if mm_cov > 0: mm_cov /= len(cds_alns_ribosomal) cds_summary.write("ribosomals all {0}\n".format(len(cds_alns_ribosomal))) cds_summary.write("mean coverage: {0}\n".format(mm_cov)) cds_summary.write("max coverage : {0}\n".format(max_cov)) print 'done' # Ribosomal CDSs only - Output coverage/length histogram print "Exporting phase 2 - ribosomal CDSs only..." export_CDS_stats_data(cds_alns_ribosomal, args.export_folder, "2_ribosomal_CDSs.txt") print "done" # ------------------- Making biological sense - choosing CDSs -------------------- # print 'Filtering under-average ribosomals...' # NOTE: take length into consideration? cds_alns_ribosomal = [cds_aln for cds_aln in cds_alns_ribosomal # Filters if cds_aln.get_mean_coverage() > mm_cov] print 'done' cds_summary.write("ribosomals over-mean: {0}\n".format(len(cds_alns_ribosomal))) cds_summary.close() print 'Phase 3 - filtered ribosomal CDSs...' export_CDS_stats_data(cds_alns_ribosomal, args.export_folder, "3_ribosomal_CDSs_filtered.txt") print 'done' # Store charts cov data - if selected so if args.export_charts: print "Exporting chart coverage data..." export_CDS_graph_data(cds_alns_ribosomal, args.export_charts) print "done." # --------------------- I have chosen CDSs - determine species and analyse ------------------------ # # Species level resolution # See which species are present - dump ones with not enough CDSs # NOTE: So far done in determine_species_by_ribosomals.py CDS_count = {} # Count CDSs of each species species_set = set() # Get estimated tax_ids for cds_aln in cds_alns_ribosomal: tax_id = cds_aln.cds.taxon # Put each tax_id up to the "species" level tax_id_species = tax_tree.get_parent_with_rank(tax_id, 'species') species_set.add(tax_id_species) CDS_count[tax_id_species] = CDS_count.get(tax_id_species, 0) + 1 # Get reported CDSs ids reported_CDS_ids = set() for cds_aln in cds_alns_ribosomal: reported_CDS_ids.add(cds_aln.cds.id) # ------------ Read assignment analysis -------------- # print "Read assignment analysis..." reads = read_container.fetch_all_reads(format=list) assignment_analysis(species_set, reads, tax_tree, args.export_folder, CDS_count)
def main(): ''' Script to run binner in one of the most common usage scenarios. * load alignment data * load taxonomy data * do basic alignment data filtering (remove host reads ecc) ''' #----------------------------------# #------ INPUT ARGUMENTS -----------# argparser = PickleParser() args = argparser.parse_args() #----------------------------------# #------- STATIC DATA SOURCE -------# # CDS - GI2TAXID -- NAMES -- NODES # dataAccess = DataAccess(args) #raw_input('Data access created') #----------------------------------# #-------- TAXONOMY TREE -----------# print '1. Loading tax tree...' tax_tree = TaxTree() # tax_tree.load_taxonomy_data(dataAccess) print 'done.' #----------------------------------# #------- ALIGNMENT DATA SOURCE ----# print '2. Loading alignment file...' read_container = ReadContainer() read_container.load_alignment_data(args.input) #---SET TAXIDS FOR ALL ALIGNMENTS--# read_container.set_taxids(dataAccess) print 'done' #------- FILTER HOST READS -------# print '3. Filtering host reads & alignments...' new_reads = host_filter.filter_potential_host_reads( read_container.fetch_all_reads(format=list), tax_tree.tax2relevantTax, tax_tree.potential_hosts, #delete_host_alignments = True, #filter_unassigned = True, #unassigned_taxid= -1, host_filter.perc_of_host_alignments_larger_than) dataAccess.clear_cache() # deletes gi2taxid cache reads_with_no_host_alignments = host_filter.filter_potential_hosts_alignments( new_reads, tax_tree.tax2relevantTax, tax_tree.potential_hosts, True, # delete host alignments True, # filter unassigned -1) # unassigned taxid host_read_count = len(read_container.fetch_all_reads(format=list)) - len(reads_with_no_host_alignments) read_container.set_new_reads(reads_with_no_host_alignments) print 'done' #----------------------------------# #------- LOAD ALL RECORDS -------# print '4. Loading referenced records...' record_container = RecordContainer() record_container.set_db_access(dataAccess) record_container.populate(read_container.fetch_all_reads_versions(), table='cds') record_container.populate(read_container.fetch_all_reads_versions(), table='rrna') print 'done' #----------------------------------# #-- MAP ALIGNMENTS TO GENES -----# print '5. Mapping alignments to genes...' read_container.populate_cdss(record_container) #----------------------------------# #- RECORD ALL ALIGNEMENTS TO GENE -# cds_aln_container = CdsAlnContainer() cds_aln_container.populate(read_container.fetch_all_reads(format=list)) print 'done'
def main(): ''' Script to analyse genes expressed for given tax id. ''' # Input arguments argparser = ArgParser() args = argparser.parse_args() # Access database dataAccess = DataAccess(args) print '1. Loading tax tree...' tax_tree = TaxTree() print 'done.' print '2. Loading alignment file...' read_container = ReadContainer() read_container.load_alignment_data(args.alignment_file) #---SET TAXIDS FOR ALL ALIGNMENTS--# read_container.set_taxids(dataAccess) print 'done' # TODO: Here i should recognize host reads! # ------------------------------------- # #----------------------------------# #------- LOAD ALL RECORDS -------# print '4. Loading referenced records...' record_container = RecordContainer() record_container.set_db_access(dataAccess) record_container.populate(read_container.fetch_all_reads_versions(), table='cds') print 'done' #----------------------------------# #-- MAP ALIGNMENTS TO GENES -----# print '5. Mapping alignments to genes...' read_container.populate_cdss(record_container) #----------------------------------# #- RECORD ALL ALIGNEMENTS TO GENE -# print '6. Populating CDS container...' cds_aln_container = CdsAlnContainer() cds_aln_container.populate(read_container.fetch_all_reads(format=list)) print 'done' print("Loaded CDS container") # Take only CDSs of given tax_id # Remove CDSs with too low mean coverage value min_mean_coverage = 10 min_length = 20 cds_alns = cds_aln_container.fetch_all_cds_alns(format=list) print ( "All CDSs (all organisms): " + str(len(cds_alns)) ) cds_alns_targeted = [cds_aln for cds_aln in cds_alns if cds_aln.get_tax_id() == args.tax_id # Filters and cds_aln.get_cds_length() > min_length and cds_aln.get_mean_coverage() > min_mean_coverage] # Remove CDSs with no gene/product cds_alns_targeted = [cds_aln for cds_aln in cds_alns_targeted if cds_aln.cds.gene != None and cds_aln.cds.product != None] # ------------------- CDSs filtered and ready to be analyzed ------------------- # # Number of targeted CDSs print ( "Targeted CDSs: " + str(len(cds_alns_targeted)) ) ''' print ("Sorting CDSs: stddev/mean") cds_alns_sorted = sorted(cds_alns_targeted, key=lambda cds_aln: cds_aln.get_std_over_mean(), reverse=False) # TODO: Here I should somehow determine which CDSs are "expressed", and which are not? # Write to file stuff(gene, protein_id) for each cds_aln print("Writing data to file") path = args.export_path f = open(path, 'w') for cds_aln in cds_alns_targeted: gene = cds_aln.cds.gene product = cds_aln.cds.product protein_id = cds_aln.cds.protein_id f.write("{0} {1}\n".format(gene, protein_id)) f.close() print("Done") ''' # -------------------- # ''' # Analyse those CDSs print ( "Targeted CDSs: " + str(len(cds_alns_targeted)) ) # See the mean length of CDS mean_cds_length = 0 no_locs_num = 0 for cds_aln in cds_alns_targeted: try: loc_length = cds_aln.get_cds_length() mean_cds_length += loc_length except: no_locs_num += 1 # Get mean mean_cds_length /= float(len(cds_alns_targeted)) print("---------------------------------------------") print("Mean CDS length: " + str(mean_cds_length)) print("Nones: " + str(no_locs_num)) print("---------------------------------------------") ''' ''' # Create folder where data about CDSs will be stored if not os.path.exists(args.export_path): os.makedirs(args.export_path) # Export some amount of best CDSs i = 1 for cds_aln in cds_alns_sorted: filename = "cds_" + str(i) + ".txt" coverage_path = os.path.join(args.export_path, filename) print(str(i) + ": " + str(cds_aln.get_std_over_mean())) cds_aln.coverage_to_file(coverage_path) if i == 50: # TODO: Define this somehow as parameter break i += 1 # Load CDS container ''' # Analyse stuff # print("Analysing stuff!") '''
def main(): ''' Script to identify analyse ribosomal genes expressed. ''' # Input arguments argparser = ArgParser() args = argparser.parse_args() # Access database dataAccess = DataAccess(args) #print '1. Loading tax tree...' tax_tree = TaxTree() #print 'done.' #print '2. Loading alignment file...' read_container = ReadContainer() read_container.load_alignment_data(args.alignment_file) #---SET TAXIDS FOR ALL ALIGNMENTS--# read_container.set_taxids(dataAccess) #print 'done' ''' # TODO: Here i should recognize host reads! #------- FILTER HOST READS -------# #print '3. Filtering host reads & alignments...' new_reads = host_filter.filter_potential_host_reads( read_container.fetch_all_reads(format=list), tax_tree.tax2relevantTax, tax_tree.potential_hosts, #delete_host_alignments = True, #filter_unassigned = True, #unassigned_taxid= -1, host_filter.perc_of_host_alignments_larger_than) dataAccess.clear_cache() # deletes gi2taxid cache reads_with_no_host_alignments = host_filter.filter_potential_hosts_alignments( new_reads, tax_tree.tax2relevantTax, tax_tree.potential_hosts, True, # delete host alignments True, # filter unassigned -1) # unassigned taxid read_count = len(read_container.fetch_all_reads(format=list)) host_read_count = read_count - len(reads_with_no_host_alignments) non_host_read_count = read_count - host_read_count print ("total : {0:8d}".format(read_count)) print ("host : {0:8d} {1:.2f}".format(host_read_count, host_read_count / float(read_count) )) print ("non-host: {0:8d} {1:.2f}".format(non_host_read_count, non_host_read_count / float(read_count) )) print read_container.set_new_reads(reads_with_no_host_alignments) ''' # ------------------------------------- # #----------------------------------# #------- LOAD ALL RECORDS -------# #print '4. Loading referenced records...' record_container = RecordContainer() record_container.set_db_access(dataAccess) record_container.populate(read_container.fetch_all_reads_versions(), table='cds') #print 'done' #----------------------------------# #-- MAP ALIGNMENTS TO GENES -----# #print '5. Mapping alignments to genes...' read_container.populate_cdss(record_container) #----------------------------------# #- RECORD ALL ALIGNEMENTS TO GENE -# #print '6. Populating CDS container...' cds_aln_container = CdsAlnContainer() cds_aln_container.populate(read_container.fetch_all_reads(format=list)) #print 'done' #print("Loaded CDS container") # Take only CDSs of given tax_id # Remove CDSs with too low mean coverage value min_mean_coverage = 0 min_length = 0 cds_alns = cds_aln_container.fetch_all_cds_alns(format=list) print ( "CDSs all : " + str(len(cds_alns)) ) cds_alns_targeted = [cds_aln for cds_aln in cds_alns # Filters if cds_aln.get_cds_length() > min_length and cds_aln.get_mean_coverage() > min_mean_coverage] # Remove CDSs with no gene/product cds_alns_targeted = [cds_aln for cds_aln in cds_alns_targeted if cds_aln.cds.gene != None and cds_aln.cds.product != None] # ------------------- CDSs filtered and ready to be analyzed ------------------- # # Number of targeted CDSs print ( "CDSs valid: " + str(len(cds_alns_targeted)) ) print cds_alns_ribosomal = [] for cds_aln in cds_alns_targeted: # If has word "ribosomal" in name, store coverage data for graph gene = cds_aln.cds.gene product = cds_aln.cds.product protein_id = cds_aln.cds.protein_id if "ribosomal" in product: #print("{0} {1} {2}\n".format(gene, protein_id, product)) cds_alns_ribosomal.append(cds_aln) # ------------------- Ribosomal CDSs acquired! --------------------- # # Sort it! cds_alns_ribosomal = sorted(cds_alns_ribosomal, key=lambda cds_aln: cds_aln.get_std_over_mean(), reverse=False) # Extract interesting data # Mean coverage, max coverage mm_cov = 0 max_cov = 0 for cds_aln in cds_alns_ribosomal: mean_cov = cds_aln.get_mean_coverage() mm_cov += mean_cov max_cov = max(max_cov, mean_cov) if mm_cov > 0: mm_cov /= len(cds_alns_ribosomal) # Print print("ribosomals: " + str(len(cds_alns_ribosomal))) print("mean coverage: " + str(mm_cov)) print("max coverage : {0}".format(max_cov)) print for cds_aln in cds_alns_ribosomal: gene = cds_aln.cds.gene product = cds_aln.cds.product protein_id = cds_aln.cds.protein_id taxon = cds_aln.cds.taxon name = tax_tree.nodes[taxon].organism_name print("{0:4} {1:10} {2:50} {3:10d} {4:60}".format(gene, protein_id, product, taxon, name)) # Store graph data export_CDS_graph_data(cds_alns_ribosomal, args.export_path) '''