def main(): args = docopt(__doc__) fasta_f = args['--infile'] bam_fs = args['--bam'] cas_fs = args['--cas'] prefix = args['--output'] estimate_cov_flag = True if not args['--calculate_cov'] else False # Make covLibs cov_libs = [BtCore.CovLibObj('bam' + str(idx), 'bam', lib_f) for idx, lib_f in enumerate(bam_fs)] + \ [BtCore.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)] if not (cov_libs): BtLog.error('31') blobDb = BtCore.BlobDb('cov') blobDb.version = interface.__version__ blobDb.parseFasta(fasta_f, None) blobDb.parseCoverage(covLibObjs=cov_libs, estimate_cov=estimate_cov_flag, prefix=prefix)
def main(): args = docopt(__doc__) fasta_f = args['--infile'] bam_fs = args['--bam'] cas_fs = args['--cas'] sam_fs = args['--sam'] prefix = args['--output'] no_base_cov_flag = args['--no_base_cov'] # Make covLibs cov_libs = [BtCore.CovLibObj('bam' + str(idx), 'bam', lib_f) for idx, lib_f in enumerate(bam_fs)] + \ [BtCore.CovLibObj('sam' + str(idx), 'sam', lib_f) for idx, lib_f in enumerate(sam_fs)] + \ [BtCore.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)] if not (cov_libs): BtLog.error('31') blobDb = BtCore.BlobDb('cov') blobDb.version = blobtools.__version__ blobDb.parseFasta(fasta_f, None) blobDb.parseCoverage(covLibObjs=cov_libs, no_base_cov=no_base_cov_flag, prefix=prefix)
def main(): #print(data_dir) args = docopt(__doc__) blobdb_f = args['--input'] prefix = args['--out'] ranks = args['--rank'] taxrule = args['--taxrule'] hits_flag = args['--hits'] seq_list_f = args['--list'] concoct = args['--concoct'] cov = args['--cov'] notable = args['--notable'] experimental = args['--experimental'] # Does blobdb_f exist ? if not isfile(blobdb_f): BtLog.error('0', blobdb_f) out_f = BtIO.getOutFile(blobdb_f, prefix, None) # Are ranks sane ? if 'all' in ranks: temp_ranks = RANKS[0:-1] ranks = temp_ranks[::-1] else: for rank in ranks: if rank not in RANKS: BtLog.error('9', rank) # Does seq_list file exist? seqs = [] if (seq_list_f): if isfile(seq_list_f): seqs = BtIO.parseList(seq_list_f) else: BtLog.error('0', seq_list_f) # Load BlobDb blobDb = BtCore.BlobDb('new') print(BtLog.status_d['9'] % (blobdb_f)) blobDb.load(blobdb_f) blobDb.version = interface.__version__ # Is taxrule sane and was it computed? if (blobDb.hitLibs) and taxrule not in blobDb.taxrules: BtLog.error('11', taxrule, blobDb.taxrules) # view(s) viewObjs = [] print(BtLog.status_d['14']) if not (notable): tableView = None if len(blobDb.hitLibs) > 1: tableView = BtCore.ViewObj(name="table", out_f=out_f, suffix="%s.table.txt" % (taxrule), body=[]) else: tableView = BtCore.ViewObj(name="table", out_f=out_f, suffix="table.txt", body=[]) viewObjs.append(tableView) if not experimental == 'False': meta = {} if isfile(experimental): meta = BtIO.readYaml(experimental) experimentalView = BtCore.ExperimentalViewObj(name="experimental", view_dir=out_f, blobDb=blobDb, meta=meta) viewObjs.append(experimentalView) if (concoct): concoctTaxView = None concoctCovView = None if len(blobDb.hitLibs) > 1: concoctTaxView = BtCore.ViewObj( name="concoct_tax", out_f=out_f, suffix="%s.concoct_taxonomy_info.csv" % (taxrule), body=dict()) concoctCovView = BtCore.ViewObj( name="concoct_cov", out_f=out_f, suffix="%s.concoct_coverage_info.tsv" % (taxrule), body=[]) else: concoctTaxView = BtCore.ViewObj(name="concoct_tax", out_f=out_f, suffix="concoct_taxonomy_info.csv", body=dict()) concoctCovView = BtCore.ViewObj(name="concoct_cov", out_f=out_f, suffix="concoct_coverage_info.tsv", body=[]) viewObjs.append(concoctTaxView) viewObjs.append(concoctCovView) if (cov): for cov_lib_name, covLibDict in blobDb.covLibs.items(): out_f = BtIO.getOutFile(covLibDict['f'], prefix, None) covView = BtCore.ViewObj(name="covlib", out_f=out_f, suffix="cov", body=[]) blobDb.view(viewObjs=[covView], ranks=None, taxrule=None, hits_flag=None, seqs=None, cov_libs=[cov_lib_name], progressbar=True) if (viewObjs): #for viewObj in viewObjs: # print(viewObj.name) blobDb.view(viewObjs=viewObjs, ranks=ranks, taxrule=taxrule, hits_flag=hits_flag, seqs=seqs, cov_libs=[], progressbar=True) print(BtLog.status_d['19'])
refcov_dict = {} if (refcov_f): refcov_dict = BtPlot.parseRefCov(refcov_f) catcolour_dict = {} if (catcolour_f) and (c_index): BtLog.error('24') elif (catcolour_f): catcolour_dict = BtPlot.parseCatColour(catcolour_f) else: pass # Load BlobDb print BtLog.status_d['9'] % blobdb_f blobDB = bt.BlobDb('new') blobDB.load(blobdb_f) title = blobDB.title if plot_title: plot_title = title # Is taxrule sane and was it computed? if taxrule not in blobDB.taxrules: BtLog.error('11', taxrule, blobDB.taxrules) data_dict, max_cov, cov_libs, cov_libs_total_reads = blobDB.getPlotData( rank, min_length, hide_nohits, taxrule, c_index, catcolour_dict) plotObj = BtPlot.PlotObj(data_dict, cov_libs, cov_libs_total_reads) plotObj.exclude_groups = exclude_groups plotObj.format = format
def main(): args = docopt(__doc__) args = BtPlot.check_input(args) blobdb_f = args['--infile'] rank = args['--rank'] min_length = int(args['--length']) max_group_plot = int(args['--plotgroups']) hide_nohits = args['--nohit'] taxrule = args['--taxrule'] c_index = args['--cindex'] exclude_groups = args['--exclude'] labels = args['--label'] colour_f = args['--colours'] refcov_f = args['--refcov'] catcolour_f = args['--catcolour'] multiplot = args['--multiplot'] out_prefix = args['--out'] sort_order = args['--sort'] sort_first = args['--sort_first'] hist_type = args['--hist'] no_title = args['--notitle'] ignore_contig_length = args['--noscale'] format_plot = args['--format'] no_plot_blobs = args['--noblobs'] no_plot_reads = args['--noreads'] legend_flag = args['--legend'] cumulative_flag = args['--cumulative'] cov_lib_selection = args['--lib'] filelabel = args['--filelabel'] exclude_groups = BtIO.parseCmdlist(exclude_groups) refcov_dict = BtIO.parseReferenceCov(refcov_f) user_labels = BtIO.parseCmdLabels(labels) catcolour_dict = BtIO.parseCatColour(catcolour_f) colour_dict = BtIO.parseColours(colour_f) # Load BlobDb print BtLog.status_d['9'] % blobdb_f blobDb = BtCore.BlobDb('blobplot') blobDb.version = blobtools.__version__ blobDb.load(blobdb_f) # Generate plot data print BtLog.status_d['18'] data_dict, min_cov, max_cov, cov_lib_dict = blobDb.getPlotData( rank, min_length, hide_nohits, taxrule, c_index, catcolour_dict) plotObj = BtPlot.PlotObj(data_dict, cov_lib_dict, cov_lib_selection, 'blobplot', sort_first) plotObj.exclude_groups = exclude_groups plotObj.version = blobDb.version plotObj.format = format_plot plotObj.max_cov = max_cov plotObj.min_cov = min_cov plotObj.no_title = no_title plotObj.multiplot = multiplot plotObj.hist_type = hist_type plotObj.ignore_contig_length = ignore_contig_length plotObj.max_group_plot = max_group_plot plotObj.legend_flag = legend_flag plotObj.cumulative_flag = cumulative_flag # order by which to plot (should know about user label) plotObj.group_order = BtPlot.getSortedGroups(data_dict, sort_order, sort_first) # labels for each level of stats plotObj.labels.update(plotObj.group_order) # plotObj.group_labels is dict that contains labels for each group : all/other/user_label if (user_labels): for group, label in user_labels.items(): plotObj.labels.add(label) plotObj.group_labels = {group: set() for group in plotObj.group_order} plotObj.relabel_and_colour(colour_dict, user_labels) plotObj.compute_stats() plotObj.refcov_dict = refcov_dict # Plotting info_flag = 1 out_f = '' for cov_lib in plotObj.cov_libs: plotObj.ylabel = "Coverage" plotObj.xlabel = "GC proportion" if (filelabel): plotObj.ylabel = basename(cov_lib_dict[cov_lib]['f']) out_f = "%s.%s.%s.p%s.%s.%s" % (blobDb.title, taxrule, rank, max_group_plot, hist_type, min_length) if catcolour_dict: out_f = "%s.%s" % (out_f, "catcolour") if ignore_contig_length: out_f = "%s.%s" % (out_f, "noscale") if c_index: out_f = "%s.%s" % (out_f, "c_index") if exclude_groups: out_f = "%s.%s" % (out_f, "exclude_" + "_".join(exclude_groups)) if labels: out_f = "%s.%s" % (out_f, "userlabel_" + "_".join( set([name for name in user_labels.values()]))) out_f = "%s.%s" % (out_f, "blobplot") if (plotObj.cumulative_flag): out_f = "%s.%s" % (out_f, "cumulative") if (plotObj.multiplot): out_f = "%s.%s" % (out_f, "multiplot") out_f = BtIO.getOutFile(out_f, out_prefix, None) if not (no_plot_blobs): plotObj.plotScatter(cov_lib, info_flag, out_f) info_flag = 0 if not (no_plot_reads) and ( plotObj.cov_libs_total_reads_dict[cov_lib]): # prevent plotting if --noreads or total_reads == 0 plotObj.plotBar(cov_lib, out_f) plotObj.write_stats(out_f)
nodesDB_f = os.path.join(main_dir, nodesDB_f) if not os.path.isfile(nodesDB_f) and not ((names_f) and (nodes_f)): BtLog.error('3') if not (hit_fs): BtLog.error('18') # can FASTA parser deal with assemblies if not fasta_type in ASSEMBLY_TYPES: BtLog.error('2', ",".join(ASSEMBLY_TYPES[1:])) # Is coverage provided? if not (fasta_type) and not bam_fs and not sam_fs and not cov_fs and not cas_fs: BtLog.error('1') cov_libs = [bt.CovLibObj('bam' + str(idx), 'bam', lib_f) for idx, lib_f in enumerate(bam_fs)] + \ [bt.CovLibObj('sam' + str(idx), 'sam', lib_f) for idx, lib_f in enumerate(sam_fs)] + \ [bt.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)] + \ [bt.CovLibObj('cov' + str(idx), 'cov', lib_f) for idx, lib_f in enumerate(cov_fs)] # Create BlobDB object blobDb = bt.BlobDb(title) # Parse FASTA blobDb.parseFasta(fasta_f, fasta_type) # Parse coverage blobDb.parseCovs(cov_libs) # Parse Tax hitLibs = [bt.hitLibObj('tax' + str(idx), 'tax', lib_f) for idx, lib_f in enumerate(hit_fs)] blobDb.parseHits(hitLibs)
def validate_input_create(main_dir, args): ''' Accepts: - main_dir - docopt args Returns: - title - fasta_f - fasta_type - cov_libs - hit_libs - nodesDB_f - taxrules - out_f ''' ASSEMBLY_TYPES = [None, 'spades', 'soap', 'abyss', 'velvet'] fasta_f = args['--infile'] fasta_type = args['--type'] sam_fs = args['--sam'] bam_fs = args['--bam'] cov_fs = args['--cov'] cas_fs = args['--cas'] hit_fs = args['--taxfile'] out_f = args['--out'] if (out_f): out_f = "%s.%s" % (os.path.basename(out_f), "BlobDB.json") else: out_f = "%s" % ("BlobDB.json") nodesDB_f = args['--db'] names_f = args['--names'] nodes_f = args['--nodes'] taxrules = args['--taxrule'] title = args['--title'] if (args['--title']) else out_f # Do files exist ? files = [ x for x in list([fasta_f] + sam_fs + bam_fs + cov_fs + cas_fs + [names_f] + [nodes_f] + hit_fs) if x is not None ] for f in files: if not os.path.isfile(f): BtLog.error('0', f) # Is taxonomy provided? if nodesDB_f == "data/nodesDB.txt": nodesDB_f = os.path.join(main_dir, nodesDB_f) if not os.path.isfile(nodesDB_f) and not ((names_f) and (nodes_f)): BtLog.error('3') if not (hit_fs): BtLog.error('18') # can FASTA parser deal with assemblies if not fasta_type in ASSEMBLY_TYPES: BtLog.error('2', ",".join(ASSEMBLY_TYPES[1:])) # Is coverage provided? if not (fasta_type ) and not bam_fs and not sam_fs and not cov_fs and not cas_fs: BtLog.error('1') cov_libs = [bt.CovLibObj('bam' + str(idx), 'bam', lib_f) for idx, lib_f in enumerate(bam_fs)] + \ [bt.CovLibObj('sam' + str(idx), 'sam', lib_f) for idx, lib_f in enumerate(sam_fs)] + \ [bt.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)] + \ [bt.CovLibObj('cov' + str(idx), 'cov', lib_f) for idx, lib_f in enumerate(cov_fs)] hit_libs = [ bt.hitLibObj('tax' + str(idx), 'tax', lib_f) for idx, lib_f in enumerate(hit_fs) ] return title, fasta_f, fasta_type, cov_libs, hit_libs, taxrules, nodesDB_f, nodes_f, names_f, out_f
import lib.BtIO as BtIO import lib.BtInput as BtInput import os.path if __name__ == '__main__': main_dir = os.path.dirname(__file__) #print data_dir args = docopt(__doc__) #print args title, fasta_f, fasta_type, cov_libs, hit_libs, taxrules, nodesDB_f, nodes_f, names_f, out_f = BtInput.validate_input_create(main_dir, args) # Create BlobDB object blobDb = bt.BlobDb(title) # Parse FASTA blobDb.parseFasta(fasta_f, fasta_type) # Parse Tax blobDb.parseHits(hit_libs) # Parse nodesDB nodesDB, nodesDB_f = BtIO.getNodesDB(nodes=nodes_f, names=names_f, nodesDB=nodesDB_f) blobDb.nodesDB_f = nodesDB_f if not os.path.isfile(nodesDB_f): print BtLog.status_d['5'] % nodesDB_f BtIO.writeNodesDB(nodesDB, nodesDB_f)
def main(): #main_dir = dirname(__file__) args = docopt(__doc__) fasta_f = args['--infile'] fasta_type = args['--type'] bam_fs = args['--bam'] cov_fs = args['--cov'] cas_fs = args['--cas'] hit_fs = args['--hitsfile'] prefix = args['--out'] nodesDB_f = args['--db'] names_f = args['--names'] estimate_cov_flag = True if not args['--calculate_cov'] else False nodes_f = args['--nodes'] taxrules = args['--taxrule'] try: min_bitscore_diff = float(args['--min_diff']) min_score = float(args['--min_score']) except ValueError(): BtLog.error('45') tax_collision_random = args['--tax_collision_random'] title = args['--title'] # outfile out_f = BtIO.getOutFile("blobDB", prefix, "json") if not (title): title = out_f # coverage if not (fasta_type) and not bam_fs and not cov_fs and not cas_fs: BtLog.error('1') cov_libs = [BtCore.CovLibObj('bam' + str(idx), 'bam', lib_f) for idx, lib_f in enumerate(bam_fs)] + \ [BtCore.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)] + \ [BtCore.CovLibObj('cov' + str(idx), 'cov', lib_f) for idx, lib_f in enumerate(cov_fs)] # taxonomy hit_libs = [ BtCore.HitLibObj('tax' + str(idx), 'tax', lib_f) for idx, lib_f in enumerate(hit_fs) ] # Create BlobDB object blobDb = BtCore.BlobDb(title) blobDb.version = interface.__version__ # Parse FASTA blobDb.parseFasta(fasta_f, fasta_type) # Parse nodesDB OR names.dmp, nodes.dmp nodesDB_default = join(dirname(abspath(__file__)), "../data/nodesDB.txt") nodesDB, nodesDB_f = BtIO.parseNodesDB(nodes=nodes_f, names=names_f, nodesDB=nodesDB_f, nodesDBdefault=nodesDB_default) blobDb.nodesDB_f = nodesDB_f # Parse similarity hits if (hit_libs): blobDb.parseHits(hit_libs) if not taxrules: if len(hit_libs) > 1: taxrules = ['bestsum', 'bestsumorder'] else: taxrules = ['bestsum'] blobDb.computeTaxonomy(taxrules, nodesDB, min_score, min_bitscore_diff, tax_collision_random) else: print(BtLog.warn_d['0']) # Parse coverage blobDb.parseCoverage(covLibObjs=cov_libs, estimate_cov=estimate_cov_flag, prefix=prefix) # Generating BlobDB and writing to file print(BtLog.status_d['7'] % out_f) BtIO.writeJson(blobDb.dump(), out_f)
def validate_input_create(main_dir, args): ''' Accepts: - main_dir - docopt args Returns: - title - fasta_f - fasta_type - cov_libs - hit_libs - nodesDB_f - taxrules - out_f ''' ASSEMBLY_TYPES = [None, 'spades', 'soap', 'abyss', 'velvet'] fasta_f = args['--infile'] fasta_type = args['--type'] sam_fs = args['--sam'] bam_fs = args['--bam'] cov_fs = args['--cov'] cas_fs = args['--cas'] hit_fs = args['--taxfile'] out_f = args['--out'] if (out_f): out_f = "%s.%s" % (os.path.basename(out_f), "BlobDB.json") else: out_f = "%s" % ("BlobDB.json") nodesDB_f = args['--db'] names_f = args['--names'] nodes_f = args['--nodes'] taxrules = args['--taxrule'] title = args['--title'] if (args['--title']) else out_f # Do files exist ? files = [x for x in list([fasta_f] + sam_fs + bam_fs + cov_fs + cas_fs + [names_f] + [nodes_f] + hit_fs) if x is not None] for f in files: if not os.path.isfile(f): BtLog.error('0', f) # Is taxonomy provided? if nodesDB_f == "data/nodesDB.txt": nodesDB_f = os.path.join(main_dir, nodesDB_f) if not os.path.isfile(nodesDB_f) and not ((names_f) and (nodes_f)): BtLog.error('3') if not (hit_fs): BtLog.error('18') # can FASTA parser deal with assemblies if not fasta_type in ASSEMBLY_TYPES: BtLog.error('2', ",".join(ASSEMBLY_TYPES[1:])) # Is coverage provided? if not (fasta_type) and not bam_fs and not sam_fs and not cov_fs and not cas_fs: BtLog.error('1') cov_libs = [bt.CovLibObj('bam' + str(idx), 'bam', lib_f) for idx, lib_f in enumerate(bam_fs)] + \ [bt.CovLibObj('sam' + str(idx), 'sam', lib_f) for idx, lib_f in enumerate(sam_fs)] + \ [bt.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)] + \ [bt.CovLibObj('cov' + str(idx), 'cov', lib_f) for idx, lib_f in enumerate(cov_fs)] hit_libs = [bt.hitLibObj('tax' + str(idx), 'tax', lib_f) for idx, lib_f in enumerate(hit_fs)] return title, fasta_f, fasta_type, cov_libs, hit_libs, taxrules, nodesDB_f, nodes_f, names_f, out_f