[bt.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)] + \ [bt.CovLibObj('cov' + str(idx), 'cov', lib_f) for idx, lib_f in enumerate(cov_fs)] # Create BlobDB object blobDb = bt.BlobDb(title) # Parse FASTA blobDb.parseFasta(fasta_f, fasta_type) # Parse coverage blobDb.parseCovs(cov_libs) # Parse Tax hitLibs = [bt.hitLibObj('tax' + str(idx), 'tax', lib_f) for idx, lib_f in enumerate(hit_fs)] blobDb.parseHits(hitLibs) # Parse nodesDB nodesDB, nodesDB_f = BtIO.getNodesDB(nodes=nodes_f, names=names_f, nodesDB=nodesDB_f) blobDb.nodesDB_f = nodesDB_f if not os.path.isfile(nodesDB_f): print BtLog.status_d['5'] % nodesDB_f BtIO.writeNodesDB(nodesDB, nodesDB_f) # Computing taxonomy based on taxrules print BtLog.status_d['6'] % ",".join(taxrules) blobDb.computeTaxonomy(taxrules, nodesDB) # Generating BlobDB and writing to file print BtLog.status_d['7'] % out_f BtIO.writeJson(blobDb.dump(), out_f)
def main(): #main_dir = dirname(__file__) args = docopt(__doc__) fasta_f = args['--infile'] fasta_type = args['--type'] bam_fs = args['--bam'] cov_fs = args['--cov'] cas_fs = args['--cas'] hit_fs = args['--hitsfile'] prefix = args['--out'] nodesDB_f = args['--db'] names_f = args['--names'] estimate_cov_flag = True if not args['--calculate_cov'] else False nodes_f = args['--nodes'] taxrules = args['--taxrule'] try: min_bitscore_diff = float(args['--min_diff']) min_score = float(args['--min_score']) except ValueError(): BtLog.error('45') tax_collision_random = args['--tax_collision_random'] title = args['--title'] # outfile out_f = BtIO.getOutFile("blobDB", prefix, "json") if not (title): title = out_f # coverage if not (fasta_type) and not bam_fs and not cov_fs and not cas_fs: BtLog.error('1') cov_libs = [BtCore.CovLibObj('bam' + str(idx), 'bam', lib_f) for idx, lib_f in enumerate(bam_fs)] + \ [BtCore.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)] + \ [BtCore.CovLibObj('cov' + str(idx), 'cov', lib_f) for idx, lib_f in enumerate(cov_fs)] # taxonomy hit_libs = [ BtCore.HitLibObj('tax' + str(idx), 'tax', lib_f) for idx, lib_f in enumerate(hit_fs) ] # Create BlobDB object blobDb = BtCore.BlobDb(title) blobDb.version = interface.__version__ # Parse FASTA blobDb.parseFasta(fasta_f, fasta_type) # Parse nodesDB OR names.dmp, nodes.dmp nodesDB_default = join(dirname(abspath(__file__)), "../data/nodesDB.txt") nodesDB, nodesDB_f = BtIO.parseNodesDB(nodes=nodes_f, names=names_f, nodesDB=nodesDB_f, nodesDBdefault=nodesDB_default) blobDb.nodesDB_f = nodesDB_f # Parse similarity hits if (hit_libs): blobDb.parseHits(hit_libs) if not taxrules: if len(hit_libs) > 1: taxrules = ['bestsum', 'bestsumorder'] else: taxrules = ['bestsum'] blobDb.computeTaxonomy(taxrules, nodesDB, min_score, min_bitscore_diff, tax_collision_random) else: print(BtLog.warn_d['0']) # Parse coverage blobDb.parseCoverage(covLibObjs=cov_libs, estimate_cov=estimate_cov_flag, prefix=prefix) # Generating BlobDB and writing to file print(BtLog.status_d['7'] % out_f) BtIO.writeJson(blobDb.dump(), out_f)
def output(self): # meta meta = self.get_meta() meta_f = join(self.view_dir, "meta.json") BtIO.writeJson(meta, meta_f, indent=2) # gc gc_f = join(self.view_dir, "gc.json") print BtLog.status_d['13'] % (gc_f) BtIO.writeJson(self.gc, gc_f, indent=1) # length length_f = join(self.view_dir, "length.json") print BtLog.status_d['13'] % (length_f) BtIO.writeJson(self.length, length_f, indent=1) # names names_f = join(self.view_dir, "names.json") print BtLog.status_d['13'] % (names_f) BtIO.writeJson(self.names, names_f, indent=1) # cov cov_d = join(self.view_dir, "covs") BtIO.create_dir(directory=cov_d) for cov_lib, cov in self.covs.items(): cov_f = join(cov_d, "%s.json" % cov_lib) print BtLog.status_d['13'] % (cov_f) BtIO.writeJson(cov, cov_f, indent=1) # tax taxrule_d = join(self.view_dir, "taxrule") BtIO.create_dir(directory=taxrule_d) for taxrule in self.tax: tax_d = join(taxrule_d, taxrule) BtIO.create_dir(directory=tax_d) for rank in self.tax[taxrule]: tax = self.tax[taxrule][rank] rank_f = join(tax_d, "%s.json" % rank) BtIO.writeJson(tax, rank_f, indent=1)
def output(self): # meta meta = self.get_meta() meta_f = join(self.view_dir, "meta.json") BtIO.writeJson(meta, meta_f, indent=2) # gc gc_f = join(self.view_dir, "gc.json") print BtLog.status_d['13'] % (gc_f) BtIO.writeJson({"values": self._format_float(self.gc)}, gc_f, indent=1) # length length_f = join(self.view_dir, "length.json") print BtLog.status_d['13'] % (length_f) BtIO.writeJson({"values": self.length}, length_f, indent=1) # Ns if max(self.n_count) > 0: n_f = join(self.view_dir, "ncount.json") print BtLog.status_d['13'] % (n_f) BtIO.writeJson( {"values": map(lambda x: max(x, 0.2), self.n_count)}, n_f, indent=1) # identifiers ids_f = join(self.view_dir, "identifiers.json") print BtLog.status_d['13'] % (ids_f) BtIO.writeJson(self.names, ids_f, indent=1) # cov for cov_name, cov in self.covs.items(): name = self._remove_cov_suffix(cov_name, self.blobDb.covLibs) cov_f = join(self.view_dir, "%s_cov.json" % name) print BtLog.status_d['13'] % (cov_f) BtIO.writeJson({"values": self._format_float(cov, 0.02)}, cov_f, indent=1) # read_cov for cov_name, cov in self.read_covs.items(): name = self._remove_cov_suffix(cov_name, self.blobDb.covLibs) cov_f = join(self.view_dir, "%s_read_cov.json" % name) print BtLog.status_d['13'] % (cov_f) BtIO.writeJson({"values": map(lambda x: max(x, 0.2), cov)}, cov_f, indent=1) # tax for taxrule in self.tax: for rank in self.tax[taxrule]: tax = self._keyed_list(self.tax[taxrule][rank]) rank_f = join(self.view_dir, "%s_%s.json" % (taxrule, rank)) BtIO.writeJson(tax, rank_f, indent=1) score = self.tax_scores[taxrule][rank]['score'] score_f = join(self.view_dir, "%s_%s_score.json" % (taxrule, rank)) BtIO.writeJson({"values": map(lambda x: max(x, 0.2), score)}, score_f, indent=1) cindex = self.tax_scores[taxrule][rank]['c_index'] cindex_f = join(self.view_dir, "%s_%s_cindex.json" % (taxrule, rank)) BtIO.writeJson({"values": cindex}, cindex_f, indent=1)