def main(): args = docopt(__doc__) fasta_f = args['--infile'] bam_fs = args['--bam'] cas_fs = args['--cas'] prefix = args['--output'] estimate_cov_flag = True if not args['--calculate_cov'] else False # Make covLibs cov_libs = [BtCore.CovLibObj('bam' + str(idx), 'bam', lib_f) for idx, lib_f in enumerate(bam_fs)] + \ [BtCore.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)] if not (cov_libs): BtLog.error('31') blobDb = BtCore.BlobDb('cov') blobDb.version = interface.__version__ blobDb.parseFasta(fasta_f, None) blobDb.parseCoverage(covLibObjs=cov_libs, estimate_cov=estimate_cov_flag, prefix=prefix)
def main(): args = docopt(__doc__) fasta_f = args['--infile'] bam_fs = args['--bam'] cas_fs = args['--cas'] sam_fs = args['--sam'] prefix = args['--output'] no_base_cov_flag = args['--no_base_cov'] # Make covLibs cov_libs = [BtCore.CovLibObj('bam' + str(idx), 'bam', lib_f) for idx, lib_f in enumerate(bam_fs)] + \ [BtCore.CovLibObj('sam' + str(idx), 'sam', lib_f) for idx, lib_f in enumerate(sam_fs)] + \ [BtCore.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)] if not (cov_libs): BtLog.error('31') blobDb = BtCore.BlobDb('cov') blobDb.version = blobtools.__version__ blobDb.parseFasta(fasta_f, None) blobDb.parseCoverage(covLibObjs=cov_libs, no_base_cov=no_base_cov_flag, prefix=prefix)
nodesDB_f = os.path.join(main_dir, nodesDB_f) if not os.path.isfile(nodesDB_f) and not ((names_f) and (nodes_f)): BtLog.error('3') if not (hit_fs): BtLog.error('18') # can FASTA parser deal with assemblies if not fasta_type in ASSEMBLY_TYPES: BtLog.error('2', ",".join(ASSEMBLY_TYPES[1:])) # Is coverage provided? if not (fasta_type) and not bam_fs and not sam_fs and not cov_fs and not cas_fs: BtLog.error('1') cov_libs = [bt.CovLibObj('bam' + str(idx), 'bam', lib_f) for idx, lib_f in enumerate(bam_fs)] + \ [bt.CovLibObj('sam' + str(idx), 'sam', lib_f) for idx, lib_f in enumerate(sam_fs)] + \ [bt.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)] + \ [bt.CovLibObj('cov' + str(idx), 'cov', lib_f) for idx, lib_f in enumerate(cov_fs)] # Create BlobDB object blobDb = bt.BlobDb(title) # Parse FASTA blobDb.parseFasta(fasta_f, fasta_type) # Parse coverage blobDb.parseCovs(cov_libs) # Parse Tax hitLibs = [bt.hitLibObj('tax' + str(idx), 'tax', lib_f) for idx, lib_f in enumerate(hit_fs)] blobDb.parseHits(hitLibs)
def validate_input_create(main_dir, args): ''' Accepts: - main_dir - docopt args Returns: - title - fasta_f - fasta_type - cov_libs - hit_libs - nodesDB_f - taxrules - out_f ''' ASSEMBLY_TYPES = [None, 'spades', 'soap', 'abyss', 'velvet'] fasta_f = args['--infile'] fasta_type = args['--type'] sam_fs = args['--sam'] bam_fs = args['--bam'] cov_fs = args['--cov'] cas_fs = args['--cas'] hit_fs = args['--taxfile'] out_f = args['--out'] if (out_f): out_f = "%s.%s" % (os.path.basename(out_f), "BlobDB.json") else: out_f = "%s" % ("BlobDB.json") nodesDB_f = args['--db'] names_f = args['--names'] nodes_f = args['--nodes'] taxrules = args['--taxrule'] title = args['--title'] if (args['--title']) else out_f # Do files exist ? files = [ x for x in list([fasta_f] + sam_fs + bam_fs + cov_fs + cas_fs + [names_f] + [nodes_f] + hit_fs) if x is not None ] for f in files: if not os.path.isfile(f): BtLog.error('0', f) # Is taxonomy provided? if nodesDB_f == "data/nodesDB.txt": nodesDB_f = os.path.join(main_dir, nodesDB_f) if not os.path.isfile(nodesDB_f) and not ((names_f) and (nodes_f)): BtLog.error('3') if not (hit_fs): BtLog.error('18') # can FASTA parser deal with assemblies if not fasta_type in ASSEMBLY_TYPES: BtLog.error('2', ",".join(ASSEMBLY_TYPES[1:])) # Is coverage provided? if not (fasta_type ) and not bam_fs and not sam_fs and not cov_fs and not cas_fs: BtLog.error('1') cov_libs = [bt.CovLibObj('bam' + str(idx), 'bam', lib_f) for idx, lib_f in enumerate(bam_fs)] + \ [bt.CovLibObj('sam' + str(idx), 'sam', lib_f) for idx, lib_f in enumerate(sam_fs)] + \ [bt.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)] + \ [bt.CovLibObj('cov' + str(idx), 'cov', lib_f) for idx, lib_f in enumerate(cov_fs)] hit_libs = [ bt.hitLibObj('tax' + str(idx), 'tax', lib_f) for idx, lib_f in enumerate(hit_fs) ] return title, fasta_f, fasta_type, cov_libs, hit_libs, taxrules, nodesDB_f, nodes_f, names_f, out_f
def main(): #main_dir = dirname(__file__) args = docopt(__doc__) fasta_f = args['--infile'] fasta_type = args['--type'] bam_fs = args['--bam'] cov_fs = args['--cov'] cas_fs = args['--cas'] hit_fs = args['--hitsfile'] prefix = args['--out'] nodesDB_f = args['--db'] names_f = args['--names'] estimate_cov_flag = True if not args['--calculate_cov'] else False nodes_f = args['--nodes'] taxrules = args['--taxrule'] try: min_bitscore_diff = float(args['--min_diff']) min_score = float(args['--min_score']) except ValueError(): BtLog.error('45') tax_collision_random = args['--tax_collision_random'] title = args['--title'] # outfile out_f = BtIO.getOutFile("blobDB", prefix, "json") if not (title): title = out_f # coverage if not (fasta_type) and not bam_fs and not cov_fs and not cas_fs: BtLog.error('1') cov_libs = [BtCore.CovLibObj('bam' + str(idx), 'bam', lib_f) for idx, lib_f in enumerate(bam_fs)] + \ [BtCore.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)] + \ [BtCore.CovLibObj('cov' + str(idx), 'cov', lib_f) for idx, lib_f in enumerate(cov_fs)] # taxonomy hit_libs = [ BtCore.HitLibObj('tax' + str(idx), 'tax', lib_f) for idx, lib_f in enumerate(hit_fs) ] # Create BlobDB object blobDb = BtCore.BlobDb(title) blobDb.version = interface.__version__ # Parse FASTA blobDb.parseFasta(fasta_f, fasta_type) # Parse nodesDB OR names.dmp, nodes.dmp nodesDB_default = join(dirname(abspath(__file__)), "../data/nodesDB.txt") nodesDB, nodesDB_f = BtIO.parseNodesDB(nodes=nodes_f, names=names_f, nodesDB=nodesDB_f, nodesDBdefault=nodesDB_default) blobDb.nodesDB_f = nodesDB_f # Parse similarity hits if (hit_libs): blobDb.parseHits(hit_libs) if not taxrules: if len(hit_libs) > 1: taxrules = ['bestsum', 'bestsumorder'] else: taxrules = ['bestsum'] blobDb.computeTaxonomy(taxrules, nodesDB, min_score, min_bitscore_diff, tax_collision_random) else: print(BtLog.warn_d['0']) # Parse coverage blobDb.parseCoverage(covLibObjs=cov_libs, estimate_cov=estimate_cov_flag, prefix=prefix) # Generating BlobDB and writing to file print(BtLog.status_d['7'] % out_f) BtIO.writeJson(blobDb.dump(), out_f)