Ejemplo n.º 1
0
def main():
    args = docopt(__doc__)
    fasta_f = args['--infile']
    bam_fs = args['--bam']
    cas_fs = args['--cas']
    prefix = args['--output']
    estimate_cov_flag = True if not args['--calculate_cov'] else False

    # Make covLibs
    cov_libs = [BtCore.CovLibObj('bam' + str(idx), 'bam', lib_f) for idx, lib_f in enumerate(bam_fs)] + \
           [BtCore.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)]
    if not (cov_libs):
        BtLog.error('31')
    blobDb = BtCore.BlobDb('cov')
    blobDb.version = interface.__version__
    blobDb.parseFasta(fasta_f, None)
    blobDb.parseCoverage(covLibObjs=cov_libs, estimate_cov=estimate_cov_flag, prefix=prefix)
Ejemplo n.º 2
0
def main():
    args = docopt(__doc__)
    fasta_f = args['--infile']
    bam_fs = args['--bam']
    cas_fs = args['--cas']
    sam_fs = args['--sam']
    prefix = args['--output']
    no_base_cov_flag = args['--no_base_cov']

    # Make covLibs
    cov_libs = [BtCore.CovLibObj('bam' + str(idx), 'bam', lib_f) for idx, lib_f in enumerate(bam_fs)] + \
           [BtCore.CovLibObj('sam' + str(idx), 'sam', lib_f) for idx, lib_f in enumerate(sam_fs)] + \
           [BtCore.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)]
    if not (cov_libs):
        BtLog.error('31')
    blobDb = BtCore.BlobDb('cov')
    blobDb.version = blobtools.__version__
    blobDb.parseFasta(fasta_f, None)
    blobDb.parseCoverage(covLibObjs=cov_libs, no_base_cov=no_base_cov_flag, prefix=prefix)
Ejemplo n.º 3
0
def main():
    #print(data_dir)
    args = docopt(__doc__)
    blobdb_f = args['--input']
    prefix = args['--out']
    ranks = args['--rank']
    taxrule = args['--taxrule']
    hits_flag = args['--hits']
    seq_list_f = args['--list']
    concoct = args['--concoct']
    cov = args['--cov']
    notable = args['--notable']
    experimental = args['--experimental']
    # Does blobdb_f exist ?
    if not isfile(blobdb_f):
        BtLog.error('0', blobdb_f)

    out_f = BtIO.getOutFile(blobdb_f, prefix, None)

    # Are ranks sane ?
    if 'all' in ranks:
        temp_ranks = RANKS[0:-1]
        ranks = temp_ranks[::-1]
    else:
        for rank in ranks:
            if rank not in RANKS:
                BtLog.error('9', rank)

    # Does seq_list file exist?
    seqs = []
    if (seq_list_f):
        if isfile(seq_list_f):
            seqs = BtIO.parseList(seq_list_f)
        else:
            BtLog.error('0', seq_list_f)

    # Load BlobDb
    blobDb = BtCore.BlobDb('new')
    print(BtLog.status_d['9'] % (blobdb_f))
    blobDb.load(blobdb_f)
    blobDb.version = interface.__version__

    # Is taxrule sane and was it computed?
    if (blobDb.hitLibs) and taxrule not in blobDb.taxrules:
        BtLog.error('11', taxrule, blobDb.taxrules)

    # view(s)
    viewObjs = []
    print(BtLog.status_d['14'])
    if not (notable):
        tableView = None
        if len(blobDb.hitLibs) > 1:
            tableView = BtCore.ViewObj(name="table",
                                       out_f=out_f,
                                       suffix="%s.table.txt" % (taxrule),
                                       body=[])
        else:
            tableView = BtCore.ViewObj(name="table",
                                       out_f=out_f,
                                       suffix="table.txt",
                                       body=[])
        viewObjs.append(tableView)
    if not experimental == 'False':
        meta = {}
        if isfile(experimental):
            meta = BtIO.readYaml(experimental)
        experimentalView = BtCore.ExperimentalViewObj(name="experimental",
                                                      view_dir=out_f,
                                                      blobDb=blobDb,
                                                      meta=meta)
        viewObjs.append(experimentalView)
    if (concoct):
        concoctTaxView = None
        concoctCovView = None
        if len(blobDb.hitLibs) > 1:
            concoctTaxView = BtCore.ViewObj(
                name="concoct_tax",
                out_f=out_f,
                suffix="%s.concoct_taxonomy_info.csv" % (taxrule),
                body=dict())
            concoctCovView = BtCore.ViewObj(
                name="concoct_cov",
                out_f=out_f,
                suffix="%s.concoct_coverage_info.tsv" % (taxrule),
                body=[])
        else:
            concoctTaxView = BtCore.ViewObj(name="concoct_tax",
                                            out_f=out_f,
                                            suffix="concoct_taxonomy_info.csv",
                                            body=dict())
            concoctCovView = BtCore.ViewObj(name="concoct_cov",
                                            out_f=out_f,
                                            suffix="concoct_coverage_info.tsv",
                                            body=[])
        viewObjs.append(concoctTaxView)
        viewObjs.append(concoctCovView)
    if (cov):
        for cov_lib_name, covLibDict in blobDb.covLibs.items():
            out_f = BtIO.getOutFile(covLibDict['f'], prefix, None)
            covView = BtCore.ViewObj(name="covlib",
                                     out_f=out_f,
                                     suffix="cov",
                                     body=[])
            blobDb.view(viewObjs=[covView],
                        ranks=None,
                        taxrule=None,
                        hits_flag=None,
                        seqs=None,
                        cov_libs=[cov_lib_name],
                        progressbar=True)
    if (viewObjs):
        #for viewObj in viewObjs:
        #    print(viewObj.name)
        blobDb.view(viewObjs=viewObjs,
                    ranks=ranks,
                    taxrule=taxrule,
                    hits_flag=hits_flag,
                    seqs=seqs,
                    cov_libs=[],
                    progressbar=True)
    print(BtLog.status_d['19'])
Ejemplo n.º 4
0
    refcov_dict = {}
    if (refcov_f):
        refcov_dict = BtPlot.parseRefCov(refcov_f)

    catcolour_dict = {}
    if (catcolour_f) and (c_index):
        BtLog.error('24')
    elif (catcolour_f):
        catcolour_dict = BtPlot.parseCatColour(catcolour_f)
    else:
        pass

    # Load BlobDb
    print BtLog.status_d['9'] % blobdb_f
    blobDB = bt.BlobDb('new')
    blobDB.load(blobdb_f)

    title = blobDB.title
    if plot_title:
        plot_title = title

    # Is taxrule sane and was it computed?
    if taxrule not in blobDB.taxrules:
        BtLog.error('11', taxrule, blobDB.taxrules)

    data_dict, max_cov, cov_libs, cov_libs_total_reads = blobDB.getPlotData(
        rank, min_length, hide_nohits, taxrule, c_index, catcolour_dict)
    plotObj = BtPlot.PlotObj(data_dict, cov_libs, cov_libs_total_reads)
    plotObj.exclude_groups = exclude_groups
    plotObj.format = format
Ejemplo n.º 5
0
def main():
    args = docopt(__doc__)
    args = BtPlot.check_input(args)

    blobdb_f = args['--infile']
    rank = args['--rank']
    min_length = int(args['--length'])
    max_group_plot = int(args['--plotgroups'])
    hide_nohits = args['--nohit']
    taxrule = args['--taxrule']
    c_index = args['--cindex']
    exclude_groups = args['--exclude']
    labels = args['--label']
    colour_f = args['--colours']
    refcov_f = args['--refcov']
    catcolour_f = args['--catcolour']

    multiplot = args['--multiplot']
    out_prefix = args['--out']
    sort_order = args['--sort']
    sort_first = args['--sort_first']
    hist_type = args['--hist']
    no_title = args['--notitle']
    ignore_contig_length = args['--noscale']
    format_plot = args['--format']
    no_plot_blobs = args['--noblobs']
    no_plot_reads = args['--noreads']
    legend_flag = args['--legend']
    cumulative_flag = args['--cumulative']
    cov_lib_selection = args['--lib']

    filelabel = args['--filelabel']

    exclude_groups = BtIO.parseCmdlist(exclude_groups)
    refcov_dict = BtIO.parseReferenceCov(refcov_f)
    user_labels = BtIO.parseCmdLabels(labels)
    catcolour_dict = BtIO.parseCatColour(catcolour_f)
    colour_dict = BtIO.parseColours(colour_f)

    # Load BlobDb
    print BtLog.status_d['9'] % blobdb_f
    blobDb = BtCore.BlobDb('blobplot')
    blobDb.version = blobtools.__version__
    blobDb.load(blobdb_f)

    # Generate plot data
    print BtLog.status_d['18']
    data_dict, min_cov, max_cov, cov_lib_dict = blobDb.getPlotData(
        rank, min_length, hide_nohits, taxrule, c_index, catcolour_dict)
    plotObj = BtPlot.PlotObj(data_dict, cov_lib_dict, cov_lib_selection,
                             'blobplot', sort_first)
    plotObj.exclude_groups = exclude_groups
    plotObj.version = blobDb.version
    plotObj.format = format_plot
    plotObj.max_cov = max_cov
    plotObj.min_cov = min_cov
    plotObj.no_title = no_title
    plotObj.multiplot = multiplot
    plotObj.hist_type = hist_type
    plotObj.ignore_contig_length = ignore_contig_length
    plotObj.max_group_plot = max_group_plot
    plotObj.legend_flag = legend_flag
    plotObj.cumulative_flag = cumulative_flag
    # order by which to plot (should know about user label)
    plotObj.group_order = BtPlot.getSortedGroups(data_dict, sort_order,
                                                 sort_first)
    # labels for each level of stats
    plotObj.labels.update(plotObj.group_order)
    # plotObj.group_labels is dict that contains labels for each group : all/other/user_label
    if (user_labels):
        for group, label in user_labels.items():
            plotObj.labels.add(label)
    plotObj.group_labels = {group: set() for group in plotObj.group_order}
    plotObj.relabel_and_colour(colour_dict, user_labels)
    plotObj.compute_stats()
    plotObj.refcov_dict = refcov_dict
    # Plotting
    info_flag = 1
    out_f = ''
    for cov_lib in plotObj.cov_libs:
        plotObj.ylabel = "Coverage"
        plotObj.xlabel = "GC proportion"
        if (filelabel):
            plotObj.ylabel = basename(cov_lib_dict[cov_lib]['f'])
        out_f = "%s.%s.%s.p%s.%s.%s" % (blobDb.title, taxrule, rank,
                                        max_group_plot, hist_type, min_length)
        if catcolour_dict:
            out_f = "%s.%s" % (out_f, "catcolour")
        if ignore_contig_length:
            out_f = "%s.%s" % (out_f, "noscale")
        if c_index:
            out_f = "%s.%s" % (out_f, "c_index")
        if exclude_groups:
            out_f = "%s.%s" % (out_f, "exclude_" + "_".join(exclude_groups))
        if labels:
            out_f = "%s.%s" % (out_f, "userlabel_" + "_".join(
                set([name for name in user_labels.values()])))
        out_f = "%s.%s" % (out_f, "blobplot")
        if (plotObj.cumulative_flag):
            out_f = "%s.%s" % (out_f, "cumulative")
        if (plotObj.multiplot):
            out_f = "%s.%s" % (out_f, "multiplot")
        out_f = BtIO.getOutFile(out_f, out_prefix, None)
        if not (no_plot_blobs):
            plotObj.plotScatter(cov_lib, info_flag, out_f)
            info_flag = 0
        if not (no_plot_reads) and (
                plotObj.cov_libs_total_reads_dict[cov_lib]):
            # prevent plotting if --noreads or total_reads == 0
            plotObj.plotBar(cov_lib, out_f)
    plotObj.write_stats(out_f)
Ejemplo n.º 6
0
        nodesDB_f = os.path.join(main_dir, nodesDB_f)
    if not os.path.isfile(nodesDB_f) and not ((names_f) and (nodes_f)):
        BtLog.error('3')

    if not (hit_fs):
        BtLog.error('18')

    # can FASTA parser deal with assemblies
    if not fasta_type in ASSEMBLY_TYPES:
        BtLog.error('2', ",".join(ASSEMBLY_TYPES[1:]))

    # Is coverage provided?
    if not (fasta_type) and not bam_fs and not sam_fs and not cov_fs and not cas_fs:
        BtLog.error('1')

    cov_libs = [bt.CovLibObj('bam' + str(idx), 'bam', lib_f) for idx, lib_f in enumerate(bam_fs)] + \
               [bt.CovLibObj('sam' + str(idx), 'sam', lib_f) for idx, lib_f in enumerate(sam_fs)] + \
               [bt.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)] + \
               [bt.CovLibObj('cov' + str(idx), 'cov', lib_f) for idx, lib_f in enumerate(cov_fs)] 
               
    # Create BlobDB object              
    blobDb = bt.BlobDb(title)

    # Parse FASTA
    blobDb.parseFasta(fasta_f, fasta_type)
    # Parse coverage
    blobDb.parseCovs(cov_libs)

    # Parse Tax
    hitLibs = [bt.hitLibObj('tax' + str(idx), 'tax', lib_f) for idx, lib_f in enumerate(hit_fs)]
    blobDb.parseHits(hitLibs)
Ejemplo n.º 7
0
def validate_input_create(main_dir, args):
    '''
    Accepts: 
        - main_dir
        - docopt args
    Returns:
        - title
        - fasta_f
        - fasta_type
        - cov_libs
        - hit_libs
        - nodesDB_f
        - taxrules
        - out_f
    '''
    ASSEMBLY_TYPES = [None, 'spades', 'soap', 'abyss', 'velvet']

    fasta_f = args['--infile']
    fasta_type = args['--type']
    sam_fs = args['--sam']
    bam_fs = args['--bam']
    cov_fs = args['--cov']
    cas_fs = args['--cas']
    hit_fs = args['--taxfile']
    out_f = args['--out']
    if (out_f):
        out_f = "%s.%s" % (os.path.basename(out_f), "BlobDB.json")
    else:
        out_f = "%s" % ("BlobDB.json")
    nodesDB_f = args['--db']
    names_f = args['--names']
    nodes_f = args['--nodes']
    taxrules = args['--taxrule']
    title = args['--title'] if (args['--title']) else out_f

    # Do files exist ?
    files = [
        x for x in list([fasta_f] + sam_fs + bam_fs + cov_fs + cas_fs +
                        [names_f] + [nodes_f] + hit_fs) if x is not None
    ]
    for f in files:
        if not os.path.isfile(f):
            BtLog.error('0', f)

    # Is taxonomy provided?
    if nodesDB_f == "data/nodesDB.txt":
        nodesDB_f = os.path.join(main_dir, nodesDB_f)
    if not os.path.isfile(nodesDB_f) and not ((names_f) and (nodes_f)):
        BtLog.error('3')
    if not (hit_fs):
        BtLog.error('18')
    # can FASTA parser deal with assemblies
    if not fasta_type in ASSEMBLY_TYPES:
        BtLog.error('2', ",".join(ASSEMBLY_TYPES[1:]))
    # Is coverage provided?
    if not (fasta_type
            ) and not bam_fs and not sam_fs and not cov_fs and not cas_fs:
        BtLog.error('1')
    cov_libs = [bt.CovLibObj('bam' + str(idx), 'bam', lib_f) for idx, lib_f in enumerate(bam_fs)] + \
               [bt.CovLibObj('sam' + str(idx), 'sam', lib_f) for idx, lib_f in enumerate(sam_fs)] + \
               [bt.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)] + \
               [bt.CovLibObj('cov' + str(idx), 'cov', lib_f) for idx, lib_f in enumerate(cov_fs)]

    hit_libs = [
        bt.hitLibObj('tax' + str(idx), 'tax', lib_f)
        for idx, lib_f in enumerate(hit_fs)
    ]

    return title, fasta_f, fasta_type, cov_libs, hit_libs, taxrules, nodesDB_f, nodes_f, names_f, out_f
Ejemplo n.º 8
0
import lib.BtIO as BtIO
import lib.BtInput as BtInput
import os.path


if __name__ == '__main__':

    main_dir = os.path.dirname(__file__)
    #print data_dir
    args = docopt(__doc__)
    #print args

    title, fasta_f, fasta_type, cov_libs, hit_libs, taxrules, nodesDB_f, nodes_f, names_f, out_f = BtInput.validate_input_create(main_dir, args)

    # Create BlobDB object
    blobDb = bt.BlobDb(title)

    # Parse FASTA
    blobDb.parseFasta(fasta_f, fasta_type)

    # Parse Tax
    blobDb.parseHits(hit_libs)

    # Parse nodesDB
    nodesDB, nodesDB_f = BtIO.getNodesDB(nodes=nodes_f, names=names_f, nodesDB=nodesDB_f)
    blobDb.nodesDB_f = nodesDB_f

    if not os.path.isfile(nodesDB_f):
        print BtLog.status_d['5'] % nodesDB_f
        BtIO.writeNodesDB(nodesDB, nodesDB_f)
Ejemplo n.º 9
0
def main():

    #main_dir = dirname(__file__)
    args = docopt(__doc__)
    fasta_f = args['--infile']
    fasta_type = args['--type']
    bam_fs = args['--bam']
    cov_fs = args['--cov']
    cas_fs = args['--cas']
    hit_fs = args['--hitsfile']
    prefix = args['--out']
    nodesDB_f = args['--db']
    names_f = args['--names']
    estimate_cov_flag = True if not args['--calculate_cov'] else False
    nodes_f = args['--nodes']
    taxrules = args['--taxrule']
    try:
        min_bitscore_diff = float(args['--min_diff'])
        min_score = float(args['--min_score'])
    except ValueError():
        BtLog.error('45')
    tax_collision_random = args['--tax_collision_random']
    title = args['--title']

    # outfile
    out_f = BtIO.getOutFile("blobDB", prefix, "json")
    if not (title):
        title = out_f

    # coverage
    if not (fasta_type) and not bam_fs and not cov_fs and not cas_fs:
        BtLog.error('1')
    cov_libs = [BtCore.CovLibObj('bam' + str(idx), 'bam', lib_f) for idx, lib_f in enumerate(bam_fs)] + \
           [BtCore.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)] + \
           [BtCore.CovLibObj('cov' + str(idx), 'cov', lib_f) for idx, lib_f in enumerate(cov_fs)]

    # taxonomy
    hit_libs = [
        BtCore.HitLibObj('tax' + str(idx), 'tax', lib_f)
        for idx, lib_f in enumerate(hit_fs)
    ]

    # Create BlobDB object
    blobDb = BtCore.BlobDb(title)
    blobDb.version = interface.__version__
    # Parse FASTA
    blobDb.parseFasta(fasta_f, fasta_type)

    # Parse nodesDB OR names.dmp, nodes.dmp
    nodesDB_default = join(dirname(abspath(__file__)), "../data/nodesDB.txt")
    nodesDB, nodesDB_f = BtIO.parseNodesDB(nodes=nodes_f,
                                           names=names_f,
                                           nodesDB=nodesDB_f,
                                           nodesDBdefault=nodesDB_default)
    blobDb.nodesDB_f = nodesDB_f

    # Parse similarity hits
    if (hit_libs):
        blobDb.parseHits(hit_libs)
        if not taxrules:
            if len(hit_libs) > 1:
                taxrules = ['bestsum', 'bestsumorder']
            else:
                taxrules = ['bestsum']
        blobDb.computeTaxonomy(taxrules, nodesDB, min_score, min_bitscore_diff,
                               tax_collision_random)
    else:
        print(BtLog.warn_d['0'])

    # Parse coverage
    blobDb.parseCoverage(covLibObjs=cov_libs,
                         estimate_cov=estimate_cov_flag,
                         prefix=prefix)

    # Generating BlobDB and writing to file
    print(BtLog.status_d['7'] % out_f)
    BtIO.writeJson(blobDb.dump(), out_f)
Ejemplo n.º 10
0
def validate_input_create(main_dir, args):
    '''
    Accepts: 
        - main_dir
        - docopt args
    Returns:
        - title
        - fasta_f
        - fasta_type
        - cov_libs
        - hit_libs
        - nodesDB_f
        - taxrules
        - out_f
    '''
    ASSEMBLY_TYPES = [None, 'spades', 'soap', 'abyss', 'velvet']

    fasta_f = args['--infile']
    fasta_type = args['--type']
    sam_fs = args['--sam']
    bam_fs = args['--bam']
    cov_fs = args['--cov']
    cas_fs = args['--cas']
    hit_fs = args['--taxfile']
    out_f = args['--out']
    if (out_f):
        out_f = "%s.%s" % (os.path.basename(out_f), "BlobDB.json")
    else:
        out_f = "%s" % ("BlobDB.json")
    nodesDB_f = args['--db']
    names_f = args['--names']
    nodes_f = args['--nodes']
    taxrules = args['--taxrule']
    title = args['--title'] if (args['--title']) else out_f
    
    # Do files exist ?
    files = [x for x in list([fasta_f] + sam_fs + bam_fs + cov_fs + cas_fs + [names_f] + [nodes_f] + hit_fs) if x is not None]
    for f in files:
        if not os.path.isfile(f):
            BtLog.error('0', f)

    # Is taxonomy provided?
    if nodesDB_f == "data/nodesDB.txt":
        nodesDB_f = os.path.join(main_dir, nodesDB_f)
    if not os.path.isfile(nodesDB_f) and not ((names_f) and (nodes_f)):
        BtLog.error('3')
    if not (hit_fs):
        BtLog.error('18')
    # can FASTA parser deal with assemblies
    if not fasta_type in ASSEMBLY_TYPES:
        BtLog.error('2', ",".join(ASSEMBLY_TYPES[1:]))
    # Is coverage provided?
    if not (fasta_type) and not bam_fs and not sam_fs and not cov_fs and not cas_fs:
        BtLog.error('1')
    cov_libs = [bt.CovLibObj('bam' + str(idx), 'bam', lib_f) for idx, lib_f in enumerate(bam_fs)] + \
               [bt.CovLibObj('sam' + str(idx), 'sam', lib_f) for idx, lib_f in enumerate(sam_fs)] + \
               [bt.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)] + \
               [bt.CovLibObj('cov' + str(idx), 'cov', lib_f) for idx, lib_f in enumerate(cov_fs)] 

    hit_libs = [bt.hitLibObj('tax' + str(idx), 'tax', lib_f) for idx, lib_f in enumerate(hit_fs)]

    return title, fasta_f, fasta_type, cov_libs, hit_libs, taxrules, nodesDB_f, nodes_f, names_f, out_f