Esempi in Python per BtIO.getOutFile, esempi in Python per bloblib.BtIO.getOutFile

Esempio n. 1

0

Mostra file

File: BtCore.py Progetto: YourePrettyGood/blobtools

    def parseCoverage(self, **kwargs):
        # arguments
        covLibObjs = kwargs['covLibObjs']
        no_base_cov = kwargs['no_base_cov']

        for covLib in covLibObjs:
            self.addCovLib(covLib)
            print BtLog.status_d['1'] % (covLib.name, covLib.f)
            if covLib.fmt == 'bam' or covLib.fmt == 'sam':
                base_cov_dict = {}
                if covLib.fmt == 'bam':
                    base_cov_dict, covLib.reads_total, covLib.reads_mapped, read_cov_dict = BtIO.parseBam(covLib.f, set(self.dict_of_blobs), no_base_cov)
                else:
                    base_cov_dict, covLib.reads_total, covLib.reads_mapped, read_cov_dict = BtIO.parseSam(covLib.f, set(self.dict_of_blobs), no_base_cov)

                if covLib.reads_total == 0:
                    print BtLog.warn_d['4'] % covLib.f

                for name, base_cov in base_cov_dict.items():
                    cov = base_cov / self.dict_of_blobs[name].agct_count
                    covLib.cov_sum += cov
                    self.dict_of_blobs[name].addCov(covLib.name, cov)
                    self.dict_of_blobs[name].addReadCov(covLib.name, read_cov_dict[name])
                # Create COV file for future use
                out_f = BtIO.getOutFile(covLib.f, kwargs.get('prefix', None), None)
                covView = ViewObj(name="covlib", out_f=out_f, suffix="cov", header="", body=[])
                self.view(viewObjs=[covView], ranks=None, taxrule=None, hits_flag=None, seqs=None, cov_libs=[covLib.name], progressbar=False)

            elif covLib.fmt == 'cas':
                cov_dict, covLib.reads_total, covLib.reads_mapped, read_cov_dict = BtIO.parseCas(covLib.f, self.order_of_blobs)
                if covLib.reads_total == 0:
                    print BtLog.warn_d['4'] % covLib.f
                for name, cov in cov_dict.items():
                    covLib.cov_sum += cov
                    self.dict_of_blobs[name].addCov(covLib.name, cov)
                    self.dict_of_blobs[name].addReadCov(covLib.name, read_cov_dict[name])
                out_f = BtIO.getOutFile(covLib.f, kwargs.get('prefix', None), None)
                covView = ViewObj(name="covlib", out_f=out_f, suffix="cov", header="", body=[])
                self.view(viewObjs=[covView], ranks=None, taxrule=None, hits_flag=None, seqs=None, cov_libs=[covLib.name], progressbar=False)

            elif covLib.fmt == 'cov':
                base_cov_dict, covLib.reads_total, covLib.reads_mapped, covLib.reads_unmapped, read_cov_dict = BtIO.parseCov(covLib.f, set(self.dict_of_blobs))
                #cov_dict = BtIO.readCov(covLib.f, set(self.dict_of_blobs))
                if not len(base_cov_dict) == self.seqs:
                    print BtLog.warn_d['4'] % covLib.f
                for name, cov in base_cov_dict.items():
                    covLib.cov_sum += cov
                    self.dict_of_blobs[name].addCov(covLib.name, cov)
                    if name in read_cov_dict:
                        self.dict_of_blobs[name].addReadCov(covLib.name, read_cov_dict[name])
            else:
                pass
            covLib.mean_cov = covLib.cov_sum/self.seqs
            if covLib.cov_sum == 0.0:
                print BtLog.warn_d['6'] % (covLib.name)
            self.covLibs[covLib.name] = covLib

Esempio n. 2

0

Mostra file

File: create.py Progetto: DRL/blobtools

def main():

    #main_dir = dirname(__file__)
    args = docopt(__doc__)
    fasta_f = args['--infile']
    fasta_type = args['--type']
    sam_fs = args['--sam']
    bam_fs = args['--bam']
    cov_fs = args['--cov']
    cas_fs = args['--cas']
    hit_fs = args['--hitsfile']
    prefix = args['--out']
    nodesDB_f = args['--db']
    names_f = args['--names']
    nodes_f = args['--nodes']
    taxrules = args['--taxrule']
    min_bitscore_diff = float(args['--min_diff'])
    tax_collision_random = args['--tax_collision_random']
    title = args['--title']

    # outfile
    out_f = BtIO.getOutFile("blobDB", prefix, "json")
    if not (title):
        title = out_f

    # coverage
    if not (fasta_type) and not bam_fs and not sam_fs and not cov_fs and not cas_fs:
        BtLog.error('1')
    cov_libs = [BtCore.CovLibObj('bam' + str(idx), 'bam', lib_f) for idx, lib_f in enumerate(bam_fs)] + \
           [BtCore.CovLibObj('sam' + str(idx), 'sam', lib_f) for idx, lib_f in enumerate(sam_fs)] + \
           [BtCore.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)] + \
           [BtCore.CovLibObj('cov' + str(idx), 'cov', lib_f) for idx, lib_f in enumerate(cov_fs)]

    # taxonomy
    hit_libs = [BtCore.HitLibObj('tax' + str(idx), 'tax', lib_f) for idx, lib_f in enumerate(hit_fs)]

    # Create BlobDB object
    blobDb = BtCore.BlobDb(title)
    blobDb.version = blobtools.__version__
    # Parse FASTA
    blobDb.parseFasta(fasta_f, fasta_type)

    # Parse nodesDB OR names.dmp, nodes.dmp
    nodesDB_default = join(blobtools.DATADIR, "nodesDB.txt")
    nodesDB, nodesDB_f = BtIO.parseNodesDB(nodes=nodes_f, names=names_f, nodesDB=nodesDB_f, nodesDBdefault=nodesDB_default)
    blobDb.nodesDB_f = nodesDB_f

    # Parse similarity hits
    if (hit_libs):
        blobDb.parseHits(hit_libs)
        blobDb.computeTaxonomy(taxrules, nodesDB, min_bitscore_diff, tax_collision_random)
    else:
        print BtLog.warn_d['0']

    # Parse coverage
    blobDb.parseCoverage(covLibObjs=cov_libs, no_base_cov=None)

    # Generating BlobDB and writing to file
    print BtLog.status_d['7'] % out_f
    BtIO.writeJson(blobDb.dump(), out_f)

Esempio n. 3

0

Mostra file

def main():
    args = docopt(__doc__)
    bam_f = args['--bam']
    include_f = args['--include']
    exclude_f = args['--exclude']
    out_prefix = args['--out']
    gzip = args['--gzip']
    do_sort = args['--sort']
    keep_sorted = args['--keep']
    sort_threads = int(args['--threads'])

    print BtLog.status_d['22'] % bam_f
    out_f = BtIO.getOutFile(bam_f, out_prefix, None)
    if include_f and exclude_f:
        print BtLog.error('43')
    elif include_f:
        sequence_list = BtIO.parseList(include_f)
        BtIO.parseBamForFilter(bam_f, out_f, sequence_list, None, gzip,
                               do_sort, keep_sorted, sort_threads)
    elif exclude_f:
        sequence_list = BtIO.parseList(exclude_f)
        BtIO.parseBamForFilter(bam_f, out_f, None, sequence_list, gzip,
                               do_sort, keep_sorted, sort_threads)
    else:
        BtIO.parseBamForFilter(bam_f, out_f, None, None, gzip, do_sort,
                               keep_sorted, sort_threads)

Esempio n. 4

0

Mostra file

File: seqfilter.py Progetto: sujaikumar/blobtools

def main():
    args = docopt(__doc__)
    fasta_f = args['--infile']
    list_f = args['--list']
    invert = args['--invert']
    prefix = args['--out']

    output = []
    out_f = BtIO.getOutFile(fasta_f, prefix, "filtered.fna")

    print BtLog.status_d['1'] % ("list", list_f)
    items = BtIO.parseSet(list_f)
    items_count = len(items)
    print BtLog.status_d['22'] % fasta_f
    items_parsed = []
    sequences = 0
    for header, sequence in BtIO.readFasta(fasta_f):
        sequences += 1
        if header in items:
            if not (invert):
                items_parsed.append(header)
                output.append(">%s\n%s\n" % (header, sequence))
        else:
            if (invert):
                items_parsed.append(header)
                output.append(">%s\n%s\n" % (header, sequence))
        BtLog.progress(len(output), 10, items_count, no_limit=True)
    BtLog.progress(items_count, 10, items_count)

    items_parsed_count = len(items_parsed)
    print BtLog.status_d['23'] % ('{:.2%}'.format(items_parsed_count/sequences), "{:,}".format(items_count), "{:,}".format(items_parsed_count), "{:,}".format(sequences))

    items_parsed_count_unique = len(set(items_parsed))
    if not items_parsed_count == items_parsed_count_unique:
        print BtLog.warn_d['8'] % "\n\t\t\t".join(list(set([x for x in items_parsed if items_parsed.count(x) > 1])))

    with open(out_f, "w") as fh:
        print BtLog.status_d['24'] % out_f
        fh.write("".join(output))

Esempio n. 5

0

Mostra file

File: bamfilter.py Progetto: DRL/blobtools

def main():
    args = docopt(__doc__)
    bam_f = args['--bam']
    include_f = args['--include']
    exclude_f = args['--exclude']
    out_prefix = args['--out']
    gzip = args['--gzip']
    do_sort = args['--sort']
    keep_sorted = args['--keep']
    sort_threads = int(args['--threads'])

    print BtLog.status_d['22'] % bam_f
    out_f = BtIO.getOutFile(bam_f, out_prefix, None)
    if include_f and exclude_f:
        print BtLog.error('43')
    elif include_f:
        sequence_list = BtIO.parseList(include_f)
        BtIO.parseBamForFilter(bam_f, out_f, sequence_list, None, gzip, do_sort, keep_sorted, sort_threads)
    elif exclude_f:
        sequence_list = BtIO.parseList(exclude_f)
        BtIO.parseBamForFilter(bam_f, out_f, None, sequence_list, gzip, do_sort, keep_sorted, sort_threads)
    else:
        BtIO.parseBamForFilter(bam_f, out_f, None, None, gzip, do_sort, keep_sorted, sort_threads)

Esempio n. 6

0

Mostra file

File: view.py Progetto: DRL/blobtools

def main():
    #print data_dir
    args = docopt(__doc__)
    blobdb_f = args['--input']
    prefix = args['--out']
    ranks = args['--rank']
    taxrule = args['--taxrule']
    hits_flag = args['--hits']
    seq_list_f = args['--list']
    concoct = args['--concoct']
    cov = args['--cov']
    notable = args['--notable']
    experimental = args['--experimental']
    # Does blobdb_f exist ?
    if not isfile(blobdb_f):
        BtLog.error('0', blobdb_f)

    out_f = BtIO.getOutFile(blobdb_f, prefix, None)

    # Are ranks sane ?
    if 'all' in ranks:
        temp_ranks = RANKS[0:-1]
        ranks = temp_ranks[::-1]
    else:
        for rank in ranks:
            if rank not in RANKS:
                BtLog.error('9', rank)

    # Does seq_list file exist?
    seqs = []
    if (seq_list_f):
        if isfile(seq_list_f):
            seqs = BtIO.parseList(seq_list_f)
        else:
            BtLog.error('0', seq_list_f)

    # Load BlobDb
    blobDb = BtCore.BlobDb('new')
    print BtLog.status_d['9'] % (blobdb_f)
    blobDb.load(blobdb_f)
    blobDb.version = blobtools.__version__

    # Is taxrule sane and was it computed?
    if (blobDb.hitLibs) and taxrule not in blobDb.taxrules:
        BtLog.error('11', taxrule, blobDb.taxrules)

    # view(s)
    viewObjs = []
    print BtLog.status_d['14']
    if not (notable):
        tableView = BtCore.ViewObj(name="table", out_f=out_f, suffix="table.txt", body=[])
        viewObjs.append(tableView)
    if (experimental):
        experimentalView = BtCore.ExperimentalViewObj(name = "experimental", view_dir=out_f)
        viewObjs.append(experimentalView)
    if (concoct):
        concoctTaxView = BtCore.ViewObj(name="concoct_tax", out_f=out_f, suffix="concoct_taxonomy_info.csv", body=dict())
        viewObjs.append(concoctTaxView)
        concoctCovView = BtCore.ViewObj(name="concoct_cov", out_f=out_f, suffix="concoct_coverage_info.tsv", body=[])
        viewObjs.append(concoctCovView)
    if (cov):
        for cov_lib_name, covLibDict in blobDb.covLibs.items():
            out_f = BtIO.getOutFile(covLibDict['f'], prefix, None)
            covView = BtCore.ViewObj(name="covlib", out_f=out_f, suffix="cov", body=[])
            blobDb.view(viewObjs=[covView], ranks=None, taxrule=None, hits_flag=None, seqs=None, cov_libs=[cov_lib_name], progressbar=True)
    if (viewObjs):
        blobDb.view(viewObjs=viewObjs, ranks=ranks, taxrule=taxrule, hits_flag=hits_flag, seqs=seqs, cov_libs=[], progressbar=True)
    print BtLog.status_d['19']

Esempio n. 7

0

Mostra file

File: covplot.py Progetto: sujaikumar/blobtools

def main():
    args = docopt(__doc__)
    args = BtPlot.check_input(args)
    blobdb_f = args['--infile']
    cov_f = args['--cov']
    rank = args['--rank']
    min_length = int(args['--length'])
    max_group_plot = int(args['--plotgroups'])
    hide_nohits = args['--nohit']
    taxrule = args['--taxrule']
    c_index = args['--cindex']
    exclude_groups = args['--exclude']
    labels = args['--label']
    colour_f = args['--colours']
    refcov_f = args['--refcov']
    catcolour_f = args['--catcolour']

    multiplot = args['--multiplot']
    out_prefix = args['--out']
    sort_order = args['--sort']
    hist_type = args['--hist']
    no_title = args['--notitle']
    ignore_contig_length = args['--noscale']
    format = args['--format']
    no_plot_blobs = args['--noblobs']
    no_plot_reads = args['--noreads']
    legend_flag = args['--legend']
    cumulative_flag = args['--cumulative']
    cov_lib_selection = args['--lib']

    xlabel = args['--xlabel']
    ylabel = args['--ylabel']
    axis_max = float(args['--max'])

    exclude_groups = BtIO.parseCmdlist(exclude_groups)
    refcov_dict = BtIO.parseReferenceCov(refcov_f)
    user_labels = BtIO.parseCmdLabels(labels)
    catcolour_dict = BtIO.parseCatColour(catcolour_f)
    colour_dict = BtIO.parseColours(colour_f)

    # Load BlobDb
    print BtLog.status_d['9'] % blobdb_f
    blobDb = Bt.BlobDb('blobplot')
    blobDb.version = blobtools.__version__
    blobDb.load(blobdb_f)

    # Generate plot data
    print BtLog.status_d['18']
    data_dict, min_cov, max_cov, cov_lib_dict = blobDb.getPlotData(
        rank, min_length, hide_nohits, taxrule, c_index, catcolour_dict)
    plotObj = BtPlot.PlotObj(data_dict, cov_lib_dict, cov_lib_selection,
                             'covplot')
    plotObj.cov_y_dict, reads_total, reads_mapped, reads_unmapped, read_cov_dict = BtIO.parseCov(
        cov_f, set(blobDb.dict_of_blobs))
    plotObj.exclude_groups = exclude_groups
    plotObj.version = blobDb.version
    plotObj.format = format
    plotObj.max_cov = axis_max
    plotObj.no_title = no_title
    plotObj.multiplot = multiplot
    plotObj.hist_type = hist_type
    plotObj.ignore_contig_length = ignore_contig_length
    plotObj.max_group_plot = max_group_plot
    plotObj.legend_flag = legend_flag
    plotObj.cumulative_flag = cumulative_flag
    # order by which to plot (should know about user label)
    plotObj.group_order = BtPlot.getSortedGroups(data_dict, sort_order)
    # labels for each level of stats
    plotObj.labels.update(plotObj.group_order)
    # plotObj.group_labels is dict that contains labels for each group : all/other/user_label
    if (user_labels):
        for group, label in user_labels.items():
            plotObj.labels.add(label)
    plotObj.group_labels = {group: set() for group in plotObj.group_order}
    plotObj.relabel_and_colour(colour_dict, user_labels)
    plotObj.compute_stats()
    plotObj.refcov_dict = refcov_dict
    # Plotting
    info_flag = 1

    out_f = ''
    for cov_lib in plotObj.cov_libs:
        plotObj.xlabel = basename(cov_lib_dict[cov_lib]['f'])
        plotObj.ylabel = cov_f
        if (ylabel):
            plotObj.ylabel = ylabel
        if (xlabel):
            plotObj.xlabel = xlabel
        out_f = "%s.%s.%s.p%s.%s.%s" % (blobDb.title, taxrule, rank,
                                        max_group_plot, hist_type, min_length)
        if catcolour_dict:
            out_f = "%s.%s" % (out_f, "catcolour")
        if ignore_contig_length:
            out_f = "%s.%s" % (out_f, "noscale")
        if c_index:
            out_f = "%s.%s" % (out_f, "c_index")
        if exclude_groups:
            out_f = "%s.%s" % (out_f, "exclude_" + "_".join(exclude_groups))
        if labels:
            out_f = "%s.%s" % (out_f, "userlabel_" + "_".join(
                set([name for name in user_labels.values()])))
        out_f = "%s.%s" % (out_f, "covplot")
        if (plotObj.cumulative_flag):
            out_f = "%s.%s" % (out_f, "cumulative")
        if (plotObj.multiplot):
            out_f = "%s.%s" % (out_f, "multiplot")
        out_f = BtIO.getOutFile(out_f, out_prefix, None)
        if not (no_plot_blobs):
            plotObj.plotScatter(cov_lib, info_flag, out_f)
            info_flag = 0
    plotObj.write_stats(out_f)

Esempio n. 8

0

Mostra file

def main():

    #main_dir = dirname(__file__)
    args = docopt(__doc__)
    fasta_f = args['--infile']
    fasta_type = args['--type']
    sam_fs = args['--sam']
    bam_fs = args['--bam']
    cov_fs = args['--cov']
    cas_fs = args['--cas']
    hit_fs = args['--hitsfile']
    prefix = args['--out']
    nodesDB_f = args['--db']
    names_f = args['--names']
    nodes_f = args['--nodes']
    taxrules = args['--taxrule']
    min_bitscore_diff = float(args['--min_diff'])
    tax_collision_random = args['--tax_collision_random']
    title = args['--title']

    # outfile
    out_f = BtIO.getOutFile("blobDB", prefix, "json")
    if not (title):
        title = out_f

    # coverage
    if not (fasta_type
            ) and not bam_fs and not sam_fs and not cov_fs and not cas_fs:
        BtLog.error('1')
    cov_libs = [BtCore.CovLibObj('bam' + str(idx), 'bam', lib_f) for idx, lib_f in enumerate(bam_fs)] + \
           [BtCore.CovLibObj('sam' + str(idx), 'sam', lib_f) for idx, lib_f in enumerate(sam_fs)] + \
           [BtCore.CovLibObj('cas' + str(idx), 'cas', lib_f) for idx, lib_f in enumerate(cas_fs)] + \
           [BtCore.CovLibObj('cov' + str(idx), 'cov', lib_f) for idx, lib_f in enumerate(cov_fs)]

    # taxonomy
    hit_libs = [
        BtCore.HitLibObj('tax' + str(idx), 'tax', lib_f)
        for idx, lib_f in enumerate(hit_fs)
    ]

    # Create BlobDB object
    blobDb = BtCore.BlobDb(title)
    blobDb.version = blobtools.__version__
    # Parse FASTA
    blobDb.parseFasta(fasta_f, fasta_type)

    # Parse nodesDB OR names.dmp, nodes.dmp
    nodesDB_default = join(blobtools.DATADIR, "nodesDB.txt")
    nodesDB, nodesDB_f = BtIO.parseNodesDB(nodes=nodes_f,
                                           names=names_f,
                                           nodesDB=nodesDB_f,
                                           nodesDBdefault=nodesDB_default)
    blobDb.nodesDB_f = nodesDB_f

    # Parse similarity hits
    if (hit_libs):
        blobDb.parseHits(hit_libs)
        blobDb.computeTaxonomy(taxrules, nodesDB, min_bitscore_diff,
                               tax_collision_random)
    else:
        print BtLog.warn_d['0']

    # Parse coverage
    blobDb.parseCoverage(covLibObjs=cov_libs, no_base_cov=None)

    # Generating BlobDB and writing to file
    print BtLog.status_d['7'] % out_f
    BtIO.writeJson(blobDb.dump(), out_f)

Esempio n. 9

0

Mostra file

File: blobplot.py Progetto: DRL/blobtools

def main():
    args = docopt(__doc__)
    args = BtPlot.check_input(args)

    blobdb_f = args['--infile']
    rank = args['--rank']
    min_length = int(args['--length'])
    max_group_plot = int(args['--plotgroups'])
    hide_nohits = args['--nohit']
    taxrule = args['--taxrule']
    c_index = args['--cindex']
    exclude_groups = args['--exclude']
    labels = args['--label']
    colour_f = args['--colours']
    refcov_f = args['--refcov']
    catcolour_f = args['--catcolour']

    multiplot = args['--multiplot']
    out_prefix = args['--out']
    sort_order = args['--sort']
    hist_type = args['--hist']
    no_title = args['--notitle']
    ignore_contig_length = args['--noscale']
    format = args['--format']
    no_plot_blobs = args['--noblobs']
    no_plot_reads = args['--noreads']
    legend_flag = args['--legend']
    cumulative_flag = args['--cumulative']
    cov_lib_selection = args['--lib']

    filelabel = args['--filelabel']

    exclude_groups = BtIO.parseCmdlist(exclude_groups)
    refcov_dict = BtIO.parseReferenceCov(refcov_f)
    user_labels = BtIO.parseCmdLabels(labels)
    catcolour_dict = BtIO.parseCatColour(catcolour_f)
    colour_dict = BtIO.parseColours(colour_f)

    # Load BlobDb
    print BtLog.status_d['9'] % blobdb_f
    blobDb = BtCore.BlobDb('blobplot')
    blobDb.version = blobtools.__version__
    blobDb.load(blobdb_f)

    # Generate plot data
    print BtLog.status_d['18']
    data_dict, min_cov, max_cov, cov_lib_dict = blobDb.getPlotData(rank, min_length, hide_nohits, taxrule, c_index, catcolour_dict)
    plotObj = BtPlot.PlotObj(data_dict, cov_lib_dict, cov_lib_selection, 'blobplot')
    plotObj.exclude_groups = exclude_groups
    plotObj.version = blobDb.version
    plotObj.format = format
    plotObj.max_cov = max_cov
    plotObj.min_cov = min_cov
    plotObj.no_title = no_title
    plotObj.multiplot = multiplot
    plotObj.hist_type = hist_type
    plotObj.ignore_contig_length = ignore_contig_length
    plotObj.max_group_plot = max_group_plot
    plotObj.legend_flag = legend_flag
    plotObj.cumulative_flag = cumulative_flag
    # order by which to plot (should know about user label)
    plotObj.group_order = BtPlot.getSortedGroups(data_dict, sort_order)
    # labels for each level of stats
    plotObj.labels.update(plotObj.group_order)
    # plotObj.group_labels is dict that contains labels for each group : all/other/user_label
    if (user_labels):
        for group, label in user_labels.items():
            plotObj.labels.add(label)
    plotObj.group_labels = {group : set() for group in plotObj.group_order}
    plotObj.relabel_and_colour(colour_dict, user_labels)
    plotObj.compute_stats()
    plotObj.refcov_dict = refcov_dict
    # Plotting
    info_flag = 1
    out_f = ''
    for cov_lib in plotObj.cov_libs:
        plotObj.ylabel = "Coverage"
        plotObj.xlabel = "GC proportion"
        if (filelabel):
            plotObj.ylabel = basename(cov_lib_dict[cov_lib]['f'])
        out_f = "%s.%s.%s.p%s.%s.%s" % (blobDb.title, taxrule, rank, max_group_plot, hist_type, min_length)
        if catcolour_dict:
            out_f = "%s.%s" % (out_f, "catcolour")
        if ignore_contig_length:
            out_f = "%s.%s" % (out_f, "noscale")
        if c_index:
            out_f = "%s.%s" % (out_f, "c_index")
        if exclude_groups:
            out_f = "%s.%s" % (out_f, "exclude_" + "_".join(exclude_groups))
        if labels:
            out_f = "%s.%s" % (out_f, "userlabel_" + "_".join(set([name for name in user_labels.values()])))
        out_f = "%s.%s" % (out_f, "blobplot")
        if (plotObj.cumulative_flag):
            out_f = "%s.%s" % (out_f, "cumulative")
        if (plotObj.multiplot):
            out_f = "%s.%s" % (out_f, "multiplot")
        out_f = BtIO.getOutFile(out_f, out_prefix, None)
        if not (no_plot_blobs):
            plotObj.plotScatter(cov_lib, info_flag, out_f)
            info_flag = 0
        if not (no_plot_reads) and (plotObj.cov_libs_total_reads_dict[cov_lib]):
            # prevent plotting if --noreads or total_reads == 0
            plotObj.plotBar(cov_lib, out_f)
    plotObj.write_stats(out_f)

Esempio n. 10

0

Mostra file

File: BtCore.py Progetto: DRL/blobtools

    def parseCoverage(self, **kwargs):
        # arguments
        covLibObjs = kwargs["covLibObjs"]
        no_base_cov = kwargs["no_base_cov"]

        for covLib in covLibObjs:
            self.addCovLib(covLib)
            print BtLog.status_d["1"] % (covLib.name, covLib.f)
            if covLib.fmt == "bam" or covLib.fmt == "sam":
                base_cov_dict = {}
                if covLib.fmt == "bam":
                    base_cov_dict, covLib.reads_total, covLib.reads_mapped, read_cov_dict = BtIO.parseBam(
                        covLib.f, set(self.dict_of_blobs), no_base_cov
                    )
                else:
                    base_cov_dict, covLib.reads_total, covLib.reads_mapped, read_cov_dict = BtIO.parseSam(
                        covLib.f, set(self.dict_of_blobs), no_base_cov
                    )

                if covLib.reads_total == 0:
                    print BtLog.warn_d["4"] % covLib.f

                for name, base_cov in base_cov_dict.items():
                    cov = base_cov / self.dict_of_blobs[name].agct_count
                    covLib.cov_sum += cov
                    self.dict_of_blobs[name].addCov(covLib.name, cov)
                    self.dict_of_blobs[name].addReadCov(covLib.name, read_cov_dict[name])
                # Create COV file for future use
                out_f = BtIO.getOutFile(covLib.f, None, None)
                covView = ViewObj(name="covlib", out_f=out_f, suffix="cov", header="", body=[])
                self.view(
                    viewObjs=[covView],
                    ranks=None,
                    taxrule=None,
                    hits_flag=None,
                    seqs=None,
                    cov_libs=[covLib.name],
                    progressbar=False,
                )

            elif covLib.fmt == "cas":
                cov_dict, covLib.reads_total, covLib.reads_mapped, read_cov_dict = BtIO.parseCas(
                    covLib.f, self.order_of_blobs
                )
                if covLib.reads_total == 0:
                    print BtLog.warn_d["4"] % covLib.f
                for name, cov in cov_dict.items():
                    covLib.cov_sum += cov
                    self.dict_of_blobs[name].addCov(covLib.name, cov)
                    self.dict_of_blobs[name].addReadCov(covLib.name, read_cov_dict[name])
                out_f = BtIO.getOutFile(covLib.f, None, None)
                covView = ViewObj(name="covlib", out_f=out_f, suffix="cov", header="", body=[])
                self.view(
                    viewObjs=[covView],
                    ranks=None,
                    taxrule=None,
                    hits_flag=None,
                    seqs=None,
                    cov_libs=[covLib.name],
                    progressbar=False,
                )

            elif covLib.fmt == "cov":
                base_cov_dict, covLib.reads_total, covLib.reads_mapped, covLib.reads_unmapped, read_cov_dict = BtIO.parseCov(
                    covLib.f, set(self.dict_of_blobs)
                )
                # cov_dict = BtIO.readCov(covLib.f, set(self.dict_of_blobs))
                if not len(base_cov_dict) == self.seqs:
                    print BtLog.warn_d["4"] % covLib.f
                for name, cov in base_cov_dict.items():
                    covLib.cov_sum += cov
                    self.dict_of_blobs[name].addCov(covLib.name, cov)
                    if name in read_cov_dict:
                        self.dict_of_blobs[name].addReadCov(covLib.name, read_cov_dict[name])
            else:
                pass
            covLib.mean_cov = covLib.cov_sum / self.seqs
            if covLib.cov_sum == 0.0:
                print BtLog.warn_d["6"] % (covLib.name)
            self.covLibs[covLib.name] = covLib

Esempio n. 11

0

Mostra file

def main():
    args = docopt(__doc__)
    blast_f = args['--blast']
    diamond_f = args['--diamond']

    uniref_f = args['--uniref']
    rnacentral_f = args['--rnacentral']
    swissprot_f = args['--swissprot']
    taxid = args['--taxid']

    force = args['--force']
    prefix = args['--out']

    out_f, hit_f, map_f, taxid_d = None, None, None, {}

    # Check if blast_f OR diamond_f is speciefied
    if not (bool(blast_f) + bool(diamond_f) == 1):
        BtLog.error('26')
    elif blast_f:
        hit_f = blast_f
    elif diamond_f:
        hit_f = diamond_f
    else:
        pass

    # Check if taxID or Mapping file is supplied
    if (taxid):
        try:
            taxid = int(taxid)
        except TypeError:
            BtLog.error('26')
        out_f = BtIO.getOutFile(hit_f, prefix, "tax_%s.out" % taxid)
        taxid_d = defaultdict(lambda: taxid)
    elif (bool(uniref_f) + bool(rnacentral_f) + bool(swissprot_f) == 1):
        if uniref_f:
            print BtLog.status_d['1'] % ("ID-to-taxID Mapping file", uniref_f)
            taxid_d = BtIO.parseDict(uniref_f, 0, 1)
            out_f = BtIO.getOutFile(hit_f, prefix, "uniref.out")
            map_f = uniref_f
        elif rnacentral_f:
            print BtLog.status_d['1'] % ("ID-to-taxID Mapping file",
                                         rnacentral_f)
            taxid_d = BtIO.parseDict(rnacentral_f, 0, 3)
            out_f = BtIO.getOutFile(hit_f, prefix, "rnacentral.out")
            map_f = rnacentral_f
        elif swissprot_f:
            print BtLog.status_d['1'] % ("ID-to-taxID Mapping file",
                                         swissprot_f)
            taxid_d = BtIO.parseDict(swissprot_f, 0, 1)
            out_f = BtIO.getOutFile(hit_f, prefix, "swissprot.out")
            map_f = swissprot_f
        else:
            pass
    else:
        BtLog.error('41')

    output = []
    print BtLog.status_d['1'] % ("hits file", hit_f)

    with open(hit_f) as fh:
        for idx, l in enumerate(fh):
            query_id, bitscore, tax_id, subject_id, rest = None, None, None, None, None
            line = l.rstrip("\n").split()
            query_id = line[0]
            if blast_f:
                bitscore = line[2]
                tax_id = line[1]
                subject_id = line[4]
                rest = "\t".join(line[2:])
            elif diamond_f:
                bitscore = line[11]
                subject_id = line[1]
                rest = "\t".join(line[1:])
            if swissprot_f:
                subject_id = subject_id.split("|")[1]
            if blast_f and not tax_id == "N/A" and not force:  # so that it does not overwrite existing taxIDs
                print BtLog.warn_d['10'] % (idx + 1, line[0], line[1])
                output.append("%s\t%s\t%s\t%s" %
                              (query_id, tax_id, bitscore, rest))
            else:
                try:
                    tax_id = taxid_d[subject_id]
                except KeyError:
                    BtLog.warn_d['12'] % (subject_id, map_f)
                    tax_id = "N/A"
                output.append("%s\t%s\t%s\t%s" %
                              (query_id, tax_id, bitscore, rest))

    if output:
        with open(out_f, "w") as fh:
            print BtLog.status_d['24'] % out_f
            fh.write("\n".join(output))

Esempio n. 12

0

Mostra file

File: taxify.py Progetto: DRL/blobtools

def main():
    args = docopt(__doc__)
    blast_f = args['--blast']
    diamond_f = args['--diamond']

    uniref_f = args['--uniref']
    rnacentral_f = args['--rnacentral']
    swissprot_f = args['--swissprot']
    taxid = args['--taxid']

    force = args['--force']
    prefix = args['--out']

    out_f, hit_f, map_f, taxid_d = None, None, None, {}

    # Check if blast_f OR diamond_f is speciefied
    if not (bool(blast_f) + bool(diamond_f) == 1):
        BtLog.error('26')
    elif blast_f:
        hit_f = blast_f
    elif diamond_f:
        hit_f = diamond_f
    else:
        pass

    # Check if taxID or Mapping file is supplied
    if (taxid):
        try:
            taxid = int(taxid)
        except TypeError:
            BtLog.error('26')
        out_f = BtIO.getOutFile(hit_f, prefix, "tax_%s.out" % taxid)
        taxid_d = defaultdict(lambda: taxid)
    elif (bool(uniref_f) + bool(rnacentral_f) + bool(swissprot_f) == 1):
        if uniref_f:
            print BtLog.status_d['1'] % ("ID-to-taxID Mapping file", uniref_f)
            taxid_d = BtIO.parseDict(uniref_f, 0, 1)
            out_f = BtIO.getOutFile(hit_f, prefix, "uniref.out")
            map_f = uniref_f
        elif rnacentral_f:
            print BtLog.status_d['1'] % ("ID-to-taxID Mapping file", rnacentral_f)
            taxid_d = BtIO.parseDict(rnacentral_f, 0, 3)
            out_f = BtIO.getOutFile(hit_f, prefix, "rnacentral.out")
            map_f = rnacentral_f
        elif swissprot_f:
            print BtLog.status_d['1'] % ("ID-to-taxID Mapping file", swissprot_f)
            taxid_d = BtIO.parseDict(swissprot_f, 0, 1)
            out_f = BtIO.getOutFile(hit_f, prefix, "swissprot.out")
            map_f = swissprot_f
        else:
            pass
    else:
        BtLog.error('41')

    output = []
    print BtLog.status_d['1'] % ("hits file", hit_f)

    with open(hit_f) as fh:
        for idx, l in enumerate(fh):
            query_id, bitscore, tax_id, subject_id, rest = None, None, None, None, None
            line = l.rstrip("\n").split()
            query_id = line[0]
            if blast_f:
                bitscore = line[2]
                tax_id = line[1]
                subject_id = line[3]
                rest = "\t".join(line[2:])
            elif diamond_f:
                bitscore = line[11]
                subject_id = line[1]
                rest = "\t".join(line[1:])
            if swissprot_f:
                subject_id = subject_id.split("|")[1]
            if blast_f and not tax_id == "N/A" and not force: # so that it does not overwrite existing taxIDs
                print BtLog.warn_d['10'] % (idx+1, line[0], line[1])
                output.append("%s\t%s\t%s\t%s" % (query_id, tax_id, bitscore, rest))
            else:
                try:
                    tax_id = taxid_d[subject_id]
                except KeyError:
                    BtLog.error('42', subject_id, map_f)
                    tax_id = "N/A"
                output.append("%s\t%s\t%s\t%s" % (query_id, tax_id, bitscore, rest))

    if output:
        with open(out_f, "w") as fh:
            print BtLog.status_d['24'] % out_f
            fh.write("\n".join(output))

Esempio n. 13

0

Mostra file

File: view.py Progetto: sujaikumar/blobtools

def main():
    #print data_dir
    args = docopt(__doc__)
    blobdb_f = args['--input']
    prefix = args['--out']
    ranks = args['--rank']
    taxrule = args['--taxrule']
    hits_flag = args['--hits']
    seq_list_f = args['--list']
    concoct = args['--concoct']
    cov = args['--cov']
    notable = args['--notable']
    experimental = args['--experimental']
    # Does blobdb_f exist ?
    if not isfile(blobdb_f):
        BtLog.error('0', blobdb_f)

    out_f = BtIO.getOutFile(blobdb_f, prefix, None)

    # Are ranks sane ?
    if 'all' in ranks:
        temp_ranks = RANKS[0:-1]
        ranks = temp_ranks[::-1]
    else:
        for rank in ranks:
            if rank not in RANKS:
                BtLog.error('9', rank)

    # Does seq_list file exist?
    seqs = []
    if (seq_list_f):
        if isfile(seq_list_f):
            seqs = BtIO.parseList(seq_list_f)
        else:
            BtLog.error('0', seq_list_f)

    # Load BlobDb
    blobDb = BtCore.BlobDb('new')
    print BtLog.status_d['9'] % (blobdb_f)
    blobDb.load(blobdb_f)
    blobDb.version = blobtools.__version__

    # Is taxrule sane and was it computed?
    if (blobDb.hitLibs) and taxrule not in blobDb.taxrules:
        BtLog.error('11', taxrule, blobDb.taxrules)

    # view(s)
    viewObjs = []
    print BtLog.status_d['14']
    if not (notable):
        tableView = BtCore.ViewObj(name="table",
                                   out_f=out_f,
                                   suffix="table.txt",
                                   body=[])
        viewObjs.append(tableView)
    if (experimental):
        experimentalView = BtCore.ExperimentalViewObj(name="experimental",
                                                      view_dir=out_f)
        viewObjs.append(experimentalView)
    if (concoct):
        concoctTaxView = BtCore.ViewObj(name="concoct_tax",
                                        out_f=out_f,
                                        suffix="concoct_taxonomy_info.csv",
                                        body=dict())
        viewObjs.append(concoctTaxView)
        concoctCovView = BtCore.ViewObj(name="concoct_cov",
                                        out_f=out_f,
                                        suffix="concoct_coverage_info.tsv",
                                        body=[])
        viewObjs.append(concoctCovView)
    if (cov):
        for cov_lib_name, covLibDict in blobDb.covLibs.items():
            out_f = BtIO.getOutFile(covLibDict['f'], prefix, None)
            covView = BtCore.ViewObj(name="covlib",
                                     out_f=out_f,
                                     suffix="cov",
                                     body=[])
            blobDb.view(viewObjs=[covView],
                        ranks=None,
                        taxrule=None,
                        hits_flag=None,
                        seqs=None,
                        cov_libs=[cov_lib_name],
                        progressbar=True)
    if (viewObjs):
        blobDb.view(viewObjs=viewObjs,
                    ranks=ranks,
                    taxrule=taxrule,
                    hits_flag=hits_flag,
                    seqs=seqs,
                    cov_libs=[],
                    progressbar=True)
    print BtLog.status_d['19']