コード例 #1
0
ファイル: test.py プロジェクト: tanglingfung/metaseq
 def setup(self):
     self.m = metaseq.genomic_signal(
             metaseq.example_filename('gdc.bam'), kind='bam')
     line = '[%s] %s\n' % (datetime.datetime.now(), self.__class__.__name__)
     print line
     sys.stdout.flush()
     pass
コード例 #2
0
 def setup(self):
     self.m = metaseq.genomic_signal(metaseq.example_filename('gdc.bam'),
                                     kind='bam')
     line = '[%s] %s\n' % (datetime.datetime.now(), self.__class__.__name__)
     print line
     sys.stdout.flush()
     pass
コード例 #3
0
ファイル: test.py プロジェクト: NHLBI-BCB/metaseq
def test_errors():
    "these things should raise errors"
    def check(error, callable_obj, args, kwargs):
        assert_raises(error, callable_obj, *args, **kwargs)

    class X(metaseq.filetype_adapters.BaseAdapter):
        def make_fileobj(self):
            return None

    items = [
        (ValueError, metaseq.filetype_adapters.BaseAdapter, (metaseq.example_filename('gdc.bed'),), {}),
        (NotImplementedError, metaseq.filetype_adapters.BigWigAdapter(metaseq.example_filename('gdc.bigwig')).__getitem__, (0,), {}),
        (ValueError, X("").__getitem__, (0,), {}),
        #(ValueError, gs['bam'].local_coverage, ['chr2L:1-5', 'chr2L:1-5'], dict(processes=PROCESSES)),
    ]
    for error, callable_obj, args, kwargs in items:
        yield check, error, callable_obj, args, kwargs
コード例 #4
0
def run_metaseq():
    # set up a BamSignal object
    m = metaseq.genomic_signal(metaseq.example_filename("wgEncodeUwTfbsK562CtcfStdAlnRep1.bam"), kind="bam")

    print "metaseq starting...",
    sys.stdout.flush()
    t0 = time.time()

    # Tweak processes and chunksize as needed to balance CPUs and I/O.
    PROCESSES = 6
    CHUNKSIZE = 100

    # the trick is to use a single bin...
    ms_array = m.array(windows, processes=PROCESSES, chunksize=CHUNKSIZE, bins=1)

    t1 = time.time()
    print "completed in %.2fs" % (t1 - t0)
    sys.stdout.flush()
    return ms_array.ravel()
コード例 #5
0
def run_metaseq():
    # set up a BamSignal object
    m = metaseq.genomic_signal(
        metaseq.example_filename('wgEncodeUwTfbsK562CtcfStdAlnRep1.bam'),
        kind='bam')

    print 'metaseq starting...',
    sys.stdout.flush()
    t0 = time.time()

    # Tweak processes and chunksize as needed to balance CPUs and I/O.
    PROCESSES = 6
    CHUNKSIZE = 100

    # the trick is to use a single bin...
    ms_array = m.array(windows,
                       processes=PROCESSES,
                       chunksize=CHUNKSIZE,
                       bins=1)

    t1 = time.time()
    print 'completed in %.2fs' % (t1 - t0)
    sys.stdout.flush()
    return ms_array.ravel()
コード例 #6
0
import multiprocessing
from matplotlib import pyplot as plt
import matplotlib
import numpy as np
import metaseq
import pybedtools

# Use example data and generate some random features
gs = metaseq.genomic_signal(metaseq.example_filename('x.bam'), 'bam')

features = pybedtools.BedTool()\
    .window_maker(
        b=pybedtools.BedTool('chr2L 0 500000',
                             from_string=True).fn,
        w=1000)\
    .shuffle(seed=1, genome={'chr2L': (0, 5000000)})
genes = []
for i, f in enumerate(features):
    genes.append('gene_%s' % i)
genes = np.array(genes)
arr = gs.array(features, processes=multiprocessing.cpu_count(), bins=100)

# At this point, each item in `genes` corresponds to the same row in `arr`

ind, breaks = metaseq.plotutils.clustered_sortind(arr, k=5)

# Boundaries of clusters are provided in `breaks`.
# So the first cluster's original indices into `arr` are:
cluster_1_inds = ind[0:breaks[0]]

# Which means the genes in the first cluster are:
コード例 #7
0
ファイル: test.py プロジェクト: NHLBI-BCB/metaseq
"""
Many of these tests use the minimal test/data/gdc.bed file which has just
enough complexity to be useful in testing corner cases.  When reading through
the tests, it's useful to have that file open to understand what's happening.
"""
import os
import metaseq
import multiprocessing
from metaseq.array_helpers import ArgumentError
import numpy as np
from nose.tools import assert_raises
from nose.plugins.skip import SkipTest
gs = {}
for kind in ['bed', 'bam', 'bigbed', 'bigwig']:
    gs[kind] = metaseq.genomic_signal(metaseq.example_filename('gdc.%s' % kind), kind)

PROCESSES = int(os.environ.get("METASEQ_PROCESSES", multiprocessing.cpu_count()))

def test_tointerval():
    assert metaseq.helpers.tointerval("chr2L:1-10[-]").strand == '-'
    assert metaseq.helpers.tointerval("chr2L:1-10[+]").strand == '+'
    assert metaseq.helpers.tointerval("chr2L:1-10").strand == '.'


def test_local_count():

    def check(kind, coord, expected, stranded):
        try:
            result = gs[kind].local_count(coord, stranded=stranded)
        except NotImplementedError:
            raise SkipTest("Incompatible bx-python version for bigBed")
コード例 #8
0
def test_example_data_exists():
    assert os.path.exists(metaseq.example_filename('x.bam'))
    assert os.path.exists(metaseq.example_filename('gdc.bam'))
コード例 #9
0
 def setup(self):
     self.m = metaseq.genomic_signal(metaseq.example_filename('gdc.bigbed'),
                                     kind='bigbed')
コード例 #10
0
ファイル: atf3_peaks.py プロジェクト: Al3n70rn/metaseq
    Convenience function to close all mini-browser figures
    """
    for fig in FIGS:
        plt.close(fig)

# Choices for RUN_TYPE are:
# * 'intron': all introns of all genes on the selected chromosomes
# * 'TSS'   : gene-level TSSs, +/- upstream and downstream bp
# * 'peaks' : peaks from ENCODE; acts as a positive control on the numbers

RUN_TYPE = 'TSS'

try:
    chip = chipseq.Chipseq(
            ip_bam=metaseq.example_filename(
                'wgEncodeHaibTfbsK562Atf3V0416101AlnRep1.bam'
                ),
            control_bam=metaseq.example_filename(
                'wgEncodeHaibTfbsK562RxlchV0416101AlnRep1.bam'
                ),
            dbfn=metaseq.example_filename(
                'Homo_sapiens.GRCh37.66.cleaned.gtf.db')
            )
except ValueError:
    raise ValueError("please use the download_data.py script in the "
                     "data directory")


if RUN_TYPE == "TSS":
    # Gets all genes on selected chroms, then applies the TSS modifier and
    # saves the results
コード例 #11
0
def test_db():

    # should work
    d.attach_db(None)

    d.attach_db(metaseq.example_filename('dmel-all-r5.33-cleaned.gff.db'))
コード例 #12
0
"""
Many of these tests use the minimal test/data/gdc.bed file which has just
enough complexity to be useful in testing corner cases.  When reading through
the tests, it's useful to have that file open to understand what's happening.
"""
import os
import metaseq
import multiprocessing
from metaseq.array_helpers import ArgumentError
import numpy as np
from nose.tools import assert_raises
from nose.plugins.skip import SkipTest
gs = {}
for kind in ['bed', 'bam', 'bigbed', 'bigwig']:
    gs[kind] = metaseq.genomic_signal(
        metaseq.example_filename('gdc.%s' % kind), kind)

PROCESSES = int(
    os.environ.get("METASEQ_PROCESSES", multiprocessing.cpu_count()))


def test_tointerval():
    assert metaseq.helpers.tointerval("chr2L:1-10[-]").strand == '-'
    assert metaseq.helpers.tointerval("chr2L:1-10[+]").strand == '+'
    assert metaseq.helpers.tointerval("chr2L:1-10").strand == '.'


def test_local_count():
    def check(kind, coord, expected, stranded):
        try:
            result = gs[kind].local_count(coord, stranded=stranded)
コード例 #13
0
ファイル: test_large.py プロジェクト: Al3n70rn/metaseq
module for testing the larger files (x.bam, x.bed.gz, etc)
"""
import multiprocessing
import metaseq
import pybedtools

CPUS = multiprocessing.cpu_count()

gs = {}
for kind in ['bam', 'bigwig', 'bed', 'bigbed']:
    if kind == 'bed':
        ext = 'bed.gz'
    else:
        ext = kind
    gs[kind] = metaseq.genomic_signal(
        metaseq.example_filename('x.%s' % ext), kind)

# generate the test features
features = pybedtools.BedTool()\
        .window_maker(
            b=pybedtools.BedTool('chr2L 0 500000',
                                 from_string=True).fn,
            w=1000)\
        .shuffle(seed=1,
                 genome={'chr2L': (0, 5000000)})

args = (features,)
kwargs = dict(processes=CPUS, bins=100)
bam_array = gs['bam'].array(*args, **kwargs)
bed_array = gs['bed'].array(*args, **kwargs)
bw_array = gs['bigwig'].array(*args, method='get_as_array', **kwargs)
コード例 #14
0
ファイル: minibrowser.py プロジェクト: hjanime/metaseq
    def peak_panel(self, ax, feature):
        bedtool = pybedtools.BedTool(self.bed)
        features = bedtool.intersect([feature], u=True)
        track = Track(features)
        ax.add_collection(track)
        ax.axis('tight')
        return feature


if __name__ == "__main__":
    import metaseq
    import gffutils
    import pybedtools

    G = gffutils.FeatureDB(
            metaseq.example_filename('Homo_sapiens.GRCh37.66.cleaned.gtf.db'))

    ip = metaseq.genomic_signal(
            metaseq.example_filename('wgEncodeUwTfbsK562CtcfStdAlnRep1.bam'),
            'bam')
    inp = metaseq.genomic_signal(
            metaseq.example_filename('wgEncodeUwTfbsK562InputStdAlnRep1.bam'),
            'bam')
    peaks = pybedtools.BedTool(metaseq.example_filename(
            'wgEncodeUwTfbsK562CtcfStdPkRep1.narrowPeak.gz'))

    plotting_kwargs = [
                dict(color='r', label='IP'),
                dict(color='k', linestyle=':', label='input')]

    local_coverage_kwargs = dict(fragment_size=200)
コード例 #15
0
ファイル: github_issue_9.py プロジェクト: Al3n70rn/metaseq
import multiprocessing
from matplotlib import pyplot as plt
import matplotlib
import numpy as np
import metaseq
import pybedtools

# Use example data and generate some random features
gs = metaseq.genomic_signal(metaseq.example_filename('x.bam'), 'bam')

features = pybedtools.BedTool()\
    .window_maker(
        b=pybedtools.BedTool('chr2L 0 500000',
                             from_string=True).fn,
        w=1000)\
    .shuffle(seed=1, genome={'chr2L': (0, 5000000)})
genes = []
for i, f in enumerate(features):
    genes.append('gene_%s' % i)
genes = np.array(genes)
arr = gs.array(features, processes=multiprocessing.cpu_count(), bins=100)

# At this point, each item in `genes` corresponds to the same row in `arr`

ind, breaks = metaseq.plotutils.clustered_sortind(arr, k=5)

# Boundaries of clusters are provided in `breaks`.
# So the first cluster's original indices into `arr` are:
cluster_1_inds = ind[0:breaks[0]]

# Which means the genes in the first cluster are:
コード例 #16
0
    def peak_panel(self, ax, feature):
        bedtool = pybedtools.BedTool(self.bed)
        features = bedtool.intersect([feature], u=True)
        track = Track(features)
        ax.add_collection(track)
        ax.axis('tight')
        return feature


if __name__ == "__main__":
    import metaseq
    import gffutils
    import pybedtools

    G = gffutils.FeatureDB(
        metaseq.example_filename('Homo_sapiens.GRCh37.66.cleaned.gtf.db'))

    ip = metaseq.genomic_signal(
        metaseq.example_filename('wgEncodeUwTfbsK562CtcfStdAlnRep1.bam'),
        'bam')
    inp = metaseq.genomic_signal(
        metaseq.example_filename('wgEncodeUwTfbsK562InputStdAlnRep1.bam'),
        'bam')
    peaks = pybedtools.BedTool(
        metaseq.example_filename(
            'wgEncodeUwTfbsK562CtcfStdPkRep1.narrowPeak.gz'))

    plotting_kwargs = [
        dict(color='r', label='IP'),
        dict(color='k', linestyle=':', label='input')
    ]
コード例 #17
0
ファイル: test.py プロジェクト: tanglingfung/metaseq
 def setup(self):
     self.m = metaseq.genomic_signal(
             metaseq.example_filename('gdc.bigbed'), kind='bigbed')
コード例 #18
0
ファイル: test.py プロジェクト: tanglingfung/metaseq
 def setup(self):
     deseq_fn = metaseq.example_filename('ex.deseq')
     db_fn = metaseq.example_filename('dmel-all-r5.33-cleaned.gff.db')
     self.d = metaseq.ResultsTable(deseq_fn, db_fn)
コード例 #19
0
ファイル: test.py プロジェクト: tanglingfung/metaseq
def test_example_data_exists():
    assert os.path.exists(metaseq.example_filename('x.bam'))
    assert os.path.exists(metaseq.example_filename('gdc.bam'))
コード例 #20
0
Diagnostic plots are generated at the end of the script.

TODO: figure out what's causing the discrepancies (open vs closed intervals?
Binning artifact? CIGAR operations?)
"""
import os
import sys
import time
import numpy as np
import metaseq
import pybedtools

from matplotlib import pyplot as plt

bam_fn = metaseq.example_filename("wgEncodeUwTfbsK562CtcfStdAlnRep1.bam")

if not os.path.exists(bam_fn):
    raise ValueError("Please run download_data.py in test/data dir to retrieve ENCODE " "data used for examples")


# Construct 10kb windows, but subset to only use chr19 (to speed up the test)
print "creating windows..."
sys.stdout.flush()
windows = pybedtools.BedTool().window_maker(genome="hg19", w=10000).filter(lambda x: x.chrom == "chr19").saveas()


def run_bedtools():

    # set up a BAM-based BedTool
    bt = pybedtools.BedTool(bam_fn)
コード例 #21
0
ファイル: chipseq.py プロジェクト: Al3n70rn/metaseq
if __name__ == "__main__":
    import sys
    choices = ['xcorr', 'chipseq']
    try:
        examples = sys.argv[1:]
    except IndexError:
        print 'Choices are: ', choices
        examples = []

    for ex in examples:
        if ex not in choices:
            raise ValueError('%s not in %s' % (ex, choices))

    if 'xcorr' in examples:
        ip = metaseq.genomic_signal(
            metaseq.example_filename(
                'wgEncodeUwTfbsK562CtcfStdAlnRep1.bam'), 'bam')

        NWINDOWS = 5000
        FRAGMENT_SIZE = 1
        WINDOWSIZE = 5000
        THRESH = FRAGMENT_SIZE / float(WINDOWSIZE) * 10
        lags, shift = estimate_shift(
            ip, nwindows=NWINDOWS, maxlag=500, thresh=THRESH,
            array_kwargs=dict(
                processes=8, chunksize=100,
                fragment_size=FRAGMENT_SIZE),
            verbose=True)
        plt.plot(lags, shift.mean(axis=0))
        plt.axvline(
            lags[np.argmax(shift.mean(axis=0))],
            linestyle='--', color='k')
コード例 #22
0
from metaseq import results_table
import metaseq
import numpy as np

fn = metaseq.example_filename('ex.deseq')
d = results_table.ResultsTable(fn)


def test_dataframe_access():

    # different ways of accessing get the same data in memory
    assert d.id is d.data.id
    assert d['id'] is d.data.id

def test_dataframe_subsetting():
    assert all(d[:10].data == d.data[:10])
    assert all(d.update(d.data[:10]).data == d.data[:10])

def test_copy():
    e = d.copy()
    e.id = 'a'
    assert e.id[0] == 'a'
    assert d.id[0] != 'a'

def smoke_tests():
    #smoke test for repr
    print repr(d)

def test_db():

    # should work
コード例 #23
0
ファイル: test_large.py プロジェクト: woodhaha/metaseq
"""
module for testing the larger files (x.bam, x.bed.gz, etc)
"""
import multiprocessing
import metaseq
import pybedtools

CPUS = multiprocessing.cpu_count()

gs = {}
for kind in ['bam', 'bigwig', 'bed', 'bigbed']:
    if kind == 'bed':
        ext = 'bed.gz'
    else:
        ext = kind
    gs[kind] = metaseq.genomic_signal(metaseq.example_filename('x.%s' % ext),
                                      kind)

# generate the test features
features = pybedtools.BedTool()\
        .window_maker(
            b=pybedtools.BedTool('chr2L 0 500000',
                                 from_string=True).fn,
            w=1000)\
        .shuffle(seed=1,
                 genome={'chr2L': (0, 5000000)})

args = (features, )
kwargs = dict(processes=CPUS, bins=100)
bam_array = gs['bam'].array(*args, **kwargs)
bed_array = gs['bed'].array(*args, **kwargs)
コード例 #24
0
if __name__ == "__main__":
    import sys
    choices = ['xcorr', 'chipseq']
    try:
        examples = sys.argv[1:]
    except IndexError:
        print 'Choices are: ', choices
        examples = []

    for ex in examples:
        if ex not in choices:
            raise ValueError('%s not in %s' % (ex, choices))

    if 'xcorr' in examples:
        ip = metaseq.genomic_signal(
            metaseq.example_filename('wgEncodeUwTfbsK562CtcfStdAlnRep1.bam'),
            'bam')

        NWINDOWS = 5000
        FRAGMENT_SIZE = 1
        WINDOWSIZE = 5000
        THRESH = FRAGMENT_SIZE / float(WINDOWSIZE) * 10
        lags, shift = estimate_shift(ip,
                                     nwindows=NWINDOWS,
                                     maxlag=500,
                                     thresh=THRESH,
                                     array_kwargs=dict(
                                         processes=8,
                                         chunksize=100,
                                         fragment_size=FRAGMENT_SIZE),
                                     verbose=True)
コード例 #25
0
from metaseq import results_table
import metaseq
import numpy as np

fn = metaseq.example_filename('ex.deseq')
d = results_table.ResultsTable(fn)


def test_dataframe_access():

    # different ways of accessing get the same data in memory
    assert d.id is d.data.id
    assert d['id'] is d.data.id


def test_dataframe_subsetting():
    assert all(d[:10].data == d.data[:10])
    assert all(d.update(d.data[:10]).data == d.data[:10])


def test_copy():
    e = d.copy()
    e.id = 'a'
    assert e.id[0] == 'a'
    assert d.id[0] != 'a'


def smoke_tests():
    #smoke test for repr
    print repr(d)
コード例 #26
0
    def peak_panel(self, ax, feature):
        bedtool = pybedtools.BedTool(self.bed)
        features = bedtool.intersect([feature], u=True)
        track = Track(features)
        ax.add_collection(track)
        # ax.axis('tight')
        return feature


if __name__ == "__main__":
    import metaseq
    import gffutils
    import pybedtools

    G = gffutils.FeatureDB(metaseq.example_filename("Homo_sapiens.GRCh37.66.cleaned.gtf.db"))

    ip = metaseq.genomic_signal(metaseq.example_filename("wgEncodeUwTfbsK562CtcfStdAlnRep1.bam"), "bam")
    inp = metaseq.genomic_signal(metaseq.example_filename("wgEncodeUwTfbsK562InputStdAlnRep1.bam"), "bam")
    peaks = pybedtools.BedTool(metaseq.example_filename("wgEncodeUwTfbsK562CtcfStdPkRep1.narrowPeak.gz"))

    plotting_kwargs = [dict(color="r", label="IP"), dict(color="k", linestyle=":", label="input")]

    local_coverage_kwargs = dict(fragment_size=200)

    b = SignalMiniBrowser([ip, inp], plotting_kwargs=plotting_kwargs, local_coverage_kwargs=local_coverage_kwargs)

    g = GeneModelMiniBrowser([ip, inp], G, plotting_kwargs=plotting_kwargs, local_coverage_kwargs=local_coverage_kwargs)

    p = PeakMiniBrowser([ip, inp], peaks, plotting_kwargs=plotting_kwargs, local_coverage_kwargs=local_coverage_kwargs)
コード例 #27
0
"""
Settings for the ctcf_peaks example script
"""
import gffutils
import metaseq

UPSTREAM = 1000
DOWNSTREAM = 1000
BINS = 100
FRAGMENT_SIZE = 200
GENOME = 'hg19'
CHROMS = ['chr1', 'chr2']

gtfdb = metaseq.example_filename('Homo_sapiens.GRCh37.66.cleaned.gtf.db')
G = gffutils.FeatureDB(gtfdb)
コード例 #28
0
Diagnostic plots are generated at the end of the script.

TODO: figure out what's causing the discrepancies (open vs closed intervals?
Binning artifact? CIGAR operations?)
"""
import os
import sys
import time
import numpy as np
import metaseq
import pybedtools

from matplotlib import pyplot as plt

bam_fn = metaseq.example_filename('wgEncodeUwTfbsK562CtcfStdAlnRep1.bam')

if not os.path.exists(bam_fn):
    raise ValueError(
        'Please run download_data.py in test/data dir to retrieve ENCODE '
        'data used for examples')

# Construct 10kb windows, but subset to only use chr19 (to speed up the test)
print 'creating windows...'
sys.stdout.flush()
windows = pybedtools.BedTool()\
        .window_maker(genome='hg19', w=10000)\
        .filter(lambda x: x.chrom == 'chr19')\
        .saveas()

コード例 #29
0
 def setup(self):
     deseq_fn = metaseq.example_filename('ex.deseq')
     db_fn = metaseq.example_filename('dmel-all-r5.33-cleaned.gff.db')
     self.d = metaseq.ResultsTable(deseq_fn, db_fn)
コード例 #30
0
ファイル: results_table.py プロジェクト: cauyrd/metaseq
        return s

    def keys(self):
        return self.fn_dict.keys()

    def values(self):
        return [self._dict[key] for key in self.keys()]

    def items(self):
        return list((key, self._dict[key]) for key in self.keys())

if __name__ == "__main__":
    import metaseq
    from matplotlib import pyplot as plt

    db = metaseq.example_filename('dmel-all-r5.33-cleaned.gff.db')
    import_kwargs = dict(comment='#')
    d = DESeqResults(
        metaseq.example_filename('rrp6-s2-polyA.final.summary'),
        db=db,
        import_kwargs=import_kwargs,
    )

    e = DESeqResults(
        metaseq.example_filename('rrp40-s2-polyA.final.summary'),
        db=db,
        import_kwargs=import_kwargs,
    )

    d = d.align_with(e)
コード例 #31
0
ファイル: tables.py プロジェクト: olgabot/metaseq
            if (i == j) and hist_kwargs:
                ax.hist(xfunc(p.ix[i][val]), **hist_kwargs)
            else:
                scatter(p.ix[i][val],
                        p.ix[j][val],
                        ax=ax,
                        xlab_prefix=i + " ",
                        ylab_prefix=j + " ",
                        **kwargs)
            axind += 1


if __name__ == "__main__":
    from metaseq import example_filename

    dbfn = example_filename('Homo_sapiens.GRCh37.66.cleaned.gtf.db')
    db = gffutils.FeatureDB(dbfn)

    p = pandas.Panel({
        'uninduced_1':
        deseq_dataframe(example_filename('GSM847565_SL2585.table'),
                        index_col='id',
                        db=db),
        'induced_1':
        deseq_dataframe(example_filename('GSM847566_SL2592.table'),
                        index_col='id',
                        db=db),
        'uninduced_2':
        deseq_dataframe(example_filename('GSM847567_SL4337.table'),
                        index_col='id',
                        db=db),
コード例 #32
0
ファイル: signal_comparison.py プロジェクト: Al3n70rn/metaseq
                if score != 0:
                    fout.write('\t'.join([
                        feature.chrom,
                        str(start),
                        str(stop),
                        str(score)]) + '\n')
                start = start + binsize
        this_batch = []
        i = 0
    fout.close()


if __name__ == "__main__":
    import metaseq
    ip_bam = metaseq.genomic_signal(
            metaseq.example_filename(
                'wgEncodeUwTfbsK562CtcfStdAlnRep1.bam'), 'bam')
    control_bam = metaseq.genomic_signal(
            metaseq.example_filename(
                'wgEncodeUwTfbsK562InputStdAlnRep1.bam'), 'bam')

    BINSIZE = 10
    WINDOWSIZE = 10000
    BINS = WINDOWSIZE / BINSIZE
    features = pybedtools.BedTool()\
            .window_maker(genome='hg19', w=WINDOWSIZE)\
            .filter(lambda x: x.chrom == 'chr19')

    result = compare(
            signal1=ip_bam,
            signal2=control_bam,
            features=features,
コード例 #33
0
ファイル: example.py プロジェクト: lingdudefeiteng/metaseq
import numpy as np
import os
import metaseq

ip_filename = metaseq.helpers.example_filename("wgEncodeHaibTfbsK562Atf3V0416101AlnRep1_chr17.bam")
input_filename = metaseq.helpers.example_filename("wgEncodeHaibTfbsK562RxlchV0416101AlnRep1_chr17.bam")

ip_signal = metaseq.genomic_signal(ip_filename, "bam")
input_signal = metaseq.genomic_signal(input_filename, "bam")

# If you already have TSSs, skip this part.
import gffutils

db = gffutils.FeatureDB(metaseq.example_filename("Homo_sapiens.GRCh37.66_chr17.gtf.db"))

import pybedtools
from pybedtools.featurefuncs import TSS
from gffutils.helpers import asinterval


def tss_generator():
    for transcript in db.features_of_type("transcript"):
        yield TSS(asinterval(transcript), upstream=1000, downstream=1000)


if not os.path.exists("tsses.gtf"):
    tsses = pybedtools.BedTool(tss_generator()).saveas("tsses.gtf")
tsses = pybedtools.BedTool("tsses.gtf")

from metaseq import persistence
コード例 #34
0
ファイル: tables.py プロジェクト: tanglingfung/metaseq
    ncols = len(p.items)
    axind = 1
    for i in p.items:
        for j in p.items:
            ax = fig.add_subplot(nrows, ncols, axind)
            if (i == j) and hist_kwargs:
                ax.hist(xfunc(p.ix[i][val]), **hist_kwargs)
            else:
                scatter(p.ix[i][val], p.ix[j][val], ax=ax, xlab_prefix=i + " ", ylab_prefix=j + " ", **kwargs)
            axind += 1


if __name__ == "__main__":
    from metaseq import example_filename

    dbfn = example_filename('Homo_sapiens.GRCh37.66.cleaned.gtf.db')
    db = gffutils.FeatureDB(dbfn)

    p = pandas.Panel(
            {
                'uninduced_1': deseq_dataframe(
                    example_filename('GSM847565_SL2585.table'),
                    index_col='id', db=db),

                'induced_1': deseq_dataframe(
                    example_filename('GSM847566_SL2592.table'),
                    index_col='id', db=db),
                'uninduced_2': deseq_dataframe(
                    example_filename('GSM847567_SL4337.table'),
                    index_col='id', db=db),
コード例 #35
0
import numpy as np
import os
import metaseq

ip_filename = metaseq.helpers.example_filename(
    'wgEncodeHaibTfbsK562Atf3V0416101AlnRep1_chr17.bam')
input_filename = metaseq.helpers.example_filename(
    'wgEncodeHaibTfbsK562RxlchV0416101AlnRep1_chr17.bam')

ip_signal = metaseq.genomic_signal(ip_filename, 'bam')
input_signal = metaseq.genomic_signal(input_filename, 'bam')

# If you already have TSSs, skip this part.
import gffutils
db = gffutils.FeatureDB(
    metaseq.example_filename('Homo_sapiens.GRCh37.66_chr17.gtf.db'))

import pybedtools
from pybedtools.featurefuncs import TSS
from gffutils.helpers import asinterval


def tss_generator():
    for transcript in db.features_of_type('transcript'):
        yield TSS(asinterval(transcript), upstream=1000, downstream=1000)

if not os.path.exists('tsses.gtf'):
    tsses = pybedtools.BedTool(tss_generator()).saveas('tsses.gtf')
tsses = pybedtools.BedTool('tsses.gtf')

from metaseq import persistence
コード例 #36
0
def test_db():

    # should work
    d.attach_db(None)

    d.attach_db(metaseq.example_filename('dmel-all-r5.33-cleaned.gff.db'))
コード例 #37
0
    Convenience function to close all mini-browser figures
    """
    for fig in FIGS:
        plt.close(fig)


# Choices for RUN_TYPE are:
# * 'intron': all introns of all genes on the selected chromosomes
# * 'TSS'   : gene-level TSSs, +/- upstream and downstream bp
# * 'peaks' : peaks from ENCODE; acts as a positive control on the numbers

RUN_TYPE = 'TSS'

try:
    chip = chipseq.Chipseq(
        ip_bam=metaseq.example_filename(
            'wgEncodeHaibTfbsK562Atf3V0416101AlnRep1.bam'),
        control_bam=metaseq.example_filename(
            'wgEncodeHaibTfbsK562RxlchV0416101AlnRep1.bam'),
        dbfn=metaseq.example_filename('Homo_sapiens.GRCh37.66.cleaned.gtf.db'))
except ValueError:
    raise ValueError("please use the download_data.py script in the "
                     "data directory")

if RUN_TYPE == "TSS":
    # Gets all genes on selected chroms, then applies the TSS modifier and
    # saves the results
    tss_fn = 'example_tsses.gtf'
    if not os.path.exists(tss_fn):
        features = pybedtools.BedTool(helpers.gene_generator())\
                .filter(helpers.chromfilter)\
                .each(helpers.TSS, upstream=settings.UPSTREAM,
コード例 #38
0
                if score != 0:
                    fout.write('\t'.join(
                        [feature.chrom,
                         str(start),
                         str(stop),
                         str(score)]) + '\n')
                start = start + binsize
        this_batch = []
        i = 0
    fout.close()


if __name__ == "__main__":
    import metaseq
    ip_bam = metaseq.genomic_signal(
        metaseq.example_filename('wgEncodeUwTfbsK562CtcfStdAlnRep1.bam'),
        'bam')
    control_bam = metaseq.genomic_signal(
        metaseq.example_filename('wgEncodeUwTfbsK562InputStdAlnRep1.bam'),
        'bam')

    BINSIZE = 10
    WINDOWSIZE = 10000
    BINS = WINDOWSIZE / BINSIZE
    features = pybedtools.BedTool()\
            .window_maker(genome='hg19', w=WINDOWSIZE)\
            .filter(lambda x: x.chrom == 'chr19')

    result = compare(signal1=ip_bam,
                     signal2=control_bam,
                     features=features,