Ejemplo n.º 1
0
def test_get_features_by_feature(frag3, frag6, region3, region6):
    from pybedtools import BedTool
    for f in [frag6]:
        t = Track.load(f)
        for r in [region3, region6]:
            result = [len(x[1]) for x in t._get_features_by_feature(BedTool(r))]
            assert [0, 1, 3] == sorted(result)
Ejemplo n.º 2
0
def tracks():
    ftypes = ["bam", "bed", "wig", "bg", "bw", "wig.gz", "bg.gz", "bigWig", "bedGraph"]#, "bed.gz"]
    my_tracks = []
    for ftype in ftypes:
        fname = "tests/data/profile." + ftype
        my_tracks.append(Track.load(fname))
    return my_tracks
Ejemplo n.º 3
0
    def __init__(self, bamfile, height=1, color=None, bgmode=None, alpha=None, fragmentsize=200, rmdup=True,
                 rmrepeats=True, **kwargs):
        self.height = height
        self.track = Track.load(bamfile, fragmentsize=fragmentsize, rmdup=rmdup, rmrepeats=rmrepeats)

        self.ymax = None
        self.bgmode = bgmode

        self.scalepm = kwargs.get("adjscale", False)
        self.show_scale = kwargs.get("show_scale", True)

        if color:
            self.color = color
        else:
            self.color = "#a7004b"

        if alpha:
            self.alpha = alpha
        else:
            self.alpha = 1

        self.fragmentsize = fragmentsize
        self.rmdup = rmdup
        self.rmrepeats = rmrepeats

        self.name = kwargs.get('name')
Ejemplo n.º 4
0
def test_fetch_to_counts(frag3, frag6, region3, region6):
    from pybedtools import BedTool
    for f in [frag3, frag6]:
        t = Track.load(f)
        for r in [region3, region6]:
            overlap = [x for x in t.fetch_to_counts(BedTool(r))]
            assert 3 == len(overlap)
            counts = sorted([len(x[1]) + len(x[2]) for x in overlap])
            assert [0, 1, 3] == counts
Ejemplo n.º 5
0
def test_get_features_by_feature(frag3, frag6, region3, region6):
    from pybedtools import BedTool
    for f in [frag6]:
        t = Track.load(f)
        for r in [region3, region6]:
            result = [
                len(x[1]) for x in t._get_features_by_feature(BedTool(r))
            ]
            assert [0, 1, 3] == sorted(result)
Ejemplo n.º 6
0
def test_fetch_to_counts(frag3, frag6, region3, region6):
    from pybedtools import BedTool
    for f in [frag3, frag6]:
        t = Track.load(f)
        for r in [region3, region6]:
            overlap = [x for x in t.fetch_to_counts(BedTool(r))]
            assert 3 == len(overlap)
            counts = sorted([len(x[1]) + len(x[2]) for x in overlap])
            assert [0, 1, 3] == counts
Ejemplo n.º 7
0
def tracks():
    ftypes = [
        "bam", "bed", "wig", "bg", "bw", "wig.gz", "bg.gz", "bigWig",
        "bedGraph"
    ]  #, "bed.gz"]
    my_tracks = []
    for ftype in ftypes:
        fname = "tests/data/profile." + ftype
        my_tracks.append(Track.load(fname))
    return my_tracks
Ejemplo n.º 8
0
def load_heatmap_data(featurefile, datafile, bins=100, up=5000, down=5000, rmdup=True, rpkm=False, rmrepeats=True,fragmentsize=None, dynam=False, guard=None):
    if guard is None:
        guard = []
    #try mode='w' to make py2 and py3 work
    tmp = tempfile.NamedTemporaryFile(mode='w+', encoding='utf-8', delete=False, prefix="fluff")
    regions = []
    order = {}
    count = 0
    hashcounter = 0
    if not guard and dynam:
        filt = True
    else:
        filt = False
    for i, line in enumerate(open(featurefile)):
        if line.startswith("#") or line[:5] == "track":
            hashcounter += 1
            continue
        vals = line.strip().split("\t")
        strand = "+"
        gene = ""
        if len(vals) >= 6:
            strand = vals[5]
        if len(vals) >= 4:
            gene = vals[3]

        middle = int((int(vals[2]) + int(vals[1])) / 2)
        start, end = middle, middle
        if strand == "+":
            start -= up
            end += down
        else:
            start -= down
            end += up
        if filt:
            if start >= 0:
                guard.append(True)
            else:
                guard.append(False)
        if not filt and start >= 0:
            if not dynam or guard[i - hashcounter]:
                regions.append([vals[0], start, end, gene, strand])
                order["{0}:{1}-{2}".format(vals[0], start, end)] = count
                count += 1
                #add encode() to make py3 work
                tmp.write("{0}\t{1}\t{2}\t{3}\t0\t{4}\n".format(vals[0], start, end, gene, strand))
    tmp.flush()
    track = Track.load(datafile,
            rmdup=rmdup,
            rmrepeats=rmrepeats,
            fragmentsize=fragmentsize)

    result = track.binned_stats(tmp.name, bins, split=True, rpkm=rpkm)
    # Retrieve original order
    r_data = np.array([[float(x) for x in row[3:]] for row in result])
    return os.path.basename(datafile), regions, r_data, guard  # [r_order]
Ejemplo n.º 9
0
def load_heatmap_data(featurefile, datafile, bins=100, up=5000, down=5000, rmdup=True, rpkm=False, rmrepeats=True,fragmentsize=None, dynam=False, guard=None):
    if guard is None:
        guard = []
    
    tmp = tempfile.NamedTemporaryFile(delete=False, prefix="fluff")
    regions = []
    order = {}
    count = 0
    hashcounter = 0
    if not guard and dynam:
        filt = True
    else:
        filt = False
    for i, line in enumerate(open(featurefile)):
        if line.startswith("#") or line[:5] == "track":
            hashcounter += 1
            continue
        vals = line.strip().split("\t")
        strand = "+"
        gene = ""
        if len(vals) >= 6:
            strand = vals[5]
        if len(vals) >= 4:
            gene = vals[3]
        middle = (int(vals[2]) + int(vals[1])) / 2
        start, end = middle, middle
        if strand == "+":
            start -= up
            end += down
        else:
            start -= down
            end += up
        if filt:
            if start >= 0:
                guard.append(True)
            else:
                guard.append(False)
        if not filt and start >= 0:
            if not dynam or guard[i - hashcounter]:
                regions.append([vals[0], start, end, gene, strand])
                order["{0}:{1}-{2}".format(vals[0], start, end)] = count
                count += 1
                tmp.write("{0}\t{1}\t{2}\t{3}\t0\t{4}\n".format(vals[0], start, end, gene, strand))
    tmp.flush()
    track = Track.load(datafile,
            rmdup=rmdup,
            rmrepeats=rmrepeats,
            fragmentsize=fragmentsize)

    result = track.binned_stats(tmp.name, bins, split=True, rpkm=rpkm)
    # Retrieve original order
    r_data = np.array([[float(x) for x in row[3:]] for row in result])
    return os.path.basename(datafile), regions, r_data, guard  # [r_order]
Ejemplo n.º 10
0
def load_cluster_data(clust_file, datafiles, bins, rpkm, rmdup, rmrepeats, fragmentsize=None):
    data = {}
    for datafile in datafiles:
        result = []
        track = Track.load(datafile,
                rmdup=rmdup,
                rmrepeats=rmrepeats,
                fragmentsize=fragmentsize)
        result = track.binned_stats(clust_file,
                                  bins,
                                  split=True,
                                  rpkm=rpkm
                                  )
        data[os.path.basename(datafile)] = dict(
                [["{0}:{1}-{2}".format(vals[0], vals[1], vals[2]), [float(x) for x in vals[3:]]] for vals in result])
    return data
Ejemplo n.º 11
0
def load_cluster_data(clust_file, datafiles, bins, rpkm, rmdup, rmrepeats, fragmentsize=None):
    data = {}
    for datafile in datafiles:
        result = []
        track = Track.load(datafile,
                rmdup=rmdup,
                rmrepeats=rmrepeats,
                fragmentsize=fragmentsize)
        result = track.binned_stats(clust_file,
                                  bins,
                                  split=True,
                                  rpkm=rpkm
                                  )
        data[os.path.basename(datafile)] = dict(
                [["{0}:{1}-{2}".format(vals[0], vals[1], vals[2]), [float(x) for x in vals[3:]]] for vals in result])
    return data
Ejemplo n.º 12
0
def test_fragmentsize(region_fs, fragments):

    track = Track.load(fragments)
    result = track.binned_stats(region_fs, 4)
    assert 1 == len(result)

    counts = [int(x) for x in result[0].split("\t")[-4:]]
    assert [1, 0, 0, 1] == counts

    track.fragmentsize = 50
    result = track.binned_stats(region_fs, 4)
    counts = [int(x) for x in result[0].split("\t")[-4:]]
    assert [1, 0, 0, 1] == counts

    track.fragmentsize = 100
    result = track.binned_stats(region_fs, 4)
    counts = [int(x) for x in result[0].split("\t")[-4:]]
    assert [1, 1, 1, 1] == counts

    track.fragmentsize = 200
    result = track.binned_stats(region_fs, 4)
    counts = [int(x) for x in result[0].split("\t")[-4:]]
    assert [2, 2, 2, 2] == counts
Ejemplo n.º 13
0
def test_fragmentsize(region_fs, fragments):

    track = Track.load(fragments)
    result = track.binned_stats(region_fs, 4)
    assert 1 == len(result)

    counts = [int(x) for x in result[0].split("\t")[-4:]]
    assert [1, 0, 0, 1] == counts

    track.fragmentsize = 50
    result = track.binned_stats(region_fs, 4)
    counts = [int(x) for x in result[0].split("\t")[-4:]]
    assert [1, 0, 0, 1] == counts

    track.fragmentsize = 100
    result = track.binned_stats(region_fs, 4)
    counts = [int(x) for x in result[0].split("\t")[-4:]]
    assert [1, 1, 1, 1] == counts

    track.fragmentsize = 200
    result = track.binned_stats(region_fs, 4)
    counts = [int(x) for x in result[0].split("\t")[-4:]]
    assert [2, 2, 2, 2] == counts
Ejemplo n.º 14
0
    def __init__(self,
                 bamfile,
                 height=1,
                 color=None,
                 bgmode=None,
                 alpha=None,
                 fragmentsize=200,
                 rmdup=True,
                 rmrepeats=True,
                 **kwargs):
        self.height = height
        self.track = Track.load(bamfile,
                                fragmentsize=fragmentsize,
                                rmdup=rmdup,
                                rmrepeats=rmrepeats)

        self.ymax = None
        self.bgmode = bgmode

        self.scalepm = kwargs.get("adjscale", False)
        self.show_scale = kwargs.get("show_scale", True)

        if color:
            self.color = color
        else:
            self.color = "#a7004b"

        if alpha:
            self.alpha = alpha
        else:
            self.alpha = 1

        self.fragmentsize = fragmentsize
        self.rmdup = rmdup
        self.rmrepeats = rmrepeats

        self.name = kwargs.get('name')