예제 #1
0
def test_split_by_strands():
    genset = GenCoorSet(name="Test_set")
    bedfile = os.path.join(os.getenv("HOME"),
                           "gencoor_data/hg38/genes_hg38.bed")
    genset.load(filename=bedfile, filetype="BED")
    res = genset.split_by_strands()
    assert set([g.strand for g in res["+"]]) == set(["+"])
    assert set([g.strand for g in res["-"]]) == set(["-"])
예제 #2
0
def test_standard_chromosome1():
    genset1 = GenCoorSet(name="Test_set")
    genset1.add(GenCoor(chrom="chr1", start=1, end=50, name="test",
                        strand="."))
    genset1.add(
        GenCoor(chrom="chr2", start=20, end=40, name="test", strand="."))
    genset1.add(
        GenCoor(chrom="chr3_random", start=70, end=80, name="test",
                strand="."))
    genset1.standard_chromosome()
    assert len(genset1) == 2
예제 #3
0
def test_rm_duplicates2():
    genset1 = GenCoorSet(name="Test_set")
    genset1.add(GenCoor(chrom="chr1", start=1, end=5, name="test", strand="."))
    genset1.add(GenCoor(chrom="chr2", start=2, end=4, name="test", strand="."))
    genset1.add(
        GenCoor(chrom="chr3_random", start=1, end=80, name="test", strand="."))
    genset1.add(GenCoor(chrom="chr2", start=2, end=4, name="test2",
                        strand="-"))
    res = genset1.rm_duplicates(inplace=False)
    assert len(res) == 4
예제 #4
0
def test_GenCoorSet_len():
    genset = GenCoorSet(name="Test_set")
    genset.add(GenCoor(chrom="chr1", start=10, end=20, name="test",
                       strand="."))
    genset.add(GenCoor(chrom="chr1", start=15, end=50, name="test",
                       strand="."))
    genset.add(
        GenCoor(chrom="chr2", start=100, end=200, name="test", strand="."))
    assert len(genset) == 3
예제 #5
0
def test_merge():
    genset = GenCoorSet(name="Test_set")
    genset.add(GenCoor(chrom="chr1", start=10, end=20, name="test",
                       strand="+"))
    genset.add(GenCoor(chrom="chr1", start=15, end=50, name="test",
                       strand="-"))
    genset.add(
        GenCoor(chrom="chr2", start=100, end=200, name="test", strand="."))
    res = genset.merge(w_return=True)
    assert len(res) == 2
    assert res[0].start == 10
    assert res[0].end == 50
    assert res[0].strand == "."
    res = genset.merge(w_return=True, strand_specific=True)
    assert len(res) == 3
    assert res[0].start == 10
    assert res[0].end == 20
    assert res[0].strand == "+"
예제 #6
0
def test_total_coverage1():
    genset1 = GenCoorSet(name="Test_set")
    genset1.add(GenCoor(chrom="chr1", start=1, end=5, name="test", strand="."))
    genset1.add(GenCoor(chrom="chr2", start=2, end=4, name="test", strand="."))
    genset1.add(
        GenCoor(chrom="chr3_random", start=1, end=80, name="test", strand="."))
    cov = genset1.total_coverage()
    assert cov == 85
예제 #7
0
def test_distance2():
    genset1 = GenCoorSet(name="Test_set")
    genset1.add(GenCoor(chrom="chr1", start=1, end=5, name="test", strand="."))
    genset1.add(
        GenCoor(chrom="chr1", start=20, end=24, name="test", strand="."))
    g = GenCoor(chrom="chr1", start=7, end=19, name="test", strand=".")
    res = genset1.distance(g, sign=False)
    assert res == 1
예제 #8
0
def test_intersect_3():
    """
    A : none
    B :   -----
    R : none
    """
    genset1 = GenCoorSet(name="Test_set")
    genset2 = GenCoorSet(name="Test_set")
    genset2.add(
        GenCoor(chrom="chr1", start=10, end=20, name="test", strand="+"))
    res = genset1.intersect(genset2, mode="overlap")
    assert len(res) == 0
    res = genset1.intersect(genset2, mode="original")
    assert len(res) == 0
    res = genset1.intersect(genset2, mode="complete_included")
    assert len(res) == 0
예제 #9
0
def test_GenCoorSet_load():
    genset = GenCoorSet(name="Test_set")
    bedfile = os.path.join(os.getenv("HOME"),
                           "gencoor_data/hg38/genes_hg38.bed")
    bedfile2 = os.path.join(os.getenv("HOME"),
                            "gencoor_data/hg38/genes_hg38_test.bed")
    genset.load(filename=bedfile, filetype="BED")
    genset.save(filename=bedfile2, filetype="BED")
    os.remove(bedfile2)
예제 #10
0
def test_intersect_1():
    """
    Two empty sets
    A : none
    B : none
    R : none
    """
    genset1 = GenCoorSet(name="Test_set")
    genset2 = GenCoorSet(name="Test_set")
    res = genset1.intersect(genset2, mode="overlap")
    assert len(res) == 0
    res = genset1.intersect(genset2, mode="original")
    assert len(res) == 0
    res = genset1.intersect(genset2, mode="complete_included")
    assert len(res) == 0
예제 #11
0
def bed_sig2arr(bedname, sig_names, exp, arg):
    regions = GenCoorSet(name=bedname)
    regions.load(filename=exp.get_file(bedname))
    if arg["--test"]:
        regions.list = regions.list[0:50]
    regions.relocate(mode='center as center', width=2*int(arg["--ext"]))
    sig = SignalProfile(regions, genome=arg["--genome"],
                        bin=int(arg["--bin"]), step=int(arg["--step"]),
                        cores=int(arg["--cores"]))
    for signal in sig_names:
        if exp.get_file(signal).endswith(".bw") or exp.get_file(signal).endswith(".bigwig"):
            sig.load_bigwig(filename=exp.get_file(signal), label=signal,
                            disable_progressbar=False, verbal=False)
        elif exp.get_file(signal).endswith(".bam"):
            sig.load_bam(filename=exp.get_file(signal), label=signal,
                            disable_progressbar=False, verbal=False)
    res = sig.cov2array()

    return res
예제 #12
0
def test_intersect_7():
    """
    Perfect overlapping
    A : ------
    B : ------
    R : ------
    """
    genset1 = GenCoorSet(name="Test_set")
    genset1.add(GenCoor(chrom="chr1", start=1, end=10, name="test",
                        strand="."))
    genset1.add(
        GenCoor(chrom="chr1", start=500, end=550, name="test", strand="."))
    genset1.add(
        GenCoor(chrom="chr1", start=600, end=650, name="test", strand="."))
    genset1.add(
        GenCoor(chrom="chr1", start=700, end=750, name="test", strand="."))
    genset1.add(
        GenCoor(chrom="chr1", start=725, end=800, name="test", strand="."))
    genset2 = GenCoorSet(name="Test_set")
    genset2.add(GenCoor(chrom="chr1", start=1, end=10, name="test",
                        strand="."))
    genset2.add(
        GenCoor(chrom="chr1", start=500, end=550, name="test", strand="."))
    genset2.add(
        GenCoor(chrom="chr1", start=600, end=650, name="test", strand="."))
    genset2.add(
        GenCoor(chrom="chr1", start=700, end=750, name="test", strand="."))
    genset2.add(
        GenCoor(chrom="chr1", start=725, end=800, name="test", strand="."))
    res = genset1.intersect(genset2, mode="overlap")
    assert len(res) == 6
    res = genset1.intersect(genset2, mode="original")
    assert len(res) == 5
    res = genset1.intersect(genset2, mode="complete_included")
    assert len(res) == 5
예제 #13
0
def test_total_coverage2():
    genset1 = GenCoorSet(name="Test_set")
    genset1.add(GenCoor(chrom="chr1", start=1, end=5, name="test", strand="."))
    cov = genset1.total_coverage()
    assert cov == 4
예제 #14
0
def test_extend():
    genset = GenCoorSet(name="Test_set")
    genset.add(GenCoor(chrom="chr1", start=10, end=20, name="test",
                       strand="+"))
    genset.add(GenCoor(chrom="chr1", start=15, end=50, name="test",
                       strand="-"))
    genset.add(
        GenCoor(chrom="chr2", start=100, end=200, name="test", strand="."))
    ngcs = genset.extend(mode="left", length=5)
    assert ngcs.list[0].start == 5
    assert ngcs.list[0].end == 20
    assert ngcs.list[1].start == 10
    assert ngcs.list[1].end == 50
    ngcs = genset.extend(mode="right", length=5)
    assert ngcs.list[0].start == 10
    assert ngcs.list[0].end == 25
    assert ngcs.list[1].start == 15
    assert ngcs.list[1].end == 55
    ngcs = genset.extend(mode="5end", length=5)
    assert ngcs.list[0].start == 5
    assert ngcs.list[0].end == 20
    assert ngcs.list[1].start == 15
    assert ngcs.list[1].end == 55
    ngcs = genset.extend(mode="3end", length=5)
    assert ngcs.list[0].start == 10
    assert ngcs.list[0].end == 25
    assert ngcs.list[1].start == 10
    assert ngcs.list[1].end == 50
    ngcs = genset.extend(mode="both", length=5)
    assert ngcs.list[0].start == 5
    assert ngcs.list[0].end == 25
    assert ngcs.list[1].start == 10
    assert ngcs.list[1].end == 55
예제 #15
0
def test_intersect_5():
    """
    End-to-end attach
    A : ------      ------
    B :       ------
    R : none
    """
    genset1 = GenCoorSet(name="Test_set")
    genset1.add(GenCoor(chrom="chr1", start=1, end=5, name="test", strand="."))
    genset1.add(
        GenCoor(chrom="chr1", start=11, end=20, name="test", strand="."))
    genset2 = GenCoorSet(name="Test_set")
    genset2.add(GenCoor(chrom="chr1", start=5, end=11, name="test",
                        strand="."))
    res = genset1.intersect(genset2, mode="overlap")
    assert len(res) == 0
    res = genset1.intersect(genset2, mode="original")
    assert len(res) == 0
    res = genset1.intersect(genset2, mode="complete_included")
    assert len(res) == 0
예제 #16
0
def test_intersect_4():
    """
    No overlapping
    A : ------      ---------               -------
    B :        ----          ------  ------
    R : none
    """
    genset1 = GenCoorSet(name="Test_set")
    genset1.add(GenCoor(chrom="chr1", start=1, end=5, name="test", strand="."))
    genset1.add(
        GenCoor(chrom="chr1", start=11, end=20, name="test", strand="."))
    genset1.add(
        GenCoor(chrom="chr1", start=33, end=38, name="test", strand="."))
    genset2 = GenCoorSet(name="Test_set")
    genset2.add(GenCoor(chrom="chr1", start=7, end=9, name="test", strand="."))
    genset2.add(
        GenCoor(chrom="chr1", start=20, end=25, name="test", strand="."))
    genset2.add(
        GenCoor(chrom="chr1", start=26, end=31, name="test", strand="."))
    res = genset1.intersect(genset2, mode="overlap")
    assert len(res) == 0
    res = genset1.intersect(genset2, mode="original")
    assert len(res) == 0
    res = genset1.intersect(genset2, mode="complete_included")
    assert len(res) == 0
예제 #17
0
def test_intersect_13():
    """
    Completely included overlapping
    A : ---------------------------
    B : ----    ------       -----------
    R1: ----    ------       ------      (overlap)
    R2: ---------------------------      (original)
    R3:                                  (comp_incl)
    """
    genset1 = GenCoorSet(name="Test_set")
    genset1.add(GenCoor(chrom="chr1", start=1, end=50, name="test",
                        strand="."))
    genset2 = GenCoorSet(name="Test_set")
    genset2.add(GenCoor(chrom="chr1", start=1, end=5, name="test", strand="."))
    genset2.add(
        GenCoor(chrom="chr1", start=10, end=19, name="test", strand="."))
    genset2.add(
        GenCoor(chrom="chr1", start=45, end=60, name="test", strand="."))
    res = genset1.intersect(genset2, mode="overlap")
    assert len(res) == 3
    res = genset1.intersect(genset2, mode="original")
    assert len(res) == 1
    res = genset1.intersect(genset2, mode="complete_included")
    assert len(res) == 0
예제 #18
0
def test_intersect_15():
    """
    A : --------------         -------
            ------
    B :       -----          ----------------
    R1:       -----            -------      (overlap)
              ----
    R2: --------------         -------      (original)
            ------
    R3:                        -------      (comp_incl)
    """
    genset1 = GenCoorSet(name="Test_set")
    genset1.add(GenCoor(chrom="chr1", start=1, end=50, name="test",
                        strand="."))
    genset1.add(
        GenCoor(chrom="chr1", start=20, end=40, name="test", strand="."))
    genset1.add(
        GenCoor(chrom="chr1", start=70, end=80, name="test", strand="."))
    genset2 = GenCoorSet(name="Test_set")
    genset2.add(
        GenCoor(chrom="chr1", start=25, end=45, name="test", strand="."))
    genset2.add(
        GenCoor(chrom="chr1", start=65, end=95, name="test", strand="."))
    res = genset1.intersect(genset2, mode="overlap")
    assert len(res) == 3
    res = genset1.intersect(genset2, mode="original")
    assert len(res) == 3
    res = genset1.intersect(genset2, mode="complete_included")
    assert len(res) == 1
예제 #19
0
def test_intersect_12():
    """
    Different chromosomes
    A : chr1  -------
    B : chr2  -------
    R : none
    """
    genset1 = GenCoorSet(name="Test_set")
    genset1.add(GenCoor(chrom="chr1", start=1, end=10, name="test",
                        strand="."))
    genset2 = GenCoorSet(name="Test_set")
    genset2.add(GenCoor(chrom="chr2", start=1, end=10, name="test",
                        strand="."))
    res = genset1.intersect(genset2, mode="overlap")
    assert len(res) == 0
    res = genset1.intersect(genset2, mode="original")
    assert len(res) == 0
    res = genset1.intersect(genset2, mode="complete_included")
    assert len(res) == 0
예제 #20
0
def test_intersect_11():
    """
    Many various overlapping (mixed)
    A :   ------------------            --------   ---------
    B : ----   -------    ------            ----------
    R1:   --   -------    --                ----   ---       (overlap)
    R2:   ------------------            --------   --------- (original)
    R3:                                                      (comp_incl)
    """
    genset1 = GenCoorSet(name="Test_set")
    genset1.add(GenCoor(chrom="chr1", start=3, end=30, name="test",
                        strand="."))
    genset1.add(
        GenCoor(chrom="chr1", start=50, end=60, name="test", strand="."))
    genset1.add(
        GenCoor(chrom="chr1", start=70, end=85, name="test", strand="."))
    genset2 = GenCoorSet(name="Test_set")
    genset2.add(GenCoor(chrom="chr1", start=1, end=5, name="test", strand="."))
    genset2.add(
        GenCoor(chrom="chr1", start=10, end=19, name="test", strand="."))
    genset2.add(
        GenCoor(chrom="chr1", start=27, end=35, name="test", strand="."))
    genset2.add(
        GenCoor(chrom="chr1", start=55, end=75, name="test", strand="."))
    res = genset1.intersect(genset2, mode="overlap")
    assert len(res) == 5
    res = genset1.intersect(genset2, mode="original")
    assert len(res) == 3
    res = genset1.intersect(genset2, mode="complete_included")
    assert len(res) == 0
예제 #21
0
def test_intersect_6():
    """
    No length attach
    A : .      .
    B :    .   .
    R : none
    """
    genset1 = GenCoorSet(name="Test_set")
    genset1.add(GenCoor(chrom="chr1", start=2, end=2, name="test", strand="."))
    genset1.add(
        GenCoor(chrom="chr1", start=20, end=20, name="test", strand="."))
    genset2 = GenCoorSet(name="Test_set")
    genset2.add(GenCoor(chrom="chr1", start=5, end=5, name="test", strand="."))
    genset2.add(
        GenCoor(chrom="chr1", start=20, end=20, name="test", strand="."))
    res = genset1.intersect(genset2, mode="overlap")
    assert len(res) == 1
    res = genset1.intersect(genset2, mode="original")
    assert len(res) == 1
    res = genset1.intersect(genset2, mode="complete_included")
    assert len(res) == 1
예제 #22
0
                if len(signals) > 1:
                    print("There are more than one BED files sharing the same tags, only the first one will be used.")

                arrs = bed_sig2arr(beds[0], [signals[0]], exp, arg)
                a = arrs[signals[0]]
                hm = axes[i, j].imshow(a, cmap='hot', interpolation='None')
                # Y ticks
                axes[i, j].get_yaxis().set_ticks([])
                # X ticks
                x_label_list = ['-'+arg["--ext"], '0', arg["--ext"]]
                xmin, xmax = axes[i, j].get_xlim()
                axes[i, j].set_xticks([xmin, int(0.5 * (xmax - xmin)), xmax])
                axes[i, j].set_xticklabels(x_label_list)

                # labels
                if j == 0:
                    axes[i, j].set_ylabel(row)
                if i == 0:
                    regions = GenCoorSet(name=beds[0])
                    regions.load(filename=exp.get_file(beds[0]))
                    axes[i, j].set_title(col+" ("+str(len(regions))+")")


        cbar_ax = fig.add_axes([0.9, 0.15, 0.02, 0.7])
        fig.colorbar(hm, cax=cbar_ax)

        set_yaxis(n_row, n_col, axes, arg)
        fig.savefig(arg["<output_file>"], bbox_inches='tight')

    # elif arg["boxplot"]:
예제 #23
0
def test_distances1():
    genset1 = GenCoorSet(name="Test_set")
    genset1.add(GenCoor(chrom="chr1", start=1, end=5, name="test", strand="."))
    genset1.add(
        GenCoor(chrom="chr1", start=80, end=84, name="test", strand="."))
    genset2 = GenCoorSet(name="Test_set")
    genset2.add(
        GenCoor(chrom="chr1", start=10, end=15, name="test", strand="."))
    genset2.add(
        GenCoor(chrom="chr1", start=60, end=64, name="test", strand="."))
    genset2.add(
        GenCoor(chrom="chr1", start=160, end=164, name="test", strand="."))
    res = genset1.distances(genset2, sign=False)
    print(res)
    assert len(res) == 2
    assert res == [5, 16]
예제 #24
0
    coordinates.py split_strand <input_BED_file_path> <output_BED_directory_path>

    Blablabla

Options:

"""

from docopt import docopt
from gencoor.coordinates import GenCoorSet
import os

if __name__ == '__main__':
    arg = docopt(__doc__)
    if arg["resize"]:
        gc = GenCoorSet(name="input")
        gc.load(arg["<input_BED_file_path>"], filetype="BED")
        gc.relocate(mode='center as center', width=int(arg["<length>"]))
        gc.save(arg["<output_BED_file_path>"], filetype="BED")

    elif arg["split_strand"]:
        name = os.path.basename(arg["<input_BED_file_path>"]).split(".")[0]
        print(
            os.path.join(arg["<output_BED_directory_path>"],
                         name + "_" + "+" + ".bed"))
        gc = GenCoorSet(name="input")
        gc.load(arg["<input_BED_file_path>"], filetype="BED")
        res = gc.split_by_strands()
        for k, g in res.items():
            g.save(os.path.join(arg["<output_BED_directory_path>"],
                                name + "_" + k + ".bed"),
예제 #25
0
def test_intersect_8():
    """
    One overlapping region
    A : ------
    B :     --------
    R1:     --       (overlap)
    R2: ------       (original)
    R3:              (comp_incl)
    """
    genset1 = GenCoorSet(name="Test_set")
    genset1.add(GenCoor(chrom="chr1", start=1, end=10, name="test",
                        strand="."))
    genset2 = GenCoorSet(name="Test_set")
    genset2.add(GenCoor(chrom="chr1", start=7, end=20, name="test",
                        strand="."))
    res = genset1.intersect(genset2, mode="overlap")
    assert len(res) == 1
    assert res[0].start == 7
    assert res[0].end == 10
    res = genset1.intersect(genset2, mode="original")
    assert len(res) == 1
    assert res[0].start == 1
    assert res[0].end == 10
    res = genset1.intersect(genset2, mode="complete_included")
    assert len(res) == 0
예제 #26
0
def test_intersect_10():
    """
    Two separately overlapping regions
    A : -------      --------
    B :     -----        --------
    R1:     ---          ----     (overlap)
    R2: -------      --------     (original)
    R3:                           (comp_incl)
    """
    genset1 = GenCoorSet(name="Test_set")
    genset1.add(GenCoor(chrom="chr1", start=1, end=10, name="test",
                        strand="."))
    genset1.add(
        GenCoor(chrom="chr1", start=26, end=35, name="test", strand="."))
    genset2 = GenCoorSet(name="Test_set")
    genset2.add(GenCoor(chrom="chr1", start=7, end=15, name="test",
                        strand="."))
    genset2.add(
        GenCoor(chrom="chr1", start=30, end=40, name="test", strand="."))
    res = genset1.intersect(genset2, mode="overlap")
    assert len(res) == 2
    res = genset1.intersect(genset2, mode="original")
    assert len(res) == 2
    res = genset1.intersect(genset2, mode="complete_included")
    assert len(res) == 0
예제 #27
0
    def step_size(self):
        return int(self.config["Parameters"]["step_size"])

    def get_inputs(self):
        res = {}
        if "Input" in self.config.sections():
            for lab in self.config["Input"]:
                res[lab] = self.config["Input"][lab]
        return res


if __name__ == '__main__':
    arg = docopt(__doc__)
    if arg["diffpeak"]:
        config = DiffPeakConfig(filepath=arg["<config_file>"])
        ref_back = GenCoorSet(name="background")
        ref_back.get_chromosomes(genome=config.genome())
        sig = SignalProfile(regions=ref_back,
                            genome=config.genome(),
                            bin=config.bin_size(),
                            step=config.step_size())
        sig.load_files(file_dict=config.files_dict)

        # Normalization
        # sig.norm_bakcground(genome="hg38")
        sig.norm_library_size()

        # Input
        if "Input" in config.config.sections():
            sig2 = SignalProfile(regions=ref_back,
                                 genome=config.genome(),