def test_split_by_strands(): genset = GenCoorSet(name="Test_set") bedfile = os.path.join(os.getenv("HOME"), "gencoor_data/hg38/genes_hg38.bed") genset.load(filename=bedfile, filetype="BED") res = genset.split_by_strands() assert set([g.strand for g in res["+"]]) == set(["+"]) assert set([g.strand for g in res["-"]]) == set(["-"])
def test_standard_chromosome1(): genset1 = GenCoorSet(name="Test_set") genset1.add(GenCoor(chrom="chr1", start=1, end=50, name="test", strand=".")) genset1.add( GenCoor(chrom="chr2", start=20, end=40, name="test", strand=".")) genset1.add( GenCoor(chrom="chr3_random", start=70, end=80, name="test", strand=".")) genset1.standard_chromosome() assert len(genset1) == 2
def test_rm_duplicates2(): genset1 = GenCoorSet(name="Test_set") genset1.add(GenCoor(chrom="chr1", start=1, end=5, name="test", strand=".")) genset1.add(GenCoor(chrom="chr2", start=2, end=4, name="test", strand=".")) genset1.add( GenCoor(chrom="chr3_random", start=1, end=80, name="test", strand=".")) genset1.add(GenCoor(chrom="chr2", start=2, end=4, name="test2", strand="-")) res = genset1.rm_duplicates(inplace=False) assert len(res) == 4
def test_GenCoorSet_len(): genset = GenCoorSet(name="Test_set") genset.add(GenCoor(chrom="chr1", start=10, end=20, name="test", strand=".")) genset.add(GenCoor(chrom="chr1", start=15, end=50, name="test", strand=".")) genset.add( GenCoor(chrom="chr2", start=100, end=200, name="test", strand=".")) assert len(genset) == 3
def test_merge(): genset = GenCoorSet(name="Test_set") genset.add(GenCoor(chrom="chr1", start=10, end=20, name="test", strand="+")) genset.add(GenCoor(chrom="chr1", start=15, end=50, name="test", strand="-")) genset.add( GenCoor(chrom="chr2", start=100, end=200, name="test", strand=".")) res = genset.merge(w_return=True) assert len(res) == 2 assert res[0].start == 10 assert res[0].end == 50 assert res[0].strand == "." res = genset.merge(w_return=True, strand_specific=True) assert len(res) == 3 assert res[0].start == 10 assert res[0].end == 20 assert res[0].strand == "+"
def test_total_coverage1(): genset1 = GenCoorSet(name="Test_set") genset1.add(GenCoor(chrom="chr1", start=1, end=5, name="test", strand=".")) genset1.add(GenCoor(chrom="chr2", start=2, end=4, name="test", strand=".")) genset1.add( GenCoor(chrom="chr3_random", start=1, end=80, name="test", strand=".")) cov = genset1.total_coverage() assert cov == 85
def test_distance2(): genset1 = GenCoorSet(name="Test_set") genset1.add(GenCoor(chrom="chr1", start=1, end=5, name="test", strand=".")) genset1.add( GenCoor(chrom="chr1", start=20, end=24, name="test", strand=".")) g = GenCoor(chrom="chr1", start=7, end=19, name="test", strand=".") res = genset1.distance(g, sign=False) assert res == 1
def test_intersect_3(): """ A : none B : ----- R : none """ genset1 = GenCoorSet(name="Test_set") genset2 = GenCoorSet(name="Test_set") genset2.add( GenCoor(chrom="chr1", start=10, end=20, name="test", strand="+")) res = genset1.intersect(genset2, mode="overlap") assert len(res) == 0 res = genset1.intersect(genset2, mode="original") assert len(res) == 0 res = genset1.intersect(genset2, mode="complete_included") assert len(res) == 0
def test_GenCoorSet_load(): genset = GenCoorSet(name="Test_set") bedfile = os.path.join(os.getenv("HOME"), "gencoor_data/hg38/genes_hg38.bed") bedfile2 = os.path.join(os.getenv("HOME"), "gencoor_data/hg38/genes_hg38_test.bed") genset.load(filename=bedfile, filetype="BED") genset.save(filename=bedfile2, filetype="BED") os.remove(bedfile2)
def test_intersect_1(): """ Two empty sets A : none B : none R : none """ genset1 = GenCoorSet(name="Test_set") genset2 = GenCoorSet(name="Test_set") res = genset1.intersect(genset2, mode="overlap") assert len(res) == 0 res = genset1.intersect(genset2, mode="original") assert len(res) == 0 res = genset1.intersect(genset2, mode="complete_included") assert len(res) == 0
def bed_sig2arr(bedname, sig_names, exp, arg): regions = GenCoorSet(name=bedname) regions.load(filename=exp.get_file(bedname)) if arg["--test"]: regions.list = regions.list[0:50] regions.relocate(mode='center as center', width=2*int(arg["--ext"])) sig = SignalProfile(regions, genome=arg["--genome"], bin=int(arg["--bin"]), step=int(arg["--step"]), cores=int(arg["--cores"])) for signal in sig_names: if exp.get_file(signal).endswith(".bw") or exp.get_file(signal).endswith(".bigwig"): sig.load_bigwig(filename=exp.get_file(signal), label=signal, disable_progressbar=False, verbal=False) elif exp.get_file(signal).endswith(".bam"): sig.load_bam(filename=exp.get_file(signal), label=signal, disable_progressbar=False, verbal=False) res = sig.cov2array() return res
def test_intersect_7(): """ Perfect overlapping A : ------ B : ------ R : ------ """ genset1 = GenCoorSet(name="Test_set") genset1.add(GenCoor(chrom="chr1", start=1, end=10, name="test", strand=".")) genset1.add( GenCoor(chrom="chr1", start=500, end=550, name="test", strand=".")) genset1.add( GenCoor(chrom="chr1", start=600, end=650, name="test", strand=".")) genset1.add( GenCoor(chrom="chr1", start=700, end=750, name="test", strand=".")) genset1.add( GenCoor(chrom="chr1", start=725, end=800, name="test", strand=".")) genset2 = GenCoorSet(name="Test_set") genset2.add(GenCoor(chrom="chr1", start=1, end=10, name="test", strand=".")) genset2.add( GenCoor(chrom="chr1", start=500, end=550, name="test", strand=".")) genset2.add( GenCoor(chrom="chr1", start=600, end=650, name="test", strand=".")) genset2.add( GenCoor(chrom="chr1", start=700, end=750, name="test", strand=".")) genset2.add( GenCoor(chrom="chr1", start=725, end=800, name="test", strand=".")) res = genset1.intersect(genset2, mode="overlap") assert len(res) == 6 res = genset1.intersect(genset2, mode="original") assert len(res) == 5 res = genset1.intersect(genset2, mode="complete_included") assert len(res) == 5
def test_total_coverage2(): genset1 = GenCoorSet(name="Test_set") genset1.add(GenCoor(chrom="chr1", start=1, end=5, name="test", strand=".")) cov = genset1.total_coverage() assert cov == 4
def test_extend(): genset = GenCoorSet(name="Test_set") genset.add(GenCoor(chrom="chr1", start=10, end=20, name="test", strand="+")) genset.add(GenCoor(chrom="chr1", start=15, end=50, name="test", strand="-")) genset.add( GenCoor(chrom="chr2", start=100, end=200, name="test", strand=".")) ngcs = genset.extend(mode="left", length=5) assert ngcs.list[0].start == 5 assert ngcs.list[0].end == 20 assert ngcs.list[1].start == 10 assert ngcs.list[1].end == 50 ngcs = genset.extend(mode="right", length=5) assert ngcs.list[0].start == 10 assert ngcs.list[0].end == 25 assert ngcs.list[1].start == 15 assert ngcs.list[1].end == 55 ngcs = genset.extend(mode="5end", length=5) assert ngcs.list[0].start == 5 assert ngcs.list[0].end == 20 assert ngcs.list[1].start == 15 assert ngcs.list[1].end == 55 ngcs = genset.extend(mode="3end", length=5) assert ngcs.list[0].start == 10 assert ngcs.list[0].end == 25 assert ngcs.list[1].start == 10 assert ngcs.list[1].end == 50 ngcs = genset.extend(mode="both", length=5) assert ngcs.list[0].start == 5 assert ngcs.list[0].end == 25 assert ngcs.list[1].start == 10 assert ngcs.list[1].end == 55
def test_intersect_5(): """ End-to-end attach A : ------ ------ B : ------ R : none """ genset1 = GenCoorSet(name="Test_set") genset1.add(GenCoor(chrom="chr1", start=1, end=5, name="test", strand=".")) genset1.add( GenCoor(chrom="chr1", start=11, end=20, name="test", strand=".")) genset2 = GenCoorSet(name="Test_set") genset2.add(GenCoor(chrom="chr1", start=5, end=11, name="test", strand=".")) res = genset1.intersect(genset2, mode="overlap") assert len(res) == 0 res = genset1.intersect(genset2, mode="original") assert len(res) == 0 res = genset1.intersect(genset2, mode="complete_included") assert len(res) == 0
def test_intersect_4(): """ No overlapping A : ------ --------- ------- B : ---- ------ ------ R : none """ genset1 = GenCoorSet(name="Test_set") genset1.add(GenCoor(chrom="chr1", start=1, end=5, name="test", strand=".")) genset1.add( GenCoor(chrom="chr1", start=11, end=20, name="test", strand=".")) genset1.add( GenCoor(chrom="chr1", start=33, end=38, name="test", strand=".")) genset2 = GenCoorSet(name="Test_set") genset2.add(GenCoor(chrom="chr1", start=7, end=9, name="test", strand=".")) genset2.add( GenCoor(chrom="chr1", start=20, end=25, name="test", strand=".")) genset2.add( GenCoor(chrom="chr1", start=26, end=31, name="test", strand=".")) res = genset1.intersect(genset2, mode="overlap") assert len(res) == 0 res = genset1.intersect(genset2, mode="original") assert len(res) == 0 res = genset1.intersect(genset2, mode="complete_included") assert len(res) == 0
def test_intersect_13(): """ Completely included overlapping A : --------------------------- B : ---- ------ ----------- R1: ---- ------ ------ (overlap) R2: --------------------------- (original) R3: (comp_incl) """ genset1 = GenCoorSet(name="Test_set") genset1.add(GenCoor(chrom="chr1", start=1, end=50, name="test", strand=".")) genset2 = GenCoorSet(name="Test_set") genset2.add(GenCoor(chrom="chr1", start=1, end=5, name="test", strand=".")) genset2.add( GenCoor(chrom="chr1", start=10, end=19, name="test", strand=".")) genset2.add( GenCoor(chrom="chr1", start=45, end=60, name="test", strand=".")) res = genset1.intersect(genset2, mode="overlap") assert len(res) == 3 res = genset1.intersect(genset2, mode="original") assert len(res) == 1 res = genset1.intersect(genset2, mode="complete_included") assert len(res) == 0
def test_intersect_15(): """ A : -------------- ------- ------ B : ----- ---------------- R1: ----- ------- (overlap) ---- R2: -------------- ------- (original) ------ R3: ------- (comp_incl) """ genset1 = GenCoorSet(name="Test_set") genset1.add(GenCoor(chrom="chr1", start=1, end=50, name="test", strand=".")) genset1.add( GenCoor(chrom="chr1", start=20, end=40, name="test", strand=".")) genset1.add( GenCoor(chrom="chr1", start=70, end=80, name="test", strand=".")) genset2 = GenCoorSet(name="Test_set") genset2.add( GenCoor(chrom="chr1", start=25, end=45, name="test", strand=".")) genset2.add( GenCoor(chrom="chr1", start=65, end=95, name="test", strand=".")) res = genset1.intersect(genset2, mode="overlap") assert len(res) == 3 res = genset1.intersect(genset2, mode="original") assert len(res) == 3 res = genset1.intersect(genset2, mode="complete_included") assert len(res) == 1
def test_intersect_12(): """ Different chromosomes A : chr1 ------- B : chr2 ------- R : none """ genset1 = GenCoorSet(name="Test_set") genset1.add(GenCoor(chrom="chr1", start=1, end=10, name="test", strand=".")) genset2 = GenCoorSet(name="Test_set") genset2.add(GenCoor(chrom="chr2", start=1, end=10, name="test", strand=".")) res = genset1.intersect(genset2, mode="overlap") assert len(res) == 0 res = genset1.intersect(genset2, mode="original") assert len(res) == 0 res = genset1.intersect(genset2, mode="complete_included") assert len(res) == 0
def test_intersect_11(): """ Many various overlapping (mixed) A : ------------------ -------- --------- B : ---- ------- ------ ---------- R1: -- ------- -- ---- --- (overlap) R2: ------------------ -------- --------- (original) R3: (comp_incl) """ genset1 = GenCoorSet(name="Test_set") genset1.add(GenCoor(chrom="chr1", start=3, end=30, name="test", strand=".")) genset1.add( GenCoor(chrom="chr1", start=50, end=60, name="test", strand=".")) genset1.add( GenCoor(chrom="chr1", start=70, end=85, name="test", strand=".")) genset2 = GenCoorSet(name="Test_set") genset2.add(GenCoor(chrom="chr1", start=1, end=5, name="test", strand=".")) genset2.add( GenCoor(chrom="chr1", start=10, end=19, name="test", strand=".")) genset2.add( GenCoor(chrom="chr1", start=27, end=35, name="test", strand=".")) genset2.add( GenCoor(chrom="chr1", start=55, end=75, name="test", strand=".")) res = genset1.intersect(genset2, mode="overlap") assert len(res) == 5 res = genset1.intersect(genset2, mode="original") assert len(res) == 3 res = genset1.intersect(genset2, mode="complete_included") assert len(res) == 0
def test_intersect_6(): """ No length attach A : . . B : . . R : none """ genset1 = GenCoorSet(name="Test_set") genset1.add(GenCoor(chrom="chr1", start=2, end=2, name="test", strand=".")) genset1.add( GenCoor(chrom="chr1", start=20, end=20, name="test", strand=".")) genset2 = GenCoorSet(name="Test_set") genset2.add(GenCoor(chrom="chr1", start=5, end=5, name="test", strand=".")) genset2.add( GenCoor(chrom="chr1", start=20, end=20, name="test", strand=".")) res = genset1.intersect(genset2, mode="overlap") assert len(res) == 1 res = genset1.intersect(genset2, mode="original") assert len(res) == 1 res = genset1.intersect(genset2, mode="complete_included") assert len(res) == 1
if len(signals) > 1: print("There are more than one BED files sharing the same tags, only the first one will be used.") arrs = bed_sig2arr(beds[0], [signals[0]], exp, arg) a = arrs[signals[0]] hm = axes[i, j].imshow(a, cmap='hot', interpolation='None') # Y ticks axes[i, j].get_yaxis().set_ticks([]) # X ticks x_label_list = ['-'+arg["--ext"], '0', arg["--ext"]] xmin, xmax = axes[i, j].get_xlim() axes[i, j].set_xticks([xmin, int(0.5 * (xmax - xmin)), xmax]) axes[i, j].set_xticklabels(x_label_list) # labels if j == 0: axes[i, j].set_ylabel(row) if i == 0: regions = GenCoorSet(name=beds[0]) regions.load(filename=exp.get_file(beds[0])) axes[i, j].set_title(col+" ("+str(len(regions))+")") cbar_ax = fig.add_axes([0.9, 0.15, 0.02, 0.7]) fig.colorbar(hm, cax=cbar_ax) set_yaxis(n_row, n_col, axes, arg) fig.savefig(arg["<output_file>"], bbox_inches='tight') # elif arg["boxplot"]:
def test_distances1(): genset1 = GenCoorSet(name="Test_set") genset1.add(GenCoor(chrom="chr1", start=1, end=5, name="test", strand=".")) genset1.add( GenCoor(chrom="chr1", start=80, end=84, name="test", strand=".")) genset2 = GenCoorSet(name="Test_set") genset2.add( GenCoor(chrom="chr1", start=10, end=15, name="test", strand=".")) genset2.add( GenCoor(chrom="chr1", start=60, end=64, name="test", strand=".")) genset2.add( GenCoor(chrom="chr1", start=160, end=164, name="test", strand=".")) res = genset1.distances(genset2, sign=False) print(res) assert len(res) == 2 assert res == [5, 16]
coordinates.py split_strand <input_BED_file_path> <output_BED_directory_path> Blablabla Options: """ from docopt import docopt from gencoor.coordinates import GenCoorSet import os if __name__ == '__main__': arg = docopt(__doc__) if arg["resize"]: gc = GenCoorSet(name="input") gc.load(arg["<input_BED_file_path>"], filetype="BED") gc.relocate(mode='center as center', width=int(arg["<length>"])) gc.save(arg["<output_BED_file_path>"], filetype="BED") elif arg["split_strand"]: name = os.path.basename(arg["<input_BED_file_path>"]).split(".")[0] print( os.path.join(arg["<output_BED_directory_path>"], name + "_" + "+" + ".bed")) gc = GenCoorSet(name="input") gc.load(arg["<input_BED_file_path>"], filetype="BED") res = gc.split_by_strands() for k, g in res.items(): g.save(os.path.join(arg["<output_BED_directory_path>"], name + "_" + k + ".bed"),
def test_intersect_8(): """ One overlapping region A : ------ B : -------- R1: -- (overlap) R2: ------ (original) R3: (comp_incl) """ genset1 = GenCoorSet(name="Test_set") genset1.add(GenCoor(chrom="chr1", start=1, end=10, name="test", strand=".")) genset2 = GenCoorSet(name="Test_set") genset2.add(GenCoor(chrom="chr1", start=7, end=20, name="test", strand=".")) res = genset1.intersect(genset2, mode="overlap") assert len(res) == 1 assert res[0].start == 7 assert res[0].end == 10 res = genset1.intersect(genset2, mode="original") assert len(res) == 1 assert res[0].start == 1 assert res[0].end == 10 res = genset1.intersect(genset2, mode="complete_included") assert len(res) == 0
def test_intersect_10(): """ Two separately overlapping regions A : ------- -------- B : ----- -------- R1: --- ---- (overlap) R2: ------- -------- (original) R3: (comp_incl) """ genset1 = GenCoorSet(name="Test_set") genset1.add(GenCoor(chrom="chr1", start=1, end=10, name="test", strand=".")) genset1.add( GenCoor(chrom="chr1", start=26, end=35, name="test", strand=".")) genset2 = GenCoorSet(name="Test_set") genset2.add(GenCoor(chrom="chr1", start=7, end=15, name="test", strand=".")) genset2.add( GenCoor(chrom="chr1", start=30, end=40, name="test", strand=".")) res = genset1.intersect(genset2, mode="overlap") assert len(res) == 2 res = genset1.intersect(genset2, mode="original") assert len(res) == 2 res = genset1.intersect(genset2, mode="complete_included") assert len(res) == 0
def step_size(self): return int(self.config["Parameters"]["step_size"]) def get_inputs(self): res = {} if "Input" in self.config.sections(): for lab in self.config["Input"]: res[lab] = self.config["Input"][lab] return res if __name__ == '__main__': arg = docopt(__doc__) if arg["diffpeak"]: config = DiffPeakConfig(filepath=arg["<config_file>"]) ref_back = GenCoorSet(name="background") ref_back.get_chromosomes(genome=config.genome()) sig = SignalProfile(regions=ref_back, genome=config.genome(), bin=config.bin_size(), step=config.step_size()) sig.load_files(file_dict=config.files_dict) # Normalization # sig.norm_bakcground(genome="hg38") sig.norm_library_size() # Input if "Input" in config.config.sections(): sig2 = SignalProfile(regions=ref_back, genome=config.genome(),