def test_correct_limits(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) regions_range = "chr1\t150\t225" regions_data = "chr1\t125\t150\nchr1\t150\t175\nchr1\t175\t200\nchr1\t200\t225\nchr1\t225\t250" (s, q_range) = epidb.input_regions("hg19", regions_range, self.admin_key) (s, q_date) = epidb.input_regions("hg19", regions_data, self.admin_key) (s, q_agg) = epidb.aggregate(q_date, q_range, "START", self.admin_key) status, req = epidb.get_regions(q_agg, "CHROMOSOME,START,END,@AGG.COUNT", self.admin_key) rs = self.get_regions_request(req) self.assertEquals("chr1\t150\t225\t3", rs)
def test_load_bedgraph(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] regions_data = gzip.open("data/bedgraph/chr19.txt.gz").read() # adding two experiments with the same data should work res = epidb.add_experiment( "S0022IH2.ERX300681.H3K36me3.bwa.GRCh38.20150528.bedgraph", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", regions_data, "bedgraph", {"md5sum": "afd4af5afd5afd4af5afd5afd4af5afd5"}, self.admin_key) self.assertSuccess(res) (status, query_id) = epidb.select_regions( "#afd4af5afd5afd4af5afd5afd4af5afd5", None, None, None, None, None, "chr19", 49388217, 49417994, self.admin_key) self.assertSuccess(status, query_id) (status, input) = epidb.input_regions("hg19", "chr19\t49388217\t49417994", self.admin_key) self.assertSuccess(status, input) (status, query_overlap) = epidb.intersection(query_id, input, self.admin_key) self.assertSuccess(status, query_overlap) (status, request_id) = epidb.get_regions(query_id, "CHROMOSOME,START,END,VALUE", self.admin_key) self.assertSuccess(status, request_id) (status, overlap_request_id) = epidb.get_regions(query_id, "CHROMOSOME,START,END,VALUE", self.admin_key) self.assertSuccess(status, overlap_request_id) by_select = self.get_regions_request(request_id) by_overlap = self.get_regions_request(overlap_request_id) self.assertEqual(by_overlap, by_select) self.assertTrue(len(by_select) > 0) (status, info) = epidb.info("#afd4af5afd5afd4af5afd5afd4af5afd5", self.admin_key) self.assertEquals(info[0]["_id"], "e1")
def test_misc_formats(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) files = ["chr_s_e_name.bed", "chr_s_e_score.bed", "bed10.bed"] print epidb.list_genomes(self.admin_key) for f in files: print f content = open("data/bed/" + f).read() res, q1 = epidb.input_regions("hg19", content, self.admin_key) print epidb.info(q1, self.admin_key) fmt = epidb.info(q1, self.admin_key)[1][0]['args']['format'] self.assertSuccess(res, q1) print fmt r, r1 = epidb.get_regions(q1, fmt, self.admin_key) self.assertSuccess(r, r1) for l in self.get_regions_request(r1).split("\n"): print l print len(l.split("\t"))
def test_complex_input_regions(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_full(epidb) regions = "chr1\t1\t10000\nchr2\t2\t20000\nchr3\t3\t30000" (s, q) = epidb.input_regions("hg19", regions, self.admin_key) res, req = epidb.count_regions(q, self.admin_key) self.assertSuccess(res, req) count = self.count_request(req) self.assertEqual(count, 3) res, req = epidb.get_regions( q, "CHROMOSOME,START,END,NAME,@NAME,@EPIGENETIC_MARK,@CALCULATED(return value_of('END') - value_of('START') )", self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) output = """chr1\t1\t10000\t\tQuery q1 regions set\t\t9999.000000 chr2\t2\t20000\t\tQuery q1 regions set\t\t19998.000000 chr3\t3\t30000\t\tQuery q1 regions set\t\t29997.000000""" self.assertEqual(regions, output)
def test_overlap_simple(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) # Test the intersect command data_one = "chr1\t3049996\t3050022\nchr1\t3050022\t3050040\nchr1\t3050040\t3050051\nchr1\t3050051\t3050126" region = "chr1\t3050022\t3050100" (s, q1) = epidb.input_regions("hg19", data_one, self.admin_key) self.assertSuccess(s, q1) (s, q_input) = epidb.input_regions("hg19", region, self.admin_key) self.assertSuccess(s, q_input) (s, q3) = epidb.intersection(q1, q_input, self.admin_key) self.assertSuccess(s, q3) (s, req) = epidb.get_regions(q3, "CHROMOSOME,START,END", self.admin_key) self.assertSuccess(s, req) regions = self.get_regions_request(req) self.assertEqual( regions, 'chr1\t3050022\t3050040\nchr1\t3050040\t3050051\nchr1\t3050051\t3050126' ) # Test the select annotation sample_id = self.sample_ids[0] res = epidb.add_annotation("exp1", "hg19", "desc1", data_one, "CHROMOSOME,START,END", None, self.admin_key) self.assertSuccess(res) (s, q1) = epidb.select_annotations("exp1", "hg19", "chr1", 3050022, 3050100, self.admin_key) self.assertSuccess(s, q1) (s, req) = epidb.get_regions(q1, "CHROMOSOME,START,END", self.admin_key) self.assertSuccess(s, req) regions = self.get_regions_request(req) self.assertEqual( regions, 'chr1\t3050022\t3050040\nchr1\t3050040\t3050051\nchr1\t3050051\t3050126' ) # Test the select experiment sample_id = self.sample_ids[0] res = epidb.add_experiment("exp1", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", data_one, "CHROMOSOME,START,END", None, self.admin_key) self.assertSuccess(res) (s, q1) = epidb.select_experiments("exp1", "chr1", 3050022, 3050100, self.admin_key) self.assertSuccess(s, q1) (s, req) = epidb.get_regions(q1, "CHROMOSOME,START,END", self.admin_key) self.assertSuccess(s, req) regions = self.get_regions_request(req) self.assertEqual( regions, 'chr1\t3050022\t3050040\nchr1\t3050040\t3050051\nchr1\t3050051\t3050126' ) (s, q1) = epidb.select_experiments("exp1", "chr1", None, None, self.admin_key) self.assertSuccess(s, q1) (s, q2) = epidb.aggregate(q1, q_input, "START", self.admin_key) self.assertSuccess(s, q2) (s, req) = epidb.get_regions(q2, "@AGG.MIN,@AGG.MAX,@AGG.COUNT", self.admin_key) self.assertSuccess(s, req) regions = self.get_regions_request(req) self.assertEqual(regions, '3050022.0000\t3050051.0000\t3')
def test_overlap(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_full(epidb) ##### SET 1 ###### res, qid_1 = epidb.select_regions("hg19_chr1_1", "hg19", None, None, None, None, None, None, None, self.admin_key) self.assertSuccess(res, qid_1) res, qid_2 = epidb.select_regions("hg19_chr1_2", "hg19", None, None, None, None, None, None, None, self.admin_key) self.assertSuccess(res, qid_2) res, qid_3 = epidb.overlap(qid_1, qid_2, True, 0, "bp", self.admin_key) self.assertSuccess(res, qid_3) res, req = epidb.get_regions(qid_3, "CHROMOSOME,START,END", self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) expected_regions = helpers.get_result("intersection") self.assertEqual(regions, expected_regions) res, qid_31 = epidb.overlap(qid_1, qid_2, True, 165, "bp", self.admin_key) self.assertSuccess(res, qid_31) res, req = epidb.get_regions(qid_31, "CHROMOSOME,START,END", self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) expected_regions = """chr1\t713240\t713390 chr1\t713900\t714050 chr1\t714160\t714310 chr1\t715060\t715210 chr1\t762420\t762570 chr1\t840080\t840230 chr1\t840600\t840750 chr1\t858880\t859030 chr1\t859600\t859750 chr1\t860240\t860390 chr1\t875900\t876050""" self.assertEqual(regions, expected_regions) res, qid_4 = epidb.overlap(qid_1, qid_2, False, 0, "bp", self.admin_key) self.assertSuccess(res, qid_4) res, req = epidb.get_regions(qid_4, "CHROMOSOME,START,END", self.admin_key) self.assertSuccess(res, req) regions2 = self.get_regions_request(req) expected = """chr1 761180 761330 chr1 763020 763170 chr1 839540 839690 chr1 875400 875550 chr1 876180 876330""" self.assertEquals(regions2, expected) res, qid_41 = epidb.overlap(qid_1, qid_2, False, 0, "bp", self.admin_key) self.assertSuccess(res, qid_41) res, req = epidb.get_regions(qid_41, "CHROMOSOME,START,END", self.admin_key) self.assertSuccess(res, req) regions2 = self.get_regions_request(req) expected = """chr1 761180 761330 chr1 763020 763170 chr1 839540 839690 chr1 875400 875550 chr1 876180 876330""" res, qid_41 = epidb.overlap(qid_1, qid_2, False, 0, "%", self.admin_key) self.assertSuccess(res, qid_41) res, req = epidb.get_regions(qid_41, "CHROMOSOME,START,END", self.admin_key) self.assertSuccess(res, req) regions2 = self.get_regions_request(req) expected = """chr1 761180 761330 chr1 763020 763170 chr1 839540 839690 chr1 875400 875550 chr1 876180 876330""" self.assertEquals(regions2, expected) res, qid_5 = epidb.overlap(qid_1, qid_2, False, 350, "bp", self.admin_key) self.assertSuccess(res, qid_5) res, req = epidb.get_regions(qid_5, "CHROMOSOME,START,END", self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) expected = """chr1 761180 761330 chr1 763020 763170 chr1 875400 875550""" self.assertEquals(regions, expected) res, qid_6 = epidb.overlap(qid_1, qid_2, False, 500, "%", self.admin_key) self.assertSuccess(res, qid_6) res, req = epidb.get_regions(qid_6, "CHROMOSOME,START,END", self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) self.assertEquals(regions, "chr1\t763020\t763170") ##### SET 2 ###### res, q1 = epidb.input_regions("hg19", "chr1\t1000\t1010\nchr1\t1100\t1110\nchr1\t1200\t1210\nchr1\t1300\t1310\nchr1\t1400\t1410\nchr1\t1500\t1510", self.admin_key) self.assertSuccess(res, q1) res, q2 = epidb.input_regions("hg19", "chr1\t1011\t1020\nchr1\t1111\t1120\nchr1\t1211\t1220\nchr1\t1311\t1320\nchr1\t1411\t1420\nchr1\t1511\t1520", self.admin_key) self.assertSuccess(res, q2) res, q3 = epidb.overlap(q1, q2, False, 0, "bp", self.admin_key) self.assertSuccess(res, q3) res, req = epidb.get_regions(q3, "CHROMOSOME,START,END", self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) expected = 'chr1\t1000\t1010\nchr1\t1100\t1110\nchr1\t1200\t1210\nchr1\t1300\t1310\nchr1\t1400\t1410\nchr1\t1500\t1510' self.assertEquals(regions, expected) res, q3 = epidb.overlap(q1, q2, False, 20, "%", self.admin_key) self.assertSuccess(res, q3) res, req = epidb.get_regions(q3, "CHROMOSOME,START,END", self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) expected = '' self.assertEquals(regions, expected) res, q3 = epidb.overlap(q1, q2, False, 1, "%", self.admin_key) self.assertSuccess(res, q3) res, req = epidb.get_regions(q3, "CHROMOSOME,START,END", self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) expected = 'chr1\t1000\t1010\nchr1\t1100\t1110\nchr1\t1200\t1210\nchr1\t1300\t1310\nchr1\t1400\t1410\nchr1\t1500\t1510' self.assertEquals(regions, expected) res, q3 = epidb.overlap(q2, q1, False, 0, "bp", self.admin_key) self.assertSuccess(res, q3) res, req = epidb.get_regions(q3, "CHROMOSOME,START,END", self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) self.assertEquals(regions, 'chr1\t1011\t1020\nchr1\t1111\t1120\nchr1\t1211\t1220\nchr1\t1311\t1320\nchr1\t1411\t1420\nchr1\t1511\t1520') res, q4 = epidb.input_regions("hg19", "chr1\t1000\t1010\nchr1\t1200\t1210\nchr1\t1300\t1310", self.admin_key) self.assertSuccess(res, q4) res, q5 = epidb.input_regions("hg19", "chr1\t1011\t1020\nchr1\t1111\t1120\nchr1\t1211\t1220\nchr1\t1311\t1320\nchr1\t1411\t1420\nchr1\t1511\t1520", self.admin_key) self.assertSuccess(res, q5) res, q6 = epidb.overlap(q4, q5, False, 0, "bp", self.admin_key) self.assertSuccess(res, q6) res, req = epidb.get_regions(q6, "CHROMOSOME,START,END", self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) self.assertEquals(regions, 'chr1\t1000\t1010\nchr1\t1200\t1210\nchr1\t1300\t1310') res, q4 = epidb.input_regions("hg19", "chr1\t1000\t1010", self.admin_key) self.assertSuccess(res, q4) res, q5 = epidb.input_regions("hg19", "chr1\t1011\t1020\nchr1\t1111\t1120\nchr1\t1211\t1220\nchr1\t1311\t1320\nchr1\t1411\t1420\nchr1\t1511\t1520", self.admin_key) self.assertSuccess(res, q5) res, q6 = epidb.overlap(q4, q5, False, 0, "bp", self.admin_key) self.assertSuccess(res, q6) res, req = epidb.get_regions(q6, "CHROMOSOME,START,END", self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) self.assertEqual(regions, "chr1\t1000\t1010") res, q7 = epidb.overlap(q5, q4, False, 0, "bp", self.admin_key) self.assertSuccess(res, q7) res, req = epidb.get_regions(q7, "CHROMOSOME,START,END", self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) self.assertEqual(regions, "chr1\t1011\t1020\nchr1\t1111\t1120\nchr1\t1211\t1220\nchr1\t1311\t1320\nchr1\t1411\t1420\nchr1\t1511\t1520")
def test_enriochment_overlap(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) res = epidb.add_technique("ChIP-seq", "ChIP-sequencing", {}, self.admin_key) self.assertSuccess(res) sample_id = self.sample_ids[0] broad_peak_format = ",".join([ "CHROMOSOME", "START", "END", "NAME", "SCORE", "STRAND", "SIGNAL_VALUE", "P_VALUE", "Q_VALUE", ]) with open("data/wgEncodeBroadHistoneH1hescH3k4me3StdPk.bed", 'r') as f: file_data = f.read() (res, exp) = epidb.add_experiment( "wgEncodeBroadHistoneH1hescH3k4me3StdPk.bed", "hg19", "H3k4me3", "s1", "ChIPseq", "ENCODE", "wgEncodeBroadHistoneH1hescH3k4me3StdPk.bed from ENCODE", file_data, broad_peak_format, None, self.admin_key) self.assertSuccess(res, exp) res, q_exp = epidb.select_experiments( "wgEncodeBroadHistoneH1hescH3k4me3StdPk.bed", "chr1", None, None, self.admin_key) self.assertSuccess(res, q_exp) ## Testting with all overlaps res, q_tiling = epidb.tiling_regions(10000, "hg19", None, self.admin_key) res, r_id = epidb.enrich_regions_overlap( q_exp, q_tiling, {"H3k4me3": ["wgEncodeBroadHistoneH1hescH3k4me3StdPk.bed"]}, "hg19", self.admin_key) result = self.get_regions_request(r_id) self.assertEquals(result['enrichment']['results'][0]['p_value_log'], float('Inf')) self.assertEquals( result, { 'enrichment': { 'count_query_regions': 3074, 'count_universe_regions': 313669, 'results': [{ 'c': 0, 'b': 30861, 'description': '', 'p_value_log': float('Inf'), 'experiment_size': 33270, 'database_name': 'H3k4me3', 'max_rank': 1, 'support_rank': 1, 'dataset': 'wgEncodeBroadHistoneH1hescH3k4me3StdPk.bed', 'biosource': 'K562', 'odd_rank': 1, 'odds_ratio': float('Inf'), 'epigenetic_mark': 'H3k4me3', 'mean_rank': 1.0, 'log_rank': 1, 'support': 3066, 'd': 279742, 'msg': '', 'error': False }] } }) ## Testting without any overlap _, q_input = epidb.input_regions("hg19", "chr1\t1\t2", self.admin_key) res, r_id = epidb.enrich_regions_overlap( q_input, q_tiling, {"H3k4me3": ["wgEncodeBroadHistoneH1hescH3k4me3StdPk.bed"]}, "hg19", self.admin_key) result = self.get_regions_request(r_id) self.assertEquals(result['enrichment']['results'][0]['p_value_log'], 0.0) self.assertEquals( { 'enrichment': { 'count_query_regions': 1, 'count_universe_regions': 313669, 'results': [{ 'c': 1, 'b': 33927, 'description': '', 'p_value_log': 0.0, 'experiment_size': 33270, 'database_name': 'H3k4me3', 'max_rank': 1, 'support_rank': 1, 'dataset': 'wgEncodeBroadHistoneH1hescH3k4me3StdPk.bed', 'biosource': 'K562', 'odd_rank': 1, 'odds_ratio': 0.0, 'epigenetic_mark': 'H3k4me3', 'mean_rank': 1.0, 'log_rank': 1, 'support': 0, 'd': 279741, 'error': False, 'msg': '' }] } }, result)