def test_select_only_peaks_cached(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] files = ["reference_example", "test1"] for filename in files: wig_data = helpers.load_bedgraph(filename) res = epidb.add_experiment(filename, "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", wig_data, "bedgraph", None, self.admin_key) (s, q) = epidb.select_experiments(files, "", None, None, self.admin_key) (s, q_cache) = epidb.query_cache(q, True, self.admin_key) (s, req) = epidb.count_regions(q_cache, self.admin_key) count = self.count_request(req) self.assertEqual(1009, count) (s, req2) = epidb.count_regions(q_cache, self.admin_key) count = self.count_request(req2) self.assertEqual(1009, count) self.assertEqual(req, req2)
def test_overlap_experiment_annotation(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] self.insert_experiment(epidb, "hg19_chr1_1", sample_id) self.insert_annotation(epidb, "Cpg Islands") # TODO: Is this test actually check anything? res, qid_1 = epidb.select_regions("hg19_chr1_1", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "chr1", 760000, 860000, self.admin_key) self.assertSuccess(res, qid_1) res, req = epidb.count_regions(qid_1, self.admin_key) self.assertSuccess(res, req) count = self.count_request(req) res, qid_2 = epidb.select_annotations("Cpg Islands", "hg19", "chr1", None, None, self.admin_key) self.assertSuccess(res, qid_2) res, req = epidb.count_regions(qid_2, self.admin_key) self.assertSuccess(res, req) count = self.count_request(req) res, qid_3 = epidb.merge_queries(qid_1, qid_2, self.admin_key) self.assertSuccess(res, qid_3) res, req = epidb.count_regions(qid_3, self.admin_key) self.assertSuccess(res, req) count = self.count_request(req)
def test_wig_files(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] files = [ "scores1", "scores2", "scores3", "scores4", "scores5", "scores6", "scores7", "yeast_pol2", "yeast_rap1" ] for filename in files: wig_data = helpers.load_wig(filename) res = epidb.add_experiment(filename, "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", wig_data, "wig", None, self.admin_key) self.assertSuccess(res) (s, r) = epidb.select_regions(files, "hg19", None, None, None, None, None, None, None, self.admin_key) (s, rs) = epidb.get_regions(r, "CHROMOSOME, START, END, VALUE", self.admin_key) (s, req) = epidb.count_regions(r, self.admin_key) count = self.count_request(req) self.assertEqual(5667, count)
def test_annotation_full_cpg_islands(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) cpg_island = ",".join([ "CHROMOSOME", "START", "END", "NAME", "LENGTH", "NUM_CPG", "NUM_GC", "PER_CPG", "PER_CG", "OBS_EXP" ]) with open("data/cpgIslandExtFull.txt", 'r') as f: file_data = f.read() regions_count = len(file_data.split("\n")) (res, a_1) = epidb.add_annotation("Cpg Islands", "hg19", "Complete CpG islands", file_data, cpg_island, None, self.admin_key) self.assertSuccess(res, a_1) res, qid_1 = epidb.select_annotations("Cpg Islands", "hg19", None, None, None, self.admin_key) self.assertSuccess(res, qid_1) (s, req) = epidb.count_regions(qid_1, self.admin_key) count = self.count_request(req) self.assertEqual(regions_count, count)
def test_bed_graph_files(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] files = ["reference_example", "test1", "bigwig"] for filename in files: wig_data = helpers.load_bedgraph(filename) res = epidb.add_experiment(filename, "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", wig_data, "bedgraph", None, self.admin_key) self.assertSuccess(res) (s, q) = epidb.select_regions(files, "hg19", None, None, None, None, None, None, None, self.admin_key) (s, req) = epidb.count_regions(q, self.admin_key) self.assertSuccess(s, req) count = self.count_request(req) # 3997106 // grep -v # *.bg | grep -v browser | grep -v track | wc -l self.assertEqual(3997106, count)
def test_gene_retrieve(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) data = open("data/gtf/gencode.v23.basic.annotation_head.gtf").read() (s, ss) = epidb.add_gene_model("Test One", "hg19", "Test One Description", data, "GTF", {}, self.admin_key) self.assertSuccess(s, ss) (s, query_id) = epidb.select_genes( ["ENSG00000223972.5", "ENSG00000223972.5", "DDX11L1"], "", "Test One", None, None, None, self.admin_key) (s, r_id) = epidb.get_regions(query_id, "CHROMOSOME,START,END", self.admin_key) regions = self.get_regions_request(r_id) self.assertEquals("chr1\t11869\t14409", regions) (s, r_id) = epidb.get_regions( query_id, "CHROMOSOME,START,END,@GENE_ATTRIBUTE(gene_id),@GENE_ATTRIBUTE(gene_name),@NAME", self.admin_key) regions = self.get_regions_request(r_id) self.assertEquals( "chr1\t11869\t14409\tENSG00000223972.5\tDDX11L1\tTest One", regions) (s, r_id) = epidb.get_regions( query_id, "CHROMOSOME,START,END,@GENE_ATTRIBUTE(gene_id),@GENE_ATTRIBUTE(gene_name),@NAME,@GENE_ATTRIBUTE(noooo)", self.admin_key) regions = self.get_regions_request(r_id) self.assertEquals( "chr1\t11869\t14409\tENSG00000223972.5\tDDX11L1\tTest One\t", regions) (s, r_id) = epidb.get_regions( query_id, "CHROMOSOME,SOURCE,FEATURE,START,END,GTF_SCORE,STRAND,FRAME,GTF_ATTRIBUTES,@GENE_ATTRIBUTE(gene_name)", self.admin_key) regions = self.get_regions_request(r_id) self.assertEquals( 'chr1\tHAVANA\tgene\t11869\t14409\t.\t+\t.\tgene_id "ENSG00000223972.5"; gene_name "DDX11L1"; gene_status "KNOWN"; gene_type "transcribed_unprocessed_pseudogene"; havana_gene "OTTHUMG00000000961.2"; level "2"\tDDX11L1', regions) (s, query_id) = epidb.select_genes(None, "", "Test One", None, None, None, self.admin_key) (s, request_id) = epidb.count_regions(query_id, self.admin_key) (s, r_id) = epidb.get_regions( query_id, "CHROMOSOME,START,END,@GENE_ATTRIBUTE(gene_id),@GENE_ATTRIBUTE(gene_name),@NAME,@GENE_ID(Test One),@GENE_NAME(Test One)", self.admin_key) regions = self.get_regions_request(r_id) for line in regions.split("\n"): ls = line.split("\t") self.assertEquals(ls[3], ls[6]) self.assertEquals(ls[4], ls[7])
def test_select_full_experiment(self, format=None): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base() sample_id = self.sample_ids[0] self.insert_experiment(epidb, "hg19_chr1_1", sample_id) full_experiment_regions = helpers.get_result("hg19_chr1_1_output") region_count = len(full_experiment_regions.split("\n")) format = "CHROMOSOME,START,END,NAME,SCORE,STRAND,SIGNAL_VALUE,P_VALUE,Q_VALUE,PEAK" # test to retrieve the whole data with all parameters of the experiment # set or not set argument_combinations = [ ("hg19_chr1_1", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "chr1", 713240, 876330), ("hg19_chr1_1", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "chr1", 713240, None), ("hg19_chr1_1", "hg19", "Methylation", sample_id, "tech1", "ENCODE", "chr1", None, None), ("hg19_chr1_1", "hg19", "Methylation", sample_id, "tech1", "ENCODE", None, None, None), ("hg19_chr1_1", "hg19", "Methylation", sample_id, "tech1", None, None, None, None), ("hg19_chr1_1", "hg19", "Methylation", sample_id, None, None, None, None, None), ("hg19_chr1_1", "hg19", "Methylation", None, None, None, None, None, None), ("hg19_chr1_1", "hg19", None, None, None, None, None, None, None) ] for args in argument_combinations: args = args + (self.admin_key, ) res, qid = epidb.select_regions(*args) self.assertSuccess(res, qid) res, req = epidb.count_regions(qid, self.admin_key) self.assertSuccess(res, req) count = self.count_request(req) self.assertEqual(count, region_count) res, req = epidb.get_regions(qid, format, self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) self.assertEqual(regions, full_experiment_regions)
def test_multiple_overlap(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] self.insert_experiment(epidb, "hg19_chr1_1", sample_id) self.insert_annotation(epidb, "Cpg Islands") res, qid_1 = epidb.select_annotations("Cpg Islands", "hg19", None, None, None, self.admin_key) self.assertSuccess(res, qid_1) res, req = epidb.count_regions(qid_1, self.admin_key) self.assertSuccess(res, req) count = self.count_request(req) self.assertEqual(count, 14) res, qid_2 = epidb.select_regions("hg19_chr1_1", "hg19", None, None, None, None, None, None, None, self.admin_key) self.assertEqual(res, "okay") res, req = epidb.count_regions(qid_2, self.admin_key) self.assertSuccess(res, req) count = self.count_request(req) self.assertEqual(count, 21) res, i_id = epidb.intersection(qid_1, qid_2, self.admin_key) self.assertEqual(res, "okay") res, req = epidb.count_regions(i_id, self.admin_key) self.assertSuccess(res, req) count = self.count_request(req) self.assertEqual(count, 3) res, i_id = epidb.intersection(qid_2, qid_2, self.admin_key) self.assertSuccess(res, i_id) res, req = epidb.count_regions(i_id, self.admin_key) self.assertSuccess(res, req) count = self.count_request(req) self.assertEqual(count, 21) res, i_id = epidb.intersection(qid_1, qid_1, self.admin_key) self.assertSuccess(res, i_id) res, req = epidb.count_regions(i_id, self.admin_key) self.assertSuccess(res, req) count = self.count_request(req) self.assertEqual(count, 14) res, i_id = epidb.intersection(i_id, i_id, self.admin_key) self.assertSuccess(res, i_id) res, req = epidb.count_regions(i_id, self.admin_key) self.assertSuccess(res, req) count = self.count_request(req) self.assertEqual(count, 14)
def test_remove_full_chromosome_data(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_full(epidb) self.insert_experiment(epidb, "hg19_big_2") res, qid_1_1 = epidb.select_regions("hg19_big_2", "hg19", None, None, None, None, None, 0, 9841558, self.admin_key) res, req = epidb.count_regions(qid_1_1, self.admin_key) self.assertSuccess(res, req) count = self.count_request(req) (status, filtered_chr) = epidb.filter_regions(qid_1_1,"CHROMOSOME", "==", "chr21", "string", self.admin_key) res, req = epidb.get_regions(filtered_chr, "CHROMOSOME,START,END,STRAND", self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) self.assertEquals(regions, "chr21\t9656828\t9656920\t.\nchr21\t9700370\t9700415\t.\nchr21\t9825445\t9826573\t.\nchr21\t9826759\t9827609\t.\nchr21\t9829381\t9829420\t.\nchr21\t9831594\t9831981\t.\nchr21\t9833197\t9833459\t.\nchr21\t9833733\t9833902\t.\nchr21\t9841288\t9841558\t.")
def test_output_format(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) file_data = open("data/cpgIslandAllFields.txt").read() cpg_island = ",".join([ "CHROMOSOME", "START", "END", "NAME", "LENGTH", "NUM_CPG", "NUM_GC", "PER_CPG", "PER_CG", "OBS_EXP" ]) res = epidb.add_annotation("Cpg Islands", "hg19", "CpG islands", file_data, cpg_island, None, self.admin_key) self.assertSuccess(res) res, qid_1 = epidb.select_annotations("Cpg Islands", "hg19", None, None, None, self.admin_key) self.assertSuccess(res, qid_1) self.assertEqual(qid_1, 'q1') res, req = epidb.count_regions(qid_1, self.admin_key) self.assertSuccess(res, req) count = self.count_request(req) self.assertEqual(10, count) res, req = epidb.get_regions( qid_1, "chr:start:end:name:length:cpgNum:gcNum:perCpg:perGc:obsExp", self.admin_key) self.assertEquals( req, "125000:Column name 'chr:start:end:name:length:cpgNum:gcNum:perCpg:perGc:obsExp' does not exist." ) res, req = epidb.get_regions( qid_1, "CHROMOSOME,START,END,NAME,LENGTH,NUM_CPG,NUM_GC,PER_CPG,PER_CG,OBS_EXP", self.admin_key) regions = self.get_regions_request(req) expected = 'chr1\t28735\t29810\tCpG: 116\t1075\t116\t787\t21.6000\t73.2000\t0.8300\nchr1\t135124\t135563\tCpG: 30\t439\t30\t295\t13.7000\t67.2000\t0.6400\nchr1\t327790\t328229\tCpG: 29\t439\t29\t295\t13.2000\t67.2000\t0.6200\nchr1\t437151\t438164\tCpG: 84\t1013\t84\t734\t16.6000\t72.5000\t0.6400\nchr1\t449273\t450544\tCpG: 99\t1271\t99\t777\t15.6000\t61.1000\t0.8400\nchr1\t533219\t534114\tCpG: 94\t895\t94\t570\t21.0000\t63.7000\t1.0400\nchr1\t544738\t546649\tCpG: 171\t1911\t171\t1405\t17.9000\t73.5000\t0.6700\nchr1\t713984\t714547\tCpG: 60\t563\t60\t385\t21.3000\t68.4000\t0.9200\nchr1\t762416\t763445\tCpG: 115\t1029\t115\t673\t22.4000\t65.4000\t1.0700\nchr1\t788863\t789211\tCpG: 28\t348\t28\t192\t16.1000\t55.2000\t1.0600' self.assertSuccess(res, regions) self.assertEquals(expected, regions)
def test_gene_case_insensitive(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) data = open("data/gtf/gencode.v23.basic.annotation_head.gtf").read() (s, ss) = epidb.add_gene_model("Test One", "hg19", "Test One Description", data, "GTF", {}, self.admin_key) self.assertSuccess(s, ss) (s, query_id) = epidb.select_genes(["RP11-34P13.7"], "", "Test One", None, None, None, self.admin_key) (s, req) = epidb.count_regions(query_id, self.admin_key) count = self.count_request(req) self.assertEquals(count, 1) status, gene_models = epidb.list_gene_models(self.admin_key) self.assertEquals(gene_models, [['gs1', 'Test One']])
def test_multiple_merge(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sizes = [10000, 20000, 25000, 30000, 35000, 40000, 50000] qs = [] for s in sizes: res, q_t = epidb.tiling_regions(s, "hg19", "chr21", self.admin_key) qs.append(q_t) res, qid_3 = epidb.merge_queries(qs[0], qs[1:], self.admin_key) self.assertSuccess(res, qid_3) res, req = epidb.count_regions(qid_3, self.admin_key) self.assertSuccess(res, req) count = self.count_request(req) self.assertEqual(count, 14287)
def test_annotation_signal_bedgraph(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] files = ["test1"] for filename in files: wig_data = helpers.load_bedgraph(filename) res = epidb.add_annotation(filename, "hg19", "Test data", wig_data, "bedgraph", None, self.admin_key) self.assertSuccess(res) (s, q) = epidb.select_annotations(files, "hg19", None, None, None, self.admin_key) (s, req) = epidb.count_regions(q, self.admin_key) self.assertSuccess(s, req) count = self.count_request(req) self.assertEqual(1000, count) (s, q_filtered_down) = epidb.filter_regions(q, "VALUE", ">", "0.75", "number", self.admin_key) (s, q_filtered_up) = epidb.filter_regions(q_filtered_down, "VALUE", "<", "0.8", "number", self.admin_key) (s, q_chr_x) = epidb.filter_regions(q_filtered_up, "CHROMOSOME", "!=", "chrX", "string", self.admin_key) (s, q_chr_7) = epidb.filter_regions(q_chr_x, "CHROMOSOME", "!=", "chr7", "string", self.admin_key) (s, req) = epidb.get_regions( q_chr_7, "CHROMOSOME,START,END,VALUE,@NAME,@EPIGENETIC_MARK", self.admin_key) regions = self.get_regions_request(req) self.assertEqual( regions, 'chr1\t104372258\t104372293\t0.7767\ttest1\t\nchr10\t126498141\t126498176\t0.7695\ttest1\t\nchr11\t66110277\t66110312\t0.7613\ttest1\t\nchr15\t38653026\t38653061\t0.7720\ttest1\t\nchr15\t87725326\t87725361\t0.7727\ttest1\t\nchr16\t2119419\t2119454\t0.7696\ttest1\t\nchr16\t63360719\t63360754\t0.7740\ttest1\t\nchr19\t46369215\t46369250\t0.7727\ttest1\t\nchr8\t21923667\t21923702\t0.7930\ttest1\t' )
def test_annotation(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) file_data = None with open("data/cpgIslandExt.txt", 'r') as f: file_data = f.read() res = epidb.add_annotation( "Cpg Islands", "hg19", "CpG islands are associated ...", file_data, "", { "url": "genome.ucsc.edu/cgi-bin/hgTables?db=hg19&hgta_group=regulation&hgta_track=cpgIslandExt&hgta_table=cpgIslandExt&hgta_doSchema=describe+table+schema" }, self.admin_key) self.assertSuccess(res) res, annotations = epidb.list_annotations("hg19", self.admin_key) self.assertSuccess(res, annotations) self.assertEqual(len(annotations), 2) self.assertEqual(annotations[0][1], "Chromosomes size for hg19") self.assertEqual(annotations[1][1], "Cpg Islands") size = len(file_data.split("\n")) res, qid = epidb.select_annotations("Cpg Islands", "hg19", None, None, None, self.admin_key) self.assertSuccess(res, qid) res, req = epidb.count_regions(qid, self.admin_key) self.assertSuccess(res, req) count = self.count_request(req) self.assertEqual(size, count) res, req = epidb.get_regions(qid, "CHROMOSOME,START,END", self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) self.assertEqual(regions, file_data)
def test_annotation_signal_wig(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] files = [ "scores1", "scores2", "scores3", "scores4", "scores5", "scores6", "scores7", "yeast_pol2", "yeast_rap1" ] for filename in files: wig_data = helpers.load_wig(filename) res = epidb.add_annotation(filename, "hg19", "Test data", wig_data, "wig", None, self.admin_key) self.assertSuccess(res) (s, r) = epidb.select_annotations(files, "hg19", None, None, None, self.admin_key) (s, req) = epidb.count_regions(r, self.admin_key) count = self.count_request(req) self.assertEqual(5667, count)
def test_complex_input_regions(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_full(epidb) regions = "chr1\t1\t10000\nchr2\t2\t20000\nchr3\t3\t30000" (s, q) = epidb.input_regions("hg19", regions, self.admin_key) res, req = epidb.count_regions(q, self.admin_key) self.assertSuccess(res, req) count = self.count_request(req) self.assertEqual(count, 3) res, req = epidb.get_regions( q, "CHROMOSOME,START,END,NAME,@NAME,@EPIGENETIC_MARK,@CALCULATED(return value_of('END') - value_of('START') )", self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) output = """chr1\t1\t10000\t\tQuery q1 regions set\t\t9999.000000 chr2\t2\t20000\t\tQuery q1 regions set\t\t19998.000000 chr3\t3\t30000\t\tQuery q1 regions set\t\t29997.000000""" self.assertEqual(regions, output)
def test_select_genes(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) data = open("data/gtf/gencode.v23.basic.annotation_head.gtf").read() (s, ss) = epidb.add_gene_model("Test One", "hg19", "Test One Description", data, "GTF", {}, self.admin_key) self.assertSuccess(s, ss) (s, ss) = epidb.list_genes(["ENSG00000279457.3"], "", None, None, None, "Test One", self.admin_key) self.assertEquals(ss, [{ 'gene_name': 'FO538757.2', 'gene_type': 'protein_coding', 'end': 200322, 'source': 'ENSEMBL', 'frame': '.', 'level': '3', 'tag': 'ncRNA_host', 'gene_id': 'ENSG00000279457.3', 'start': 184923, 'score': 0.0, 'strand': '-', '_id': 'gn20', 'gene_status': 'KNOWN', 'chromosome': 'chr1' }]) (s, ss) = epidb.list_genes("ENSG00000279457", "", "chr1", None, None, "Test One", self.admin_key) self.assertEquals(ss, []) (s, ss) = epidb.list_genes(None, None, None, None, None, "Test One", self.admin_key) self.assertEquals(20, len(ss)) (s, query_id) = epidb.select_genes(["RP11-34P13.7"], "", "Test One", None, None, None, self.admin_key) (s, req) = epidb.count_regions(query_id, self.admin_key) count = self.count_request(req) self.assertEquals(count, 1) (s, new_query_id) = epidb.select_genes(["RP11-34P13.7"], "", "Test One", None, None, None, self.admin_key) self.assertEquals(query_id, new_query_id) (s, new_query_id) = epidb.select_genes("RP11-34P13.7", "", "Test One", None, None, None, self.admin_key) self.assertEquals(query_id, new_query_id) (s, query_id) = epidb.select_genes(["RP11-34P13.234"], "", "Test One", None, None, None, self.admin_key) (s, req) = epidb.count_regions(query_id, self.admin_key) count = self.count_request(req) self.assertEquals(count, 0) (s, query_id) = epidb.select_genes(["RP11-34P13"], "", "Test One", None, None, None, self.admin_key) (s, req) = epidb.count_regions(query_id, self.admin_key) count = self.count_request(req) self.assertEquals(count, 0) status, gene_models = epidb.list_gene_models(self.admin_key) self.assertEquals(gene_models, [['gs1', 'Test One']])
def test_cancel_aggregation(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] self.insert_experiment(epidb, "hg19_big_2", sample_id) cpg_island = ",".join([ "CHROMOSOME", "START", "END", "NAME", "LENGTH", "NUM_CPG", "NUM_GC", "PER_CPG", "PER_CG", "OBS_EXP" ]) with open("data/cpgIslandExtFull.txt", 'r') as f: file_data = f.read() (res, a_1) = epidb.add_annotation("Cpg Islands", "hg19", "Complete CpG islands", file_data, cpg_island, None, self.admin_key) self.assertSuccess(res, a_1) res, q_cgi = epidb.select_annotations("Cpg Islands", "hg19", None, None, None, self.admin_key) self.assertSuccess(res, q_cgi) res, qid_2 = epidb.tiling_regions(1000000, "hg19", None, self.admin_key) self.assertSuccess(res, qid_2) res, req_count = epidb.count_regions(qid_2, self.admin_key) self.assertSuccess(res, req_count) count = self.count_request(req_count) self.assertEquals(count, 3118) res, qid_3 = epidb.aggregate(q_cgi, qid_2, "@LENGTH", self.admin_key) self.assertSuccess(res, qid_3) res, qid_4 = epidb.filter_regions(qid_3, "@AGG.COUNT", ">=", "100", "number", self.admin_key) (res, req_regions) = epidb.get_regions( qid_4, "CHROMOSOME,START,END,@AGG.MIN,@AGG.MAX,@AGG.MEDIAN,@AGG.MEAN,@AGG.VAR,@AGG.SD,@AGG.COUNT", self.admin_key) self.assertSuccess(res, req_regions) (s, user_two) = epidb.add_user("ANOTHER NAME", "ANOTHER EMAIL", "INSTITUTE", self.admin_key) s, tmp_user = epidb.modify_user_admin(user_two[0], "permission_level", "GET_DATA", self.admin_key) s, msg = epidb.cancel_request(req_regions, user_two[1]) self.assertEquals(msg, "130003:The request ID 'r2' is invalid.") s, msg = epidb.cancel_request(req_count, user_two[1]) self.assertEquals(msg, "130003:The request ID 'r1' is invalid.") (s, m) = epidb.cancel_request(req_regions, self.admin_key) self.assertSuccess(s, m) (s, m) = epidb.cancel_request(req_count, self.admin_key) self.assertSuccess(s, m) (s, user_ass) = epidb.add_user("ASS NAME", "ASS EMAIL", "INSTITUTE", self.admin_key) s, tmp_user = epidb.modify_user_admin(user_ass[0], "permission_level", "GET_DATA", self.admin_key) res, q_cgi_other = epidb.select_annotations("Cpg Islands", "hg19", None, None, None, user_two[1]) self.assertSuccess(res, q_cgi) (res, req_other) = epidb.get_regions(q_cgi_other, "CHROMOSOME,START,END", user_two[1]) self.assertSuccess(res, req_regions) (s, msg) = epidb.cancel_request(req_other, user_ass[1]) self.assertEquals(msg, "130003:The request ID 'r3' is invalid.") (s, m) = epidb.cancel_request(req_other, self.admin_key) self.assertSuccess(s, m) (s, ss) = epidb.info(req_other, self.admin_key) self.assertEquals(ss[0]['state'], 'canceled') (s, ss_count) = epidb.info(req_count, self.admin_key) self.assertEquals(ss_count[0]["state"], "removed") (s, ss_regions) = epidb.info(req_regions, self.admin_key) self.assertEquals(ss_regions[0]["state"], "canceled") s, e1 = epidb.get_request_data(req_count, self.admin_key) self.assertEqual( e1, "Request ID r1 was not finished. Please, check its status.") s, e2 = epidb.get_request_data(req_regions, self.admin_key) self.assertEqual( e2, "Request ID r2 was not finished. Please, check its status.")
def test_aggregation(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] cpg_island = ",".join([ "CHROMOSOME", "START", "END", "NAME", "LENGTH", "NUM_CPG", "NUM_GC", "PER_CPG", "PER_CG", "OBS_EXP" ]) with open("data/cpgIslandExtFull.txt", 'r') as f: file_data = f.read() (res, a_1) = epidb.add_annotation("Cpg Islands", "hg19", "Complete CpG islands", file_data, cpg_island, None, self.admin_key) self.assertSuccess(res, a_1) res, q_cgi = epidb.select_annotations("Cpg Islands", "hg19", None, None, None, self.admin_key) self.assertSuccess(res, q_cgi) res, qid_2 = epidb.tiling_regions(1000000, "hg19", None, self.admin_key) self.assertSuccess(res, qid_2) res, req = epidb.count_regions(qid_2, self.admin_key) self.assertSuccess(res, req) count = self.count_request(req) self.assertEquals(count, 3118) res, _qid_3 = epidb.aggregate(q_cgi, qid_2, "@LENGTH", self.admin_key) self.assertSuccess(res, _qid_3) res, qid_3 = epidb.query_cache(_qid_3, True, self.admin_key) self.assertSuccess(res, qid_3) res, qid_4 = epidb.filter_regions(qid_3, "@AGG.COUNT", ">", "0", "number", self.admin_key) res, req = epidb.count_regions(qid_4, self.admin_key) count = self.count_request(req) self.assertEquals(count, 2574) res, qid_4 = epidb.filter_regions(qid_2, "@AGG.COUNT", "<", "0", "number", self.admin_key) res, req = epidb.count_regions(qid_4, self.admin_key) count = self.count_request(req) self.assertEquals(count, 0) res, qid_4 = epidb.filter_regions(qid_3, "@AGG.COUNT", ">=", "100", "number", self.admin_key) (res, req) = epidb.get_regions( qid_4, "CHROMOSOME,START,END,@AGG.MIN,@AGG.MAX,@AGG.MEDIAN,@AGG.MEAN,@AGG.VAR,@AGG.SD,@AGG.COUNT,@AGG.SUM", self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) expected = 'chr1\t1000000\t2000000\t201.0000\t5585.0000\t469.0000\t766.0082\t589695.4375\t767.9163\t122\t93453.0000\nchr16\t0\t1000000\t201.0000\t6377.0000\t484.0000\t746.6083\t674998.0625\t821.5826\t120\t89593.0000\nchr16\t1000000\t2000000\t201.0000\t5449.0000\t398.0000\t666.6393\t630197.3125\t793.8497\t122\t81330.0000\nchr16\t2000000\t3000000\t201.0000\t4843.0000\t533.0000\t780.4951\t559994.2500\t748.3276\t101\t78830.0000\nchr16\t88000000\t89000000\t202.0000\t3785.0000\t347.0000\t553.3461\t295400.6875\t543.5078\t104\t57548.0000\nchr19\t0\t1000000\t201.0000\t7814.0000\t424.0000\t776.2705\t944608.4375\t971.9097\t122\t94705.0000\nchr19\t1000000\t2000000\t201.0000\t6035.0000\t430.0000\t738.8853\t625527.1250\t790.9027\t183\t135216.0000\nchr19\t2000000\t3000000\t201.0000\t3978.0000\t395.0000\t673.9907\t444749.5000\t666.8954\t107\t72117.0000\nchr19\t3000000\t4000000\t201.0000\t2753.0000\t387.0000\t531.0648\t172512.1094\t415.3458\t108\t57355.0000\nchr20\t62000000\t63000000\t202.0000\t5019.0000\t501.0000\t716.2427\t427763.9375\t654.0366\t103\t73773.0000\nchr7\t0\t1000000\t201.0000\t6234.0000\t348.0000\t556.3500\t475220.5625\t689.3624\t100\t55635.0000\nchr9\t139000000\t140000000\t202.0000\t6342.0000\t406.0000\t777.3303\t817548.5625\t904.1839\t109\t84729.0000' self.assertEquals(regions, expected) (s, req) = epidb.count_regions(qid_4, self.admin_key) self.assertSuccess(s, req) count = self.count_request(req) self.assertEquals(count, 12)
def test_big_files_intersect(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_full(epidb) self.insert_experiment(epidb, "hg19_big_1") self.insert_experiment(epidb, "hg19_big_2") ## Testing if the total of regions is the same as overlap and not-overlap hg19_big_1 res, qid_1_1 = epidb.select_experiments("hg19_big_1", None, None, None, self.admin_key) self.assertSuccess(res, qid_1_1) res, req = epidb.count_regions(qid_1_1, self.admin_key) self.assertSuccess(res, req) c = self.count_request(req) self.assertEquals(c, 54307) res, qid_1_2 = epidb.select_experiments("hg19_big_2", None, None, None, self.admin_key) self.assertSuccess(res, qid_1_2) res, q6 = epidb.overlap(qid_1_1, qid_1_2, False, 0, "bp", self.admin_key) self.assertSuccess(res, q6) res, req = epidb.count_regions(q6, self.admin_key) self.assertSuccess(res, req) c1 = self.count_request(req) self.assertEquals(c1, 17180) res, q6 = epidb.overlap(qid_1_1, qid_1_2, True, 0, "bp", self.admin_key) self.assertSuccess(res, q6) res, req = epidb.count_regions(q6, self.admin_key) self.assertSuccess(res, req) c2 = self.count_request(req) self.assertEquals(c2, 37127) self.assertEquals(c, c1+c2) ## Testing if the total of regions is the same as overlap and not-overlap hg19_big_2 res, req = epidb.count_regions(qid_1_2, self.admin_key) self.assertSuccess(res, req) cc1 = self.count_request(req) self.assertEquals(cc1, 77543) res, q7 = epidb.overlap(qid_1_2, qid_1_1, False, 0, "bp", self.admin_key) self.assertSuccess(res, q7) res, req1 = epidb.count_regions(q7, self.admin_key) self.assertSuccess(res, req1) c3 = self.count_request(req1) self.assertEquals(c3, 39771) res, q8 = epidb.overlap(qid_1_2, qid_1_1, True, 0, "bp", self.admin_key) self.assertSuccess(res, q8) res, req2 = epidb.count_regions(q8, self.admin_key) self.assertSuccess(res, req2) c4 = self.count_request(req2) self.assertEquals(c4, 37772) self.assertEquals(cc1, c3+c4) res, q9 = epidb.overlap(qid_1_2, q7, False, 0, "bp", self.admin_key) res, req3X = epidb.count_regions(q9, self.admin_key) self.assertSuccess(res, req3X) c3XX = self.count_request(req3X) self.assertEquals(c3XX, 37772) res, q10 = epidb.overlap(q9, q8, False, 0, "bp", self.admin_key) res, req3 = epidb.count_regions(q10, self.admin_key) self.assertSuccess(res, req3) cXX = self.count_request(req3) self.assertEquals(cXX, 0) res, q11 = epidb.overlap(qid_1_2, q8, False, 0, "bp", self.admin_key) res, req4X = epidb.count_regions(q11, self.admin_key) self.assertSuccess(res, req4X) c4XX = self.count_request(req4X) self.assertEquals(c4XX, 39771) res, q12 = epidb.overlap(q9, q7, False, 0, "bp", self.admin_key) res, req5X = epidb.count_regions(q10, self.admin_key) self.assertSuccess(res, req5X) c5XX = self.count_request(req5X) self.assertEquals(c5XX, 0)
def test_select_only_signal(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] files = ["reference_example"] for filename in files: wig_data = helpers.load_bedgraph(filename) res = epidb.add_experiment(filename, "hg19", "Methylation", sample_id, "tech1", "ENCODE", "desc1", wig_data, "bedgraph", None, self.admin_key) self.assertSuccess(res) self.insert_experiment(epidb, "hg19_chr1_1", sample_id) (s, q) = epidb.select_regions("", "hg19", None, None, None, None, None, None, None, self.admin_key) (s, req) = epidb.count_regions(q, self.admin_key) self.assertSuccess(s, req) count = self.count_request(req) ## Total amount of regions self.assertEqual(30, count) (s, q) = epidb.select_regions("", "hg19", None, None, None, None, None, None, None, self.admin_key) (s, new_query_peaks) = epidb.query_experiment_type( q, "peaks", self.admin_key) self.assertSuccess(s, new_query_peaks) (s, req) = epidb.count_regions(new_query_peaks, self.admin_key) count = self.count_request(req) ## Only peaks self.assertEqual(21, count) (s, new_query_signal) = epidb.query_experiment_type( q, "signal", self.admin_key) self.assertSuccess(s, new_query_signal) (s, req) = epidb.count_regions(new_query_signal, self.admin_key) count = self.count_request(req) ## Only signal self.assertEqual(9, count) (s, new_peaks_new_query_signal) = epidb.query_experiment_type( new_query_signal, "peaks", self.admin_key) self.assertSuccess(s, new_peaks_new_query_signal) (s, req) = epidb.count_regions(new_peaks_new_query_signal, self.admin_key) count = self.count_request(req) ## Only peaks again, but deriving from an signal query self.assertEqual(21, count) (s, req2) = epidb.count_regions(new_peaks_new_query_signal, self.admin_key) count = self.count_request(req2) ## Only peaks again, but deriving from an signal query self.assertEqual(21, count) self.assertEqual(req, req2)
def test_genes_location(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) data = gzip.open( "data/gtf/gencode.v19.annotation.ONLY_GENES.gtf.gz").read() (s, ss) = epidb.add_gene_model("Test One", "hg19", "Test One Description", data, "GTF", {}, self.admin_key) self.assertSuccess(s, ss) (s, query_id) = epidb.select_genes(None, "", "Test One", ["chr1"], 1000, 15000, self.admin_key) (s, r_id) = epidb.get_regions(query_id, "CHROMOSOME,START,END", self.admin_key) regions = self.get_regions_request(r_id) self.assertEquals(regions, "chr1\t11869\t14412\nchr1\t14363\t29806") (s, query_id) = epidb.select_genes(None, "", "Test One", ["chr1", "chr11", "chr21"], 10000, 2000000, self.admin_key) (s, r_id) = epidb.count_regions(query_id, self.admin_key) count = self.get_regions_request(r_id) self.assertEquals(count, {'count': 269}) status, gene_models = epidb.list_gene_models(self.admin_key) self.assertEquals(gene_models, [['gs1', 'Test One']]) (s, genes) = epidb.list_genes("", "", "chr21", 9683191, 9683272, gene_models[0][1], self.admin_key) self.assertEquals(genes, [{ 'transcript_status': 'NOVEL', 'gene_name': 'CR381670.1', 'gene_type': 'miRNA', 'end': 9683272, 'source': 'ENSEMBL', 'frame': '.', 'level': '3', 'gene_id': 'ENSG00000238411.1', 'start': 9683191, 'transcript_id': 'ENSG00000238411.1', 'score': 0.0, 'strand': '+', 'transcript_name': 'CR381670.1', '_id': 'gn52851', 'gene_status': 'NOVEL', 'transcript_type': 'miRNA', 'chromosome': 'chr21' }]) (s, genes) = epidb.list_genes("", "", "chr21", 9683191, 9683272, gene_models[0][1], self.admin_key) self.assertEquals(genes, [{ 'transcript_status': 'NOVEL', 'gene_name': 'CR381670.1', 'gene_type': 'miRNA', 'end': 9683272, 'source': 'ENSEMBL', 'frame': '.', 'level': '3', 'gene_id': 'ENSG00000238411.1', 'start': 9683191, 'transcript_id': 'ENSG00000238411.1', 'score': 0.0, 'strand': '+', 'transcript_name': 'CR381670.1', '_id': 'gn52851', 'gene_status': 'NOVEL', 'transcript_type': 'miRNA', 'chromosome': 'chr21' }]) (s, genes) = epidb.list_genes("CR381670.1", "", "chr21", None, None, gene_models[0][1], self.admin_key) self.assertEquals(genes, [{ 'transcript_status': 'NOVEL', 'gene_name': 'CR381670.1', 'gene_type': 'miRNA', 'end': 9683272, 'source': 'ENSEMBL', 'frame': '.', 'level': '3', 'gene_id': 'ENSG00000238411.1', 'start': 9683191, 'transcript_id': 'ENSG00000238411.1', 'score': 0.0, 'strand': '+', 'transcript_name': 'CR381670.1', '_id': 'gn52851', 'gene_status': 'NOVEL', 'transcript_type': 'miRNA', 'chromosome': 'chr21' }]) (s, genes) = epidb.list_genes(None, "", "chr10", None, None, gene_models[0][1], self.admin_key) self.assertEquals(2260, len(genes))
def test_wrong_chromosomes_usage(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) cpg_island = ",".join([ "CHROMOSOME", "START", "END", "NAME", "LENGTH", "NUM_CPG", "NUM_GC", "PER_CPG", "PER_CG", "OBS_EXP" ]) file_data = None with open("data/cpgIslandExtFull.txt", 'r') as f: file_data = f.read() res = epidb.add_annotation("Cpg Islands", "hg19", "Complete CpG islands", file_data, cpg_island, None, self.admin_key) self.assertSuccess(res) size_total = len(file_data.split("\n")) (status, qid_cpg) = epidb.select_annotations("Cpg Islands", "hg19", "chr1", None, None, self.admin_key) (s, req) = epidb.count_regions(qid_cpg, self.admin_key) self.assertSuccess(s, req) c = self.count_request(req) self.assertEquals(2462, c) total = 0 (status, cq1) = epidb.select_annotations("Cpg Islands", "hg19", "chr1", None, None, self.admin_key) (s, req1) = epidb.count_regions(cq1, self.admin_key) c1 = self.count_request(req1) (status, cq2) = epidb.select_annotations("Cpg Islands", "hg19", "chr7", None, None, self.admin_key) (s, req2) = epidb.count_regions(cq2, self.admin_key) c2 = self.count_request(req2) (status, cq3) = epidb.select_annotations("Cpg Islands", "hg19", "chr18", None, None, self.admin_key) (s, req3) = epidb.count_regions(cq3, self.admin_key) c3 = self.count_request(req3) (status, cq4) = epidb.select_annotations("Cpg Islands", "hg19", "chrX", None, None, self.admin_key) (s, req4) = epidb.count_regions(cq4, self.admin_key) c4 = self.count_request(req4) total = int(c1) + int(c2) + int(c3) + int(c4) (status, qid_count) = epidb.select_annotations( "Cpg Islands", "hg19", ["chr1", "chr7", "chr18", "chrX"], None, None, self.admin_key) (s, req) = epidb.count_regions(qid_count, self.admin_key) c = self.count_request(req) self.assertEquals(c, total) cpg_island_chrs = """chr1 chr10 chr11 chr11_gl000202_random chr12 chr13 chr14 chr15 chr16 chr17 chr17_ctg5_hap1 chr17_gl000204_random chr17_gl000205_random chr18 chr19 chr1_gl000191_random chr1_gl000192_random chr2 chr20 chr21 chr22 chr3 chr4 chr4_ctg9_hap1 chr4_gl000193_random chr4_gl000194_random chr5 chr6 chr6_apd_hap1 chr6_cox_hap2 chr6_dbb_hap3 chr6_mann_hap4 chr6_mcf_hap5 chr6_qbl_hap6 chr6_ssto_hap7 chr7 chr8 chr8_gl000197_random chr9 chr9_gl000199_random chr9_gl000200_random chr9_gl000201_random chrUn_gl000211 chrUn_gl000212 chrUn_gl000213 chrUn_gl000214 chrUn_gl000215 chrUn_gl000216 chrUn_gl000217 chrUn_gl000218 chrUn_gl000219 chrUn_gl000220 chrUn_gl000221 chrUn_gl000222 chrUn_gl000223 chrUn_gl000224 chrUn_gl000225 chrUn_gl000228 chrUn_gl000229 chrUn_gl000231 chrUn_gl000235 chrUn_gl000236 chrUn_gl000237 chrUn_gl000240 chrUn_gl000241 chrUn_gl000242 chrUn_gl000243 chrX chrY""" (status, qid_count) = epidb.select_annotations("Cpg Islands", "hg19", cpg_island_chrs.split("\n"), None, None, self.admin_key) (s, req) = epidb.count_regions(qid_count, self.admin_key) c = self.count_request(req) self.assertEquals(size_total, c)
def test_complex2(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_full(epidb) self.insert_experiment(epidb, "hg19_big_1") self.insert_experiment(epidb, "hg19_big_2") res, qid_1_1 = epidb.select_regions("hg19_big_1", "hg19", None, None, None, None, None, 1000000, 3000000, self.admin_key) self.assertSuccess(res, qid_1_1) res, req = epidb.count_regions(qid_1_1, self.admin_key) self.assertSuccess(res, req) c = self.count_request(req) res, qid_1_2 = epidb.select_regions( "hg19_big_1", "hg19", None, None, None, None, ["chr1", "chr3", "chr11", "chrX", "chr9"], None, None, self.admin_key) self.assertSuccess(res, qid_1_2) res, req = epidb.count_regions(qid_1_2, self.admin_key) self.assertSuccess(res, req) c = self.count_request(req) # this gives us regions from 1,000,000 to 3,000,000 on chromosomes chr1, chr3, chr9, chr11, chrY res, qid_2_1 = epidb.intersection(qid_1_1, qid_1_2, self.admin_key) self.assertSuccess(res, qid_2_1) res, req = epidb.count_regions(qid_2_1, self.admin_key) self.assertSuccess(res, req) c = self.count_request(req) self.assertEqual(c, 247) res, qid_2_2 = epidb.tiling_regions(1000, "hg19", ["chr1", "chr2", "chr15", "chrX"], self.admin_key) self.assertSuccess(res, qid_2_2) res, req = epidb.count_regions(qid_2_2, self.admin_key) self.assertSuccess(res, req) c = self.count_request(req) res, qid_3_1 = epidb.merge_queries(qid_2_1, qid_2_2, self.admin_key) self.assertSuccess(res, qid_3_1) res, req = epidb.count_regions(qid_3_1, self.admin_key) self.assertSuccess(res, req) c = self.count_request(req) res, qid_4_1 = epidb.filter_regions(qid_3_1, "START", ">=", "2000000", "number", self.admin_key) self.assertSuccess(res, qid_4_1) res, req = epidb.count_regions(qid_4_1, self.admin_key) self.assertSuccess(res, req) c = self.count_request(req) res, qid_4_2 = epidb.select_regions("hg19_big_2", "hg19", None, None, None, None, ["chr1", "chrX"], None, None, self.admin_key) self.assertSuccess(res, qid_4_2) (res, qid_4_2_cached) = epidb.query_cache(qid_4_2, True, self.admin_key) res, req = epidb.count_regions(qid_4_2_cached, self.admin_key) self.assertSuccess(res, req) c = self.count_request(req) self.assertEqual(c, 8961) res, qid_5_1 = epidb.intersection(qid_4_1, qid_4_2_cached, self.admin_key) self.assertSuccess(res, qid_5_1) res, req = epidb.count_regions(qid_5_1, self.admin_key) self.assertSuccess(res, req) count = self.count_request(req) self.assertEqual(count, 14356) res, qid_6_1 = epidb.filter_regions(qid_5_1, "END", "<", "2200000", "number", self.admin_key) self.assertSuccess(res, qid_6_1) (res, qid_6_1_cached) = epidb.query_cache(qid_6_1, True, self.admin_key) res, req = epidb.count_regions(qid_6_1, self.admin_key) self.assertSuccess(res, req) count = self.count_request(req) self.assertEqual(count, 52) res, req = epidb.get_regions( qid_6_1, "CHROMOSOME,START,END,NAME,SCORE,STRAND,SIGNAL_VALUE,P_VALUE,Q_VALUE,PEAK,@NAME", self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) expected_regions = helpers.get_result("complex2") self.assertEqual(regions, expected_regions)