def test_merge_tiling(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_full(epidb) res, qid1 = epidb.tiling_regions(10000, "hg19", "chr1", self.admin_key) self.assertSuccess(res, qid1) res, qid2 = epidb.select_regions("hg19_chr1_1", "hg19", None, None, None, None, None, None, None, self.admin_key) self.assertSuccess(res, qid2) # limit the tilings on the range of the experiment res, qid3 = epidb.filter_regions(qid1, "START", ">=", "713240", "number", self.admin_key) self.assertSuccess(res, qid3) res, qid4 = epidb.filter_regions(qid3, "END", "<=", "876330", "number", self.admin_key) self.assertSuccess(res, qid4) res, qid5 = epidb.merge_queries(qid4, qid2, self.admin_key) self.assertSuccess(res, qid5) res, req = epidb.get_regions(qid5, "CHROMOSOME,START,END", self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) expected_regions = helpers.get_result("merge_tiling") self.assertEqual(regions, expected_regions)
def test_filter_regions(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_full(epidb) res, qid = epidb.select_regions("hg19_chr1_1", "hg19", None, None, None, None, None, None, None, self.admin_key) self.assertSuccess(res, qid) res, qid2 = epidb.filter_regions(qid, "START", ">=", "875400 ", "number", self.admin_key) self.assertSuccess(res, qid2) res, req = epidb.get_regions(qid2, "CHROMOSOME,START,END", self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) # Test filter with string values expected_regions = helpers.get_result("filter_ge_875400") self.assertEqual(regions, expected_regions) res, qid3 = epidb.filter_regions(qid, "STRAND", "==", "+", "string", self.admin_key) self.assertSuccess(res, qid3) res, req = epidb.get_regions(qid3, "CHROMOSOME,START,END,STRAND", self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) self.assertEqual(regions, 'chr1\t713240\t713390\t+\nchr1\t713900\t714050\t+\nchr1\t714160\t714310\t+\nchr1\t714540\t714690\t+\nchr1\t715060\t715210\t+\nchr1\t762060\t762210\t+\nchr1\t839540\t839690\t+\nchr1\t840080\t840230\t+\nchr1\t860240\t860390\t+\nchr1\t875400\t875550\t+\nchr1\t876180\t876330\t+') res, qid3 = epidb.filter_regions(qid, "STRAND", "!=", "+", "string", self.admin_key) self.assertSuccess(res, qid3) res, req = epidb.get_regions(qid3, "CHROMOSOME,START,END,STRAND", self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) self.assertEqual(regions, 'chr1\t713520\t713670\t-\nchr1\t761180\t761330\t-\nchr1\t762420\t762570\t.\nchr1\t762820\t762970\t-\nchr1\t763020\t763170\t-\nchr1\t840600\t840750\t-\nchr1\t858880\t859030\t.\nchr1\t859600\t859750\t.\nchr1\t861040\t861190\t-\nchr1\t875900\t876050\t-')
def test_big_file(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) f = gzip.open("data/bedgraph/bigwig.bg.gz") data = f.read() (status, a1) = epidb.add_annotation("exp_wig", "hg19", "bla", data, "bedgraph", None, self.admin_key) (status, q1) = epidb.select_annotations("exp_wig", "hg19", None, None, None, self.admin_key) status, r1 = epidb.binning(q1, "VALUE", 5, self.admin_key) binning = self.get_regions_request(r1) self.assertEqual( binning, { 'binning': { 'ranges': [ -1126.72, -726.6238, -326.5276, 73.5686, 473.6648, 873.761 ], 'counts': [8, 5, 3992582, 3489, 13] } }) to_filter_low = binning["binning"]["ranges"][2] status, filtered = epidb.filter_regions(q1, "VALUE", ">", str(to_filter_low), "number", self.admin_key) to_filter_high = binning["binning"]["ranges"][4] status, filtered = epidb.filter_regions(q1, "VALUE", "<", str(to_filter_high), "number", self.admin_key) status, r_filtered = epidb.binning(filtered, "VALUE", 10, self.admin_key) binning = self.get_regions_request(r_filtered) self.assertEqual( binning, { 'binning': { 'counts': [4, 4, 1, 2, 2, 17, 1, 3932813, 772, 119], 'ranges': [ -1126.72, -967.0013, -807.2826, -647.5638, -487.8452, -328.1265, -168.4077, -8.689, 151.0297, 310.7484, 470.4671 ] } })
def test_annotation_signal_bedgraph(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] files = ["test1"] for filename in files: wig_data = helpers.load_bedgraph(filename) res = epidb.add_annotation(filename, "hg19", "Test data", wig_data, "bedgraph", None, self.admin_key) self.assertSuccess(res) (s, q) = epidb.select_annotations(files, "hg19", None, None, None, self.admin_key) (s, req) = epidb.count_regions(q, self.admin_key) self.assertSuccess(s, req) count = self.count_request(req) self.assertEqual(1000, count) (s, q_filtered_down) = epidb.filter_regions(q, "VALUE", ">", "0.75", "number", self.admin_key) (s, q_filtered_up) = epidb.filter_regions(q_filtered_down, "VALUE", "<", "0.8", "number", self.admin_key) (s, q_chr_x) = epidb.filter_regions(q_filtered_up, "CHROMOSOME", "!=", "chrX", "string", self.admin_key) (s, q_chr_7) = epidb.filter_regions(q_chr_x, "CHROMOSOME", "!=", "chr7", "string", self.admin_key) (s, req) = epidb.get_regions( q_chr_7, "CHROMOSOME,START,END,VALUE,@NAME,@EPIGENETIC_MARK", self.admin_key) regions = self.get_regions_request(req) self.assertEqual( regions, 'chr1\t104372258\t104372293\t0.7767\ttest1\t\nchr10\t126498141\t126498176\t0.7695\ttest1\t\nchr11\t66110277\t66110312\t0.7613\ttest1\t\nchr15\t38653026\t38653061\t0.7720\ttest1\t\nchr15\t87725326\t87725361\t0.7727\ttest1\t\nchr16\t2119419\t2119454\t0.7696\ttest1\t\nchr16\t63360719\t63360754\t0.7740\ttest1\t\nchr19\t46369215\t46369250\t0.7727\ttest1\t\nchr8\t21923667\t21923702\t0.7930\ttest1\t' )
def test_filter_tiling(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base() res, qid = epidb.tiling_regions(10000, "hg19", "chr1", self.admin_key) self.assertSuccess(res, qid) res, qid2 = epidb.filter_regions(qid, "END", "<=", "100000", "number", self.admin_key) self.assertSuccess(res, qid2) res, req = epidb.get_regions(qid2, "CHROMOSOME,START,END", self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) expected_regions = helpers.get_result("filter_tiling") self.assertEqual(regions, expected_regions)
def test_filter_two_genomes(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_full(epidb) res, qid = epidb.select_regions(["hg19_chr1_1", "hg18_chr1_1"], ["hg19", "hg18"], None, None, None, None, None, None, None, self.admin_key) self.assertSuccess(res, qid) res, qid2 = epidb.filter_regions(qid, "START", ">=", "875400 ", "number", self.admin_key) self.assertSuccess(res, qid2) res, req = epidb.get_regions(qid2, "CHROMOSOME,START,END", self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) expected_regions = helpers.get_result("filter_multiple_genomes_ge_875400") self.assertEqual(regions, expected_regions)
def test_remove_full_chromosome_data(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_full(epidb) self.insert_experiment(epidb, "hg19_big_2") res, qid_1_1 = epidb.select_regions("hg19_big_2", "hg19", None, None, None, None, None, 0, 9841558, self.admin_key) res, req = epidb.count_regions(qid_1_1, self.admin_key) self.assertSuccess(res, req) count = self.count_request(req) (status, filtered_chr) = epidb.filter_regions(qid_1_1,"CHROMOSOME", "==", "chr21", "string", self.admin_key) res, req = epidb.get_regions(filtered_chr, "CHROMOSOME,START,END,STRAND", self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) self.assertEquals(regions, "chr21\t9656828\t9656920\t.\nchr21\t9700370\t9700415\t.\nchr21\t9825445\t9826573\t.\nchr21\t9826759\t9827609\t.\nchr21\t9829381\t9829420\t.\nchr21\t9831594\t9831981\t.\nchr21\t9833197\t9833459\t.\nchr21\t9833733\t9833902\t.\nchr21\t9841288\t9841558\t.")
def test_correction_score(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] self.insert_experiment(epidb, "hg19_chr1_1", sample_id) (s, data) = epidb.select_experiments("hg19_chr1_1", "chr1", None, None, self.admin_key) res, qid_2 = epidb.tiling_regions(1000, "hg19", "chr1", self.admin_key) self.assertSuccess(res, qid_2) (s, q_agg) = epidb.aggregate(data, qid_2, "SIGNAL_VALUE", self.admin_key) res, qid_4 = epidb.filter_regions(q_agg, "@AGG.COUNT", ">", "0", "number", self.admin_key) status, req = epidb.get_regions( qid_4, "CHROMOSOME,START,END,@AGG.MEAN,@AGG.COUNT", self.admin_key) rs = self.get_regions_request(req) self.assertEquals( rs, "chr1\t713000\t714000\t27.1111\t3\nchr1\t714000\t715000\t39.5556\t3\nchr1\t715000\t716000\t24.0000\t1\nchr1\t761000\t762000\t6.0000\t1\nchr1\t762000\t763000\t54.6667\t3\nchr1\t763000\t764000\t12.0000\t1\nchr1\t839000\t840000\t15.0000\t1\nchr1\t840000\t841000\t14.0000\t2\nchr1\t858000\t859000\t16.8000\t1\nchr1\t859000\t860000\t10.6000\t2\nchr1\t860000\t861000\t41.0000\t1\nchr1\t861000\t862000\t22.0000\t1\nchr1\t875000\t876000\t13.0000\t2\nchr1\t876000\t877000\t13.5000\t2" )
def test_gene_expression(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) (s, project) = epidb.add_project("DEEP", "Deutsche Epigenom", self.admin_key) self.assertSuccess(s, project) data = gzip.open("data/fpkm/small_1.fpkm_tracking.gz").read() (s, gene_expression) = epidb.add_expression("gene", "s2", 1, data, "cufflinks", "DEEP", None, self.admin_key) self.assertSuccess(s, gene_expression) data = gzip.open("data/fpkm/small_2.fpkm_tracking.gz").read() (s, gene_expression) = epidb.add_expression("gene", "s2", 2, data, "cufflinks", "DEEP", None, self.admin_key) self.assertSuccess(s, gene_expression) data = gzip.open("data/fpkm/small_3.fpkm_tracking.gz").read() (s, gene_expression) = epidb.add_expression("gene", "s2", 44, data, "cufflinks", "DEEP", None, self.admin_key) self.assertSuccess(s, gene_expression) (s, gex) = epidb.list_expressions("gene", "s1", None, None, self.admin_key) self.assertEquals(gex, []) (s, gex) = epidb.list_expressions("gene", "s2", None, None, self.admin_key) self.assertEquals(gex, [['gx1', ''], ['gx2', ''], ['gx3', '']]) (s, gex) = epidb.list_expressions("gene", "s2", [1, 2], None, self.admin_key) self.assertEquals(gex, [['gx1', ''], ['gx2', '']]) (s, gex) = epidb.list_expressions("gene", "s2", 44, None, self.admin_key) self.assertEquals(gex, [['gx3', '']]) (s, gex) = epidb.list_expressions("gene", None, 1, "DEEP", self.admin_key) self.assertEquals(gex, [['gx1', '']]) data = gzip.open( "data/fpkm/51_Hf03_BlTN_Ct_mRNA_M_1.LXPv1.20150708_genes.fpkm_tracking.gz" ).read() (s, gene_expression) = epidb.add_expression("gene", "s1", 1, data, "cufflinks", "ENCODE", None, self.admin_key) self.assertSuccess(s, gene_expression) (s, gex) = epidb.list_expressions("gene", None, None, None, self.admin_key) self.assertEquals(gex, [['gx1', ''], ['gx2', ''], ['gx3', ''], ['gx4', '']]) (s, gex) = epidb.list_expressions("gene", "s1", 1, None, self.admin_key) self.assertEquals(gex, [['gx4', '']]) (s, gex) = epidb.list_expressions("gene", None, None, "ENCODE", self.admin_key) self.assertEquals(gex, [['gx4', '']]) (s, gex) = epidb.list_expressions("gene", ["s1", "s2"], 1, None, self.admin_key) self.assertEquals(gex, [['gx1', ''], ['gx4', '']]) (s, gex) = epidb.list_expressions("gene", ["s1", "s2"], 2, None, self.admin_key) self.assertEquals(gex, [['gx2', '']]) (s, gex) = epidb.list_expressions("gene", None, 1, "ENCODE", self.admin_key) self.assertEquals(gex, [['gx4', '']]) s, user = epidb.add_user("user", "email", "institution", self.admin_key) (user_id, user_key) = user self.assertSuccess(s) (s, ss) = epidb.modify_user_admin(user_id, "permission_level", "GET_DATA", self.admin_key) self.assertSuccess(s, ss) (s, gex) = epidb.list_expressions("gene", None, None, None, user_key) self.assertEquals(gex, []) (s, gex) = epidb.list_expressions("gene", "s1", 1, None, user_key) self.assertEquals(gex, []) (s, gex) = epidb.list_expressions("gene", None, None, "ENCODE", user_key) self.assertEquals(gex, "107000:Project 'ENCODE' does not exist.") (s, info) = epidb.info(gene_expression, self.admin_key) self.assertEquals( info[0], { 'format': 'TRACKING_ID,GENE_ID,GENE_SHORT_NAME,FPKM,FPKM_CONF_LO,FPKM_CONF_HI,FPKM_STATUS', 'sample_info': { 'biosource_name': 'K562', 'karyotype': 'cancer', 'sex': 'F' }, 'content_format': 'cufflinks', 'total_genes': 57910, 'replica': 1, 'sample_id': 's1', '_id': 'gx4', 'extra_metadata': {}, 'columns': [{ 'name': 'TRACKING_ID', 'column_type': 'string' }, { 'name': 'GENE_ID', 'column_type': 'string' }, { 'name': 'GENE_SHORT_NAME', 'column_type': 'string' }, { 'name': 'FPKM', 'column_type': 'double' }, { 'name': 'FPKM_CONF_LO', 'column_type': 'double' }, { 'name': 'FPKM_CONF_HI', 'column_type': 'double' }, { 'name': 'FPKM_STATUS', 'column_type': 'string' }] }) data = gzip.open( "data/grape2/SP8-TH91.gene_quantification.rsem_grape2_crg.GRCh38.20150622.results.txt.gz" ).read() (s, gene_expression) = epidb.add_expression("gene", "s1", 1, data, "grape2", "ENCODE", None, self.admin_key) self.assertEquals( gene_expression, "131001:A Expression of the type 'gene' with sample_id 's1' and replica '1' already exists." ) (s, gene_expression) = epidb.add_expression("gene", "s1", 2, data, "grape2", "ENCODE", None, self.admin_key) self.assertSuccess(s, gene_expression) data = gzip.open( "data/gtf/gencode.v19.annotation.ONLY_GENES.gtf.gz").read() (s, ss) = epidb.add_gene_model("gencode v19", "hg19", "Test One Description", data, "GTF", {}, self.admin_key) self.assertSuccess(s, ss) (status, gx_query) = epidb.select_expressions("gene", "s1", 2, "ENSG00000000003.13", "ENCODE", "gencode v19", self.admin_key) self.assertSuccess(status, gx_query) status, info = epidb.info(gx_query, user_key) (status, r_id) = epidb.get_regions( gx_query, "CHROMOSOME,START,END,@STRAND,GENE_ID,TRANSCRIPT_IDS,LENGTH,EFFECTIVE_LENGTH,EXPECTED_COUNT,TPM,FPKM,POSTERIOR_MEAN_COUNT,POSTERIOR_STANDARD_DEVIATION_OF_COUNT,PME_TPM,PME_FPKM,TPM_CI_LOWER_BOUND,TPM_CI_UPPER_BOUND,FPKM_CI_LOWER_BOUND,FPKM_CI_UPPER_BOUND", self.admin_key) self.assertSuccess(status, r_id) regions = self.get_regions_request(r_id) self.assertEquals( regions, "chrX\t99883667\t99894988\t-\tENSG00000000003.13\tENSG00000000003.13\t2025\t1855.4301\t161.0000\t1.0000\t2.1300\t161.0000\t0.0000\t1.0500\t2.2700\t0.8742\t1.2451\t1.8882\t2.6879" ) (status, gx_query) = epidb.select_expressions("gene", "s1", 2, "ENSG00000000003.13", "ENCODE", "gencode v19", self.admin_key) self.assertSuccess(status, gx_query) status, info = epidb.info(gx_query, user_key) (status, r_id) = epidb.get_regions( gx_query, "GENE_ID,TRANSCRIPT_IDS,LENGTH,EFFECTIVE_LENGTH,EXPECTED_COUNT,TPM,FPKM,POSTERIOR_MEAN_COUNT,POSTERIOR_STANDARD_DEVIATION_OF_COUNT,PME_TPM,PME_FPKM,TPM_CI_LOWER_BOUND,TPM_CI_UPPER_BOUND,FPKM_CI_LOWER_BOUND,FPKM_CI_UPPER_BOUND", self.admin_key) self.assertSuccess(status, r_id) regions = self.get_regions_request(r_id) self.assertEquals( regions, "ENSG00000000003.13\tENSG00000000003.13\t2025\t1855.4301\t161.0000\t1.0000\t2.1300\t161.0000\t0.0000\t1.0500\t2.2700\t0.8742\t1.2451\t1.8882\t2.6879" ) (status, gx_query) = epidb.select_expressions("gene", "s1", 1, "OR4G11P", "ENCODE", "gencode v19", self.admin_key) self.assertSuccess(status, gx_query) status, info = epidb.info("gx1", user_key) (status, r_id) = epidb.get_regions(gx_query, info[0]["format"], self.admin_key) self.assertSuccess(status, r_id) regions = self.get_regions_request(r_id) self.assertEquals( regions, "ENSG00000240361.1\tENSG00000240361.1\tOR4G11P\t0.0000\t0.0000\t0.0000\tOK" ) (status, gx_query) = epidb.select_expressions( "gene", "s1", 1, ['CCR1', 'CD164', 'CD1D', 'CD2', 'CD34', 'CD3G', 'CD44'], "ENCODE", "gencode v19", self.admin_key) self.assertSuccess(status, gx_query) status, info = epidb.info("gx1", user_key) (status, r_id) = epidb.get_regions(gx_query, info[0]["format"], self.admin_key) self.assertSuccess(status, r_id) regions_a = self.get_regions_request(r_id) excepted = "ENSG00000135535.10\tENSG00000135535.10\tCD164\t101.3820\t98.8947\t103.8680\tOK\nENSG00000026508.12\tENSG00000026508.12\tCD44\t193.4920\t189.4020\t197.5830\tOK\nENSG00000160654.5\tENSG00000160654.5\tCD3G\t53.0051\t51.4405\t54.5696\tOK\nENSG00000163823.3\tENSG00000163823.3\tCCR1\t0.0201\t0.0000\t0.0433\tOK\nENSG00000116824.4\tENSG00000116824.4\tCD2\t90.0146\t87.9630\t92.0661\tOK\nENSG00000158473.6\tENSG00000158473.6\tCD1D\t0.0241\t0.0000\t0.0519\tOK\nENSG00000174059.12\tENSG00000174059.12\tCD34\t0.0000\t0.0000\t0.0000\tOK" lexp = excepted.split("\n") lresult = regions_a.split("\n") self.assertEquals(len(lresult), len(lexp)) for l in lresult: self.assertTrue(l in lexp) (status, gx_query) = epidb.select_expressions("gene", "s1", 1, 'CCR1', "ENCODE", "gencode v19", self.admin_key) self.assertSuccess(status, gx_query) status, info = epidb.info("gx1", user_key) (status, r_id) = epidb.get_regions(gx_query, info[0]["format"], self.admin_key) self.assertSuccess(status, r_id) regions = self.get_regions_request(r_id) self.assertEquals( regions, "ENSG00000163823.3\tENSG00000163823.3\tCCR1\t0.0201\t0.0000\t0.0433\tOK" ) q1 = gx_query (status, gx_query) = epidb.select_expressions("gene", "s1", 1, 'CD164', "ENCODE", "gencode v19", self.admin_key) self.assertSuccess(status, gx_query) status, info = epidb.info("gx1", user_key) (status, r_id) = epidb.get_regions(gx_query, info[0]["format"], self.admin_key) self.assertSuccess(status, r_id) regions = self.get_regions_request(r_id) self.assertEquals( regions, "ENSG00000135535.10\tENSG00000135535.10\tCD164\t101.3820\t98.8947\t103.8680\tOK" ) self.assertTrue(q1 != gx_query) (s, info) = epidb.info(ss, self.admin_key) self.assertEquals( info[0], { 'total_genes': 57820, '_id': 'gs1', 'genome': 'hg19', 'description': 'Test One Description', 'format': 'GTF', 'name': 'gencode v19' }) (status, gene_info) = epidb.info("gn1", self.admin_key) self.assertEquals( gene_info[0], { 'transcript_status': 'KNOWN', 'gene_name': 'DDX11L1', 'gene_type': 'pseudogene', 'end': 14412, 'source': 'HAVANA', 'frame': '.', 'level': '2', 'gene_id': 'ENSG00000223972.4', 'start': 11869, 'transcript_id': 'ENSG00000223972.4', 'score': 0.0, 'strand': '+', 'havana_gene': 'OTTHUMG00000000961.2', 'transcript_name': 'DDX11L1', '_id': 'gn1', 'gene_status': 'KNOWN', 'transcript_type': 'pseudogene', 'chromosome': 'chr1' }) (status, query) = epidb.select_expressions("gene", "s1", [1, 5, 10, 122], None, "ENCODE", "gencode v19", self.admin_key) query_one = query self.assertSuccess(status, query) (status, filtered) = epidb.filter_regions(query, "FPKM_STATUS", "!=", "OK", "string", self.admin_key) self.assertSuccess(status, filtered) (status, filtered_chr) = epidb.filter_regions(filtered, "CHROMOSOME", "==", "chr21", "string", self.admin_key) self.assertSuccess(status, filtered_chr) (status, r_id) = epidb.get_regions( filtered_chr, "GENE_ID,FPKM_STATUS,@SAMPLE_ID,@BIOSOURCE", self.admin_key) self.assertSuccess(status, r_id) regions = self.get_regions_request(r_id) self.assertEquals( regions, "ENSG00000240755.1\tLOWDATA\ts1\tK562\nENSG00000256386.1\tLOWDATA\ts1\tK562\nENSG00000198743.5\tLOWDATA\ts1\tK562\nENSG00000267937.1\tLOWDATA\ts1\tK562\nENSG00000238556.1\tLOWDATA\ts1\tK562\nENSG00000255902.1\tLOWDATA\ts1\tK562\nENSG00000266692.1\tLOWDATA\ts1\tK562" ) (status, query) = epidb.select_expressions("gene", "s1", [1, 5, 10, 122], None, "", "gencode v19", user_key) self.assertSuccess(status, query) (status, filtered) = epidb.filter_regions(query, "FPKM_STATUS", "!=", "OK", "string", user_key) self.assertSuccess(status, filtered) (status, filtered_chr) = epidb.filter_regions(filtered, "CHROMOSOME", "==", "chr21", "string", user_key) self.assertSuccess(status, filtered_chr) (status, req) = epidb.get_regions(filtered_chr, "GENE_ID,FPKM_STATUS,@SAMPLE_ID,@BIOSOURCE", user_key) self.assertSuccess(status, r_id) (s, ss) = epidb.info(req, user_key) while ss[0]["state"] != "done": time.sleep(1) (s, ss) = epidb.info(req, user_key) s, regions = epidb.get_request_data(req, user_key) self.assertEquals( regions, "ENSG00000240755.1\tLOWDATA\ts1\tK562\nENSG00000256386.1\tLOWDATA\ts1\tK562\nENSG00000198743.5\tLOWDATA\ts1\tK562\nENSG00000267937.1\tLOWDATA\ts1\tK562\nENSG00000238556.1\tLOWDATA\ts1\tK562\nENSG00000255902.1\tLOWDATA\ts1\tK562\nENSG00000266692.1\tLOWDATA\ts1\tK562" )
def test_complex2(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_full(epidb) self.insert_experiment(epidb, "hg19_big_1") self.insert_experiment(epidb, "hg19_big_2") res, qid_1_1 = epidb.select_regions("hg19_big_1", "hg19", None, None, None, None, None, 1000000, 3000000, self.admin_key) self.assertSuccess(res, qid_1_1) res, req = epidb.count_regions(qid_1_1, self.admin_key) self.assertSuccess(res, req) c = self.count_request(req) res, qid_1_2 = epidb.select_regions( "hg19_big_1", "hg19", None, None, None, None, ["chr1", "chr3", "chr11", "chrX", "chr9"], None, None, self.admin_key) self.assertSuccess(res, qid_1_2) res, req = epidb.count_regions(qid_1_2, self.admin_key) self.assertSuccess(res, req) c = self.count_request(req) # this gives us regions from 1,000,000 to 3,000,000 on chromosomes chr1, chr3, chr9, chr11, chrY res, qid_2_1 = epidb.intersection(qid_1_1, qid_1_2, self.admin_key) self.assertSuccess(res, qid_2_1) res, req = epidb.count_regions(qid_2_1, self.admin_key) self.assertSuccess(res, req) c = self.count_request(req) self.assertEqual(c, 247) res, qid_2_2 = epidb.tiling_regions(1000, "hg19", ["chr1", "chr2", "chr15", "chrX"], self.admin_key) self.assertSuccess(res, qid_2_2) res, req = epidb.count_regions(qid_2_2, self.admin_key) self.assertSuccess(res, req) c = self.count_request(req) res, qid_3_1 = epidb.merge_queries(qid_2_1, qid_2_2, self.admin_key) self.assertSuccess(res, qid_3_1) res, req = epidb.count_regions(qid_3_1, self.admin_key) self.assertSuccess(res, req) c = self.count_request(req) res, qid_4_1 = epidb.filter_regions(qid_3_1, "START", ">=", "2000000", "number", self.admin_key) self.assertSuccess(res, qid_4_1) res, req = epidb.count_regions(qid_4_1, self.admin_key) self.assertSuccess(res, req) c = self.count_request(req) res, qid_4_2 = epidb.select_regions("hg19_big_2", "hg19", None, None, None, None, ["chr1", "chrX"], None, None, self.admin_key) self.assertSuccess(res, qid_4_2) (res, qid_4_2_cached) = epidb.query_cache(qid_4_2, True, self.admin_key) res, req = epidb.count_regions(qid_4_2_cached, self.admin_key) self.assertSuccess(res, req) c = self.count_request(req) self.assertEqual(c, 8961) res, qid_5_1 = epidb.intersection(qid_4_1, qid_4_2_cached, self.admin_key) self.assertSuccess(res, qid_5_1) res, req = epidb.count_regions(qid_5_1, self.admin_key) self.assertSuccess(res, req) count = self.count_request(req) self.assertEqual(count, 14356) res, qid_6_1 = epidb.filter_regions(qid_5_1, "END", "<", "2200000", "number", self.admin_key) self.assertSuccess(res, qid_6_1) (res, qid_6_1_cached) = epidb.query_cache(qid_6_1, True, self.admin_key) res, req = epidb.count_regions(qid_6_1, self.admin_key) self.assertSuccess(res, req) count = self.count_request(req) self.assertEqual(count, 52) res, req = epidb.get_regions( qid_6_1, "CHROMOSOME,START,END,NAME,SCORE,STRAND,SIGNAL_VALUE,P_VALUE,Q_VALUE,PEAK,@NAME", self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) expected_regions = helpers.get_result("complex2") self.assertEqual(regions, expected_regions)
def test_aggregation(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] cpg_island = ",".join([ "CHROMOSOME", "START", "END", "NAME", "LENGTH", "NUM_CPG", "NUM_GC", "PER_CPG", "PER_CG", "OBS_EXP" ]) with open("data/cpgIslandExtFull.txt", 'r') as f: file_data = f.read() (res, a_1) = epidb.add_annotation("Cpg Islands", "hg19", "Complete CpG islands", file_data, cpg_island, None, self.admin_key) self.assertSuccess(res, a_1) res, q_cgi = epidb.select_annotations("Cpg Islands", "hg19", None, None, None, self.admin_key) self.assertSuccess(res, q_cgi) res, qid_2 = epidb.tiling_regions(1000000, "hg19", None, self.admin_key) self.assertSuccess(res, qid_2) res, req = epidb.count_regions(qid_2, self.admin_key) self.assertSuccess(res, req) count = self.count_request(req) self.assertEquals(count, 3118) res, _qid_3 = epidb.aggregate(q_cgi, qid_2, "@LENGTH", self.admin_key) self.assertSuccess(res, _qid_3) res, qid_3 = epidb.query_cache(_qid_3, True, self.admin_key) self.assertSuccess(res, qid_3) res, qid_4 = epidb.filter_regions(qid_3, "@AGG.COUNT", ">", "0", "number", self.admin_key) res, req = epidb.count_regions(qid_4, self.admin_key) count = self.count_request(req) self.assertEquals(count, 2574) res, qid_4 = epidb.filter_regions(qid_2, "@AGG.COUNT", "<", "0", "number", self.admin_key) res, req = epidb.count_regions(qid_4, self.admin_key) count = self.count_request(req) self.assertEquals(count, 0) res, qid_4 = epidb.filter_regions(qid_3, "@AGG.COUNT", ">=", "100", "number", self.admin_key) (res, req) = epidb.get_regions( qid_4, "CHROMOSOME,START,END,@AGG.MIN,@AGG.MAX,@AGG.MEDIAN,@AGG.MEAN,@AGG.VAR,@AGG.SD,@AGG.COUNT,@AGG.SUM", self.admin_key) self.assertSuccess(res, req) regions = self.get_regions_request(req) expected = 'chr1\t1000000\t2000000\t201.0000\t5585.0000\t469.0000\t766.0082\t589695.4375\t767.9163\t122\t93453.0000\nchr16\t0\t1000000\t201.0000\t6377.0000\t484.0000\t746.6083\t674998.0625\t821.5826\t120\t89593.0000\nchr16\t1000000\t2000000\t201.0000\t5449.0000\t398.0000\t666.6393\t630197.3125\t793.8497\t122\t81330.0000\nchr16\t2000000\t3000000\t201.0000\t4843.0000\t533.0000\t780.4951\t559994.2500\t748.3276\t101\t78830.0000\nchr16\t88000000\t89000000\t202.0000\t3785.0000\t347.0000\t553.3461\t295400.6875\t543.5078\t104\t57548.0000\nchr19\t0\t1000000\t201.0000\t7814.0000\t424.0000\t776.2705\t944608.4375\t971.9097\t122\t94705.0000\nchr19\t1000000\t2000000\t201.0000\t6035.0000\t430.0000\t738.8853\t625527.1250\t790.9027\t183\t135216.0000\nchr19\t2000000\t3000000\t201.0000\t3978.0000\t395.0000\t673.9907\t444749.5000\t666.8954\t107\t72117.0000\nchr19\t3000000\t4000000\t201.0000\t2753.0000\t387.0000\t531.0648\t172512.1094\t415.3458\t108\t57355.0000\nchr20\t62000000\t63000000\t202.0000\t5019.0000\t501.0000\t716.2427\t427763.9375\t654.0366\t103\t73773.0000\nchr7\t0\t1000000\t201.0000\t6234.0000\t348.0000\t556.3500\t475220.5625\t689.3624\t100\t55635.0000\nchr9\t139000000\t140000000\t202.0000\t6342.0000\t406.0000\t777.3303\t817548.5625\t904.1839\t109\t84729.0000' self.assertEquals(regions, expected) (s, req) = epidb.count_regions(qid_4, self.admin_key) self.assertSuccess(s, req) count = self.count_request(req) self.assertEquals(count, 12)
def test_cancel_aggregation(self): epidb = DeepBlueClient(address="localhost", port=31415) self.init_base(epidb) sample_id = self.sample_ids[0] self.insert_experiment(epidb, "hg19_big_2", sample_id) cpg_island = ",".join([ "CHROMOSOME", "START", "END", "NAME", "LENGTH", "NUM_CPG", "NUM_GC", "PER_CPG", "PER_CG", "OBS_EXP" ]) with open("data/cpgIslandExtFull.txt", 'r') as f: file_data = f.read() (res, a_1) = epidb.add_annotation("Cpg Islands", "hg19", "Complete CpG islands", file_data, cpg_island, None, self.admin_key) self.assertSuccess(res, a_1) res, q_cgi = epidb.select_annotations("Cpg Islands", "hg19", None, None, None, self.admin_key) self.assertSuccess(res, q_cgi) res, qid_2 = epidb.tiling_regions(1000000, "hg19", None, self.admin_key) self.assertSuccess(res, qid_2) res, req_count = epidb.count_regions(qid_2, self.admin_key) self.assertSuccess(res, req_count) count = self.count_request(req_count) self.assertEquals(count, 3118) res, qid_3 = epidb.aggregate(q_cgi, qid_2, "@LENGTH", self.admin_key) self.assertSuccess(res, qid_3) res, qid_4 = epidb.filter_regions(qid_3, "@AGG.COUNT", ">=", "100", "number", self.admin_key) (res, req_regions) = epidb.get_regions( qid_4, "CHROMOSOME,START,END,@AGG.MIN,@AGG.MAX,@AGG.MEDIAN,@AGG.MEAN,@AGG.VAR,@AGG.SD,@AGG.COUNT", self.admin_key) self.assertSuccess(res, req_regions) (s, user_two) = epidb.add_user("ANOTHER NAME", "ANOTHER EMAIL", "INSTITUTE", self.admin_key) s, tmp_user = epidb.modify_user_admin(user_two[0], "permission_level", "GET_DATA", self.admin_key) s, msg = epidb.cancel_request(req_regions, user_two[1]) self.assertEquals(msg, "130003:The request ID 'r2' is invalid.") s, msg = epidb.cancel_request(req_count, user_two[1]) self.assertEquals(msg, "130003:The request ID 'r1' is invalid.") (s, m) = epidb.cancel_request(req_regions, self.admin_key) self.assertSuccess(s, m) (s, m) = epidb.cancel_request(req_count, self.admin_key) self.assertSuccess(s, m) (s, user_ass) = epidb.add_user("ASS NAME", "ASS EMAIL", "INSTITUTE", self.admin_key) s, tmp_user = epidb.modify_user_admin(user_ass[0], "permission_level", "GET_DATA", self.admin_key) res, q_cgi_other = epidb.select_annotations("Cpg Islands", "hg19", None, None, None, user_two[1]) self.assertSuccess(res, q_cgi) (res, req_other) = epidb.get_regions(q_cgi_other, "CHROMOSOME,START,END", user_two[1]) self.assertSuccess(res, req_regions) (s, msg) = epidb.cancel_request(req_other, user_ass[1]) self.assertEquals(msg, "130003:The request ID 'r3' is invalid.") (s, m) = epidb.cancel_request(req_other, self.admin_key) self.assertSuccess(s, m) (s, ss) = epidb.info(req_other, self.admin_key) self.assertEquals(ss[0]['state'], 'canceled') (s, ss_count) = epidb.info(req_count, self.admin_key) self.assertEquals(ss_count[0]["state"], "removed") (s, ss_regions) = epidb.info(req_regions, self.admin_key) self.assertEquals(ss_regions[0]["state"], "canceled") s, e1 = epidb.get_request_data(req_count, self.admin_key) self.assertEqual( e1, "Request ID r1 was not finished. Please, check its status.") s, e2 = epidb.get_request_data(req_regions, self.admin_key) self.assertEqual( e2, "Request ID r2 was not finished. Please, check its status.")