예제 #1
0
    def test_correct_limits(self):
        epidb = DeepBlueClient(address="localhost", port=31415)
        self.init_base(epidb)

        regions_range = "chr1\t150\t225"
        regions_data = "chr1\t125\t150\nchr1\t150\t175\nchr1\t175\t200\nchr1\t200\t225\nchr1\t225\t250"

        (s, q_range) = epidb.input_regions("hg19", regions_range,
                                           self.admin_key)
        (s, q_date) = epidb.input_regions("hg19", regions_data, self.admin_key)

        (s, q_agg) = epidb.aggregate(q_date, q_range, "START", self.admin_key)

        status, req = epidb.get_regions(q_agg,
                                        "CHROMOSOME,START,END,@AGG.COUNT",
                                        self.admin_key)
        rs = self.get_regions_request(req)

        self.assertEquals("chr1\t150\t225\t3", rs)
예제 #2
0
    def test_load_bedgraph(self):
        epidb = DeepBlueClient(address="localhost", port=31415)
        self.init_base(epidb)

        sample_id = self.sample_ids[0]
        regions_data = gzip.open("data/bedgraph/chr19.txt.gz").read()

        # adding two experiments with the same data should work
        res = epidb.add_experiment(
            "S0022IH2.ERX300681.H3K36me3.bwa.GRCh38.20150528.bedgraph", "hg19",
            "Methylation", sample_id, "tech1", "ENCODE", "desc1", regions_data,
            "bedgraph", {"md5sum": "afd4af5afd5afd4af5afd5afd4af5afd5"},
            self.admin_key)
        self.assertSuccess(res)

        (status, query_id) = epidb.select_regions(
            "#afd4af5afd5afd4af5afd5afd4af5afd5", None, None, None, None, None,
            "chr19", 49388217, 49417994, self.admin_key)

        self.assertSuccess(status, query_id)

        (status, input) = epidb.input_regions("hg19",
                                              "chr19\t49388217\t49417994",
                                              self.admin_key)
        self.assertSuccess(status, input)

        (status, query_overlap) = epidb.intersection(query_id, input,
                                                     self.admin_key)
        self.assertSuccess(status, query_overlap)

        (status, request_id) = epidb.get_regions(query_id,
                                                 "CHROMOSOME,START,END,VALUE",
                                                 self.admin_key)
        self.assertSuccess(status, request_id)
        (status,
         overlap_request_id) = epidb.get_regions(query_id,
                                                 "CHROMOSOME,START,END,VALUE",
                                                 self.admin_key)
        self.assertSuccess(status, overlap_request_id)

        by_select = self.get_regions_request(request_id)
        by_overlap = self.get_regions_request(overlap_request_id)

        self.assertEqual(by_overlap, by_select)
        self.assertTrue(len(by_select) > 0)

        (status, info) = epidb.info("#afd4af5afd5afd4af5afd5afd4af5afd5",
                                    self.admin_key)
        self.assertEquals(info[0]["_id"], "e1")
예제 #3
0
    def test_misc_formats(self):
        epidb = DeepBlueClient(address="localhost", port=31415)
        self.init_base(epidb)

        files = ["chr_s_e_name.bed", "chr_s_e_score.bed", "bed10.bed"]

        print epidb.list_genomes(self.admin_key)

        for f in files:
            print f
            content = open("data/bed/" + f).read()
            res, q1 = epidb.input_regions("hg19", content, self.admin_key)
            print epidb.info(q1, self.admin_key)
            fmt = epidb.info(q1, self.admin_key)[1][0]['args']['format']
            self.assertSuccess(res, q1)

            print fmt
            r, r1 = epidb.get_regions(q1, fmt, self.admin_key)
            self.assertSuccess(r, r1)

            for l in self.get_regions_request(r1).split("\n"):
                print l
                print len(l.split("\t"))
예제 #4
0
    def test_complex_input_regions(self):
        epidb = DeepBlueClient(address="localhost", port=31415)
        self.init_full(epidb)

        regions = "chr1\t1\t10000\nchr2\t2\t20000\nchr3\t3\t30000"

        (s, q) = epidb.input_regions("hg19", regions, self.admin_key)
        res, req = epidb.count_regions(q, self.admin_key)
        self.assertSuccess(res, req)
        count = self.count_request(req)
        self.assertEqual(count, 3)

        res, req = epidb.get_regions(
            q,
            "CHROMOSOME,START,END,NAME,@NAME,@EPIGENETIC_MARK,@CALCULATED(return value_of('END') - value_of('START') )",
            self.admin_key)
        self.assertSuccess(res, req)
        regions = self.get_regions_request(req)

        output = """chr1\t1\t10000\t\tQuery q1 regions set\t\t9999.000000
chr2\t2\t20000\t\tQuery q1 regions set\t\t19998.000000
chr3\t3\t30000\t\tQuery q1 regions set\t\t29997.000000"""

        self.assertEqual(regions, output)
예제 #5
0
    def test_overlap_simple(self):
        epidb = DeepBlueClient(address="localhost", port=31415)
        self.init_base(epidb)

        # Test the intersect command
        data_one = "chr1\t3049996\t3050022\nchr1\t3050022\t3050040\nchr1\t3050040\t3050051\nchr1\t3050051\t3050126"
        region = "chr1\t3050022\t3050100"

        (s, q1) = epidb.input_regions("hg19", data_one, self.admin_key)
        self.assertSuccess(s, q1)

        (s, q_input) = epidb.input_regions("hg19", region, self.admin_key)
        self.assertSuccess(s, q_input)

        (s, q3) = epidb.intersection(q1, q_input, self.admin_key)
        self.assertSuccess(s, q3)

        (s, req) = epidb.get_regions(q3, "CHROMOSOME,START,END",
                                     self.admin_key)
        self.assertSuccess(s, req)
        regions = self.get_regions_request(req)
        self.assertEqual(
            regions,
            'chr1\t3050022\t3050040\nchr1\t3050040\t3050051\nchr1\t3050051\t3050126'
        )

        # Test the select annotation
        sample_id = self.sample_ids[0]
        res = epidb.add_annotation("exp1", "hg19", "desc1", data_one,
                                   "CHROMOSOME,START,END", None,
                                   self.admin_key)
        self.assertSuccess(res)
        (s, q1) = epidb.select_annotations("exp1", "hg19", "chr1", 3050022,
                                           3050100, self.admin_key)
        self.assertSuccess(s, q1)
        (s, req) = epidb.get_regions(q1, "CHROMOSOME,START,END",
                                     self.admin_key)
        self.assertSuccess(s, req)
        regions = self.get_regions_request(req)
        self.assertEqual(
            regions,
            'chr1\t3050022\t3050040\nchr1\t3050040\t3050051\nchr1\t3050051\t3050126'
        )

        # Test the select experiment
        sample_id = self.sample_ids[0]
        res = epidb.add_experiment("exp1", "hg19", "Methylation", sample_id,
                                   "tech1", "ENCODE", "desc1", data_one,
                                   "CHROMOSOME,START,END", None,
                                   self.admin_key)
        self.assertSuccess(res)

        (s, q1) = epidb.select_experiments("exp1", "chr1", 3050022, 3050100,
                                           self.admin_key)
        self.assertSuccess(s, q1)
        (s, req) = epidb.get_regions(q1, "CHROMOSOME,START,END",
                                     self.admin_key)
        self.assertSuccess(s, req)
        regions = self.get_regions_request(req)
        self.assertEqual(
            regions,
            'chr1\t3050022\t3050040\nchr1\t3050040\t3050051\nchr1\t3050051\t3050126'
        )

        (s, q1) = epidb.select_experiments("exp1", "chr1", None, None,
                                           self.admin_key)
        self.assertSuccess(s, q1)
        (s, q2) = epidb.aggregate(q1, q_input, "START", self.admin_key)
        self.assertSuccess(s, q2)
        (s, req) = epidb.get_regions(q2, "@AGG.MIN,@AGG.MAX,@AGG.COUNT",
                                     self.admin_key)
        self.assertSuccess(s, req)
        regions = self.get_regions_request(req)
        self.assertEqual(regions, '3050022.0000\t3050051.0000\t3')
예제 #6
0
  def test_overlap(self):
    epidb = DeepBlueClient(address="localhost", port=31415)
    self.init_full(epidb)


    ##### SET 1 ######
    res, qid_1 = epidb.select_regions("hg19_chr1_1", "hg19", None, None, None,
                                      None, None, None, None, self.admin_key)
    self.assertSuccess(res, qid_1)
    res, qid_2 = epidb.select_regions("hg19_chr1_2", "hg19", None, None, None,
                                      None, None, None, None, self.admin_key)
    self.assertSuccess(res, qid_2)

    res, qid_3 = epidb.overlap(qid_1, qid_2, True, 0, "bp", self.admin_key)
    self.assertSuccess(res, qid_3)
    res, req = epidb.get_regions(qid_3, "CHROMOSOME,START,END", self.admin_key)
    self.assertSuccess(res, req)
    regions = self.get_regions_request(req)

    expected_regions = helpers.get_result("intersection")
    self.assertEqual(regions, expected_regions)

    res, qid_31 = epidb.overlap(qid_1, qid_2, True, 165, "bp", self.admin_key)
    self.assertSuccess(res, qid_31)
    res, req = epidb.get_regions(qid_31, "CHROMOSOME,START,END", self.admin_key)
    self.assertSuccess(res, req)
    regions = self.get_regions_request(req)

    expected_regions = """chr1\t713240\t713390
chr1\t713900\t714050
chr1\t714160\t714310
chr1\t715060\t715210
chr1\t762420\t762570
chr1\t840080\t840230
chr1\t840600\t840750
chr1\t858880\t859030
chr1\t859600\t859750
chr1\t860240\t860390
chr1\t875900\t876050"""

    self.assertEqual(regions, expected_regions)

    res, qid_4 = epidb.overlap(qid_1, qid_2, False, 0, "bp", self.admin_key)
    self.assertSuccess(res, qid_4)
    res, req = epidb.get_regions(qid_4, "CHROMOSOME,START,END", self.admin_key)
    self.assertSuccess(res, req)
    regions2 = self.get_regions_request(req)

    expected = """chr1	761180	761330
chr1	763020	763170
chr1	839540	839690
chr1	875400	875550
chr1	876180	876330"""

    self.assertEquals(regions2, expected)



    res, qid_41 = epidb.overlap(qid_1, qid_2, False, 0, "bp", self.admin_key)
    self.assertSuccess(res, qid_41)
    res, req = epidb.get_regions(qid_41, "CHROMOSOME,START,END", self.admin_key)
    self.assertSuccess(res, req)
    regions2 = self.get_regions_request(req)

    expected = """chr1	761180	761330
chr1	763020	763170
chr1	839540	839690
chr1	875400	875550
chr1	876180	876330"""


    res, qid_41 = epidb.overlap(qid_1, qid_2, False, 0, "%", self.admin_key)
    self.assertSuccess(res, qid_41)
    res, req = epidb.get_regions(qid_41, "CHROMOSOME,START,END", self.admin_key)
    self.assertSuccess(res, req)
    regions2 = self.get_regions_request(req)

    expected = """chr1	761180	761330
chr1	763020	763170
chr1	839540	839690
chr1	875400	875550
chr1	876180	876330"""

    self.assertEquals(regions2, expected)

    res, qid_5 = epidb.overlap(qid_1, qid_2, False, 350, "bp", self.admin_key)
    self.assertSuccess(res, qid_5)
    res, req = epidb.get_regions(qid_5, "CHROMOSOME,START,END", self.admin_key)
    self.assertSuccess(res, req)
    regions = self.get_regions_request(req)

    expected = """chr1	761180	761330
chr1	763020	763170
chr1	875400	875550"""

    self.assertEquals(regions, expected)

    res, qid_6 = epidb.overlap(qid_1, qid_2, False, 500, "%", self.admin_key)
    self.assertSuccess(res, qid_6)
    res, req = epidb.get_regions(qid_6, "CHROMOSOME,START,END", self.admin_key)
    self.assertSuccess(res, req)
    regions = self.get_regions_request(req)
    self.assertEquals(regions, "chr1\t763020\t763170")


    ##### SET 2 ######
    res, q1 = epidb.input_regions("hg19", "chr1\t1000\t1010\nchr1\t1100\t1110\nchr1\t1200\t1210\nchr1\t1300\t1310\nchr1\t1400\t1410\nchr1\t1500\t1510", self.admin_key)
    self.assertSuccess(res, q1)

    res, q2 = epidb.input_regions("hg19", "chr1\t1011\t1020\nchr1\t1111\t1120\nchr1\t1211\t1220\nchr1\t1311\t1320\nchr1\t1411\t1420\nchr1\t1511\t1520", self.admin_key)
    self.assertSuccess(res, q2)


    res, q3 = epidb.overlap(q1, q2, False, 0, "bp", self.admin_key)
    self.assertSuccess(res, q3)
    res, req = epidb.get_regions(q3, "CHROMOSOME,START,END", self.admin_key)
    self.assertSuccess(res, req)
    regions = self.get_regions_request(req)
    expected = 'chr1\t1000\t1010\nchr1\t1100\t1110\nchr1\t1200\t1210\nchr1\t1300\t1310\nchr1\t1400\t1410\nchr1\t1500\t1510'
    self.assertEquals(regions, expected)


    res, q3 = epidb.overlap(q1, q2, False, 20, "%", self.admin_key)
    self.assertSuccess(res, q3)
    res, req = epidb.get_regions(q3, "CHROMOSOME,START,END", self.admin_key)
    self.assertSuccess(res, req)
    regions = self.get_regions_request(req)
    expected = ''
    self.assertEquals(regions, expected)


    res, q3 = epidb.overlap(q1, q2, False, 1, "%", self.admin_key)
    self.assertSuccess(res, q3)
    res, req = epidb.get_regions(q3, "CHROMOSOME,START,END", self.admin_key)
    self.assertSuccess(res, req)
    regions = self.get_regions_request(req)
    expected = 'chr1\t1000\t1010\nchr1\t1100\t1110\nchr1\t1200\t1210\nchr1\t1300\t1310\nchr1\t1400\t1410\nchr1\t1500\t1510'
    self.assertEquals(regions, expected)

    res, q3 = epidb.overlap(q2, q1, False, 0, "bp", self.admin_key)
    self.assertSuccess(res, q3)
    res, req = epidb.get_regions(q3, "CHROMOSOME,START,END", self.admin_key)
    self.assertSuccess(res, req)
    regions = self.get_regions_request(req)
    self.assertEquals(regions, 'chr1\t1011\t1020\nchr1\t1111\t1120\nchr1\t1211\t1220\nchr1\t1311\t1320\nchr1\t1411\t1420\nchr1\t1511\t1520')


    res, q4 = epidb.input_regions("hg19", "chr1\t1000\t1010\nchr1\t1200\t1210\nchr1\t1300\t1310", self.admin_key)
    self.assertSuccess(res, q4)
    res, q5 = epidb.input_regions("hg19", "chr1\t1011\t1020\nchr1\t1111\t1120\nchr1\t1211\t1220\nchr1\t1311\t1320\nchr1\t1411\t1420\nchr1\t1511\t1520", self.admin_key)
    self.assertSuccess(res, q5)

    res, q6 = epidb.overlap(q4, q5, False, 0, "bp", self.admin_key)
    self.assertSuccess(res, q6)
    res, req = epidb.get_regions(q6, "CHROMOSOME,START,END", self.admin_key)
    self.assertSuccess(res, req)
    regions = self.get_regions_request(req)
    self.assertEquals(regions, 'chr1\t1000\t1010\nchr1\t1200\t1210\nchr1\t1300\t1310')

    res, q4 = epidb.input_regions("hg19", "chr1\t1000\t1010", self.admin_key)
    self.assertSuccess(res, q4)
    res, q5 = epidb.input_regions("hg19", "chr1\t1011\t1020\nchr1\t1111\t1120\nchr1\t1211\t1220\nchr1\t1311\t1320\nchr1\t1411\t1420\nchr1\t1511\t1520", self.admin_key)
    self.assertSuccess(res, q5)

    res, q6 = epidb.overlap(q4, q5, False, 0, "bp", self.admin_key)
    self.assertSuccess(res, q6)
    res, req = epidb.get_regions(q6, "CHROMOSOME,START,END", self.admin_key)
    self.assertSuccess(res, req)
    regions = self.get_regions_request(req)
    self.assertEqual(regions, "chr1\t1000\t1010")

    res, q7 = epidb.overlap(q5, q4, False, 0, "bp", self.admin_key)
    self.assertSuccess(res, q7)
    res, req = epidb.get_regions(q7, "CHROMOSOME,START,END", self.admin_key)
    self.assertSuccess(res, req)
    regions = self.get_regions_request(req)
    self.assertEqual(regions, "chr1\t1011\t1020\nchr1\t1111\t1120\nchr1\t1211\t1220\nchr1\t1311\t1320\nchr1\t1411\t1420\nchr1\t1511\t1520")
    def test_enriochment_overlap(self):
        epidb = DeepBlueClient(address="localhost", port=31415)
        self.init_base(epidb)

        res = epidb.add_technique("ChIP-seq", "ChIP-sequencing", {},
                                  self.admin_key)
        self.assertSuccess(res)

        sample_id = self.sample_ids[0]

        broad_peak_format = ",".join([
            "CHROMOSOME",
            "START",
            "END",
            "NAME",
            "SCORE",
            "STRAND",
            "SIGNAL_VALUE",
            "P_VALUE",
            "Q_VALUE",
        ])

        with open("data/wgEncodeBroadHistoneH1hescH3k4me3StdPk.bed", 'r') as f:
            file_data = f.read()
            (res, exp) = epidb.add_experiment(
                "wgEncodeBroadHistoneH1hescH3k4me3StdPk.bed", "hg19",
                "H3k4me3", "s1", "ChIPseq", "ENCODE",
                "wgEncodeBroadHistoneH1hescH3k4me3StdPk.bed from ENCODE",
                file_data, broad_peak_format, None, self.admin_key)
            self.assertSuccess(res, exp)
            res, q_exp = epidb.select_experiments(
                "wgEncodeBroadHistoneH1hescH3k4me3StdPk.bed", "chr1", None,
                None, self.admin_key)
            self.assertSuccess(res, q_exp)

        ## Testting with all overlaps

        res, q_tiling = epidb.tiling_regions(10000, "hg19", None,
                                             self.admin_key)
        res, r_id = epidb.enrich_regions_overlap(
            q_exp, q_tiling,
            {"H3k4me3": ["wgEncodeBroadHistoneH1hescH3k4me3StdPk.bed"]},
            "hg19", self.admin_key)

        result = self.get_regions_request(r_id)
        self.assertEquals(result['enrichment']['results'][0]['p_value_log'],
                          float('Inf'))
        self.assertEquals(
            result, {
                'enrichment': {
                    'count_query_regions':
                    3074,
                    'count_universe_regions':
                    313669,
                    'results': [{
                        'c': 0,
                        'b': 30861,
                        'description': '',
                        'p_value_log': float('Inf'),
                        'experiment_size': 33270,
                        'database_name': 'H3k4me3',
                        'max_rank': 1,
                        'support_rank': 1,
                        'dataset':
                        'wgEncodeBroadHistoneH1hescH3k4me3StdPk.bed',
                        'biosource': 'K562',
                        'odd_rank': 1,
                        'odds_ratio': float('Inf'),
                        'epigenetic_mark': 'H3k4me3',
                        'mean_rank': 1.0,
                        'log_rank': 1,
                        'support': 3066,
                        'd': 279742,
                        'msg': '',
                        'error': False
                    }]
                }
            })

        ## Testting without any overlap
        _, q_input = epidb.input_regions("hg19", "chr1\t1\t2", self.admin_key)
        res, r_id = epidb.enrich_regions_overlap(
            q_input, q_tiling,
            {"H3k4me3": ["wgEncodeBroadHistoneH1hescH3k4me3StdPk.bed"]},
            "hg19", self.admin_key)

        result = self.get_regions_request(r_id)
        self.assertEquals(result['enrichment']['results'][0]['p_value_log'],
                          0.0)
        self.assertEquals(
            {
                'enrichment': {
                    'count_query_regions':
                    1,
                    'count_universe_regions':
                    313669,
                    'results': [{
                        'c': 1,
                        'b': 33927,
                        'description': '',
                        'p_value_log': 0.0,
                        'experiment_size': 33270,
                        'database_name': 'H3k4me3',
                        'max_rank': 1,
                        'support_rank': 1,
                        'dataset':
                        'wgEncodeBroadHistoneH1hescH3k4me3StdPk.bed',
                        'biosource': 'K562',
                        'odd_rank': 1,
                        'odds_ratio': 0.0,
                        'epigenetic_mark': 'H3k4me3',
                        'mean_rank': 1.0,
                        'log_rank': 1,
                        'support': 0,
                        'd': 279741,
                        'error': False,
                        'msg': ''
                    }]
                }
            }, result)