コード例 #1
0
    def test_load_example_datasets(self):
        self.assertIsInstance(gl.get_example_dataset(name="Example_Dataset_1"),
                              GMQLDataset)
        self.assertIsInstance(gl.get_example_dataset(name="Example_Dataset_2"),
                              GMQLDataset)

        self.assertIsInstance(
            gl.get_example_dataset(name="Example_Dataset_1", load=True),
            GDataframe)
        self.assertIsInstance(
            gl.get_example_dataset(name="Example_Dataset_2", load=True),
            GDataframe)
コード例 #2
0
ファイル: test_remote.py プロジェクト: diveu/PyGMQL
    def test_remote_select_1(self):
        querytext = """
        #SELECT 1
            RES = SELECT(region: (chr == chr2 OR chr == chr3) AND NOT(strand == + OR strand == -)
                AND start >= 130 AND stop <= 250) Example_Dataset_2;
            MATERIALIZE RES INTO select_1;
        """

        logging.info("Query: {}".format(querytext))
        logging.info("Executing REMOTE TEXTUAL query")
        respaths = self.rm.query(querytext, self.remote_output_path)
        dataset_name = respaths.iloc[0].dataset
        respaths = os.path.join(self.remote_output_path, dataset_name)
        logging.info("Deleting remote dataset {}".format(dataset_name))
        self.rm.delete_dataset(dataset_name)
        res_query = gl.load_from_path(respaths).materialize()

        logging.info("Executing LOCAL PYTHON query")
        d = gl.get_example_dataset("Example_Dataset_2")
        d = d.select(region_predicate=(d.chr.isin(['chr2', 'chr3']))
                     & (~d.strand.isin(['+', '-'])) & (d.start >= 130)
                     & (d.stop <= 250))
        res_local = d.materialize()
        self.gdataframe_equality(res_query, res_local)

        logging.info("Executing REMOTE PYTHON query")
        gl.set_mode("remote")
        res_remote = d.materialize()
        self.gdataframe_equality(res_local, res_remote)
コード例 #3
0
    def test_difference_1(self):
        """ # difference_1
            D1 = SELECT(region: chr == chr2) Example_Dataset_1;
            D2 = SELECT(cell_karyotype == "cancer"; region: chr == chr2) Example_Dataset_2;
            RES = DIFFERENCE() D1 D2;
            MATERIALIZE RES INTO difference_1;
        """

        d1 = gl.get_example_dataset("Example_Dataset_1")
        d1 = d1.reg_select(d1.chr == 'chr2')

        d2 = gl.get_example_dataset("Example_Dataset_2")
        d2 = d2.select(meta_predicate=d2['cell_karyotype'] == 'cancer',
                       region_predicate=d2.chr == 'chr2')

        res = d1.difference(d2)
        res = res.materialize()
        self.gdataframe_equality(res, self.get_query_results("difference_1"))
コード例 #4
0
    def test_project_1(self):
        """ # project_1
            D = SELECT() Example_Dataset_1;
            RES = PROJECT(region_update: length AS right - left) D;
            MATERIALIZE RES INTO project_1;
        """

        res = gl.get_example_dataset("Example_Dataset_1")
        res = res.project(new_field_dict={'length': res.right - res.left})
        res = res.materialize()
        self.gdataframe_equality(res, self.get_query_results("project_1"))
コード例 #5
0
    def test_merge_1(self):
        """ # merge_1
            D = SELECT(region: chr == chr1) Example_Dataset_1;
            RES = MERGE() D;
            MATERIALIZE RES INTO merge_1;
        """

        res = gl.get_example_dataset("Example_Dataset_1")
        res = res.reg_select(res.chr == 'chr1')
        res = res.merge()
        res = res.materialize()
        self.gdataframe_equality(res, self.get_query_results("merge_1"))
コード例 #6
0
    def test_map_1(self):
        """ # map_1
            D1 = SELECT(region: chr == chr2) Example_Dataset_1;
            D2 = SELECT(region: chr == chr2) Example_Dataset_2;
            RES = MAP(avg_score AS AVG(score)) D1 D2;
            MATERIALIZE RES INTO map_1;
        """

        d1 = gl.get_example_dataset("Example_Dataset_1")
        d1 = d1.reg_select(d1.chr == 'chr2')

        d2 = gl.get_example_dataset("Example_Dataset_2")
        d2 = d2.reg_select(d2.chr == 'chr2')

        res = d1.map(d2,
                     new_reg_fields={'avg_score': gl.AVG("score")},
                     refName="D1",
                     expName="D2")
        res = res.materialize("./tmp")
        shutil.rmtree("./tmp")
        self.gdataframe_equality(res, self.get_query_results("map_1"))
コード例 #7
0
    def test_select_1(self):
        """ #SELECT 1
            RES = SELECT(region: (chr == chr2 OR chr == chr3) AND NOT(strand == + OR strand == -)
                AND start >= 130 AND stop <= 250) Example_Dataset_2;
            MATERIALIZE RES INTO select_1;
        """

        res = gl.get_example_dataset("Example_Dataset_2")
        res = res.select(region_predicate=(res.chr.isin(['chr2', 'chr3']))
                         & (~res.strand.isin(['+', '-'])) & (res.start >= 130)
                         & (res.stop <= 250))
        res = res.materialize()
        self.gdataframe_equality(res, self.get_query_results("select_1"))
コード例 #8
0
    def test_join_1(self):
        """ # join_1
            D1 = SELECT(region: chr == chr2) Example_Dataset_1;
            D2 = SELECT(region: chr == chr2) Example_Dataset_2;
            RES = JOIN(MD(1), DGE(20); output: RIGHT; joinby: cell_karyotype) D1 D2;
            MATERIALIZE RES INTO join_1;
        """

        d1 = gl.get_example_dataset("Example_Dataset_1")
        d1 = d1.reg_select(d1.chr == 'chr2')

        d2 = gl.get_example_dataset("Example_Dataset_2")
        d2 = d2.reg_select(d2.chr == 'chr2')

        res = d1.join(d2, [gl.MD(1), gl.DGE(20)],
                      output="RIGHT",
                      joinBy=['cell_karyotype'],
                      refName="D1",
                      expName="D2")
        res = res.materialize("./tmp")
        shutil.rmtree("./tmp")
        self.gdataframe_equality(res, self.get_query_results("join_1"))
コード例 #9
0
    def test_group_1(self):
        """ # group_1
            D = SELECT(region: chr == chr2) Example_Dataset_2;
            RES = GROUP(controlId; meta_aggregates: max_cell_tier AS MAX(cell_tier)) D;
            MATERIALIZE RES INTO group_1;
        """

        res = gl.get_example_dataset("Example_Dataset_2")
        res = res.reg_select(res.chr == 'chr2')
        res = res.group(meta=['controlId'],
                        meta_aggregates={'max_cell_tier': gl.MAX("cell_tier")})
        res = res.materialize()
        self.gdataframe_equality(res, self.get_query_results("group_1"))
コード例 #10
0
    def test_extend_2(self):
        """ # extend_2
            D = SELECT() Example_Dataset_1;
            RES = EXTEND(region_count AS COUNT(), min_pvalue AS MIN(pvalue)) D;
            MATERIALIZE RES INTO extend_2;
        """

        res = gl.get_example_dataset("Example_Dataset_1")
        res = res.extend({
            'region_count': gl.COUNT(),
            'min_pvalue': gl.MIN("pvalue")
        })
        res = res.materialize()
        self.gdataframe_equality(res, self.get_query_results("extend_2"))
コード例 #11
0
    def test_cover_2(self):
        """ # cover_2
            D = SELECT(region: chr == chr2) Example_Dataset_2;
            RES = COVER(2, 3; groupby: cell; aggregate: min_pvalue AS MIN(pvalue)) D;
            MATERIALIZE RES INTO cover_2;
        """

        d = gl.get_example_dataset("Example_Dataset_2")
        d = d.reg_select(d.chr == 'chr2')
        d = d.normal_cover(2,
                           3,
                           groupBy=['cell'],
                           new_reg_fields={'min_pvalue': gl.MIN("pvalue")})
        d = d.materialize()
        self.gdataframe_equality(d, self.get_query_results("cover_2"))
コード例 #12
0
    def test_order_1(self):
        """ # order_1
            D = SELECT(region: chr == chr1) Example_Dataset_1;
            D1 = EXTEND(Region_count AS COUNT()) D;
            RES = ORDER(Region_count DESC; meta_top: 2) D1;
            MATERIALIZE RES INTO order_1;
        """

        res = gl.get_example_dataset("Example_Dataset_1")
        res = res.reg_select(res.chr == 'chr1')
        res = res.extend({'Region_count': gl.COUNT()})
        res = res.order(meta=['Region_count'],
                        meta_ascending=[False],
                        meta_top="top",
                        meta_k=2)
        res = res.materialize()
        self.gdataframe_equality(res, self.get_query_results("order_1"))