Exemple #1
0
    def test_client(self, PyDruid):
        self.login()
        instance = PyDruid.return_value
        instance.time_boundary.return_value = [
            {'result': {'maxTime': '2016-01-01'}}]
        instance.segment_metadata.return_value = SEGMENT_METADATA
        instance.groupby = GB_RESULT_SET

        cluster = (
            db.session
            .query(DruidCluster)
            .filter_by(cluster_name='test_cluster')
            .first()
        )
        if cluster:
            db.session.delete(cluster)
        db.session.commit()

        cluster = DruidCluster(
            cluster_name='test_cluster',
            coordinator_host='localhost',
            coordinator_port=7979,
            broker_host='localhost',
            broker_port=7980,
            metadata_last_refreshed=datetime.now())

        db.session.add(cluster)
        cluster.get_datasources = Mock(return_value=['test_datasource'])
        cluster.refresh_datasources()
        db.session.commit()

        self.client.get('/caravel/explore/druid/1/')
Exemple #2
0
    def test_client(self, PyDruid):
        self.login()
        instance = PyDruid.return_value
        instance.time_boundary.return_value = [{
            'result': {
                'maxTime': '2016-01-01'
            }
        }]
        instance.segment_metadata.return_value = SEGMENT_METADATA
        instance.groupby = GB_RESULT_SET

        cluster = (db.session.query(DruidCluster).filter_by(
            cluster_name='test_cluster').first())
        if cluster:
            db.session.delete(cluster)
        db.session.commit()

        cluster = DruidCluster(cluster_name='test_cluster',
                               coordinator_host='localhost',
                               coordinator_port=7979,
                               broker_host='localhost',
                               broker_port=7980,
                               metadata_last_refreshed=datetime.now())

        db.session.add(cluster)
        cluster.get_datasources = Mock(return_value=['test_datasource'])
        cluster.refresh_datasources()
        db.session.commit()

        self.client.get('/caravel/explore/druid/1/')
Exemple #3
0
    def test_client(self, PyDruid):
        self.login(username='******')
        instance = PyDruid.return_value
        instance.time_boundary.return_value = [
            {'result': {'maxTime': '2016-01-01'}}]
        instance.segment_metadata.return_value = SEGMENT_METADATA

        cluster = (
            db.session
            .query(DruidCluster)
            .filter_by(cluster_name='test_cluster')
            .first()
        )
        if cluster:
            db.session.delete(cluster)
        db.session.commit()

        cluster = DruidCluster(
            cluster_name='test_cluster',
            coordinator_host='localhost',
            coordinator_port=7979,
            broker_host='localhost',
            broker_port=7980,
            metadata_last_refreshed=datetime.now())

        db.session.add(cluster)
        cluster.get_datasources = Mock(return_value=['test_datasource'])
        cluster.get_druid_version = Mock(return_value='0.9.1')
        cluster.refresh_datasources()
        datasource_id = cluster.datasources[0].id
        db.session.commit()

        nres = [
            list(v['event'].items()) + [('timestamp', v['timestamp'])]
            for v in GB_RESULT_SET]
        nres = [dict(v) for v in nres]
        import pandas as pd
        df = pd.DataFrame(nres)
        instance.export_pandas.return_value = df
        instance.query_dict = {}
        instance.query_builder.last_query.query_dict = {}

        resp = self.client.get('/caravel/explore/druid/{}/'.format(
            datasource_id))
        assert "[test_cluster].[test_datasource]" in resp.data.decode('utf-8')

        url = (
            '/caravel/explore_json/druid/{}/?viz_type=table&granularity=one+day&'
            'druid_time_origin=&since=7+days+ago&until=now&row_limit=5000&'
            'include_search=false&metrics=count&groupby=name&flt_col_0=dim1&'
            'flt_op_0=in&flt_eq_0=&slice_id=&slice_name=&collapsed_fieldsets=&'
            'action=&datasource_name=test_datasource&datasource_id={}&'
            'datasource_type=druid&previous_viz_type=table&'
            'force=true'.format(datasource_id, datasource_id))
        resp = self.get_resp(url)
        assert "Canada" in resp
Exemple #4
0
    def test_client(self, PyDruid):
        self.login_admin()
        instance = PyDruid.return_value
        instance.time_boundary.return_value = [{
            'result': {
                'maxTime': '2016-01-01'
            }
        }]
        instance.segment_metadata.return_value = SEGMENT_METADATA

        cluster = (db.session.query(DruidCluster).filter_by(
            cluster_name='test_cluster').first())
        if cluster:
            db.session.delete(cluster)
        db.session.commit()

        cluster = DruidCluster(cluster_name='test_cluster',
                               coordinator_host='localhost',
                               coordinator_port=7979,
                               broker_host='localhost',
                               broker_port=7980,
                               metadata_last_refreshed=datetime.now())

        db.session.add(cluster)
        cluster.get_datasources = Mock(return_value=['test_datasource'])
        cluster.refresh_datasources()
        datasource_id = cluster.datasources[0].id
        db.session.commit()

        resp = self.client.get(
            '/caravel/explore/druid/{}/'.format(datasource_id))
        assert "[test_cluster].[test_datasource]" in resp.data.decode('utf-8')

        nres = [
            list(v['event'].items()) + [('timestamp', v['timestamp'])]
            for v in GB_RESULT_SET
        ]
        nres = [dict(v) for v in nres]
        import pandas as pd
        df = pd.DataFrame(nres)
        instance.export_pandas.return_value = df
        instance.query_dict = {}
        instance.query_builder.last_query.query_dict = {}
        resp = self.client.get(
            '/caravel/explore/druid/1/?viz_type=table&granularity=one+day&druid_time_origin=&since=7+days+ago&until=now&row_limit=5000&include_search=false&metrics=count&groupby=name&flt_col_0=dim1&flt_op_0=in&flt_eq_0=&slice_id=&slice_name=&collapsed_fieldsets=&action=&datasource_name=test_datasource&datasource_id=1&datasource_type=druid&previous_viz_type=table&json=true&force=true'
        )
        print('-' * 300)
        print(resp.data.decode('utf-8'))
        assert "Canada" in resp.data.decode('utf-8')
Exemple #5
0
    def test_client(self, PyDruid):
        self.login(username="******")
        instance = PyDruid.return_value
        instance.time_boundary.return_value = [{"result": {"maxTime": "2016-01-01"}}]
        instance.segment_metadata.return_value = SEGMENT_METADATA

        cluster = db.session.query(DruidCluster).filter_by(cluster_name="test_cluster").first()
        if cluster:
            db.session.delete(cluster)
        db.session.commit()

        cluster = DruidCluster(
            cluster_name="test_cluster",
            coordinator_host="localhost",
            coordinator_port=7979,
            broker_host="localhost",
            broker_port=7980,
            metadata_last_refreshed=datetime.now(),
        )

        db.session.add(cluster)
        cluster.get_datasources = Mock(return_value=["test_datasource"])
        cluster.get_druid_version = Mock(return_value="0.9.1")
        cluster.refresh_datasources()
        datasource_id = cluster.datasources[0].id
        db.session.commit()

        resp = self.client.get("/caravel/explore/druid/{}/".format(datasource_id))
        assert "[test_cluster].[test_datasource]" in resp.data.decode("utf-8")

        nres = [list(v["event"].items()) + [("timestamp", v["timestamp"])] for v in GB_RESULT_SET]
        nres = [dict(v) for v in nres]
        import pandas as pd

        df = pd.DataFrame(nres)
        instance.export_pandas.return_value = df
        instance.query_dict = {}
        instance.query_builder.last_query.query_dict = {}
        resp = self.client.get(
            "/caravel/explore/druid/1/?viz_type=table&granularity=one+day&druid_time_origin=&since=7+days+ago&until=now&row_limit=5000&include_search=false&metrics=count&groupby=name&flt_col_0=dim1&flt_op_0=in&flt_eq_0=&slice_id=&slice_name=&collapsed_fieldsets=&action=&datasource_name=test_datasource&datasource_id=1&datasource_type=druid&previous_viz_type=table&json=true&force=true"
        )
        assert "Canada" in resp.data.decode("utf-8")
Exemple #6
0
    def test_druid_sync_from_config(self):
        self.login()
        cluster = DruidCluster(cluster_name="new_druid")
        db.session.add(cluster)
        db.session.commit()

        cfg = {
            "user": "******",
            "cluster": "new_druid",
            "config": {
                "name":
                "test_click",
                "dimensions": ["affiliate_id", "campaign", "first_seen"],
                "metrics_spec": [{
                    "type": "count",
                    "name": "count"
                }, {
                    "type": "sum",
                    "name": "sum"
                }],
                "batch_ingestion": {
                    "sql": "SELECT * FROM clicks WHERE d='{{ ds }}'",
                    "ts_column": "d",
                    "sources": [{
                        "table": "clicks",
                        "partition": "d='{{ ds }}'"
                    }]
                }
            }
        }
        resp = self.client.post('/caravel/sync_druid/', data=json.dumps(cfg))

        druid_ds = db.session.query(DruidDatasource).filter_by(
            datasource_name="test_click").first()
        assert set([c.column_name for c in druid_ds.columns
                    ]) == set(["affiliate_id", "campaign", "first_seen"])
        assert set([m.metric_name
                    for m in druid_ds.metrics]) == set(["count", "sum"])
        assert resp.status_code == 201

        # datasource exists, not changes required
        resp = self.client.post('/caravel/sync_druid/', data=json.dumps(cfg))
        druid_ds = db.session.query(DruidDatasource).filter_by(
            datasource_name="test_click").first()
        assert set([c.column_name for c in druid_ds.columns
                    ]) == set(["affiliate_id", "campaign", "first_seen"])
        assert set([m.metric_name
                    for m in druid_ds.metrics]) == set(["count", "sum"])
        assert resp.status_code == 201

        # datasource exists, add new metrics and dimentions
        cfg = {
            "user": "******",
            "cluster": "new_druid",
            "config": {
                "name":
                "test_click",
                "dimensions": ["affiliate_id", "second_seen"],
                "metrics_spec": [{
                    "type": "bla",
                    "name": "sum"
                }, {
                    "type": "unique",
                    "name": "unique"
                }],
            }
        }
        resp = self.client.post('/caravel/sync_druid/', data=json.dumps(cfg))
        druid_ds = db.session.query(DruidDatasource).filter_by(
            datasource_name="test_click").first()
        # columns and metrics are not deleted if config is changed as
        # user could define his own dimensions / metrics and want to keep them
        assert set([c.column_name for c in druid_ds.columns]) == set(
            ["affiliate_id", "campaign", "first_seen", "second_seen"])
        assert set([m.metric_name for m in druid_ds.metrics
                    ]) == set(["count", "sum", "unique"])
        # metric type will not be overridden, sum stays instead of bla
        assert set([m.metric_type for m in druid_ds.metrics
                    ]) == set(["longSum", "sum", "unique"])
        assert resp.status_code == 201
Exemple #7
0
    def test_client(self, PyDruid):
        self.login(username='******')
        instance = PyDruid.return_value
        instance.time_boundary.return_value = [{
            'result': {
                'maxTime': '2016-01-01'
            }
        }]
        instance.segment_metadata.return_value = SEGMENT_METADATA

        cluster = (db.session.query(DruidCluster).filter_by(
            cluster_name='test_cluster').first())
        if cluster:
            db.session.delete(cluster)
        db.session.commit()

        cluster = DruidCluster(cluster_name='test_cluster',
                               coordinator_host='localhost',
                               coordinator_port=7979,
                               broker_host='localhost',
                               broker_port=7980,
                               metadata_last_refreshed=datetime.now())

        db.session.add(cluster)
        cluster.get_datasources = Mock(return_value=['test_datasource'])
        cluster.get_druid_version = Mock(return_value='0.9.1')
        cluster.refresh_datasources()
        datasource_id = cluster.datasources[0].id
        db.session.commit()

        nres = [
            list(v['event'].items()) + [('timestamp', v['timestamp'])]
            for v in GB_RESULT_SET
        ]
        nres = [dict(v) for v in nres]
        import pandas as pd
        df = pd.DataFrame(nres)
        instance.export_pandas.return_value = df
        instance.query_dict = {}
        instance.query_builder.last_query.query_dict = {}

        resp = self.get_resp(
            '/caravel/explore/druid/{}/'.format(datasource_id))
        self.assertIn("[test_cluster].[test_datasource]", resp)

        # One groupby
        url = (
            '/caravel/explore_json/druid/{}/?viz_type=table&granularity=one+day&'
            'druid_time_origin=&since=7+days+ago&until=now&row_limit=5000&'
            'include_search=false&metrics=count&groupby=dim1&flt_col_0=dim1&'
            'flt_op_0=in&flt_eq_0=&slice_id=&slice_name=&collapsed_fieldsets=&'
            'action=&datasource_name=test_datasource&datasource_id={}&'
            'datasource_type=druid&previous_viz_type=table&'
            'force=true'.format(datasource_id, datasource_id))
        resp = self.get_json_resp(url)
        self.assertEqual("Canada", resp['data']['records'][0]['dim1'])

        # two groupby
        url = (
            '/caravel/explore_json/druid/{}/?viz_type=table&granularity=one+day&'
            'druid_time_origin=&since=7+days+ago&until=now&row_limit=5000&'
            'include_search=false&metrics=count&groupby=dim1&'
            'flt_col_0=dim1&groupby=dim2d&'
            'flt_op_0=in&flt_eq_0=&slice_id=&slice_name=&collapsed_fieldsets=&'
            'action=&datasource_name=test_datasource&datasource_id={}&'
            'datasource_type=druid&previous_viz_type=table&'
            'force=true'.format(datasource_id, datasource_id))
        resp = self.get_json_resp(url)
        self.assertEqual("Canada", resp['data']['records'][0]['dim1'])