Example #1
0
    def sync_to_db(cls, name, cluster):
        """Fetches metadata for that datasource and merges the Dashed db"""
        print("Syncing Druid datasource [{}]".format(name))
        session = get_session()
        datasource = session.query(cls).filter_by(datasource_name=name).first()
        if not datasource:
            datasource = cls(datasource_name=name)
            session.add(datasource)
            flash("Adding new datasource [{}]".format(name), "success")
        else:
            flash("Refreshing datasource [{}]".format(name), "info")
        datasource.cluster = cluster

        cols = datasource.latest_metadata()
        if not cols:
            return
        for col in cols:
            col_obj = (
                session
                .query(DruidColumn)
                .filter_by(datasource_name=name, column_name=col)
                .first()
            )
            datatype = cols[col]['type']
            if not col_obj:
                col_obj = DruidColumn(datasource_name=name, column_name=col)
                session.add(col_obj)
            if datatype == "STRING":
                col_obj.groupby = True
                col_obj.filterable = True
            if col_obj:
                col_obj.type = cols[col]['type']
            col_obj.datasource = datasource
            col_obj.generate_metrics()
Example #2
0
    def sync_to_db(cls, name, cluster):
        """Fetches metadata for that datasource and merges the Dashed db"""
        print("Syncing Druid datasource [{}]".format(name))
        session = get_session()
        datasource = session.query(cls).filter_by(datasource_name=name).first()
        if not datasource:
            datasource = cls(datasource_name=name)
            session.add(datasource)
            flash("Adding new datasource [{}]".format(name), "success")
        else:
            flash("Refreshing datasource [{}]".format(name), "info")
        datasource.cluster = cluster

        cols = datasource.latest_metadata()
        if not cols:
            return
        for col in cols:
            col_obj = (session.query(DruidColumn).filter_by(
                datasource_name=name, column_name=col).first())
            datatype = cols[col]['type']
            if not col_obj:
                col_obj = DruidColumn(datasource_name=name, column_name=col)
                session.add(col_obj)
            if datatype == "STRING":
                col_obj.groupby = True
                col_obj.filterable = True
            if col_obj:
                col_obj.type = cols[col]['type']
            col_obj.datasource = datasource
            col_obj.generate_metrics()
Example #3
0
    def generate_metrics(self):
        """Generate metrics based on the column metadata"""
        M = DruidMetric  # noqa
        metrics = []
        metrics.append(DruidMetric(
            metric_name='count',
            verbose_name='COUNT(*)',
            metric_type='count',
            json=json.dumps({'type': 'count', 'name': 'count'})
        ))
        # Somehow we need to reassign this for UDAFs
        if self.type in ('DOUBLE', 'FLOAT'):
            corrected_type = 'DOUBLE'
        else:
            corrected_type = self.type

        if self.sum and self.isnum:
            mt = corrected_type.lower() + 'Sum'
            name = 'sum__' + self.column_name
            metrics.append(DruidMetric(
                metric_name=name,
                metric_type='sum',
                verbose_name='SUM({})'.format(self.column_name),
                json=json.dumps({
                    'type': mt, 'name': name, 'fieldName': self.column_name})
            ))
        if self.min and self.isnum:
            mt = corrected_type.lower() + 'Min'
            name = 'min__' + self.column_name
            metrics.append(DruidMetric(
                metric_name=name,
                metric_type='min',
                verbose_name='MIN({})'.format(self.column_name),
                json=json.dumps({
                    'type': mt, 'name': name, 'fieldName': self.column_name})
            ))
        if self.max and self.isnum:
            mt = corrected_type.lower() + 'Max'
            name = 'max__' + self.column_name
            metrics.append(DruidMetric(
                metric_name=name,
                metric_type='max',
                verbose_name='MAX({})'.format(self.column_name),
                json=json.dumps({
                    'type': mt, 'name': name, 'fieldName': self.column_name})
            ))
        if self.count_distinct:
            mt = 'count_distinct'
            name = 'count_distinct__' + self.column_name
            metrics.append(DruidMetric(
                metric_name=name,
                verbose_name='COUNT(DISTINCT {})'.format(self.column_name),
                metric_type='count_distinct',
                json=json.dumps({
                    'type': 'cardinality',
                    'name': name,
                    'fieldNames': [self.column_name]})
            ))
        session = get_session()
        for metric in metrics:
            m = (
                session.query(M)
                .filter(M.metric_name == metric.metric_name)
                .filter(M.datasource_name == self.datasource_name)
                .filter(DruidCluster.cluster_name == self.datasource.cluster_name)
                .first()
            )
            metric.datasource_name = self.datasource_name
            if not m:
                session.add(metric)
                session.commit()
Example #4
0
    def generate_metrics(self):
        """Generate metrics based on the column metadata"""
        M = DruidMetric
        metrics = []
        metrics.append(
            DruidMetric(metric_name='count',
                        verbose_name='COUNT(*)',
                        metric_type='count',
                        json=json.dumps({
                            'type': 'count',
                            'name': 'count'
                        })))
        # Somehow we need to reassign this for UDAFs
        if self.type in ('DOUBLE', 'FLOAT'):
            corrected_type = 'DOUBLE'
        else:
            corrected_type = self.type

        if self.sum and self.isnum:
            mt = corrected_type.lower() + 'Sum'
            name = 'sum__' + self.column_name
            metrics.append(
                DruidMetric(metric_name=name,
                            metric_type='sum',
                            verbose_name='SUM({})'.format(self.column_name),
                            json=json.dumps({
                                'type': mt,
                                'name': name,
                                'fieldName': self.column_name
                            })))
        if self.min and self.isnum:
            mt = corrected_type.lower() + 'Min'
            name = 'min__' + self.column_name
            metrics.append(
                DruidMetric(metric_name=name,
                            metric_type='min',
                            verbose_name='MIN({})'.format(self.column_name),
                            json=json.dumps({
                                'type': mt,
                                'name': name,
                                'fieldName': self.column_name
                            })))
        if self.max and self.isnum:
            mt = corrected_type.lower() + 'Max'
            name = 'max__' + self.column_name
            metrics.append(
                DruidMetric(metric_name=name,
                            metric_type='max',
                            verbose_name='MAX({})'.format(self.column_name),
                            json=json.dumps({
                                'type': mt,
                                'name': name,
                                'fieldName': self.column_name
                            })))
        if self.count_distinct:
            mt = 'count_distinct'
            name = 'count_distinct__' + self.column_name
            metrics.append(
                DruidMetric(metric_name=name,
                            verbose_name='COUNT(DISTINCT {})'.format(
                                self.column_name),
                            metric_type='count_distinct',
                            json=json.dumps({
                                'type': 'cardinality',
                                'name': name,
                                'fieldNames': [self.column_name]
                            })))
        session = get_session()
        for metric in metrics:
            m = (session.query(M).filter(
                M.metric_name == metric.metric_name).filter(
                    M.datasource_name == self.datasource_name).filter(
                        DruidCluster.cluster_name ==
                        self.datasource.cluster_name).first())
            metric.datasource_name = self.datasource_name
            if not m:
                session.add(metric)
                session.commit()