def sync_to_db(cls, name, cluster): """Fetches metadata for that datasource and merges the Dashed db""" print("Syncing Druid datasource [{}]".format(name)) session = get_session() datasource = session.query(cls).filter_by(datasource_name=name).first() if not datasource: datasource = cls(datasource_name=name) session.add(datasource) flash("Adding new datasource [{}]".format(name), "success") else: flash("Refreshing datasource [{}]".format(name), "info") datasource.cluster = cluster cols = datasource.latest_metadata() if not cols: return for col in cols: col_obj = ( session .query(DruidColumn) .filter_by(datasource_name=name, column_name=col) .first() ) datatype = cols[col]['type'] if not col_obj: col_obj = DruidColumn(datasource_name=name, column_name=col) session.add(col_obj) if datatype == "STRING": col_obj.groupby = True col_obj.filterable = True if col_obj: col_obj.type = cols[col]['type'] col_obj.datasource = datasource col_obj.generate_metrics()
def sync_to_db(cls, name, cluster): """Fetches metadata for that datasource and merges the Dashed db""" print("Syncing Druid datasource [{}]".format(name)) session = get_session() datasource = session.query(cls).filter_by(datasource_name=name).first() if not datasource: datasource = cls(datasource_name=name) session.add(datasource) flash("Adding new datasource [{}]".format(name), "success") else: flash("Refreshing datasource [{}]".format(name), "info") datasource.cluster = cluster cols = datasource.latest_metadata() if not cols: return for col in cols: col_obj = (session.query(DruidColumn).filter_by( datasource_name=name, column_name=col).first()) datatype = cols[col]['type'] if not col_obj: col_obj = DruidColumn(datasource_name=name, column_name=col) session.add(col_obj) if datatype == "STRING": col_obj.groupby = True col_obj.filterable = True if col_obj: col_obj.type = cols[col]['type'] col_obj.datasource = datasource col_obj.generate_metrics()
def generate_metrics(self): """Generate metrics based on the column metadata""" M = DruidMetric # noqa metrics = [] metrics.append(DruidMetric( metric_name='count', verbose_name='COUNT(*)', metric_type='count', json=json.dumps({'type': 'count', 'name': 'count'}) )) # Somehow we need to reassign this for UDAFs if self.type in ('DOUBLE', 'FLOAT'): corrected_type = 'DOUBLE' else: corrected_type = self.type if self.sum and self.isnum: mt = corrected_type.lower() + 'Sum' name = 'sum__' + self.column_name metrics.append(DruidMetric( metric_name=name, metric_type='sum', verbose_name='SUM({})'.format(self.column_name), json=json.dumps({ 'type': mt, 'name': name, 'fieldName': self.column_name}) )) if self.min and self.isnum: mt = corrected_type.lower() + 'Min' name = 'min__' + self.column_name metrics.append(DruidMetric( metric_name=name, metric_type='min', verbose_name='MIN({})'.format(self.column_name), json=json.dumps({ 'type': mt, 'name': name, 'fieldName': self.column_name}) )) if self.max and self.isnum: mt = corrected_type.lower() + 'Max' name = 'max__' + self.column_name metrics.append(DruidMetric( metric_name=name, metric_type='max', verbose_name='MAX({})'.format(self.column_name), json=json.dumps({ 'type': mt, 'name': name, 'fieldName': self.column_name}) )) if self.count_distinct: mt = 'count_distinct' name = 'count_distinct__' + self.column_name metrics.append(DruidMetric( metric_name=name, verbose_name='COUNT(DISTINCT {})'.format(self.column_name), metric_type='count_distinct', json=json.dumps({ 'type': 'cardinality', 'name': name, 'fieldNames': [self.column_name]}) )) session = get_session() for metric in metrics: m = ( session.query(M) .filter(M.metric_name == metric.metric_name) .filter(M.datasource_name == self.datasource_name) .filter(DruidCluster.cluster_name == self.datasource.cluster_name) .first() ) metric.datasource_name = self.datasource_name if not m: session.add(metric) session.commit()
def generate_metrics(self): """Generate metrics based on the column metadata""" M = DruidMetric metrics = [] metrics.append( DruidMetric(metric_name='count', verbose_name='COUNT(*)', metric_type='count', json=json.dumps({ 'type': 'count', 'name': 'count' }))) # Somehow we need to reassign this for UDAFs if self.type in ('DOUBLE', 'FLOAT'): corrected_type = 'DOUBLE' else: corrected_type = self.type if self.sum and self.isnum: mt = corrected_type.lower() + 'Sum' name = 'sum__' + self.column_name metrics.append( DruidMetric(metric_name=name, metric_type='sum', verbose_name='SUM({})'.format(self.column_name), json=json.dumps({ 'type': mt, 'name': name, 'fieldName': self.column_name }))) if self.min and self.isnum: mt = corrected_type.lower() + 'Min' name = 'min__' + self.column_name metrics.append( DruidMetric(metric_name=name, metric_type='min', verbose_name='MIN({})'.format(self.column_name), json=json.dumps({ 'type': mt, 'name': name, 'fieldName': self.column_name }))) if self.max and self.isnum: mt = corrected_type.lower() + 'Max' name = 'max__' + self.column_name metrics.append( DruidMetric(metric_name=name, metric_type='max', verbose_name='MAX({})'.format(self.column_name), json=json.dumps({ 'type': mt, 'name': name, 'fieldName': self.column_name }))) if self.count_distinct: mt = 'count_distinct' name = 'count_distinct__' + self.column_name metrics.append( DruidMetric(metric_name=name, verbose_name='COUNT(DISTINCT {})'.format( self.column_name), metric_type='count_distinct', json=json.dumps({ 'type': 'cardinality', 'name': name, 'fieldNames': [self.column_name] }))) session = get_session() for metric in metrics: m = (session.query(M).filter( M.metric_name == metric.metric_name).filter( M.datasource_name == self.datasource_name).filter( DruidCluster.cluster_name == self.datasource.cluster_name).first()) metric.datasource_name = self.datasource_name if not m: session.add(metric) session.commit()