def sync_to_db(cls, name, cluster): """Fetches metadata for that datasource and merges the Caravel db""" print("Syncing Druid datasource [{}]".format(name)) session = get_session() datasource = session.query(cls).filter_by(datasource_name=name).first() if not datasource: datasource = cls(datasource_name=name) session.add(datasource) flasher("Adding new datasource [{}]".format(name), "success") else: flasher("Refreshing datasource [{}]".format(name), "info") session.flush() datasource.cluster = cluster cols = datasource.latest_metadata() if not cols: return for col in cols: col_obj = (session.query(DruidColumn).filter_by( datasource_name=name, column_name=col).first()) datatype = cols[col]['type'] if not col_obj: col_obj = DruidColumn(datasource_name=name, column_name=col) session.add(col_obj) if datatype == "STRING": col_obj.groupby = True col_obj.filterable = True if col_obj: col_obj.type = cols[col]['type'] session.flush() col_obj.datasource = datasource col_obj.generate_metrics() session.flush()
def sync_to_db(cls, name, cluster): """Fetches metadata for that datasource and merges the Caravel db""" print("Syncing Druid datasource [{}]".format(name)) session = get_session() datasource = session.query(cls).filter_by(datasource_name=name).first() if not datasource: datasource = cls(datasource_name=name) session.add(datasource) flasher("Adding new datasource [{}]".format(name), "success") else: flasher("Refreshing datasource [{}]".format(name), "info") datasource.cluster = cluster cols = datasource.latest_metadata() if not cols: return for col in cols: col_obj = ( session .query(DruidColumn) .filter_by(datasource_name=name, column_name=col) .first() ) datatype = cols[col]['type'] if not col_obj: col_obj = DruidColumn(datasource_name=name, column_name=col) session.add(col_obj) if datatype == "STRING": col_obj.groupby = True col_obj.filterable = True if col_obj: col_obj.type = cols[col]['type'] col_obj.datasource = datasource col_obj.generate_metrics()
def query_obj(self): """Building a query object""" form_data = self.form_data groupby = form_data.get("groupby") or [] metrics = form_data.get("metrics") or ['count'] granularity = \ form_data.get("granularity") or form_data.get("granularity_sqla") limit = int(form_data.get("limit", 0)) row_limit = int( form_data.get("row_limit", config.get("ROW_LIMIT"))) since = form_data.get("since", "1 year ago") from_dttm = utils.parse_human_datetime(since) if from_dttm > datetime.now(): from_dttm = datetime.now() - (from_dttm-datetime.now()) until = form_data.get("until", "now") to_dttm = utils.parse_human_datetime(until) if from_dttm > to_dttm: flasher("The date range doesn't seem right.", "danger") from_dttm = to_dttm # Making them identical to not raise # extras are used to query elements specific to a datasource type # for instance the extra where clause that applies only to Tables extras = { 'where': form_data.get("where", ''), 'having': form_data.get("having", ''), 'time_grain_sqla': form_data.get("time_grain_sqla", ''), 'druid_time_origin': form_data.get("druid_time_origin", ''), } d = { 'granularity': granularity, 'from_dttm': from_dttm, 'to_dttm': to_dttm, 'is_timeseries': self.is_timeseries, 'groupby': groupby, 'metrics': metrics, 'row_limit': row_limit, 'filter': self.query_filters(), 'timeseries_limit': limit, 'extras': extras, } return d
def __init__(self, datasource, form_data, slice_=None): self.orig_form_data = form_data if not datasource: raise Exception("Viz is missing a datasource") self.datasource = datasource self.request = request self.viz_type = form_data.get("viz_type") self.slice = slice_ # TODO refactor all form related logic out of here and into forms.py ff = FormFactory(self) form_class = ff.get_form() defaults = form_class().data.copy() previous_viz_type = form_data.get('previous_viz_type') if isinstance(form_data, ImmutableMultiDict): form = form_class(form_data) else: form = form_class(**form_data) data = form.data.copy() if not form.validate(): for k, v in form.errors.items(): if not data.get('json') and not data.get('async'): flasher("{}: {}".format(k, " ".join(v)), 'danger') if previous_viz_type != self.viz_type: data = { k: form.data[k] for k in form_data.keys() if k in form.data} defaults.update(data) self.form_data = defaults self.query = "" self.form_data['previous_viz_type'] = self.viz_type self.token = self.form_data.get( 'token', 'token_' + uuid.uuid4().hex[:8]) self.metrics = self.form_data.get('metrics') or [] self.groupby = self.form_data.get('groupby') or [] self.reassignments()
def __init__(self, datasource, form_data, slice_=None): self.orig_form_data = form_data if not datasource: raise Exception("Viz is missing a datasource") self.datasource = datasource self.request = request self.viz_type = form_data.get("viz_type") self.slice = slice_ # TODO refactor all form related logic out of here and into forms.py ff = FormFactory(self) form_class = ff.get_form() defaults = form_class().data.copy() previous_viz_type = form_data.get('previous_viz_type') if isinstance(form_data, ImmutableMultiDict): form = form_class(form_data) else: form = form_class(**form_data) data = form.data.copy() if not form.validate(): for k, v in form.errors.items(): if not data.get('json') and not data.get('async'): flasher("{}: {}".format(k, " ".join(v)), 'danger') if previous_viz_type != self.viz_type: data = { k: form.data[k] for k in form_data.keys() if k in form.data } defaults.update(data) self.form_data = defaults self.query = "" self.form_data['previous_viz_type'] = self.viz_type self.token = self.form_data.get('token', 'token_' + uuid.uuid4().hex[:8]) self.metrics = self.form_data.get('metrics') or [] self.groupby = self.form_data.get('groupby') or [] self.reassignments()
def fetch_metadata(self): """Fetches the metadata for the table and merges it in""" try: table = self.database.get_table(self.table_name, schema=self.schema) except Exception as e: flasher(str(e)) flasher( "Table doesn't seem to exist in the specified database, " "couldn't fetch column information", "danger") return TC = TableColumn # noqa shortcut to class M = SqlMetric # noqa metrics = [] any_date_col = None for col in table.columns: try: datatype = str(col.type) except Exception as e: datatype = "UNKNOWN" dbcol = ( db.session .query(TC) .filter(TC.table == self) .filter(TC.column_name == col.name) .first() ) db.session.flush() if not dbcol: dbcol = TableColumn(column_name=col.name) num_types = ('DOUBLE', 'FLOAT', 'INT', 'BIGINT', 'LONG') date_types = ('DATE', 'TIME') str_types = ('VARCHAR', 'STRING') datatype = str(datatype).upper() if any([t in datatype for t in str_types]): dbcol.groupby = True dbcol.filterable = True elif any([t in datatype for t in num_types]): dbcol.sum = True elif any([t in datatype for t in date_types]): dbcol.is_dttm = True db.session.merge(self) self.columns.append(dbcol) if not any_date_col and 'date' in datatype.lower(): any_date_col = col.name quoted = "{}".format( column(dbcol.column_name).compile(dialect=db.engine.dialect)) if dbcol.sum: metrics.append(M( metric_name='sum__' + dbcol.column_name, verbose_name='sum__' + dbcol.column_name, metric_type='sum', expression="SUM({})".format(quoted) )) if dbcol.max: metrics.append(M( metric_name='max__' + dbcol.column_name, verbose_name='max__' + dbcol.column_name, metric_type='max', expression="MAX({})".format(quoted) )) if dbcol.min: metrics.append(M( metric_name='min__' + dbcol.column_name, verbose_name='min__' + dbcol.column_name, metric_type='min', expression="MIN({})".format(quoted) )) if dbcol.count_distinct: metrics.append(M( metric_name='count_distinct__' + dbcol.column_name, verbose_name='count_distinct__' + dbcol.column_name, metric_type='count_distinct', expression="COUNT(DISTINCT {})".format(quoted) )) dbcol.type = datatype db.session.merge(self) db.session.commit() metrics.append(M( metric_name='count', verbose_name='COUNT(*)', metric_type='count', expression="COUNT(*)" )) for metric in metrics: m = ( db.session.query(M) .filter(M.metric_name == metric.metric_name) .filter(M.table_id == self.id) .first() ) metric.table_id = self.id if not m: db.session.add(metric) db.session.commit() if not self.main_dttm_col: self.main_dttm_col = any_date_col