def get_table_weight(table_id: int, session=None) -> int: """Calculate the weight of table. Used for ranking in auto completion and sidebar table search. Arguments: table_id {int} -- Id of DataTable Keyword Arguments: session -- Sqlalchemy DB session (default: {None}) Returns: int -- The integer weight """ num_samples = get_table_query_samples_count(table_id, session=session) num_impressions = get_viewers_count_by_item_after_date( ImpressionItemType.DATA_TABLE, table_id, get_last_impressions_date(), session=session, ) boost_score = get_table_by_id(table_id, session=session).boost_score # Samples worth 10x as much as impression # Log the score to flatten the score distrution (since its power law distribution) return int( math.log2(((num_impressions + num_samples * 10) + 1) * boost_score))
def get_table(table_id, with_schema=True, with_column=True, with_warnings=True): # username = flask_session['uid'] with DBSession() as session: table = logic.get_table_by_id(table_id, session=session) api_assert(table, "Invalid table") verify_data_schema_permission(table.schema_id, session=session) result = table.to_dict(with_schema, with_column, with_warnings) return result
def update_table(table_id, description=None, golden=None): """Update a table""" with DBSession() as session: verify_data_table_permission(table_id, session=session) if description: logic.update_table_information(table_id, description=description, session=session) if golden is not None: api_assert(current_user.is_admin, "Golden table can only be updated by Admin") logic.update_table(table_id, golden=golden, session=session) return logic.get_table_by_id(table_id, session=session)
def refresh_table_from_metastore(table_id): """Refetch table info from metastore""" with DBSession() as session: verify_data_table_permission(table_id, session=session) table = logic.get_table_by_id(table_id, session=session) schema = table.data_schema metastore_id = schema.metastore_id metastore_loader = get_metastore_loader(metastore_id, session=session) metastore_loader.sync_create_or_update_table(schema.name, table.name, session=session) session.refresh(table) return table
def make_samples_query( table_id, limit, partition=None, where: List[Tuple[str, str, str]] = None, order_by=None, order_by_asc=True, session=None, ): table = get_table_by_id(table_id, session=session) column_type_by_name = { column.name: get_column_type_from_string(column.type) for column in table.columns } query_filters = [] partition = _verify_or_get_partition(table, partition) if partition: query_filters.extend( _format_partition_filter(partition, column_type_by_name)) if where is not None: for where_filter in where: query_filters.append( _format_where_clause_filter(where_filter, column_type_by_name)) query_filter_str = ("WHERE\n{}".format(" AND ".join(query_filters)) if len(query_filters) else "") order_by_str = "" if order_by is not None: if order_by not in column_type_by_name: raise SamplesError("Invalid order by " + order_by) order_by_str = "ORDER BY {} {}".format( order_by, "ASC" if order_by_asc else "DESC") full_name = "{}.{}".format(table.data_schema.name, table.name) query = """ SELECT * FROM {} {} {} LIMIT {}""".format(full_name, query_filter_str, order_by_str, limit) return query
def update_table_by_id(table_id, session=None): index_name = ES_CONFIG["tables"]["index_name"] table = get_table_by_id(table_id, session=session) if table is None: delete_es_table_by_id(table_id) else: formatted_object = table_to_es(table, session=session) try: # Try to update if present updated_body = { "doc": formatted_object, "doc_as_upsert": True, } # ES requires this format for updates _update(index_name, table_id, updated_body) except Exception: # Otherwise insert as new LOG.error("failed to upsert {}. Will pass.".format(table_id))
def make_samples_query( table_id, limit, partition=None, where: Tuple[str, str, str] = None, order_by=None, order_by_asc=True, session=None, ): table = get_table_by_id(table_id, session=session) information = table.information columns = table.columns column_type_by_name = { column.name: get_column_type_from_string(column.type) for column in columns } query_filters = [] partitions = [] if information: partitions = json.loads(information.to_dict().get("latest_partitions") or "[]") if partition is None: partition = next(iter(reversed(partitions)), None) else: # Check the validity of partition provided if not (len(partitions) and partition in partitions): raise SamplesError("Invalid partition " + partition) if partition: # latest_partitions is like dt=2015-01-01/column1=val1 for column_filter in partition.split("/"): column_name, column_val = column_filter.split("=") column_type = column_type_by_name.get(column_name, None) column_quote = "" if column_type == QuerybookColumnType.String: column_quote = "'" query_filters.append( f"{column_name}={column_quote}{column_val}{column_quote}") if where is not None: column_name, filter_op, filter_val = where if column_name not in column_type_by_name: raise SamplesError("Invalid filter column " + column_name) column_type = column_type_by_name[column_name] if filter_op not in COMPARSION_OP: raise SamplesError("Invalid filter op " + filter_op) if filter_op in ["=", "!=", "LIKE"]: if column_type == QuerybookColumnType.Number: if not filter_val or not filter_val.isnumeric(): raise SamplesError("Invalid numeric filter value " + filter_val) elif column_type == QuerybookColumnType.Boolean: if filter_val != "true" and filter_val != "false": raise SamplesError("Invalid boolean filter value " + filter_val) else: # column_type == QuerybookColumnType.String filter_val = "'{}'".format(json.dumps(filter_val)[1:-1]) else: filter_val = "" query_filters.append(f"{column_name} {filter_op} {filter_val}") query_filter_str = ("WHERE\n{}".format(" AND ".join(query_filters)) if len(query_filters) else "") order_by_str = "" if order_by is not None: if order_by not in column_type_by_name: raise SamplesError("Invalid order by " + order_by) order_by_str = "ORDER BY {} {}".format( order_by, "ASC" if order_by_asc else "DESC") full_name = "%s.%s" % (table.data_schema.name, table.name) query = """ SELECT * FROM {} {} {} LIMIT {}""".format(full_name, query_filter_str, order_by_str, limit) return query