def top_tables(request): response = {'status': -1} database = request.POST.get('database', 'default') limit = request.POST.get('len', 1000) api = OptimizerApi(user=request.user) data = api.top_tables(database_name=database, page_size=limit) tables = [{ 'eid': table['eid'], 'database': _get_table_name(table['name'])['database'], 'name': _get_table_name(table['name'])['table'], 'popularity': table['workloadPercent'], 'column_count': table['columnCount'], 'patternCount': table['patternCount'], 'total': table['total'], 'is_fact': table['type'] != 'Dimension' } for table in data['results'] ] response['top_tables'] = tables response['status'] = 0 return JsonResponse(response)
def upload_table_stats(request): response = {'status': -1} db_tables = json.loads(request.POST.get('db_tables'), '[]') source_platform = request.POST.get('sourcePlatform', 'hive') with_columns = json.loads(request.POST.get('with_columns', 'false')) with_ddl = json.loads(request.POST.get('with_ddl', 'false')) table_stats = [] column_stats = [] table_ddls = [] for db_table in db_tables: path = _get_table_name(db_table) try: if with_ddl: db = dbms.get(request.user) query = hql_query('SHOW CREATE TABLE `%(database)s`.`%(table)s`' % path) handle = db.execute_and_wait(query, timeout_sec=5.0) if handle: result = db.fetch(handle, rows=5000) db.close(handle) table_ddls.append((0, 0, ' '.join([row[0] for row in result.rows()]), path['database'])) full_table_stats = json.loads(get_table_stats(request, database=path['database'], table=path['table']).content) stats = dict((stat['data_type'], stat['comment']) for stat in full_table_stats['stats']) table_stats.append({ 'table_name': path['table'], 'num_rows': stats.get('numRows', -1), 'last_modified_time': stats.get('transient_lastDdlTime', -1), 'total_size': stats.get('totalSize', -1), 'raw_data_size': stats.get('rawDataSize', -1), 'num_files': stats.get('numFiles', -1), # bytes_cached # cache_replication # format }) if with_columns: for col in full_table_stats['columns']: col_stats = json.loads(get_table_stats(request, database=path['database'], table=path['table'], column=col).content)['stats'] col_stats = dict([(key, val) for col_stat in col_stats for key, val in col_stat.iteritems()]) column_stats.append({ 'table_name': path['table'], 'column_name': col, 'data_type': col_stats['data_type'], "num_distinct": int(col_stats.get('distinct_count')) if col_stats.get('distinct_count') != '' else -1, "num_nulls": int(col_stats['num_nulls']) if col_stats['num_nulls'] != '' else -1, "avg_col_len": int(float(col_stats['avg_col_len'])) if col_stats['avg_col_len'] != '' else -1, "max_size": int(float(col_stats['max_col_len'])) if col_stats['max_col_len'] != '' else -1, "min": col_stats['min'] if col_stats.get('min', '') != '' else -1, "max": col_stats['max'] if col_stats.get('max', '') != '' else -1, "num_trues": col_stats['num_trues'] if col_stats.get('num_trues', '') != '' else -1, "num_falses": col_stats['num_falses'] if col_stats.get('num_falses', '') != '' else -1, }) except Exception, e: LOG.exception('Skipping upload of %s: %s' % (db_table, e))
def upload_table_stats(request): response = {'status': -1} db_tables = json.loads(request.POST.get('db_tables'), '[]') source_platform = json.loads(request.POST.get('sourcePlatform', '"hive"')) with_ddl = json.loads(request.POST.get('with_ddl', 'false')) with_table_stats = json.loads(request.POST.get('with_table', 'false')) with_columns_stats = json.loads(request.POST.get('with_columns', 'false')) table_ddls = [] table_stats = [] column_stats = [] if not OPTIMIZER.AUTO_UPLOAD_DDL.get(): with_ddl = False if not OPTIMIZER.AUTO_UPLOAD_STATS.get(): with_table_stats = with_columns_stats = False for db_table in db_tables: path = _get_table_name(db_table) try: if with_ddl: db = _get_db(request.user, source_type=source_platform) query = hql_query('SHOW CREATE TABLE `%(database)s`.`%(table)s`' % path) handle = db.execute_and_wait(query, timeout_sec=5.0) if handle: result = db.fetch(handle, rows=5000) db.close(handle) table_ddls.append((0, 0, ' '.join([row[0] for row in result.rows()]), path['database'])) if with_table_stats: mock_request = MockRequest(user=request.user, source_platform=source_platform) full_table_stats = json.loads(get_table_stats(mock_request, database=path['database'], table=path['table']).content) stats = dict((stat['data_type'], stat['comment']) for stat in full_table_stats['stats']) table_stats.append({ 'table_name': '%(database)s.%(table)s' % path, # DB Prefix 'num_rows': stats.get('numRows', -1), 'last_modified_time': stats.get('transient_lastDdlTime', -1), 'total_size': stats.get('totalSize', -1), 'raw_data_size': stats.get('rawDataSize', -1), 'num_files': stats.get('numFiles', -1), 'num_partitions': stats.get('numPartitions', -1), # bytes_cached # cache_replication # format }) if with_columns_stats: if source_platform == 'impala': colum_stats = json.loads(get_table_stats(mock_request, database=path['database'], table=path['table'], column=-1).content)['stats'] else: colum_stats = [ json.loads(get_table_stats(mock_request, database=path['database'], table=path['table'], column=col).content)['stats'] for col in full_table_stats['columns'][:25] ] raw_column_stats = [dict([(key, val if val is not None else '') for col_stat in col for key, val in col_stat.iteritems()]) for col in colum_stats] for col_stats in raw_column_stats: column_stats.append({ 'table_name': '%(database)s.%(table)s' % path, # DB Prefix 'column_name': col_stats['col_name'], 'data_type': col_stats['data_type'], "num_distinct": int(col_stats.get('distinct_count')) if col_stats.get('distinct_count') != '' else -1, "num_nulls": int(col_stats['num_nulls']) if col_stats['num_nulls'] != '' else -1, "avg_col_len": int(float(col_stats['avg_col_len'])) if col_stats['avg_col_len'] != '' else -1, "max_size": int(float(col_stats['max_col_len'])) if col_stats['max_col_len'] != '' else -1, "min": col_stats['min'] if col_stats.get('min', '') != '' else -1, "max": col_stats['max'] if col_stats.get('max', '') != '' else -1, "num_trues": col_stats['num_trues'] if col_stats.get('num_trues', '') != '' else -1, "num_falses": col_stats['num_falses'] if col_stats.get('num_falses', '') != '' else -1, }) except Exception, e: LOG.exception('Skipping upload of %s: %s' % (db_table, e))