def query_compatibility(request): response = {'status': -1} source_platform = request.POST.get('sourcePlatform') target_platform = request.POST.get('targetPlatform') query = request.POST.get('query') api = OptimizerApi() data = api.query_compatibility(source_platform=source_platform, target_platform=target_platform, query=query) if data['status'] == 'success': response['status'] = 0 response['query_compatibility'] = json.loads(data['details']) else: response['message'] = 'Optimizer: %s' % data['details'] return JsonResponse(response)
def query_compatibility(request): response = {'status': -1} source_platform = request.POST.get('sourcePlatform') target_platform = request.POST.get('targetPlatform') query = request.POST.get('query') api = OptimizerApi(request.user) data = api.query_compatibility(source_platform=source_platform, target_platform=target_platform, query=query) if data: response['status'] = 0 response['query_compatibility'] = data else: response['message'] = 'Optimizer: %s' % data return JsonResponse(response)
def table_details(request): response = {'status': -1} database_name = request.POST.get('databaseName') table_name = request.POST.get('tableName') api = OptimizerApi() data = api.table_details(database_name=database_name, table_name=table_name) if data: response['status'] = 0 response['details'] = data else: response['message'] = 'Optimizer: %s' % data['details'] return JsonResponse(response)
def query_risk(request): response = {'status': -1} query = json.loads(request.POST.get('query')) source_platform = request.POST.get('sourcePlatform') db_name = request.POST.get('dbName') api = OptimizerApi(request.user) data = api.query_risk(query=query, source_platform=source_platform, db_name=db_name) if data: response['status'] = 0 response['query_risk'] = data else: response['message'] = 'Optimizer: %s' % data return JsonResponse(response)
def similar_queries(request): response = {'status': -1} source_platform = request.POST.get('sourcePlatform') query = request.POST.get('query') api = OptimizerApi() data = api.similar_queries(source_platform=source_platform, query=query) if data['status'] == 'success': response['status'] = 0 response['similar_queries'] = json.loads( data['details']['similarQueries']) else: response['message'] = 'Optimizer: %s' % data['details'] return JsonResponse(response)
def upload_history(request): response = {'status': -1} if request.user.is_superuser: api = OptimizerApi(request.user) histories = [] upload_stats = {} if request.POST.get('sourcePlatform'): n = min( request.POST.get('n', OPTIMIZER.QUERY_HISTORY_UPLOAD_LIMIT.get())) source_platform = request.POST.get('sourcePlatform', 'hive') histories = [(source_platform, Document2.objects.get_history( doc_type='query-%s' % source_platform, user=request.user)[:n])] elif OPTIMIZER.QUERY_HISTORY_UPLOAD_LIMIT.get() > 0: histories = [(source_platform, Document2.objects.filter( type='query-%s' % source_platform, is_history=True, is_managed=False, is_trashed=False).order_by('-last_modified') [:OPTIMIZER.QUERY_HISTORY_UPLOAD_LIMIT.get()]) for source_platform in ['hive', 'impala']] for source_platform, history in histories: queries = _convert_queries( [Notebook(document=doc).get_data() for doc in history]) upload_stats[source_platform] = api.upload( data=queries, data_type='queries', source_platform=source_platform) response['upload_history'] = upload_stats response['status'] = 0 else: response['message'] = _( 'Query history upload requires Admin privileges or feature is disabled.' ) return JsonResponse(response)
def statement_risk(self, notebook, snippet): db = self._get_db(snippet) response = self._get_current_statement(db, snippet) query = response['statement'] api = OptimizerApi() data = api.query_risk(query=query, source_platform=snippet['type']) data = data.get(snippet['type'] + 'Risk', {}) if data and data == {"riskAnalysis": "", "risk": "low", "riskRecommendation": ""}: data = [] return [{ 'risk': risk.get('risk'), 'riskAnalysis': risk.get('riskAnalysis'), 'riskRecommendation': risk.get('riskRecommendation') } for risk in data]
def popular_values(request): response = {'status': -1} database_name = request.POST.get('databaseName') table_name = request.POST.get('tableName') column_name = request.POST.get('columnName') # Unsused api = OptimizerApi() data = api.popular_filter_values(database_name=database_name, table_name=table_name, column_name=column_name) if data['status'] == 'success': response['status'] = 0 response['values'] = data['results'] else: response['message'] = 'Optimizer: %s' % data return JsonResponse(response)
def popular_values(request): response = {'status': -1} table_name = request.POST.get('tableName') column_name = request.POST.get('columnName') api = OptimizerApi() data = api.popular_filter_values(table_name=table_name, column_name=column_name) if data['status'] == 'success': if 'status' in data['details']: response['values'] = [] # Bug in Opt API else: response['values'] = data['details'] response['status'] = 0 else: response['message'] = 'Optimizer: %s' % data['details'] return JsonResponse(response)
def upload_query(request): response = {'status': -1} if OPTIMIZER.AUTO_UPLOAD_QUERIES.get(): query_id = request.POST.get('query_id') doc = Document2.objects.document(request.user, doc_id=query_id) query_data = Notebook(document=doc).get_data() queries = _convert_queries([query_data]) source_platform = query_data['snippets'][0]['type'] api = OptimizerApi(request.user) response['query_upload'] = api.upload(data=queries, data_type='queries', source_platform=source_platform) else: response['query_upload'] = _('Skipped') response['status'] = 0 return JsonResponse(response)
def top_tables(request): response = {'status': -1} database = request.POST.get('database', 'default') len = request.POST.get('len', 1000) api = OptimizerApi() data = api.top_tables(database_name=database) tables = [{ 'eid': table['eid'], 'database': _get_table_name(table['name'])['database'], 'name': _get_table_name(table['name'])['table'], 'popularity': table['workloadPercent'], 'column_count': table['columnCount'], 'patternCount': table['patternCount'], 'total': table['total'], 'is_fact': table['type'] != 'Dimension' } for table in data['results']] if NAVIGATOR.APPLY_SENTRY_PERMISSIONS.get(): checker = PrivilegeChecker(user=request.user) action = 'SELECT' for table in tables: paths = _get_table_name(table['name']) table.update({ u'db': paths['database'], u'table': paths['table'], u'column': None, u'server': u'server1' }) tables = list(checker.filter_objects(tables, action)) #, getkey=getkey) response['top_tables'] = tables response['status'] = 0 return JsonResponse(response)
def top_tables(request): response = {'status': -1} database = request.POST.get('database', 'default') len = request.POST.get('len', 1000) if OPTIMIZER.MOCKING.get(): from beeswax.server import dbms from beeswax.server.dbms import get_query_server_config db = dbms.get(request.user) tables = [{ 'name': table, 'popularity': random.randint(1, 100), 'column_count': random.randint(1, 100), 'is_fact': bool(random.getrandbits(1)) } for table in db.get_tables(database=database)][:len] else: """ Get back: # u'details': [{u'columnCount': 28, u'name': u'date_dim', u'patternCount': 136, u'workloadPercent': 89, u'total': 92, u'type': u'Dimension', u'eid': u'19'}, """ api = OptimizerApi() data = api.top_tables() tables = [{ 'eid': table['eid'], 'name': table['name'], 'popularity': table['workloadPercent'], 'column_count': table['columnCount'], 'patternCount': table['patternCount'], 'total': table['total'], 'is_fact': table['type'] != 'Dimension' } for table in data['details']] response['top_tables'] = tables response['status'] = 0 return JsonResponse(response)
def upload_history(request): response = {'status': -1} n = request.POST.get('n') source_platform = request.POST.get('sourcePlatform', 'hive') history = Document2.objects.get_history(doc_type='query-%s' % source_platform, user=request.user) if n: history = history[:n] queries = _convert_queries( [Notebook(document=doc).get_data() for doc in history]) api = OptimizerApi() response['upload_history'] = api.upload(data=queries, data_type='queries', source_platform=source_platform) response['status'] = 0 return JsonResponse(response)
def upload_query(request): response = {'status': -1} source_platform = request.POST.get('sourcePlatform', 'default') query_id = request.POST.get('query_id') if OPTIMIZER.AUTO_UPLOAD_QUERIES.get() and source_platform in ('hive', 'impala') and query_id: try: doc = Document2.objects.document(request.user, doc_id=query_id) query_data = Notebook(document=doc).get_data() queries = _convert_queries([query_data]) source_platform = query_data['snippets'][0]['type'] api = OptimizerApi(request.user) response['query_upload'] = api.upload(data=queries, data_type='queries', source_platform=source_platform) except Document2.DoesNotExist: response['query_upload'] = _('Skipped as task query') else: response['query_upload'] = _('Skipped') response['status'] = 0 return JsonResponse(response)
def top_tables(request): response = {'status': -1} database = request.POST.get('database', 'default') len = request.POST.get('len', 1000) api = OptimizerApi() data = api.top_tables(database_name=database) tables = [{ 'eid': table['eid'], 'database': _get_table_name(table['name'])['database'], 'name': _get_table_name(table['name'])['table'], 'popularity': table['workloadPercent'], 'column_count': table['columnCount'], 'patternCount': table['patternCount'], 'total': table['total'], 'is_fact': table['type'] != 'Dimension' } for table in data['results']] response['top_tables'] = tables response['status'] = 0 return JsonResponse(response)
def upload_table_stats(request): response = {'status': -1} db_tables = json.loads(request.POST.get('db_tables'), '[]') source_platform = json.loads(request.POST.get('sourcePlatform', '"hive"')) with_ddl = json.loads(request.POST.get('with_ddl', 'false')) with_table_stats = json.loads(request.POST.get('with_table', 'false')) with_columns_stats = json.loads(request.POST.get('with_columns', 'false')) table_ddls = [] table_stats = [] column_stats = [] if not OPTIMIZER.AUTO_UPLOAD_DDL.get(): with_ddl = False if not OPTIMIZER.AUTO_UPLOAD_STATS.get(): with_table_stats = with_columns_stats = False for db_table in db_tables: path = _get_table_name(db_table) try: if with_ddl: db = _get_db(request.user, source_type=source_platform) query = hql_query('SHOW CREATE TABLE `%(database)s`.`%(table)s`' % path) handle = db.execute_and_wait(query, timeout_sec=5.0) if handle: result = db.fetch(handle, rows=5000) db.close(handle) table_ddls.append((0, 0, ' '.join([row[0] for row in result.rows()]), path['database'])) if with_table_stats: mock_request = MockRequest(user=request.user, source_platform=source_platform) full_table_stats = json.loads(get_table_stats(mock_request, database=path['database'], table=path['table']).content) stats = dict((stat['data_type'], stat['comment']) for stat in full_table_stats['stats']) table_stats.append({ 'table_name': '%(database)s.%(table)s' % path, # DB Prefix 'num_rows': stats.get('numRows', -1), 'last_modified_time': stats.get('transient_lastDdlTime', -1), 'total_size': stats.get('totalSize', -1), 'raw_data_size': stats.get('rawDataSize', -1), 'num_files': stats.get('numFiles', -1), 'num_partitions': stats.get('numPartitions', -1), # bytes_cached # cache_replication # format }) if with_columns_stats: if source_platform == 'impala': colum_stats = json.loads(get_table_stats(mock_request, database=path['database'], table=path['table'], column=-1).content)['stats'] else: colum_stats = [ json.loads(get_table_stats(mock_request, database=path['database'], table=path['table'], column=col).content)['stats'] for col in full_table_stats['columns'][:25] ] raw_column_stats = [dict([(key, val if val is not None else '') for col_stat in col for key, val in col_stat.items()]) for col in colum_stats] for col_stats in raw_column_stats: column_stats.append({ 'table_name': '%(database)s.%(table)s' % path, # DB Prefix 'column_name': col_stats['col_name'], 'data_type': col_stats['data_type'], "num_distinct": int(col_stats.get('distinct_count')) if col_stats.get('distinct_count') != '' else -1, "num_nulls": int(col_stats['num_nulls']) if col_stats['num_nulls'] != '' else -1, "avg_col_len": int(float(col_stats['avg_col_len'])) if col_stats['avg_col_len'] != '' else -1, "max_size": int(float(col_stats['max_col_len'])) if col_stats['max_col_len'] != '' else -1, "min": col_stats['min'] if col_stats.get('min', '') != '' else -1, "max": col_stats['max'] if col_stats.get('max', '') != '' else -1, "num_trues": col_stats['num_trues'] if col_stats.get('num_trues', '') != '' else -1, "num_falses": col_stats['num_falses'] if col_stats.get('num_falses', '') != '' else -1, }) except Exception as e: LOG.exception('Skipping upload of %s: %s' % (db_table, e)) api = OptimizerApi(request.user) response['status'] = 0 if table_stats: response['upload_table_stats'] = api.upload(data=table_stats, data_type='table_stats', source_platform=source_platform) response['upload_table_stats_status'] = 0 if response['upload_table_stats']['status']['state'] in ('WAITING', 'FINISHED', 'IN_PROGRESS') else -1 response['status'] = response['upload_table_stats_status'] if column_stats: response['upload_cols_stats'] = api.upload(data=column_stats, data_type='cols_stats', source_platform=source_platform) response['upload_cols_stats_status'] = response['status'] if response['upload_cols_stats']['status']['state'] in ('WAITING', 'FINISHED', 'IN_PROGRESS') else -1 if response['upload_cols_stats_status'] != 0: response['status'] = response['upload_cols_stats_status'] if table_ddls: response['upload_table_ddl'] = api.upload(data=table_ddls, data_type='queries', source_platform=source_platform) response['upload_table_ddl_status'] = response['status'] if response['upload_table_ddl']['status']['state'] in ('WAITING', 'FINISHED', 'IN_PROGRESS') else -1 if response['upload_table_ddl_status'] != 0: response['status'] = response['upload_table_ddl_status'] return JsonResponse(response)
if n: history = history[:n] queries = [] for doc in history: query_data = Notebook(document=doc).get_data() try: original_query_id = '%s:%s' % struct.unpack(b"QQ", base64.decodestring(query_data['snippets'][0]['result']['handle']['guid'])) execution_time = query_data['snippets'][0]['result']['executionTime'] * 100 queries.append((original_query_id, execution_time, query_data['snippets'][0]['statement'], query_data['snippets'][0].get('database', 'default'))) except Exception, e: LOG.warning('Skipping upload of %s: %s' % (doc, e)) api = OptimizerApi() response['upload_history'] = api.upload(data=queries, data_type='queries', source_platform=source_platform) response['status'] = 0 return JsonResponse(response) @require_POST @error_handler def upload_table_stats(request): response = {'status': -1} db_tables = json.loads(request.POST.get('db_tables'), '[]') source_platform = request.POST.get('sourcePlatform', 'hive') with_columns = json.loads(request.POST.get('with_columns', 'false'))
def popular_values(request): response = {'status': -1} table_name = request.POST.get('tableName') column_name = request.POST.get('columnName') if OPTIMIZER.MOCKING.get(): if column_name: values = [ { "values": [ "1", "(6,0)" ], "columnName": "d_dow", "tableName": "date_dim" } ] else: values = [ { "values": [ "('2001q1','2001q2','2001q3')", "'2001q1'" ], "columnName": "d_quarter_name", "tableName": "date_dim" }, { "values": [ "1", "2", "4" ], "columnName": "d_qoy", "tableName": "date_dim" }, { "values": [ "Subquery" ], "columnName": "d_week_seq", "tableName": "date_dim" }, { "values": [ "(cast('1998-08-14' as date) + interval '30' day)", "(cast ('1998-03-08' as date) + interval '30' day)", "d1.d_date + 5", "cast('1998-08-14' as date)", "cast('1999-04-26' as date)", "'2002-4-01'", "(cast('2000-02-02' as date) + interval '90' day)", "(cast('2002-4-01' as date) + interval '60' day)", "(cast('2002-01-18' as date) + 60 + interval '60' day)", "('1999-04-17','1999-10-04','1999-11-10')", "(cast('1999-04-26' as date) + 30 + interval '30' day)", "(cast('1999-06-03' as date) + interval '30' day)", "cast('1998-01-06' as date)", "(cast('2000-2-01' as date) + interval '60' day)", "(cast('2002-04-01' as date) + interval '30' day)", "( cast('2000-03-22' as date ) + interval '90' day )", "cast('2001-08-21' as date)", "(cast ('1998-03-08' as date) - interval '30' day)", "'2000-03-22'", "(cast('2001-08-21' as date) + interval '14' day)", "( cast('1999-08-25' as date) + interval '30' day )", "Subquery", "'2000-3-01'", "cast('2002-01-18' as date)", "(cast ('2001-03-14' as date) - interval '30' day)", "'2000-02-02'", "cast('2002-04-01' as date)", "'2002-03-09'", "(cast('2000-3-01' as date) + interval '60' day)", "cast('1999-06-03' as date)", "cast('1999-08-25' as date)", "(cast ('2001-03-14' as date) + interval '30' day)", "'2000-2-01'", "(cast('1998-01-06' as date) + interval '60' day)" ], "columnName": "d_date", "tableName": "date_dim" }, { "values": [ "1223", "1200", "1202", "1214+11", "(select distinct date_dim.d_month_seq+1 from date_dim where date_dim.d_year = 2001 and date_dim.d_moy = 5)", "1181+11", "1199", "1191", "(1206,1206+1,1206+2,1206+3,1206+4,1206+5,1206+6,1206+7,1206+8,1206+9,1206+10,1206+11)", "1211 + 11", "1199 + 11", "1212", "(select distinct date_dim.d_month_seq+3 from date_dim where date_dim.d_year = 2001 and date_dim.d_moy = 5)", "1211", "1214", "Subquery", "(1195,1195+1,1195+2,1195+3,1195+4,1195+5,1195+6,1195+7,1195+8,1195+9,1195+10,1195+11)", "1200+11", "1212 + 11", "1223+11", "1183 + 11", "1183", "1181", "1191 + 11", "1202 + 11" ], "columnName": "d_month_seq", "tableName": "date_dim" }, { "values": [ "11", "4 + 3", "12", "3+2", "2+3", "1", "3", "2", "5", "4", "6", "8", "10" ], "columnName": "d_moy", "tableName": "date_dim" }, { "values": [ "25", "16", "28", "1", "3", "2" ], "columnName": "d_dom", "tableName": "date_dim" }, { "values": [ "(1998,1998+1)", "2000 + 1", "2000 + 2", "(2000,2000+1,2000+2)", "(1999,1999+1,1999+2)", "2000-1", "2001+1", "1999 + 2", "2000+1", "2000+2", "1999+1", "(2002)", "( 1999, 1999 + 1, 1999 + 2, 1999 + 3 )", "1999-1", "( 1998, 1998 + 1, 1998 + 2 )", "1999", "1998", "(1998,1998+1,1998+2)", "2002", "2000", "2001", "2004" ], "columnName": "d_year", "tableName": "date_dim" }, { "values": [ "1", "(6,0)" ], "columnName": "d_dow", "tableName": "date_dim" } ] else: api = OptimizerApi() data = api.popular_filter_values(table_name=table_name, column_name=column_name) if data['status'] == 'success': if 'status' in data['details']: response['values'] = [] # Bug in Opt API else: response['values'] = data['details'] response['status'] = 0 else: response['message'] = 'Optimizer: %s' % data['details'] return JsonResponse(response)
'table_name': path['table'], 'column_name': col, 'data_type': col_stats['data_type'], "num_distinct": int(col_stats.get('distinct_count')) if col_stats.get('distinct_count') != '' else -1, "num_nulls": int(col_stats['num_nulls']) if col_stats['num_nulls'] != '' else -1, "avg_col_len": int(float(col_stats['avg_col_len'])) if col_stats['avg_col_len'] != '' else -1, "max_size": int(float(col_stats['max_col_len'])) if col_stats['max_col_len'] != '' else -1, "min": col_stats['min'] if col_stats.get('min', '') != '' else -1, "max": col_stats['max'] if col_stats.get('max', '') != '' else -1, "num_trues": col_stats['num_trues'] if col_stats.get('num_trues', '') != '' else -1, "num_falses": col_stats['num_falses'] if col_stats.get('num_falses', '') != '' else -1, }) except Exception, e: LOG.exception('Skipping upload of %s: %s' % (db_table, e)) api = OptimizerApi(request.user) response['upload_table_stats'] = api.upload(data=table_stats, data_type='table_stats', source_platform=source_platform) response['status'] = 0 if response['upload_table_stats']['status']['state'] in ('WAITING', 'FINISHED', 'IN_PROGRESS') else -1 if column_stats: response['upload_cols_stats'] = api.upload(data=column_stats, data_type='cols_stats', source_platform=source_platform) response['status'] = response['status'] if response['upload_cols_stats']['status']['state'] in ('WAITING', 'FINISHED', 'IN_PROGRESS') else -1 if table_ddls: response['upload_table_ddl'] = api.upload(data=table_ddls, data_type='queries', source_platform=source_platform) return JsonResponse(response) @require_POST @error_handler def upload_status(request):