Esempio n. 1
0
def query_compatibility(request):
  response = {'status': -1}

  source_platform = request.POST.get('sourcePlatform')
  target_platform = request.POST.get('targetPlatform')
  query = request.POST.get('query')

  api = OptimizerApi()

  data = api.query_compatibility(source_platform=source_platform, target_platform=target_platform, query=query)

  if data['status'] == 'success':
    response['status'] = 0
    response['query_compatibility'] = json.loads(data['details'])
  else:
    response['message'] = 'Optimizer: %s' % data['details']

  return JsonResponse(response)
Esempio n. 2
0
def query_compatibility(request):
  response = {'status': -1}

  source_platform = request.POST.get('sourcePlatform')
  target_platform = request.POST.get('targetPlatform')
  query = request.POST.get('query')

  api = OptimizerApi(request.user)

  data = api.query_compatibility(source_platform=source_platform, target_platform=target_platform, query=query)

  if data:
    response['status'] = 0
    response['query_compatibility'] = data
  else:
    response['message'] = 'Optimizer: %s' % data

  return JsonResponse(response)
Esempio n. 3
0
def table_details(request):
    response = {'status': -1}

    database_name = request.POST.get('databaseName')
    table_name = request.POST.get('tableName')

    api = OptimizerApi()

    data = api.table_details(database_name=database_name,
                             table_name=table_name)

    if data:
        response['status'] = 0
        response['details'] = data
    else:
        response['message'] = 'Optimizer: %s' % data['details']

    return JsonResponse(response)
Esempio n. 4
0
def query_risk(request):
  response = {'status': -1}

  query = json.loads(request.POST.get('query'))
  source_platform = request.POST.get('sourcePlatform')
  db_name = request.POST.get('dbName')

  api = OptimizerApi(request.user)

  data = api.query_risk(query=query, source_platform=source_platform, db_name=db_name)

  if data:
    response['status'] = 0
    response['query_risk'] = data
  else:
    response['message'] = 'Optimizer: %s' % data

  return JsonResponse(response)
Esempio n. 5
0
def similar_queries(request):
    response = {'status': -1}

    source_platform = request.POST.get('sourcePlatform')
    query = request.POST.get('query')

    api = OptimizerApi()

    data = api.similar_queries(source_platform=source_platform, query=query)

    if data['status'] == 'success':
        response['status'] = 0
        response['similar_queries'] = json.loads(
            data['details']['similarQueries'])
    else:
        response['message'] = 'Optimizer: %s' % data['details']

    return JsonResponse(response)
Esempio n. 6
0
def upload_history(request):
    response = {'status': -1}

    if request.user.is_superuser:
        api = OptimizerApi(request.user)
        histories = []
        upload_stats = {}

        if request.POST.get('sourcePlatform'):
            n = min(
                request.POST.get('n',
                                 OPTIMIZER.QUERY_HISTORY_UPLOAD_LIMIT.get()))
            source_platform = request.POST.get('sourcePlatform', 'hive')
            histories = [(source_platform,
                          Document2.objects.get_history(
                              doc_type='query-%s' % source_platform,
                              user=request.user)[:n])]

        elif OPTIMIZER.QUERY_HISTORY_UPLOAD_LIMIT.get() > 0:
            histories = [(source_platform,
                          Document2.objects.filter(
                              type='query-%s' % source_platform,
                              is_history=True,
                              is_managed=False,
                              is_trashed=False).order_by('-last_modified')
                          [:OPTIMIZER.QUERY_HISTORY_UPLOAD_LIMIT.get()])
                         for source_platform in ['hive', 'impala']]

        for source_platform, history in histories:
            queries = _convert_queries(
                [Notebook(document=doc).get_data() for doc in history])
            upload_stats[source_platform] = api.upload(
                data=queries,
                data_type='queries',
                source_platform=source_platform)

        response['upload_history'] = upload_stats
        response['status'] = 0
    else:
        response['message'] = _(
            'Query history upload requires Admin privileges or feature is disabled.'
        )

    return JsonResponse(response)
Esempio n. 7
0
  def statement_risk(self, notebook, snippet):
    db = self._get_db(snippet)

    response = self._get_current_statement(db, snippet)
    query = response['statement']

    api = OptimizerApi()

    data = api.query_risk(query=query, source_platform=snippet['type'])
    data = data.get(snippet['type'] + 'Risk', {})

    if data and data == {"riskAnalysis": "", "risk": "low", "riskRecommendation": ""}:
      data = []

    return [{
      'risk': risk.get('risk'),
      'riskAnalysis': risk.get('riskAnalysis'),
      'riskRecommendation': risk.get('riskRecommendation')
    } for risk in data]
Esempio n. 8
0
def popular_values(request):
    response = {'status': -1}

    database_name = request.POST.get('databaseName')
    table_name = request.POST.get('tableName')
    column_name = request.POST.get('columnName')  # Unsused

    api = OptimizerApi()
    data = api.popular_filter_values(database_name=database_name,
                                     table_name=table_name,
                                     column_name=column_name)

    if data['status'] == 'success':
        response['status'] = 0
        response['values'] = data['results']
    else:
        response['message'] = 'Optimizer: %s' % data

    return JsonResponse(response)
Esempio n. 9
0
def popular_values(request):
  response = {'status': -1}

  table_name = request.POST.get('tableName')
  column_name = request.POST.get('columnName')

  api = OptimizerApi()
  data = api.popular_filter_values(table_name=table_name, column_name=column_name)

  if data['status'] == 'success':
    if 'status' in data['details']:
      response['values'] = [] # Bug in Opt API
    else:
      response['values'] = data['details']
      response['status'] = 0
  else:
    response['message'] = 'Optimizer: %s' % data['details']

  return JsonResponse(response)
Esempio n. 10
0
def upload_query(request):
  response = {'status': -1}

  if OPTIMIZER.AUTO_UPLOAD_QUERIES.get():
    query_id = request.POST.get('query_id')

    doc = Document2.objects.document(request.user, doc_id=query_id)

    query_data = Notebook(document=doc).get_data()
    queries = _convert_queries([query_data])
    source_platform = query_data['snippets'][0]['type']

    api = OptimizerApi(request.user)

    response['query_upload'] = api.upload(data=queries, data_type='queries', source_platform=source_platform)
  else:
    response['query_upload'] = _('Skipped')
  response['status'] = 0

  return JsonResponse(response)
Esempio n. 11
0
def top_tables(request):
    response = {'status': -1}

    database = request.POST.get('database', 'default')
    len = request.POST.get('len', 1000)

    api = OptimizerApi()
    data = api.top_tables(database_name=database)

    tables = [{
        'eid': table['eid'],
        'database': _get_table_name(table['name'])['database'],
        'name': _get_table_name(table['name'])['table'],
        'popularity': table['workloadPercent'],
        'column_count': table['columnCount'],
        'patternCount': table['patternCount'],
        'total': table['total'],
        'is_fact': table['type'] != 'Dimension'
    } for table in data['results']]

    if NAVIGATOR.APPLY_SENTRY_PERMISSIONS.get():
        checker = PrivilegeChecker(user=request.user)
        action = 'SELECT'

        for table in tables:
            paths = _get_table_name(table['name'])
            table.update({
                u'db': paths['database'],
                u'table': paths['table'],
                u'column': None,
                u'server': u'server1'
            })
        tables = list(checker.filter_objects(tables,
                                             action))  #, getkey=getkey)

    response['top_tables'] = tables
    response['status'] = 0

    return JsonResponse(response)
Esempio n. 12
0
def top_tables(request):
    response = {'status': -1}

    database = request.POST.get('database', 'default')
    len = request.POST.get('len', 1000)

    if OPTIMIZER.MOCKING.get():
        from beeswax.server import dbms
        from beeswax.server.dbms import get_query_server_config
        db = dbms.get(request.user)
        tables = [{
            'name': table,
            'popularity': random.randint(1, 100),
            'column_count': random.randint(1, 100),
            'is_fact': bool(random.getrandbits(1))
        } for table in db.get_tables(database=database)][:len]
    else:
        """
    Get back:
    # u'details': [{u'columnCount': 28, u'name': u'date_dim', u'patternCount': 136, u'workloadPercent': 89, u'total': 92, u'type': u'Dimension', u'eid': u'19'},
    """
        api = OptimizerApi()
        data = api.top_tables()

        tables = [{
            'eid': table['eid'],
            'name': table['name'],
            'popularity': table['workloadPercent'],
            'column_count': table['columnCount'],
            'patternCount': table['patternCount'],
            'total': table['total'],
            'is_fact': table['type'] != 'Dimension'
        } for table in data['details']]

    response['top_tables'] = tables
    response['status'] = 0

    return JsonResponse(response)
Esempio n. 13
0
def upload_history(request):
    response = {'status': -1}

    n = request.POST.get('n')
    source_platform = request.POST.get('sourcePlatform', 'hive')

    history = Document2.objects.get_history(doc_type='query-%s' %
                                            source_platform,
                                            user=request.user)
    if n:
        history = history[:n]

    queries = _convert_queries(
        [Notebook(document=doc).get_data() for doc in history])

    api = OptimizerApi()

    response['upload_history'] = api.upload(data=queries,
                                            data_type='queries',
                                            source_platform=source_platform)
    response['status'] = 0

    return JsonResponse(response)
Esempio n. 14
0
def upload_query(request):
  response = {'status': -1}

  source_platform = request.POST.get('sourcePlatform', 'default')
  query_id = request.POST.get('query_id')

  if OPTIMIZER.AUTO_UPLOAD_QUERIES.get() and source_platform in ('hive', 'impala') and query_id:
    try:
      doc = Document2.objects.document(request.user, doc_id=query_id)

      query_data = Notebook(document=doc).get_data()
      queries = _convert_queries([query_data])
      source_platform = query_data['snippets'][0]['type']

      api = OptimizerApi(request.user)

      response['query_upload'] = api.upload(data=queries, data_type='queries', source_platform=source_platform)
    except Document2.DoesNotExist:
      response['query_upload'] = _('Skipped as task query')
  else:
    response['query_upload'] = _('Skipped')
  response['status'] = 0

  return JsonResponse(response)
Esempio n. 15
0
def top_tables(request):
    response = {'status': -1}

    database = request.POST.get('database', 'default')
    len = request.POST.get('len', 1000)

    api = OptimizerApi()
    data = api.top_tables(database_name=database)

    tables = [{
        'eid': table['eid'],
        'database': _get_table_name(table['name'])['database'],
        'name': _get_table_name(table['name'])['table'],
        'popularity': table['workloadPercent'],
        'column_count': table['columnCount'],
        'patternCount': table['patternCount'],
        'total': table['total'],
        'is_fact': table['type'] != 'Dimension'
    } for table in data['results']]

    response['top_tables'] = tables
    response['status'] = 0

    return JsonResponse(response)
Esempio n. 16
0
def upload_table_stats(request):
  response = {'status': -1}

  db_tables = json.loads(request.POST.get('db_tables'), '[]')
  source_platform = json.loads(request.POST.get('sourcePlatform', '"hive"'))
  with_ddl = json.loads(request.POST.get('with_ddl', 'false'))
  with_table_stats = json.loads(request.POST.get('with_table', 'false'))
  with_columns_stats = json.loads(request.POST.get('with_columns', 'false'))

  table_ddls = []
  table_stats = []
  column_stats = []

  if not OPTIMIZER.AUTO_UPLOAD_DDL.get():
    with_ddl = False

  if not OPTIMIZER.AUTO_UPLOAD_STATS.get():
    with_table_stats = with_columns_stats = False


  for db_table in db_tables:
    path = _get_table_name(db_table)

    try:
      if with_ddl:
        db = _get_db(request.user, source_type=source_platform)
        query = hql_query('SHOW CREATE TABLE `%(database)s`.`%(table)s`' % path)
        handle = db.execute_and_wait(query, timeout_sec=5.0)

        if handle:
          result = db.fetch(handle, rows=5000)
          db.close(handle)
          table_ddls.append((0, 0, ' '.join([row[0] for row in result.rows()]), path['database']))

      if with_table_stats:
        mock_request = MockRequest(user=request.user, source_platform=source_platform)
        full_table_stats = json.loads(get_table_stats(mock_request, database=path['database'], table=path['table']).content)
        stats = dict((stat['data_type'], stat['comment']) for stat in full_table_stats['stats'])

        table_stats.append({
          'table_name': '%(database)s.%(table)s' % path, # DB Prefix
          'num_rows':  stats.get('numRows', -1),
          'last_modified_time':  stats.get('transient_lastDdlTime', -1),
          'total_size':  stats.get('totalSize', -1),
          'raw_data_size':  stats.get('rawDataSize', -1),
          'num_files':  stats.get('numFiles', -1),
          'num_partitions':  stats.get('numPartitions', -1),
          # bytes_cached
          # cache_replication
          # format
        })

      if with_columns_stats:
        if source_platform == 'impala':
          colum_stats = json.loads(get_table_stats(mock_request, database=path['database'], table=path['table'], column=-1).content)['stats']
        else:
          colum_stats = [
              json.loads(get_table_stats(mock_request, database=path['database'], table=path['table'], column=col).content)['stats']
              for col in full_table_stats['columns'][:25]
          ]

        raw_column_stats = [dict([(key, val if val is not None else '') for col_stat in col for key, val in col_stat.items()]) for col in colum_stats]

        for col_stats in raw_column_stats:
          column_stats.append({
            'table_name': '%(database)s.%(table)s' % path, # DB Prefix
            'column_name': col_stats['col_name'],
            'data_type': col_stats['data_type'],
            "num_distinct": int(col_stats.get('distinct_count')) if col_stats.get('distinct_count') != '' else -1,
            "num_nulls": int(col_stats['num_nulls']) if col_stats['num_nulls'] != '' else -1,
            "avg_col_len": int(float(col_stats['avg_col_len'])) if col_stats['avg_col_len'] != '' else -1,
            "max_size": int(float(col_stats['max_col_len'])) if col_stats['max_col_len'] != '' else -1,
            "min": col_stats['min'] if col_stats.get('min', '') != '' else -1,
            "max": col_stats['max'] if col_stats.get('max', '') != '' else -1,
            "num_trues": col_stats['num_trues'] if col_stats.get('num_trues', '') != '' else -1,
            "num_falses": col_stats['num_falses'] if col_stats.get('num_falses', '') != '' else -1,
          })
    except Exception as e:
      LOG.exception('Skipping upload of %s: %s' % (db_table, e))

  api = OptimizerApi(request.user)

  response['status'] = 0

  if table_stats:
    response['upload_table_stats'] = api.upload(data=table_stats, data_type='table_stats', source_platform=source_platform)
    response['upload_table_stats_status'] = 0 if response['upload_table_stats']['status']['state'] in ('WAITING', 'FINISHED', 'IN_PROGRESS') else -1
    response['status'] = response['upload_table_stats_status']
  if column_stats:
    response['upload_cols_stats'] = api.upload(data=column_stats, data_type='cols_stats', source_platform=source_platform)
    response['upload_cols_stats_status'] = response['status'] if response['upload_cols_stats']['status']['state'] in ('WAITING', 'FINISHED', 'IN_PROGRESS') else -1
    if response['upload_cols_stats_status'] != 0:
      response['status'] = response['upload_cols_stats_status']
  if table_ddls:
    response['upload_table_ddl'] = api.upload(data=table_ddls, data_type='queries', source_platform=source_platform)
    response['upload_table_ddl_status'] = response['status'] if response['upload_table_ddl']['status']['state'] in ('WAITING', 'FINISHED', 'IN_PROGRESS') else -1
    if response['upload_table_ddl_status'] != 0:
      response['status'] = response['upload_table_ddl_status']

  return JsonResponse(response)
Esempio n. 17
0
  if n:
    history = history[:n]

  queries = []
  for doc in history:
    query_data = Notebook(document=doc).get_data()

    try:
      original_query_id = '%s:%s' % struct.unpack(b"QQ", base64.decodestring(query_data['snippets'][0]['result']['handle']['guid']))
      execution_time = query_data['snippets'][0]['result']['executionTime'] * 100

      queries.append((original_query_id, execution_time, query_data['snippets'][0]['statement'], query_data['snippets'][0].get('database', 'default')))
    except Exception, e:
      LOG.warning('Skipping upload of %s: %s' % (doc, e))

  api = OptimizerApi()

  response['upload_history'] = api.upload(data=queries, data_type='queries', source_platform=source_platform)
  response['status'] = 0

  return JsonResponse(response)


@require_POST
@error_handler
def upload_table_stats(request):
  response = {'status': -1}

  db_tables = json.loads(request.POST.get('db_tables'), '[]')
  source_platform = request.POST.get('sourcePlatform', 'hive')
  with_columns = json.loads(request.POST.get('with_columns', 'false'))
Esempio n. 18
0
def popular_values(request):
  response = {'status': -1}

  table_name = request.POST.get('tableName')
  column_name = request.POST.get('columnName')

  if OPTIMIZER.MOCKING.get():
    if column_name:
      values = [
          {
            "values": [
              "1",
              "(6,0)"
            ],
            "columnName": "d_dow",
            "tableName": "date_dim"
          }
      ]
    else:
      values = [
          {
            "values": [
              "('2001q1','2001q2','2001q3')",
              "'2001q1'"
            ],
            "columnName": "d_quarter_name",
            "tableName": "date_dim"
          },
          {
            "values": [
              "1",
              "2",
              "4"
            ],
            "columnName": "d_qoy",
            "tableName": "date_dim"
          },
          {
            "values": [
              "Subquery"
            ],
            "columnName": "d_week_seq",
            "tableName": "date_dim"
          },
          {
            "values": [
              "(cast('1998-08-14' as date) + interval '30' day)",
              "(cast ('1998-03-08' as date) + interval '30' day)",
              "d1.d_date + 5",
              "cast('1998-08-14' as date)",
              "cast('1999-04-26' as date)",
              "'2002-4-01'",
              "(cast('2000-02-02' as date) + interval '90' day)",
              "(cast('2002-4-01' as date) + interval '60' day)",
              "(cast('2002-01-18' as date) + 60 + interval '60' day)",
              "('1999-04-17','1999-10-04','1999-11-10')",
              "(cast('1999-04-26' as date) + 30 + interval '30' day)",
              "(cast('1999-06-03' as date) + interval '30' day)",
              "cast('1998-01-06' as date)",
              "(cast('2000-2-01' as date) + interval '60' day)",
              "(cast('2002-04-01' as date) + interval '30' day)",
              "( cast('2000-03-22' as date ) + interval '90' day )",
              "cast('2001-08-21' as date)",
              "(cast ('1998-03-08' as date) - interval '30' day)",
              "'2000-03-22'",
              "(cast('2001-08-21' as date) + interval '14' day)",
              "( cast('1999-08-25' as date) + interval '30' day )",
              "Subquery",
              "'2000-3-01'",
              "cast('2002-01-18' as date)",
              "(cast ('2001-03-14' as date) - interval '30' day)",
              "'2000-02-02'",
              "cast('2002-04-01' as date)",
              "'2002-03-09'",
              "(cast('2000-3-01' as date) + interval '60' day)",
              "cast('1999-06-03' as date)",
              "cast('1999-08-25' as date)",
              "(cast ('2001-03-14' as date) + interval '30' day)",
              "'2000-2-01'",
              "(cast('1998-01-06' as date) + interval '60' day)"
            ],
            "columnName": "d_date",
            "tableName": "date_dim"
          },
          {
            "values": [
              "1223",
              "1200",
              "1202",
              "1214+11",
              "(select distinct date_dim.d_month_seq+1 from date_dim where date_dim.d_year = 2001 and date_dim.d_moy = 5)",
              "1181+11",
              "1199",
              "1191",
              "(1206,1206+1,1206+2,1206+3,1206+4,1206+5,1206+6,1206+7,1206+8,1206+9,1206+10,1206+11)",
              "1211 + 11",
              "1199 + 11",
              "1212",
              "(select distinct date_dim.d_month_seq+3 from date_dim where date_dim.d_year = 2001 and date_dim.d_moy = 5)",
              "1211",
              "1214",
              "Subquery",
              "(1195,1195+1,1195+2,1195+3,1195+4,1195+5,1195+6,1195+7,1195+8,1195+9,1195+10,1195+11)",
              "1200+11",
              "1212 + 11",
              "1223+11",
              "1183 + 11",
              "1183",
              "1181",
              "1191 + 11",
              "1202 + 11"
            ],
            "columnName": "d_month_seq",
            "tableName": "date_dim"
          },
          {
            "values": [
              "11",
              "4 + 3",
              "12",
              "3+2",
              "2+3",
              "1",
              "3",
              "2",
              "5",
              "4",
              "6",
              "8",
              "10"
            ],
            "columnName": "d_moy",
            "tableName": "date_dim"
          },
          {
            "values": [
              "25",
              "16",
              "28",
              "1",
              "3",
              "2"
            ],
            "columnName": "d_dom",
            "tableName": "date_dim"
          },
          {
            "values": [
              "(1998,1998+1)",
              "2000 + 1",
              "2000 + 2",
              "(2000,2000+1,2000+2)",
              "(1999,1999+1,1999+2)",
              "2000-1",
              "2001+1",
              "1999 + 2",
              "2000+1",
              "2000+2",
              "1999+1",
              "(2002)",
              "( 1999, 1999 + 1, 1999 + 2, 1999 + 3 )",
              "1999-1",
              "( 1998, 1998 + 1, 1998 + 2 )",
              "1999",
              "1998",
              "(1998,1998+1,1998+2)",
              "2002",
              "2000",
              "2001",
              "2004"
            ],
            "columnName": "d_year",
            "tableName": "date_dim"
          },
          {
            "values": [
              "1",
              "(6,0)"
            ],
            "columnName": "d_dow",
            "tableName": "date_dim"
          }
        ]
  else:
    api = OptimizerApi()
    data = api.popular_filter_values(table_name=table_name, column_name=column_name)

    if data['status'] == 'success':
      if 'status' in data['details']:
        response['values'] = [] # Bug in Opt API
      else:
        response['values'] = data['details']
        response['status'] = 0
    else:
      response['message'] = 'Optimizer: %s' % data['details']

  return JsonResponse(response)
Esempio n. 19
0
            'table_name': path['table'],
            'column_name': col,
            'data_type': col_stats['data_type'],
            "num_distinct": int(col_stats.get('distinct_count')) if col_stats.get('distinct_count') != '' else -1,
            "num_nulls": int(col_stats['num_nulls']) if col_stats['num_nulls'] != '' else -1,
            "avg_col_len": int(float(col_stats['avg_col_len'])) if col_stats['avg_col_len'] != '' else -1,
            "max_size": int(float(col_stats['max_col_len'])) if col_stats['max_col_len'] != '' else -1,
            "min": col_stats['min'] if col_stats.get('min', '') != '' else -1,
            "max": col_stats['max'] if col_stats.get('max', '') != '' else -1,
            "num_trues": col_stats['num_trues'] if col_stats.get('num_trues', '') != '' else -1,
            "num_falses": col_stats['num_falses'] if col_stats.get('num_falses', '') != '' else -1,
          })
    except Exception, e:
      LOG.exception('Skipping upload of %s: %s' % (db_table, e))

  api = OptimizerApi(request.user)

  response['upload_table_stats'] = api.upload(data=table_stats, data_type='table_stats', source_platform=source_platform)
  response['status'] = 0 if response['upload_table_stats']['status']['state'] in ('WAITING', 'FINISHED', 'IN_PROGRESS') else -1
  if column_stats:
    response['upload_cols_stats'] = api.upload(data=column_stats, data_type='cols_stats', source_platform=source_platform)
    response['status'] = response['status'] if response['upload_cols_stats']['status']['state'] in ('WAITING', 'FINISHED', 'IN_PROGRESS') else -1
  if table_ddls:
    response['upload_table_ddl'] = api.upload(data=table_ddls, data_type='queries', source_platform=source_platform)

  return JsonResponse(response)


@require_POST
@error_handler
def upload_status(request):