Esempio n. 1
0
File: api.py Progetto: OSUser/hue
def execute(request):
  response = {'status': -1}

  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))

  try:
    response['handle'] = get_api(request, snippet).execute(notebook, snippet)
  finally:
    if notebook['type'].startswith('query-'):
      _snippet = [s for s in notebook['snippets'] if s['id'] == snippet['id']][0]
      if 'handle' in response: # No failure
        _snippet['result']['handle'] = response['handle']
        _snippet['result']['statements_count'] = response['handle']['statements_count']
      else:
        _snippet['status'] = 'failed'
      history = _historify(notebook, request.user)
      response['history_id'] = history.id
      response['history_uuid'] = history.uuid

  # Materialize and HTML escape results
  if response['handle'].get('sync') and response['handle']['result'].get('data'):
    response['handle']['result']['data'] = escape_rows(response['handle']['result']['data'])

  response['status'] = 0

  return JsonResponse(response)
Esempio n. 2
0
File: api.py Progetto: kevinhjk/hue
def execute(request):
  response = {'status': -1}

  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))

  try:
    response['handle'] = get_api(request, snippet).execute(notebook, snippet)
  finally:
    if notebook['type'].startswith('query-'):
      _snippet = [s for s in notebook['snippets'] if s['id'] == snippet['id']][0]
      if 'handle' in response: # No failure
        _snippet['result']['handle'] = response['handle']
      else:
        _snippet['status'] = 'failed'
      history = _historify(notebook, request.user)
      response['history_id'] = history.id
      response['history_uuid'] = history.uuid
      if notebook['isSaved']: # Keep track of history of saved queries
        response['history_parent_uuid'] = history.dependencies.filter(type__startswith='query-').latest('last_modified').uuid

  # Materialize and HTML escape results
  if response['handle'].get('sync') and response['handle']['result'].get('data'):
    response['handle']['result']['data'] = escape_rows(response['handle']['result']['data'])

  response['status'] = 0

  return JsonResponse(response)
Esempio n. 3
0
def view_results(request, id, first_row=0):
  """
  Returns the view for the results of the QueryHistory with the given id.

  The query results MUST be ready.
  To display query results, one should always go through the execute_query view.
  If the result set has has_result_set=False, display an empty result.

  If ``first_row`` is 0, restarts (if necessary) the query read.  Otherwise, just
  spits out a warning if first_row doesn't match the servers conception.
  Multiple readers will produce a confusing interaction here, and that's known.

  It understands the ``context`` GET parameter. (See execute_query().)
  """
  first_row = long(first_row)
  start_over = (first_row == 0)
  results = type('Result', (object,), {
                'rows': 0,
                'columns': [],
                'has_more': False,
                'start_row': 0,
            })
  data = []
  fetch_error = False
  error_message = ''
  log = ''
  columns = []
  app_name = get_app_name(request)

  query_history = authorized_get_query_history(request, id, must_exist=True)
  query_server = query_history.get_query_server_config()
  db = dbms.get(request.user, query_server)

  handle, state = _get_query_handle_and_state(query_history)
  context_param = request.GET.get('context', '')
  query_context = parse_query_context(context_param)

  # Update the status as expired should not be accessible
  expired = state == models.QueryHistory.STATE.expired

  # Retrieve query results or use empty result if no result set
  try:
    if query_server['server_name'] == 'impala' and not handle.has_result_set:
      downloadable = False
    else:
      results = db.fetch(handle, start_over, 100)

      # Materialize and HTML escape results
      data = escape_rows(results.rows())

      # We display the "Download" button only when we know that there are results:
      downloadable = first_row > 0 or data
      log = db.get_log(handle)
      columns = results.data_table.cols()

  except Exception, ex:
    LOG.exception('error fetching results')

    fetch_error = True
    error_message, log = expand_exception(ex, db, handle)
Esempio n. 4
0
File: api.py Progetto: dgs414/hue
def _get_sample_data(db, database, table):
  table_obj = db.get_table(database, table)
  sample_data = db.get_sample(database, table_obj)
  response = {'status': -1}

  if sample_data:
    response['status'] = 0
    response['headers'] = sample_data.cols()
    response['rows'] = escape_rows(sample_data.rows(), nulls_only=True)
  else:
    response['message'] = _('Failed to get sample data.')

  return response
Esempio n. 5
0
def get_indexes(request, database, table):
  query_server = dbms.get_query_server_config(get_app_name(request))
  db = dbms.get(request.user, query_server)
  response = {'status': -1}

  indexes = db.get_indexes(database, table)
  if indexes:
    response['status'] = 0
    response['headers'] = indexes.cols()
    response['rows'] = escape_rows(indexes.rows(), nulls_only=True)
  else:
    response['message'] = _('Failed to get indexes.')

  return JsonResponse(response)
Esempio n. 6
0
File: api.py Progetto: dimonge/hue
def get_indexes(request, database, table):
  query_server = dbms.get_query_server_config(get_app_name(request))
  db = dbms.get(request.user, query_server)
  response = {'status': -1, 'error_message': ''}

  indexes = db.get_indexes(database, table)
  if indexes:
    response['status'] = 0
    response['headers'] = indexes.cols()
    response['rows'] = escape_rows(indexes.rows(), nulls_only=True)
  else:
    response['error_message'] = _('Index data took too long to be generated')

  return JsonResponse(response)
Esempio n. 7
0
File: api.py Progetto: shobull/hue
def execute(request):
    response = {"status": -1}

    notebook = json.loads(request.POST.get("notebook", "{}"))
    snippet = json.loads(request.POST.get("snippet", "{}"))

    response["handle"] = get_api(request.user, snippet, request.fs, request.jt).execute(notebook, snippet)

    # Materialize and HTML escape results
    if response["handle"].get("sync") and response["handle"]["result"].get("data"):
        response["handle"]["result"]["data"] = escape_rows(response["handle"]["result"]["data"])

    response["status"] = 0

    return JsonResponse(response)
Esempio n. 8
0
def execute(request):
  response = {'status': -1}

  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))

  response['handle'] = get_api(request, snippet).execute(notebook, snippet)

  # Materialize and HTML escape results
  if response['handle'].get('sync') and response['handle']['result'].get('data'):
    response['handle']['result']['data'] = escape_rows(response['handle']['result']['data'])

  response['status'] = 0

  return JsonResponse(response)
Esempio n. 9
0
def get_sample_data(request, database, table):
  query_server = dbms.get_query_server_config(get_app_name(request))
  db = dbms.get(request.user, query_server)
  response = {'status': -1}

  table_obj = db.get_table(database, table)
  sample_data = db.get_sample(database, table_obj)
  if sample_data:
    response['status'] = 0
    response['headers'] = sample_data.cols()
    response['rows'] = escape_rows(sample_data.rows(), nulls_only=True)
  else:
    response['message'] = _('Failed to get sample data.')

  return JsonResponse(response)
Esempio n. 10
0
def get_functions(request):
  query_server = dbms.get_query_server_config(get_app_name(request))
  db = dbms.get(request.user, query_server)
  response = {'status': -1}

  prefix = request.GET.get('prefix', None)
  functions = db.get_functions(prefix)
  if functions:
    response['status'] = 0
    rows = escape_rows(functions.rows(), nulls_only=True)
    response['functions'] = [row[0] for row in rows]
  else:
    response['message'] = _('Failed to get functions.')

  return JsonResponse(response)
Esempio n. 11
0
File: api.py Progetto: dimonge/hue
def get_sample_data(request, database, table):
  query_server = dbms.get_query_server_config(get_app_name(request))
  db = dbms.get(request.user, query_server)
  response = {'status': -1, 'error_message': ''}

  table_obj = db.get_table(database, table)
  sample_data = db.get_sample(database, table_obj)
  if sample_data:
    response['status'] = 0
    response['headers'] = sample_data.cols()
    response['rows'] = escape_rows(sample_data.rows(), nulls_only=True)
  else:
    response['error_message'] = _('Sample data took too long to be generated')

  return JsonResponse(response)
Esempio n. 12
0
File: api.py Progetto: shobull/hue
def get_sample_data(request, database, table):
  db = dbms.get(request.user)
  response = {'status': -1, 'error_message': ''}

  try:
    table_obj = db.get_table(database, table)
    sample_data = db.get_sample(database, table_obj)
    if sample_data:
      response['status'] = 0
      response['headers'] = sample_data.cols()
      response['rows'] = escape_rows(sample_data.rows(), nulls_only=True)
    else:
      response['error_message'] = _('Sample data took too long to be generated')
  except Exception, ex:
    error_message, logs = dbms.expand_exception(ex, db)
    response['error_message'] = error_message
Esempio n. 13
0
def get_indexes(request, database, table):
  query_server = dbms.get_query_server_config(get_app_name(request))
  db = dbms.get(request.user, query_server)
  response = {'status': -1, 'error_message': ''}

  try:
    indexes = db.get_indexes(database, table)
    if indexes:
      response['status'] = 0
      response['headers'] = indexes.cols()
      response['rows'] = escape_rows(indexes.rows(), nulls_only=True)
    else:
      response['error_message'] = _('Index data took too long to be generated')
  except Exception, ex:
    error_message, logs = dbms.expand_exception(ex, db)
    response['error_message'] = error_message
Esempio n. 14
0
def fetch_result_data(request):
    response = {"status": -1}

    notebook = json.loads(request.POST.get("notebook", "{}"))
    snippet = json.loads(request.POST.get("snippet", "{}"))
    rows = json.loads(request.POST.get("rows", 100))
    start_over = json.loads(request.POST.get("startOver", False))

    response["result"] = get_api(request, snippet).fetch_result(notebook, snippet, rows, start_over)

    # Materialize and HTML escape results
    if response["result"].get("data") and response["result"].get("type") == "table":
        response["result"]["data"] = escape_rows(response["result"]["data"])

    response["status"] = 0

    return JsonResponse(response)
Esempio n. 15
0
File: api.py Progetto: heshunwq/hue
def fetch_result_data(request):
  response = {'status': -1}

  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))
  rows = json.loads(request.POST.get('rows', 100))
  start_over = json.loads(request.POST.get('startOver', False))

  response['result'] = get_api(request, snippet).fetch_result(notebook, snippet, rows, start_over)

  # Materialize and HTML escape results
  if response['result'].get('data') and response['result'].get('type') == 'table':
    response['result']['data'] = escape_rows(response['result']['data'])

  response['status'] = 0

  return JsonResponse(response)
Esempio n. 16
0
def _get_sample_data(db, database, table, column):
  table_obj = db.get_table(database, table)
  sample_data = db.get_sample(database, table_obj, column)
  response = {'status': -1}

  if sample_data:
    sample = escape_rows(sample_data.rows(), nulls_only=True)
    if column:
      sample = set([row[0] for row in sample])
      sample = [[item] for item in sorted(list(sample))]

    response['status'] = 0
    response['headers'] = sample_data.cols()
    response['rows'] = sample
  else:
    response['message'] = _('Failed to get sample data.')

  return response
Esempio n. 17
0
File: solr.py Progetto: infect2/hue
  def get_sample_data(self, database, table, column=None):
    if column is None:
      column = ', '.join([col['name'] for col in self.get_columns(database, table)])

    snippet = {
        'database': table,
        'statement': 'SELECT %s FROM %s LIMIT 250' % (column, table)
    }
    res = self.api.execute(None, snippet)

    response = {'status': -1}

    if res:
      response['status'] = 0
      response['headers'] = [col['name'] for col in res['result']['meta']]
      response['rows'] = escape_rows(res['result']['data'], nulls_only=True)
    else:
      response['message'] = _('Failed to get sample data.')

    return response
Esempio n. 18
0
File: api.py Progetto: biddyweb/hue
def execute(request):
  response = {'status': -1}

  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))

  try:
    response['handle'] = get_api(request, snippet).execute(notebook, snippet)
  finally:
    if notebook['type'].startswith('query-'):
      history = _historify(notebook, request.user)
      response['history_id'] = history.id

  # Materialize and HTML escape results
  if response['handle'].get('sync') and response['handle']['result'].get('data'):
    response['handle']['result']['data'] = escape_rows(response['handle']['result']['data'])

  response['status'] = 0

  return JsonResponse(response)
Esempio n. 19
0
def _get_sample_data(db, database, table, column):
  table_obj = db.get_table(database, table)
  if table_obj.is_impala_only and db.client.query_server['server_name'] != 'impala':
    query_server = get_query_server_config('impala')
    db = dbms.get(db.client.user, query_server)
  sample_data = db.get_sample(database, table_obj, column)
  response = {'status': -1}

  if sample_data:
    sample = escape_rows(sample_data.rows(), nulls_only=True)
    if column:
      sample = set([row[0] for row in sample])
      sample = [[item] for item in sorted(list(sample))]

    response['status'] = 0
    response['headers'] = sample_data.cols()
    response['full_headers'] = sample_data.full_cols()
    response['rows'] = sample
  else:
    response['message'] = _('Failed to get sample data.')

  return response
Esempio n. 20
0
File: api.py Progetto: heshunwq/hue
def execute(request):
  response = {'status': -1}
  result = None

  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))

  try:
    response['handle'] = get_api(request, snippet).execute(notebook, snippet)

    # Retrieve and remove the result from the handle
    if response['handle'].get('sync'):
      result = response['handle'].pop('result')
  finally:
    if notebook['type'].startswith('query-'):
      _snippet = [s for s in notebook['snippets'] if s['id'] == snippet['id']][0]
      if 'handle' in response: # No failure
        _snippet['result']['handle'] = response['handle']
        _snippet['result']['statements_count'] = response['handle'].get('statements_count', 1)
        _snippet['result']['statement_id'] = response['handle'].get('statement_id', 0)
        _snippet['result']['handle']['statement'] = response['handle'].get('statement', snippet['statement']) # For non HS2, as non multi query yet
      else:
        _snippet['status'] = 'failed'
      history = _historify(notebook, request.user)
      response['history_id'] = history.id
      response['history_uuid'] = history.uuid
      if notebook['isSaved']: # Keep track of history of saved queries
        response['history_parent_uuid'] = history.dependencies.filter(type__startswith='query-').latest('last_modified').uuid

  # Inject and HTML escape results
  if result is not None:
    response['result'] = result
    response['result']['data'] = escape_rows(result['data'])

  response['status'] = 0

  return JsonResponse(response)
Esempio n. 21
0
def view_results(request, id, first_row=0):
    """
  Returns the view for the results of the QueryHistory with the given id.

  The query results MUST be ready.
  To display query results, one should always go through the execute_query view.
  If the result set has has_result_set=False, display an empty result.

  If ``first_row`` is 0, restarts (if necessary) the query read.  Otherwise, just
  spits out a warning if first_row doesn't match the servers conception.
  Multiple readers will produce a confusing interaction here, and that's known.

  It understands the ``context`` GET parameter. (See execute_query().)
  """
    first_row = long(first_row)
    start_over = (first_row == 0)
    results = type('Result', (object, ), {
        'rows': 0,
        'columns': [],
        'has_more': False,
        'start_row': 0,
    })
    data = []
    fetch_error = False
    error_message = ''
    log = ''
    columns = []
    app_name = get_app_name(request)

    query_history = authorized_get_query_history(request, id, must_exist=True)
    query_server = query_history.get_query_server_config()
    db = dbms.get(request.user, query_server)

    handle, state = _get_query_handle_and_state(query_history)
    context_param = request.GET.get('context', '')
    query_context = parse_query_context(context_param)

    # Update the status as expired should not be accessible
    expired = state == models.QueryHistory.STATE.expired

    # Retrieve query results or use empty result if no result set
    try:
        if query_server[
                'server_name'] == 'impala' and not handle.has_result_set:
            downloadable = False
        else:
            results = db.fetch(handle, start_over, 100)

            # Materialize and HTML escape results
            data = escape_rows(results.rows())

            # We display the "Download" button only when we know that there are results:
            downloadable = first_row > 0 or data
            log = db.get_log(handle)
            columns = results.data_table.cols()

    except Exception, ex:
        LOG.exception('error fetching results')

        fetch_error = True
        error_message, log = expand_exception(ex, db, handle)
Esempio n. 22
0
File: views.py Progetto: ziq211/hue
def view_results(request, id, first_row=0):
  """
  Returns the view for the results of the QueryHistory with the given id.

  The query results MUST be ready.
  To display query results, one should always go through the execute_query view.
  If the result set has has_result_set=False, display an empty result.

  If ``first_row`` is 0, restarts (if necessary) the query read.  Otherwise, just
  spits out a warning if first_row doesn't match the servers conception.
  Multiple readers will produce a confusing interaction here, and that's known.

  It understands the ``context`` GET parameter. (See execute_query().)
  """
  first_row = int(first_row)
  start_over = (first_row == 0)
  results = type('Result', (object,), {
                'rows': 0,
                'columns': [],
                'has_more': False,
                'start_row': 0,
            })
  data = []
  fetch_error = False
  error_message = ''
  log = ''
  columns = []
  app_name = get_app_name(request)

  query_history = authorized_get_query_history(request, id, must_exist=True)
  query_server = query_history.get_query_server_config()
  db = dbms.get(request.user, query_server)

  handle, state = _get_query_handle_and_state(query_history)
  context_param = request.GET.get('context', '')
  query_context = parse_query_context(context_param)

  # Update the status as expired should not be accessible
  expired = state == models.QueryHistory.STATE.expired

  # Retrieve query results or use empty result if no result set
  try:
    if query_server['server_name'] == 'impala' and not handle.has_result_set:
      downloadable = False
    else:
      results = db.fetch(handle, start_over, 100)

      # Materialize and HTML escape results
      data = escape_rows(results.rows())

      # We display the "Download" button only when we know that there are results:
      downloadable = first_row > 0 or data
      log = db.get_log(handle)
      columns = results.data_table.cols()

  except Exception as ex:
    LOG.exception('error fetching results')

    fetch_error = True
    error_message, log = expand_exception(ex, db, handle)

  # Handle errors
  error = fetch_error or results is None or expired

  context = {
    'error': error,
    'message': error_message,
    'query': query_history,
    'results': data,
    'columns': columns,
    'expected_first_row': first_row,
    'log': log,
    'hadoop_jobs': app_name != 'impala' and parse_out_jobs(log),
    'query_context': query_context,
    'can_save': False,
    'context_param': context_param,
    'expired': expired,
    'app_name': app_name,
    'next_json_set': None,
    'is_finished': query_history.is_finished()
  }

  if not error:
    download_urls = {}
    if downloadable:
      for format in common.DL_FORMATS:
        download_urls[format] = reverse(app_name + ':download', kwargs=dict(id=str(id), format=format))

    results.start_row = first_row

    context.update({
      'id': id,
      'results': data,
      'has_more': results.has_more,
      'next_row': results.start_row + len(data),
      'start_row': results.start_row,
      'expected_first_row': first_row,
      'columns': columns,
      'download_urls': download_urls,
      'can_save': query_history.owner == request.user,
      'next_json_set':
        reverse(get_app_name(request) + ':view_results', kwargs={
            'id': str(id),
            'first_row': results.start_row + len(data)
          }
        )
        + ('?context=' + context_param or '') + '&format=json'
    })

  context['columns'] = massage_columns_for_json(columns)
  if 'save_form' in context:
    del context['save_form']
  if 'query' in context:
    del context['query']
  return JsonResponse(context)
Esempio n. 23
0
                    response["history_parent_uuid"] = (
                        history.dependencies.filter(type__startswith="query-").latest("last_modified").uuid
                    )
    except QueryError, ex:  # We inject the history information from _historify() to the failed queries
        if response.get("history_id"):
            ex.extra["history_id"] = response["history_id"]
        if response.get("history_uuid"):
            ex.extra["history_uuid"] = response["history_uuid"]
        if response.get("history_parent_uuid"):
            ex.extra["history_parent_uuid"] = response["history_parent_uuid"]
        raise ex

    # Inject and HTML escape results
    if result is not None:
        response["result"] = result
        response["result"]["data"] = escape_rows(result["data"])

    response["status"] = 0

    return response


@require_POST
@check_document_access_permission()
@api_error_handler
def execute(request):
    notebook = json.loads(request.POST.get("notebook", "{}"))
    snippet = json.loads(request.POST.get("snippet", "{}"))

    response = _execute_notebook(request, notebook, snippet)
Esempio n. 24
0
File: api3.py Progetto: mapr/hue
def guess_field_types(request):
    file_format = json.loads(request.POST.get('fileFormat', '{}'))

    if file_format['inputFormat'] == 'localfile':
        path = urllib_unquote(file_format['path'])

        with open(path, 'r') as local_file:

            reader = csv.reader(local_file)
            csv_data = list(reader)

            if file_format['format']['hasHeader']:
                sample = csv_data[1:5]
                column_row = [
                    re.sub('[^0-9a-zA-Z]+', '_', col) for col in csv_data[0]
                ]
            else:
                sample = csv_data[:4]
                column_row = [
                    'field_' + str(count + 1)
                    for count, col in enumerate(sample[0])
                ]

            field_type_guesses = []
            for count, col in enumerate(column_row):
                column_samples = [
                    sample_row[count] for sample_row in sample
                    if len(sample_row) > count
                ]
                field_type_guess = guess_field_type_from_samples(
                    column_samples)
                field_type_guesses.append(field_type_guess)

            columns = [
                Field(column_row[count], field_type_guesses[count]).to_dict()
                for count, col in enumerate(column_row)
            ]

            format_ = {'columns': columns, 'sample': sample}

    elif file_format['inputFormat'] == 'file':
        indexer = MorphlineIndexer(request.user, request.fs)
        path = urllib_unquote(file_format["path"])
        if path[-3:] == 'xls' or path[-4:] == 'xlsx':
            path = excel_to_csv_file_name_change(path)
        stream = request.fs.open(path)
        encoding = check_encoding(stream.read(10000))
        LOG.debug('File %s encoding is %s' % (path, encoding))
        stream.seek(0)
        _convert_format(file_format["format"], inverse=True)

        format_ = indexer.guess_field_types({
            "file": {
                "stream": stream,
                "name": path
            },
            "format": file_format['format']
        })

        # Note: Would also need to set charset to table (only supported in Hive)
        if 'sample' in format_ and format_['sample']:
            format_['sample'] = escape_rows(format_['sample'],
                                            nulls_only=True,
                                            encoding=encoding)
        for col in format_['columns']:
            col['name'] = smart_unicode(col['name'],
                                        errors='replace',
                                        encoding=encoding)

    elif file_format['inputFormat'] == 'table':
        sample = get_api(request, {
            'type': 'hive'
        }).get_sample_data({'type': 'hive'},
                           database=file_format['databaseName'],
                           table=file_format['tableName'])
        db = dbms.get(request.user)
        table_metadata = db.get_table(database=file_format['databaseName'],
                                      table_name=file_format['tableName'])

        format_ = {
            "sample":
            sample['rows'][:4],
            "columns": [
                Field(col.name,
                      HiveFormat.FIELD_TYPE_TRANSLATE.get(col.type,
                                                          'string')).to_dict()
                for col in table_metadata.cols
            ]
        }
    elif file_format['inputFormat'] == 'query':
        query_id = file_format['query']['id'] if file_format['query'].get(
            'id') else file_format['query']

        notebook = Notebook(document=Document2.objects.document(
            user=request.user, doc_id=query_id)).get_data()
        snippet = notebook['snippets'][0]
        db = get_api(request, snippet)

        if file_format.get('sampleCols'):
            columns = file_format.get('sampleCols')
            sample = file_format.get('sample')
        else:
            snippet['query'] = snippet['statement']
            try:
                sample = db.fetch_result(notebook, snippet, 4,
                                         start_over=True)['rows'][:4]
            except Exception as e:
                LOG.warning(
                    'Skipping sample data as query handle might be expired: %s'
                    % e)
                sample = [[], [], [], [], []]
            columns = db.autocomplete(snippet=snippet, database='', table='')
            columns = [
                Field(
                    col['name'],
                    HiveFormat.FIELD_TYPE_TRANSLATE.get(col['type'],
                                                        'string')).to_dict()
                for col in columns['extended_columns']
            ]
        format_ = {
            "sample": sample,
            "columns": columns,
        }
    elif file_format['inputFormat'] == 'rdbms':
        api = _get_api(request)
        sample = api.get_sample_data(None,
                                     database=file_format['rdbmsDatabaseName'],
                                     table=file_format['tableName'])

        format_ = {
            "sample":
            list(sample['rows'])[:4],
            "columns": [
                Field(col['name'], col['type']).to_dict()
                for col in sample['full_headers']
            ]
        }
    elif file_format['inputFormat'] == 'stream':
        if file_format['streamSelection'] == 'kafka':
            data = get_topic_data(request.user,
                                  file_format.get('kafkaSelectedTopics'))

            kafkaFieldNames = [col['name'] for col in data['full_headers']]
            kafkaFieldTypes = [col['type'] for col in data['full_headers']]
            topics_data = data['rows']

            format_ = {
                "sample":
                topics_data,
                "columns": [
                    Field(col, 'string', unique=False).to_dict()
                    for col in kafkaFieldNames
                ]
            }
        elif file_format['streamSelection'] == 'flume':
            if 'hue-httpd/access_log' in file_format['channelSourcePath']:
                columns = [{
                    'name': 'id',
                    'type': 'string',
                    'unique': True
                }, {
                    'name': 'client_ip',
                    'type': 'string'
                }, {
                    'name': 'time',
                    'type': 'date'
                }, {
                    'name': 'request',
                    'type': 'string'
                }, {
                    'name': 'code',
                    'type': 'plong'
                }, {
                    'name': 'bytes',
                    'type': 'plong'
                }, {
                    'name': 'method',
                    'type': 'string'
                }, {
                    'name': 'url',
                    'type': 'string'
                }, {
                    'name': 'protocol',
                    'type': 'string'
                }, {
                    'name': 'app',
                    'type': 'string'
                }, {
                    'name': 'subapp',
                    'type': 'string'
                }]
            else:
                columns = [{'name': 'message', 'type': 'string'}]

            format_ = {
                "sample": [['...'] * len(columns)] * 4,
                "columns": [
                    Field(col['name'],
                          HiveFormat.FIELD_TYPE_TRANSLATE.get(
                              col['type'], 'string'),
                          unique=col.get('unique')).to_dict()
                    for col in columns
                ]
            }
    elif file_format['inputFormat'] == 'connector':
        if file_format['connectorSelection'] == 'sfdc':
            sf = Salesforce(username=file_format['streamUsername'],
                            password=file_format['streamPassword'],
                            security_token=file_format['streamToken'])
            table_metadata = [{
                'name': column['name'],
                'type': column['type']
            } for column in sf.restful('sobjects/%(streamObject)s/describe/' %
                                       file_format)['fields']]
            query = 'SELECT %s FROM %s LIMIT 4' % (', '.join(
                [col['name']
                 for col in table_metadata]), file_format['streamObject'])
            print(query)

            try:
                records = sf.query_all(query)
            except SalesforceRefusedRequest as e:
                raise PopupException(message=str(e))

            format_ = {
                "sample":
                [list(row.values())[1:] for row in records['records']],
                "columns": [
                    Field(
                        col['name'],
                        HiveFormat.FIELD_TYPE_TRANSLATE.get(
                            col['type'], 'string')).to_dict()
                    for col in table_metadata
                ]
            }
        else:
            raise PopupException(
                _('Connector format not recognized: %(connectorSelection)s') %
                file_format)
    else:
        raise PopupException(
            _('Input format not recognized: %(inputFormat)s') % file_format)

    return JsonResponse(format_)
Esempio n. 25
0
          response['history_uuid'] = history.uuid
          if notebook['isSaved']: # Keep track of history of saved queries
            response['history_parent_uuid'] = history.dependencies.filter(type__startswith='query-').latest('last_modified').uuid
  except QueryError, ex: # We inject the history information from _historify() to the failed queries
    if response.get('history_id'):
      ex.extra['history_id'] = response['history_id']
    if response.get('history_uuid'):
      ex.extra['history_uuid'] = response['history_uuid']
    if response.get('history_parent_uuid'):
      ex.extra['history_parent_uuid'] = response['history_parent_uuid']
    raise ex

  # Inject and HTML escape results
  if result is not None:
    response['result'] = result
    response['result']['data'] = escape_rows(result['data'])

  response['status'] = 0

  return response

@require_POST
@check_document_access_permission()
@api_error_handler
def execute(request):
  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))

  response = _execute_notebook(request, notebook, snippet)

  return JsonResponse(response)
Esempio n. 26
0
def guess_field_types(request):
    file_format = json.loads(request.POST.get('fileFormat', '{}'))

    if file_format['inputFormat'] == 'file':
        indexer = MorphlineIndexer(request.user, request.fs)
        path = urllib_unquote(file_format["path"])
        stream = request.fs.open(path)
        encoding = check_encoding(stream.read(10000))
        stream.seek(0)
        _convert_format(file_format["format"], inverse=True)

        format_ = indexer.guess_field_types({
            "file": {
                "stream": stream,
                "name": path
            },
            "format": file_format['format']
        })

        # Note: Would also need to set charset to table (only supported in Hive)
        if 'sample' in format_ and format_['sample']:
            format_['sample'] = escape_rows(format_['sample'],
                                            nulls_only=True,
                                            encoding=encoding)
        for col in format_['columns']:
            col['name'] = smart_unicode(col['name'],
                                        errors='replace',
                                        encoding=encoding)

    elif file_format['inputFormat'] == 'table':
        sample = get_api(request, {
            'type': 'hive'
        }).get_sample_data({'type': 'hive'},
                           database=file_format['databaseName'],
                           table=file_format['tableName'])
        db = dbms.get(request.user)
        table_metadata = db.get_table(database=file_format['databaseName'],
                                      table_name=file_format['tableName'])

        format_ = {
            "sample":
            sample['rows'][:4],
            "columns": [
                Field(col.name,
                      HiveFormat.FIELD_TYPE_TRANSLATE.get(col.type,
                                                          'string')).to_dict()
                for col in table_metadata.cols
            ]
        }
    elif file_format['inputFormat'] == 'query':
        query_id = file_format['query']['id'] if file_format['query'].get(
            'id') else file_format['query']

        notebook = Notebook(document=Document2.objects.document(
            user=request.user, doc_id=query_id)).get_data()
        snippet = notebook['snippets'][0]
        db = get_api(request, snippet)

        if file_format.get('sampleCols'):
            columns = file_format.get('sampleCols')
            sample = file_format.get('sample')
        else:
            snippet['query'] = snippet['statement']
            try:
                sample = db.fetch_result(notebook, snippet, 4,
                                         start_over=True)['rows'][:4]
            except Exception as e:
                LOG.warn(
                    'Skipping sample data as query handle might be expired: %s'
                    % e)
                sample = [[], [], [], [], []]
            columns = db.autocomplete(snippet=snippet, database='', table='')
            columns = [
                Field(
                    col['name'],
                    HiveFormat.FIELD_TYPE_TRANSLATE.get(col['type'],
                                                        'string')).to_dict()
                for col in columns['extended_columns']
            ]
        format_ = {
            "sample": sample,
            "columns": columns,
        }
    elif file_format['inputFormat'] == 'rdbms':
        api = _get_api(request)
        sample = api.get_sample_data(None,
                                     database=file_format['rdbmsDatabaseName'],
                                     table=file_format['tableName'])

        format_ = {
            "sample":
            list(sample['rows'])[:4],
            "columns": [
                Field(col['name'], col['type']).to_dict()
                for col in sample['full_headers']
            ]
        }
    elif file_format['inputFormat'] == 'stream':
        if file_format['streamSelection'] == 'kafka':
            if file_format.get(
                    'kafkaSelectedTopics') == 'NavigatorAuditEvents':
                kafkaFieldNames = [
                    'id', 'additionalInfo', 'allowed', 'collectionName',
                    'databaseName', 'db', 'DELEGATION_TOKEN_ID', 'dst',
                    'entityId', 'family', 'impersonator', 'ip', 'name',
                    'objectType', 'objType', 'objUsageType', 'operationParams',
                    'operationText', 'op', 'opText', 'path', 'perms',
                    'privilege', 'qualifier', 'QUERY_ID', 'resourcePath',
                    'service', 'SESSION_ID', 'solrVersion', 'src', 'status',
                    'subOperation', 'tableName', 'table', 'time', 'type',
                    'url', 'user'
                ]
                kafkaFieldTypes = ['string'] * len(kafkaFieldNames)
                kafkaFieldNames.append('timeDate')
                kafkaFieldTypes.append('date')
            else:
                # Note: mocked here, should come from SFDC or Kafka API or sampling job
                kafkaFieldNames = file_format.get('kafkaFieldNames',
                                                  '').split(',')
                kafkaFieldTypes = file_format.get('kafkaFieldTypes',
                                                  '').split(',')

            data = """%(kafkaFieldNames)s
%(data)s""" % {
                'kafkaFieldNames': ','.join(kafkaFieldNames),
                'data': '\n'.join(
                    [','.join(['...'] * len(kafkaFieldTypes))] * 5)
            }
            stream = string_io()
            stream.write(data)

            _convert_format(file_format["format"], inverse=True)

            indexer = MorphlineIndexer(request.user, request.fs)
            format_ = indexer.guess_field_types({
                "file": {
                    "stream": stream,
                    "name": file_format['path']
                },
                "format": file_format['format']
            })
            type_mapping = dict(list(zip(kafkaFieldNames, kafkaFieldTypes)))

            for col in format_['columns']:
                col['keyType'] = type_mapping[col['name']]
                col['type'] = type_mapping[col['name']]
        elif file_format['streamSelection'] == 'flume':
            if 'hue-httpd/access_log' in file_format['channelSourcePath']:
                columns = [{
                    'name': 'id',
                    'type': 'string',
                    'unique': True
                }, {
                    'name': 'client_ip',
                    'type': 'string'
                }, {
                    'name': 'time',
                    'type': 'date'
                }, {
                    'name': 'request',
                    'type': 'string'
                }, {
                    'name': 'code',
                    'type': 'plong'
                }, {
                    'name': 'bytes',
                    'type': 'plong'
                }, {
                    'name': 'method',
                    'type': 'string'
                }, {
                    'name': 'url',
                    'type': 'string'
                }, {
                    'name': 'protocol',
                    'type': 'string'
                }, {
                    'name': 'app',
                    'type': 'string'
                }, {
                    'name': 'subapp',
                    'type': 'string'
                }]
            else:
                columns = [{'name': 'message', 'type': 'string'}]

            format_ = {
                "sample": [['...'] * len(columns)] * 4,
                "columns": [
                    Field(col['name'],
                          HiveFormat.FIELD_TYPE_TRANSLATE.get(
                              col['type'], 'string'),
                          unique=col.get('unique')).to_dict()
                    for col in columns
                ]
            }
    elif file_format['inputFormat'] == 'connector':
        if file_format['connectorSelection'] == 'sfdc':
            sf = Salesforce(username=file_format['streamUsername'],
                            password=file_format['streamPassword'],
                            security_token=file_format['streamToken'])
            table_metadata = [{
                'name': column['name'],
                'type': column['type']
            } for column in sf.restful('sobjects/%(streamObject)s/describe/' %
                                       file_format)['fields']]
            query = 'SELECT %s FROM %s LIMIT 4' % (', '.join(
                [col['name']
                 for col in table_metadata]), file_format['streamObject'])
            print(query)

            try:
                records = sf.query_all(query)
            except SalesforceRefusedRequest as e:
                raise PopupException(message=str(e))

            format_ = {
                "sample":
                [list(row.values())[1:] for row in records['records']],
                "columns": [
                    Field(
                        col['name'],
                        HiveFormat.FIELD_TYPE_TRANSLATE.get(
                            col['type'], 'string')).to_dict()
                    for col in table_metadata
                ]
            }
        else:
            raise PopupException(
                _('Connector format not recognized: %(connectorSelection)s') %
                file_format)
    else:
        raise PopupException(
            _('Input format not recognized: %(inputFormat)s') % file_format)

    return JsonResponse(format_)
Esempio n. 27
0
File: api.py Progetto: cloudera/hue
    response['status'] = 0
    if async:
      notebook = make_notebook(
          name=_('Table sample for `%(database)s`.`%(table)s`.`%(column)s`') % {'database': database, 'table': table, 'column': column},
          editor_type=_get_servername(db),
          statement=sample_data,
          status='ready-execute',
          skip_historify=True,
          is_task=False,
          compute=cluster if cluster else None
      )
      response['result'] = notebook.execute(request=MockedDjangoRequest(user=db.client.user), batch=False)
      if table_obj.is_impala_only:
        response['result']['type'] = 'impala'
    else:
      sample = escape_rows(sample_data.rows(), nulls_only=True)
      if column:
        sample = set([row[0] for row in sample])
        sample = [[item] for item in sorted(list(sample))]

      response['headers'] = sample_data.cols()
      response['full_headers'] = sample_data.full_cols()
      response['rows'] = sample
  else:
    response['message'] = _('Failed to get sample data.')

  return response


@error_handler
def get_indexes(request, database, table):
Esempio n. 28
0
File: api.py Progetto: taklwu/hue
    response['status'] = 0
    if async:
      notebook = make_notebook(
          name=_('Table sample for `%(database)s`.`%(table)s`.`%(column)s`') % {'database': database, 'table': table, 'column': column},
          editor_type=_get_servername(db),
          statement=sample_data,
          status='ready-execute',
          skip_historify=True,
          is_task=False,
          compute=cluster if cluster else None
      )
      response['result'] = notebook.execute(request=MockedDjangoRequest(user=db.client.user), batch=False)
      if table_obj.is_impala_only:
        response['result']['type'] = 'impala'
    else:
      sample = escape_rows(sample_data.rows(), nulls_only=True)
      if column:
        sample = set([row[0] for row in sample])
        sample = [[item] for item in sorted(list(sample))]

      response['headers'] = sample_data.cols()
      response['full_headers'] = sample_data.full_cols()
      response['rows'] = sample
  else:
    response['message'] = _('Failed to get sample data.')

  return response


@error_handler
def get_indexes(request, database, table):
Esempio n. 29
0
                            'history_parent_uuid'] = history.dependencies.filter(
                                type__startswith='query-').latest(
                                    'last_modified').uuid
    except QueryError, ex:  # We inject the history information from _historify() to the failed queries
        if response.get('history_id'):
            ex.extra['history_id'] = response['history_id']
        if response.get('history_uuid'):
            ex.extra['history_uuid'] = response['history_uuid']
        if response.get('history_parent_uuid'):
            ex.extra['history_parent_uuid'] = response['history_parent_uuid']
        raise ex

    # Inject and HTML escape results
    if result is not None:
        response['result'] = result
        response['result']['data'] = escape_rows(result['data'])

    response['status'] = 0

    return response


@require_POST
@check_document_access_permission()
@api_error_handler
def execute(request, engine=None):
    notebook = json.loads(request.POST.get('notebook', '{}'))
    snippet = json.loads(request.POST.get('snippet', '{}'))

    response = _execute_notebook(request, notebook, snippet)
Esempio n. 30
0
def guess_field_types(request):
    file_format = json.loads(request.POST.get('fileFormat', '{}'))

    if file_format['inputFormat'] == 'file':
        indexer = MorphlineIndexer(request.user, request.fs)
        path = urllib.unquote(file_format["path"])
        stream = request.fs.open(path)
        encoding = chardet.detect(stream.read(10000)).get('encoding')
        stream.seek(0)
        _convert_format(file_format["format"], inverse=True)

        format_ = indexer.guess_field_types({
            "file": {
                "stream": stream,
                "name": path
            },
            "format": file_format['format']
        })

        # Note: Would also need to set charset to table (only supported in Hive)
        if 'sample' in format_:
            format_['sample'] = escape_rows(format_['sample'],
                                            nulls_only=True,
                                            encoding=encoding)
        for col in format_['columns']:
            col['name'] = smart_unicode(col['name'],
                                        errors='replace',
                                        encoding=encoding)

    elif file_format['inputFormat'] == 'table':
        sample = get_api(request, {
            'type': 'hive'
        }).get_sample_data({'type': 'hive'},
                           database=file_format['databaseName'],
                           table=file_format['tableName'])
        db = dbms.get(request.user)
        table_metadata = db.get_table(database=file_format['databaseName'],
                                      table_name=file_format['tableName'])

        format_ = {
            "sample":
            sample['rows'][:4],
            "columns": [
                Field(col.name,
                      HiveFormat.FIELD_TYPE_TRANSLATE.get(col.type,
                                                          'string')).to_dict()
                for col in table_metadata.cols
            ]
        }
    elif file_format['inputFormat'] == 'query':
        query_id = file_format['query']['id'] if file_format['query'].get(
            'id') else file_format['query']

        notebook = Notebook(document=Document2.objects.document(
            user=request.user, doc_id=query_id)).get_data()
        snippet = notebook['snippets'][0]
        db = get_api(request, snippet)

        if file_format.get('sampleCols'):
            columns = file_format.get('sampleCols')
            sample = file_format.get('sample')
        else:
            snippet['query'] = snippet['statement']
            try:
                sample = db.fetch_result(notebook, snippet, 4,
                                         start_over=True)['rows'][:4]
            except Exception, e:
                LOG.warn(
                    'Skipping sample data as query handle might be expired: %s'
                    % e)
                sample = [[], [], [], [], []]
            columns = db.autocomplete(snippet=snippet, database='', table='')
            columns = [
                Field(
                    col['name'],
                    HiveFormat.FIELD_TYPE_TRANSLATE.get(col['type'],
                                                        'string')).to_dict()
                for col in columns['extended_columns']
            ]
        format_ = {
            "sample": sample,
            "columns": columns,
        }
Esempio n. 31
0
def _execute_notebook(request, notebook, snippet):
    response = {'status': -1}
    result = None
    history = None

    historify = (notebook['type'] != 'notebook'
                 or snippet.get('wasBatchExecuted')
                 ) and not notebook.get('skipHistorify')

    try:
        try:
            sessions = notebook.get('sessions') and notebook[
                'sessions']  # Session reference for snippet execution without persisting it

            active_executable = json.loads(request.POST.get(
                'executable', '{}'))  # Editor v2

            # TODO: Use statement, database etc. from active_executable

            if historify:
                history = _historify(notebook, request.user)
                notebook = Notebook(document=history).get_data()

            interpreter = get_api(request, snippet)
            if snippet.get('interface') == 'sqlalchemy':
                interpreter.options['session'] = sessions[0]

            with opentracing.tracer.start_span('interpreter') as span:
                # interpreter.execute needs the sessions, but we don't want to persist them
                pre_execute_sessions = notebook['sessions']
                notebook['sessions'] = sessions
                response['handle'] = interpreter.execute(notebook, snippet)
                notebook['sessions'] = pre_execute_sessions

            # Retrieve and remove the result from the handle
            if response['handle'].get('sync'):
                result = response['handle'].pop('result')
        finally:
            if historify:
                _snippet = [
                    s for s in notebook['snippets'] if s['id'] == snippet['id']
                ][0]

                if 'id' in active_executable:  # Editor v2
                    # notebook_executable is the 1-to-1 match of active_executable in the notebook structure
                    notebook_executable = [
                        e for e in _snippet['executor']['executables']
                        if e['id'] == active_executable['id']
                    ][0]
                    if 'handle' in response:
                        notebook_executable['handle'] = response['handle']
                    if history:
                        notebook_executable['history'] = {
                            'id': history.id,
                            'uuid': history.uuid
                        }
                        notebook_executable['operationId'] = history.uuid

                if 'handle' in response:  # No failure
                    if 'result' not in _snippet:  # Editor v2
                        _snippet['result'] = {}
                    _snippet['result']['handle'] = response['handle']
                    _snippet['result']['statements_count'] = response[
                        'handle'].get('statements_count', 1)
                    _snippet['result']['statement_id'] = response[
                        'handle'].get('statement_id', 0)
                    _snippet['result']['handle']['statement'] = response[
                        'handle'].get('statement', snippet['statement']).strip(
                        )  # For non HS2, as non multi query yet
                else:
                    _snippet['status'] = 'failed'

                if history:  # If _historify failed, history will be None. If we get Atomic block exception, something underneath interpreter.execute() crashed and is not handled.
                    history.update_data(notebook)
                    history.save()

                    response['history_id'] = history.id
                    response['history_uuid'] = history.uuid
                    if notebook[
                            'isSaved']:  # Keep track of history of saved queries
                        response[
                            'history_parent_uuid'] = history.dependencies.filter(
                                type__startswith='query-').latest(
                                    'last_modified').uuid
    except QueryError as ex:  # We inject the history information from _historify() to the failed queries
        if response.get('history_id'):
            ex.extra['history_id'] = response['history_id']
        if response.get('history_uuid'):
            ex.extra['history_uuid'] = response['history_uuid']
        if response.get('history_parent_uuid'):
            ex.extra['history_parent_uuid'] = response['history_parent_uuid']
        raise ex

    # Inject and HTML escape results
    if result is not None:
        response['result'] = result
        response['result']['data'] = escape_rows(result['data'])

    response['status'] = 0

    return response
Esempio n. 32
0
def _execute_notebook(request, notebook, snippet):
    response = {'status': -1}
    result = None
    history = None

    historify = (notebook['type'] != 'notebook'
                 or snippet.get('wasBatchExecuted')
                 ) and not notebook.get('skipHistorify')

    try:
        try:
            session = notebook.get('sessions') and notebook['sessions'][
                0]  # Session reference for snippet execution without persisting it
            if historify:
                history = _historify(notebook, request.user)
                notebook = Notebook(document=history).get_data()

            interpreter = get_api(request, snippet)
            if snippet.get('interface') == 'sqlalchemy':
                interpreter.options['session'] = session

            response['handle'] = interpreter.execute(notebook, snippet)

            # Retrieve and remove the result from the handle
            if response['handle'].get('sync'):
                result = response['handle'].pop('result')
        finally:
            if historify:
                _snippet = [
                    s for s in notebook['snippets'] if s['id'] == snippet['id']
                ][0]
                if 'handle' in response:  # No failure
                    _snippet['result']['handle'] = response['handle']
                    _snippet['result']['statements_count'] = response[
                        'handle'].get('statements_count', 1)
                    _snippet['result']['statement_id'] = response[
                        'handle'].get('statement_id', 0)
                    _snippet['result']['handle']['statement'] = response[
                        'handle'].get('statement', snippet['statement']).strip(
                        )  # For non HS2, as non multi query yet
                else:
                    _snippet['status'] = 'failed'

                if history:  # If _historify failed, history will be None. If we get Atomic block exception, something underneath interpreter.execute() crashed and is not handled.
                    history.update_data(notebook)
                    history.save()

                    response['history_id'] = history.id
                    response['history_uuid'] = history.uuid
                    if notebook[
                            'isSaved']:  # Keep track of history of saved queries
                        response[
                            'history_parent_uuid'] = history.dependencies.filter(
                                type__startswith='query-').latest(
                                    'last_modified').uuid
    except QueryError as ex:  # We inject the history information from _historify() to the failed queries
        if response.get('history_id'):
            ex.extra['history_id'] = response['history_id']
        if response.get('history_uuid'):
            ex.extra['history_uuid'] = response['history_uuid']
        if response.get('history_parent_uuid'):
            ex.extra['history_parent_uuid'] = response['history_parent_uuid']
        raise ex

    # Inject and HTML escape results
    if result is not None:
        response['result'] = result
        response['result']['data'] = escape_rows(result['data'])

    response['status'] = 0

    return response
Esempio n. 33
0
                        table=None,
                        column=None,
                        async=False,
                        operation=None):
        engine = self._create_engine()
        inspector = inspect(engine)

        assist = Assist(inspector, engine, backticks=self.backticks)
        response = {'status': -1, 'result': {}}

        metadata, sample_data = assist.get_sample_data(database, table, column)
        has_result_set = sample_data is not None

        if sample_data:
            response['status'] = 0
            response['rows'] = escape_rows(sample_data)

        if table:
            columns = assist.get_columns(database, table)
            response['full_headers'] = [{
                'name': col.get('name'),
                'type': str(col.get('type')),
                'comment': ''
            } for col in columns]
        elif metadata:
            response['full_headers'] = [{
                'name':
                col[0] if type(col) is dict or type(col) is tuple else col,
                'type':
                'STRING_TYPE',
                'comment':