Beispiel #1
0
def close_session(request):
    response = {'status': -1}

    session = json.loads(request.POST.get('session', '{}'))

    response['session'] = get_api(request, {
        'type': session['type']
    }).close_session(session=session)
    response['status'] = 0

    return JsonResponse(response)
Beispiel #2
0
def create_session(request):
  response = {'status': -1}

  session = json.loads(request.POST.get('session', '{}'))

  properties = session.get('properties', [])

  response['session'] = get_api(request, session).create_session(lang=session['type'], properties=properties)
  response['status'] = 0

  return JsonResponse(response)
Beispiel #3
0
def _check_status(request, notebook=None, snippet=None, operation_id=None):
    response = {'status': -1}

    if operation_id or not snippet:  # To unify with _get_snippet
        nb_doc = Document2.objects.get_by_uuid(user=request.user,
                                               uuid=operation_id
                                               or notebook['uuid'])
        notebook = Notebook(document=nb_doc).get_data()  # Used below
        snippet = notebook['snippets'][0]

    try:
        response['query_status'] = get_api(request, snippet).check_status(
            notebook, snippet)
        response['status'] = 0
    except SessionExpired:
        response['status'] = 'expired'
        raise
    except QueryExpired:
        response['status'] = 'expired'
        raise
    finally:
        if response['status'] == 0 and snippet['status'] != response[
                'query_status']:
            status = response['query_status']['status']
        elif response['status'] == 'expired':
            status = 'expired'
        else:
            status = 'failed'

        if response.get('query_status'):
            has_result_set = response['query_status'].get('has_result_set')
        else:
            has_result_set = None

        if notebook.get('dialect') or notebook['type'].startswith(
                'query') or notebook.get('isManaged'):
            nb_doc = Document2.objects.get_by_uuid(user=request.user,
                                                   uuid=operation_id
                                                   or notebook['uuid'])
            if nb_doc.can_write(request.user):
                nb = Notebook(document=nb_doc).get_data()
                if status != nb['snippets'][0][
                        'status'] or has_result_set != nb['snippets'][0].get(
                            'has_result_set'):
                    nb['snippets'][0]['status'] = status
                    if has_result_set is not None:
                        nb['snippets'][0]['has_result_set'] = has_result_set
                        nb['snippets'][0]['result']['handle'][
                            'has_result_set'] = has_result_set
                    nb_doc.update_data(nb)
                    nb_doc.save()

    return response
Beispiel #4
0
def check_status(request):
  response = {'status': -1}

  operation_id = request.POST.get('operationId')
  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))

  if operation_id or not snippet: # To unify with _get_snippet
    nb_doc = Document2.objects.get_by_uuid(user=request.user, uuid=operation_id or notebook['uuid'])
    notebook = Notebook(document=nb_doc).get_data() # Used below
    snippet = notebook['snippets'][0]

  try:
    with opentracing.tracer.start_span('notebook-check_status') as span:
      span.set_tag('user-id', request.user.username)
      span.set_tag(
        'query-id',
        snippet['result']['handle']['guid'] if snippet['result'].get('handle') and snippet['result']['handle'].get('guid') else None
      )
      response['query_status'] = get_api(request, snippet).check_status(notebook, snippet)

    response['status'] = 0
  except SessionExpired:
    response['status'] = 'expired'
    raise
  except QueryExpired:
    response['status'] = 'expired'
    raise
  finally:
    if response['status'] == 0 and snippet['status'] != response['query_status']:
      status = response['query_status']['status']
    elif response['status'] == 'expired':
      status = 'expired'
    else:
      status = 'failed'
    if response.get('query_status'):
      has_result_set = response['query_status'].get('has_result_set')
    else:
      has_result_set = None

    if notebook['type'].startswith('query') or notebook.get('isManaged'):
      nb_doc = Document2.objects.get_by_uuid(user=request.user, uuid=operation_id or notebook['uuid'])
      if nb_doc.can_write(request.user):
        nb = Notebook(document=nb_doc).get_data()
        if status != nb['snippets'][0]['status'] or has_result_set != nb['snippets'][0].get('has_result_set'):
          nb['snippets'][0]['status'] = status
          if has_result_set is not None:
            nb['snippets'][0]['has_result_set'] = has_result_set
          nb_doc.update_data(nb)
          nb_doc.save()

  return JsonResponse(response)
Beispiel #5
0
def statement_similarity(request):
  response = {'status': -1, 'message': ''}

  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))
  source_platform = request.POST.get('sourcePlatform')

  api = get_api(request, snippet)

  response['statement_similarity'] = api.statement_similarity(notebook, snippet, source_platform=source_platform)
  response['status'] = 0

  return JsonResponse(response)
Beispiel #6
0
def statement_risk(request):
    response = {'status': -1, 'message': ''}

    notebook = json.loads(request.POST.get('notebook', '{}'))
    snippet = json.loads(request.POST.get('snippet', '{}'))
    interface = request.POST.get('interface', OPTIMIZER.INTERFACE.get())

    api = get_api(request, snippet)

    response['query_complexity'] = api.statement_risk(interface, notebook,
                                                      snippet)
    response['status'] = 0

    return JsonResponse(response)
Beispiel #7
0
def describe(request, database, table=None, column=None):
    response = {'status': -1, 'message': ''}
    notebook = json.loads(request.POST.get('notebook', '{}'))
    source_type = request.POST.get('source_type', '')
    snippet = {'type': source_type}

    describe = get_api(request, snippet).describe(notebook,
                                                  snippet,
                                                  database,
                                                  table,
                                                  column=column)
    response.update(describe)

    return JsonResponse(response)
Beispiel #8
0
def close_notebook(request):
    response = {'status': -1, 'result': []}

    notebook = json.loads(request.POST.get('notebook', '{}'))

    for session in [
            _s for _s in notebook['sessions']
            if _s['type'] in ('scala', 'spark', 'pyspark', 'sparkr', 'r')
    ]:
        try:
            response['result'].append(
                get_api(request, session).close_session(session))
        except QueryExpired:
            pass
        except Exception as e:
            LOG.exception('Error closing session %s' % str(e))

    for snippet in [
            _s for _s in notebook['snippets']
            if _s['type'] in ('hive', 'impala')
    ]:
        try:
            if snippet['status'] != 'running':
                response['result'].append(
                    get_api(request,
                            snippet).close_statement(notebook, snippet))
            else:
                LOG.info('Not closing SQL snippet as still running.')
        except QueryExpired:
            pass
        except Exception as e:
            LOG.exception('Error closing statement %s' % str(e))

    response['status'] = 0
    response['message'] = _('Notebook closed successfully')

    return JsonResponse(response)
Beispiel #9
0
def get_sample_data(request, server=None, database=None, table=None, column=None):
  response = {'status': -1}

  # Passed by check_document_access_permission but unused by APIs
  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))
  is_async = json.loads(request.POST.get('async', 'false'))
  operation = json.loads(request.POST.get('operation', '"default"'))

  sample_data = get_api(request, snippet).get_sample_data(snippet, database, table, column, is_async=is_async, operation=operation)
  response.update(sample_data)

  response['status'] = 0

  return JsonResponse(response)
Beispiel #10
0
def autocomplete(request, server=None, database=None, table=None, column=None, nested=None):
  response = {'status': -1}

  # Passed by check_document_access_permission but unused by APIs
  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))

  try:
    autocomplete_data = get_api(request, snippet).autocomplete(snippet, database, table, column, nested)
    response.update(autocomplete_data)
  except QueryExpired:
    pass

  response['status'] = 0

  return JsonResponse(response)
Beispiel #11
0
def get_logs(request):
    response = {'status': -1}

    operation_id = request.POST.get('operationId')
    notebook = json.loads(request.POST.get('notebook', '{}'))
    snippet = json.loads(request.POST.get('snippet', '{}'))

    notebook = _get_notebook(request.user, notebook, operation_id)

    if operation_id and not notebook.get('uuid'):
        notebook['uuid'] = operation_id

    startFrom = request.POST.get('from')
    startFrom = int(startFrom) if startFrom else None
    size = request.POST.get('size')
    size = int(size) if size else None
    full_log = smart_str(request.POST.get('full_log', ''))

    snippet = _get_snippet(request.user, notebook, snippet, operation_id)

    db = get_api(request, snippet)

    with opentracing.tracer.start_span('notebook-get_logs') as span:
        logs = smart_str(
            db.get_log(notebook, snippet, startFrom=startFrom, size=size))

        span.set_tag('user-id', request.user.username)
        span.set_tag(
            'query-id', snippet['result']['handle']['guid']
            if snippet['result'].get('handle')
            and snippet['result']['handle'].get('guid') else None)
    full_log += logs

    jobs = db.get_jobs(notebook, snippet, full_log)

    response['logs'] = logs.strip()
    response['progress'] = min(
        db.progress(notebook, snippet, logs=full_log),
        99) if snippet['status'] != 'available' and snippet[
            'status'] != 'success' else 100
    response['jobs'] = jobs
    response['isFullLogs'] = db.get_log_is_full_log(notebook, snippet)
    response['status'] = 0

    return JsonResponse(response)
Beispiel #12
0
def close_notebook(request):
  response = {'status': -1, 'result': []}

  notebook = json.loads(request.POST.get('notebook', '{}'))

  for session in [_s for _s in notebook['sessions']]:
    try:
      api = get_api(request, session)
      if hasattr(api, 'close_session_idle'):
        response['result'].append(api.close_session_idle(notebook, session))
      else:
        response['result'].append(api.close_session(session))
    except QueryExpired:
      pass
    except Exception as e:
      LOG.exception('Error closing session %s' % str(e))

  return JsonResponse(response)
Beispiel #13
0
def fetch_result_size(request):
    response = {'status': -1}

    notebook = json.loads(request.POST.get('notebook', '{}'))
    snippet = json.loads(request.POST.get('snippet', '{}'))

    with opentracing.tracer.start_span('notebook-fetch_result_size') as span:
        response['result'] = get_api(request, snippet).fetch_result_size(
            notebook, snippet)

        span.set_tag('user-id', request.user.username)
        span.set_tag(
            'query-id', snippet['result']['handle']['guid']
            if snippet['result'].get('handle')
            and snippet['result']['handle'].get('guid') else None)

    response['status'] = 0

    return JsonResponse(response)
Beispiel #14
0
def statement_compatibility(request):
    response = {'status': -1, 'message': ''}

    notebook = json.loads(request.POST.get('notebook', '{}'))
    snippet = json.loads(request.POST.get('snippet', '{}'))
    interface = request.POST.get('interface', OPTIMIZER.INTERFACE.get())
    source_platform = request.POST.get('sourcePlatform')
    target_platform = request.POST.get('targetPlatform')

    api = get_api(request, snippet)

    response['query_compatibility'] = api.statement_compatibility(
        interface,
        notebook,
        snippet,
        source_platform=source_platform,
        target_platform=target_platform)
    response['status'] = 0

    return JsonResponse(response)
Beispiel #15
0
def browse(request, database, table, partition_spec=None):
    snippet = {'type': request.POST.get('sourceType', 'hive')}

    statement = get_api(request,
                        snippet).get_browse_query(snippet, database, table,
                                                  partition_spec)
    editor_type = snippet['type']
    namespace = request.POST.get('namespace', 'default')
    compute = json.loads(request.POST.get('cluster', '{}'))

    if request.method == 'POST':
        notebook = make_notebook(name='Execute and watch',
                                 editor_type=editor_type,
                                 statement=statement,
                                 database=database,
                                 status='ready-execute',
                                 is_task=True,
                                 namespace=namespace,
                                 compute=compute)
        return JsonResponse(notebook.execute(request, batch=False))
    else:
        editor = make_notebook(name='Browse',
                               editor_type=editor_type,
                               statement=statement,
                               status='ready-execute',
                               namespace=namespace,
                               compute=compute)
        return render(
            'editor2.mako' if ENABLE_NOTEBOOK_2.get() else 'editor.mako',
            request, {
                'notebooks_json':
                json.dumps([editor.get_data()]),
                'options_json':
                json.dumps({
                    'languages': get_ordered_interpreters(request.user),
                    'mode': 'editor',
                    'editor_type': editor_type
                }),
                'editor_type':
                editor_type,
            })
Beispiel #16
0
def send_result_file(request, channel_id, message_ts, doc, file_format):
  notebook = json.loads(doc.data)
  snippet = notebook['snippets'][0]
  snippet['statement'] = notebook['snippets'][0]['statement_raw']

  content_generator = get_api(request, snippet).download(notebook, snippet, file_format)

  file_format = 'xlsx'
  file_name = _get_snippet_name(notebook)

  try:
    slack_client.files_upload(
      channels=channel_id,
      file=next(content_generator), 
      thread_ts=message_ts,
      filetype=file_format,
      filename='{name}.{format}'.format(name=file_name, format=file_format),
      initial_comment='Here is your result file!'
    )
  except Exception as e:
    raise PopupException(_("Cannot upload result file"), detail=e)
Beispiel #17
0
def cancel_statement(request):
  response = {'status': -1}

  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = None
  operation_id = request.POST.get('operationId') or notebook['uuid']

  snippet = _get_snippet(request.user, notebook, snippet, operation_id)

  with opentracing.tracer.start_span('notebook-cancel_statement') as span:
    response['result'] = get_api(request, snippet).cancel(notebook, snippet)

    span.set_tag('user-id', request.user.username)
    span.set_tag(
      'query-id',
      snippet['result']['handle']['guid'] if snippet['result'].get('handle') and snippet['result']['handle'].get('guid') else None
    )

  response['status'] = 0

  return JsonResponse(response)
Beispiel #18
0
def cancel_statement(request):
    response = {'status': -1}

    notebook = json.loads(request.POST.get('notebook', '{}'))
    nb_doc = Document2.objects.get_by_uuid(user=request.user,
                                           uuid=notebook['uuid'])
    notebook = Notebook(document=nb_doc).get_data()
    snippet = notebook['snippets'][0]

    with opentracing.tracer.start_span('notebook-cancel_statement') as span:
        response['result'] = get_api(request,
                                     snippet).cancel(notebook, snippet)

        span.set_tag('user-id', request.user.username)
        span.set_tag(
            'query-id', snippet['result']['handle']['guid']
            if snippet['result'].get('handle')
            and snippet['result']['handle'].get('guid') else None)

    response['status'] = 0

    return JsonResponse(response)
Beispiel #19
0
def download(request):
    if not ENABLE_DOWNLOAD.get():
        return serve_403_error(request)

    notebook = json.loads(request.POST.get('notebook', '{}'))
    snippet = json.loads(request.POST.get('snippet', '{}'))
    file_format = request.POST.get('format', 'csv')
    user_agent = request.META.get('HTTP_USER_AGENT')
    file_name = _get_snippet_name(notebook)

    content_generator = get_api(request,
                                snippet).download(notebook,
                                                  snippet,
                                                  file_format=file_format)
    response = export_csvxls.make_response(content_generator,
                                           file_format,
                                           file_name,
                                           user_agent=user_agent)

    if snippet['id']:
        response.set_cookie('download-%s' % snippet['id'],
                            json.dumps({
                                'truncated': 'false',
                                'row_counter': '0'
                            }),
                            max_age=DOWNLOAD_COOKIE_AGE)
    if response:
        request.audit = {
            'operation':
            'DOWNLOAD',
            'operationText':
            'User %s downloaded results from %s as %s' %
            (request.user.username, _get_snippet_name(notebook), file_format),
            'allowed':
            True
        }

    return response
Beispiel #20
0
def _fetch_result_data(request,
                       notebook=None,
                       snippet=None,
                       operation_id=None,
                       rows=100,
                       start_over=False,
                       nulls_only=False):
    snippet = _get_snippet(request.user, notebook, snippet, operation_id)

    response = {
        'result':
        get_api(request, snippet).fetch_result(notebook, snippet, rows,
                                               start_over)
    }

    # Materialize and HTML escape results
    if response['result'].get('data') and response['result'].get(
            'type') == 'table' and not response['result'].get('isEscaped'):
        response['result']['data'] = escape_rows(response['result']['data'],
                                                 nulls_only=nulls_only)
        response['result']['isEscaped'] = True

    return response
Beispiel #21
0
def autocomplete(request,
                 server=None,
                 database=None,
                 table=None,
                 column=None,
                 nested=None):
    response = {'status': -1}

    # Passed by check_document_access_permission but unused by APIs
    notebook = json.loads(request.POST.get('notebook', '{}'))
    snippet = json.loads(request.POST.get('snippet', '{}'))
    action = request.POST.get('operation', 'schema')

    try:
        autocomplete_data = get_api(request, snippet).autocomplete(
            snippet, database, table, column, nested, action)
        response.update(autocomplete_data)
    except QueryExpired as e:
        LOG.warning('Expired query seen: %s' % e)

    response['status'] = 0

    return JsonResponse(response)
Beispiel #22
0
def export_result(request):
    response = {'status': -1, 'message': _('Success')}

    # Passed by check_document_access_permission but unused by APIs
    notebook = json.loads(request.POST.get('notebook', '{}'))
    snippet = json.loads(request.POST.get('snippet', '{}'))
    data_format = json.loads(request.POST.get('format', '"hdfs-file"'))
    destination = urllib_unquote(
        json.loads(request.POST.get('destination', '""')))
    overwrite = json.loads(request.POST.get('overwrite', 'false'))
    is_embedded = json.loads(request.POST.get('is_embedded', 'false'))
    start_time = json.loads(request.POST.get('start_time', '-1'))

    api = get_api(request, snippet)

    if data_format == 'hdfs-file':  # Blocking operation, like downloading
        if request.fs.isdir(destination):
            if notebook.get('name'):
                destination += '/%(name)s.csv' % notebook
            else:
                destination += '/%(type)s-%(id)s.csv' % notebook
        if overwrite and request.fs.exists(destination):
            request.fs.do_as_user(request.user.username, request.fs.rmtree,
                                  destination)
        response['watch_url'] = api.export_data_as_hdfs_file(
            snippet, destination, overwrite)
        response['status'] = 0
        request.audit = {
            'operation':
            'EXPORT',
            'operationText':
            'User %s exported to HDFS destination: %s' %
            (request.user.username, destination),
            'allowed':
            True
        }
    elif data_format == 'hive-table':
        if is_embedded:
            sql, success_url = api.export_data_as_table(
                notebook, snippet, destination)

            task = make_notebook(name=_('Export %s query to table %s') %
                                 (snippet['type'], destination),
                                 description=_('Query %s to %s') %
                                 (_get_snippet_name(notebook), success_url),
                                 editor_type=snippet['type'],
                                 statement=sql,
                                 status='ready',
                                 database=snippet['database'],
                                 on_success_url=success_url,
                                 last_executed=start_time,
                                 is_task=True)
            response = task.execute(request)
        else:
            notebook_id = notebook['id'] or request.GET.get(
                'editor', request.GET.get('notebook'))
            response['watch_url'] = reverse('notebook:execute_and_watch') + '?action=save_as_table&notebook=' + str(notebook_id) + \
                '&snippet=0&destination=' + destination
            response['status'] = 0
        request.audit = {
            'operation':
            'EXPORT',
            'operationText':
            'User %s exported to Hive table: %s' %
            (request.user.username, destination),
            'allowed':
            True
        }
    elif data_format == 'hdfs-directory':
        if destination.lower().startswith("abfs"):
            destination = abfspath(destination)
        if request.fs.exists(destination) and request.fs.listdir_stats(
                destination):
            raise PopupException(
                _('The destination is not an empty directory!'))
        if is_embedded:
            sql, success_url = api.export_large_data_to_hdfs(
                notebook, snippet, destination)

            task = make_notebook(name=_('Export %s query to directory') %
                                 snippet['type'],
                                 description=_('Query %s to %s') %
                                 (_get_snippet_name(notebook), success_url),
                                 editor_type=snippet['type'],
                                 statement=sql,
                                 status='ready-execute',
                                 database=snippet['database'],
                                 on_success_url=success_url,
                                 last_executed=start_time,
                                 is_task=True)
            response = task.execute(request)
        else:
            notebook_id = notebook['id'] or request.GET.get(
                'editor', request.GET.get('notebook'))
            response['watch_url'] = reverse('notebook:execute_and_watch') + '?action=insert_as_query&notebook=' + str(notebook_id) + \
                '&snippet=0&destination=' + destination
            response['status'] = 0
        request.audit = {
            'operation':
            'EXPORT',
            'operationText':
            'User %s exported to HDFS directory: %s' %
            (request.user.username, destination),
            'allowed':
            True
        }
    elif data_format in ('search-index', 'dashboard'):
        # Open the result in the Dashboard via a SQL sub-query or the Import wizard (quick vs scalable)
        if is_embedded:
            notebook_id = notebook['id'] or request.GET.get(
                'editor', request.GET.get('notebook'))

            if data_format == 'dashboard':
                engine = notebook['type'].replace('query-', '')
                response['watch_url'] = reverse(
                    'dashboard:browse', kwargs={
                        'name': notebook_id
                    }) + '?source=query&engine=%(engine)s' % {
                        'engine': engine
                    }
                response['status'] = 0
            else:
                sample = get_api(request,
                                 snippet).fetch_result(notebook,
                                                       snippet,
                                                       rows=4,
                                                       start_over=True)
                for col in sample['meta']:
                    col['type'] = HiveFormat.FIELD_TYPE_TRANSLATE.get(
                        col['type'], 'string')

                response['status'] = 0
                response['id'] = notebook_id
                response['name'] = _get_snippet_name(notebook)
                response['source_type'] = 'query'
                response['target_type'] = 'index'
                response['target_path'] = destination
                response['sample'] = list(sample['data'])
                response['columns'] = [
                    Field(col['name'], col['type']).to_dict()
                    for col in sample['meta']
                ]
        else:
            notebook_id = notebook['id'] or request.GET.get(
                'editor', request.GET.get('notebook'))
            response['watch_url'] = reverse('notebook:execute_and_watch') + '?action=index_query&notebook=' + str(notebook_id) + \
                '&snippet=0&destination=' + destination
            response['status'] = 0

        if response.get('status') != 0:
            response['message'] = _('Exporting result failed.')

    return JsonResponse(response)
Beispiel #23
0
def _execute_notebook(request, notebook, snippet):
    response = {'status': -1}
    result = None
    history = None

    historify = (notebook['type'] != 'notebook'
                 or snippet.get('wasBatchExecuted')
                 ) and not notebook.get('skipHistorify')

    try:
        try:
            sessions = notebook.get('sessions') and notebook[
                'sessions']  # Session reference for snippet execution without persisting it

            active_executable = json.loads(request.POST.get(
                'executable', '{}'))  # Editor v2

            # TODO: Use statement, database etc. from active_executable

            if historify:
                history = _historify(notebook, request.user)
                notebook = Notebook(document=history).get_data()

            interpreter = get_api(request, snippet)
            if snippet.get('interface') == 'sqlalchemy':
                interpreter.options['session'] = sessions[0]

            with opentracing.tracer.start_span('interpreter') as span:
                # interpreter.execute needs the sessions, but we don't want to persist them
                pre_execute_sessions = notebook['sessions']
                notebook['sessions'] = sessions
                response['handle'] = interpreter.execute(notebook, snippet)
                notebook['sessions'] = pre_execute_sessions

            # Retrieve and remove the result from the handle
            if response['handle'].get('sync'):
                result = response['handle'].pop('result')
        finally:
            if historify:
                _snippet = [
                    s for s in notebook['snippets'] if s['id'] == snippet['id']
                ][0]

                if 'id' in active_executable:  # Editor v2
                    # notebook_executable is the 1-to-1 match of active_executable in the notebook structure
                    notebook_executable = [
                        e for e in _snippet['executor']['executables']
                        if e['id'] == active_executable['id']
                    ][0]
                    if 'handle' in response:
                        notebook_executable['handle'] = response['handle']
                    if history:
                        notebook_executable['history'] = {
                            'id': history.id,
                            'uuid': history.uuid
                        }
                        notebook_executable['operationId'] = history.uuid

                if 'handle' in response:  # No failure
                    if 'result' not in _snippet:  # Editor v2
                        _snippet['result'] = {}
                    _snippet['result']['handle'] = response['handle']
                    _snippet['result']['statements_count'] = response[
                        'handle'].get('statements_count', 1)
                    _snippet['result']['statement_id'] = response[
                        'handle'].get('statement_id', 0)
                    _snippet['result']['handle']['statement'] = response[
                        'handle'].get('statement', snippet['statement']).strip(
                        )  # For non HS2, as non multi query yet
                else:
                    _snippet['status'] = 'failed'

                if history:  # If _historify failed, history will be None. If we get Atomic block exception, something underneath interpreter.execute() crashed and is not handled.
                    history.update_data(notebook)
                    history.save()

                    response['history_id'] = history.id
                    response['history_uuid'] = history.uuid
                    if notebook[
                            'isSaved']:  # Keep track of history of saved queries
                        response[
                            'history_parent_uuid'] = history.dependencies.filter(
                                type__startswith='query-').latest(
                                    'last_modified').uuid
    except QueryError as ex:  # We inject the history information from _historify() to the failed queries
        if response.get('history_id'):
            ex.extra['history_id'] = response['history_id']
        if response.get('history_uuid'):
            ex.extra['history_uuid'] = response['history_uuid']
        if response.get('history_parent_uuid'):
            ex.extra['history_parent_uuid'] = response['history_parent_uuid']
        raise ex

    # Inject and HTML escape results
    if result is not None:
        response['result'] = result
        response['result']['data'] = escape_rows(result['data'])

    response['status'] = 0

    return response
Beispiel #24
0
def execute_and_watch(request):
    notebook_id = request.GET.get('editor', request.GET.get('notebook'))
    snippet_id = int(request.GET['snippet'])
    action = request.GET['action']
    destination = request.GET['destination']

    notebook = Notebook(document=Document2.objects.get(
        id=notebook_id)).get_data()
    snippet = notebook['snippets'][snippet_id]
    editor_type = snippet['type']

    api = get_api(request, snippet)

    if action == 'save_as_table':
        sql, success_url = api.export_data_as_table(notebook, snippet,
                                                    destination)
        editor = make_notebook(name='Execute and watch',
                               editor_type=editor_type,
                               statement=sql,
                               status='ready-execute',
                               database=snippet['database'])
    elif action == 'insert_as_query':
        # TODO: checks/workarounds in case of non impersonation or Sentry
        # TODO: keep older simpler way in case of known not many rows?
        sql, success_url = api.export_large_data_to_hdfs(
            notebook, snippet, destination)
        editor = make_notebook(name='Execute and watch',
                               editor_type=editor_type,
                               statement=sql,
                               status='ready-execute',
                               database=snippet['database'],
                               on_success_url=success_url)
    elif action == 'index_query':
        if destination == '__hue__':
            destination = _get_snippet_name(notebook,
                                            unique=True,
                                            table_format=True)
            live_indexing = True
        else:
            live_indexing = False

        sql, success_url = api.export_data_as_table(notebook,
                                                    snippet,
                                                    destination,
                                                    is_temporary=True,
                                                    location='')
        editor = make_notebook(name='Execute and watch',
                               editor_type=editor_type,
                               statement=sql,
                               status='ready-execute')

        sample = get_api(request, snippet).fetch_result(notebook,
                                                        snippet,
                                                        0,
                                                        start_over=True)

        from indexer.api3 import _index  # Will ve moved to the lib
        from indexer.file_format import HiveFormat
        from indexer.fields import Field

        file_format = {
            'name':
            'col',
            'inputFormat':
            'query',
            'format': {
                'quoteChar': '"',
                'recordSeparator': '\n',
                'type': 'csv',
                'hasHeader': False,
                'fieldSeparator': '\u0001'
            },
            "sample":
            '',
            "columns": [
                Field(
                    col['name'].rsplit('.')[-1],
                    HiveFormat.FIELD_TYPE_TRANSLATE.get(col['type'],
                                                        'string')).to_dict()
                for col in sample['meta']
            ]
        }

        if live_indexing:
            file_format['inputFormat'] = 'hs2_handle'
            file_format['fetch_handle'] = lambda rows, start_over: get_api(
                request, snippet).fetch_result(
                    notebook, snippet, rows=rows, start_over=start_over)

        job_handle = _index(request,
                            file_format,
                            destination,
                            query=notebook['uuid'])

        if live_indexing:
            return redirect(
                reverse('search:browse', kwargs={'name': destination}))
        else:
            return redirect(
                reverse('oozie:list_oozie_workflow',
                        kwargs={'job_id': job_handle['handle']['id']}))
    else:
        raise PopupException(_('Action %s is unknown') % action)

    return render(
        'editor.mako', request, {
            'notebooks_json':
            json.dumps([editor.get_data()]),
            'options_json':
            json.dumps({
                'languages': [{
                    "name": "%s SQL" % editor_type.title(),
                    "type": editor_type
                }],
                'mode':
                'editor',
                'editor_type':
                editor_type,
                'success_url':
                success_url
            }),
            'editor_type':
            editor_type,
        })