Example #1
0
File: api.py Project: kevinhjk/hue
def check_status(request):
  response = {'status': -1}

  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))

  try:
    response['query_status'] = get_api(request, snippet).check_status(notebook, snippet)
    response['status'] = 0
  except SessionExpired:
    response['status'] = 'expired'
    raise
  except QueryExpired:
    response['status'] = 'expired'
    raise
  finally:
    if response['status'] == 0 and snippet['status'] != response['query_status']:
      status = response['query_status']['status']
    elif response['status'] == 'expired':
      status = 'expired'
    else:
      status = 'failed'
    if notebook['type'].startswith('query'):
      nb_doc = Document2.objects.get(id=notebook['id'])
      nb_doc.can_write_or_exception(request.user)
      nb = Notebook(document=nb_doc).get_data()
      nb['snippets'][0]['status'] = status
      nb_doc.update_data(nb)
      nb_doc.save()

  return JsonResponse(response)
Example #2
0
def guess_field_types(request):
  file_format = json.loads(request.POST.get('fileFormat', '{}'))

  if file_format['inputFormat'] == 'file':
    indexer = Indexer(request.user, request.fs)
    stream = request.fs.open(file_format["path"])
    _convert_format(file_format["format"], inverse=True)

    format_ = indexer.guess_field_types({
      "file": {
        "stream": stream,
        "name": file_format['path']
        },
      "format": file_format['format']
    })
  elif file_format['inputFormat'] == 'table':
    sample = get_api(request, {'type': 'hive'}).get_sample_data({'type': 'hive'}, database=file_format['databaseName'], table=file_format['tableName'])
    db = dbms.get(request.user)
    table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName'])

    format_ = {
        "sample": sample['rows'][:4],
        "columns": [
            Field(col.name, HiveFormat.FIELD_TYPE_TRANSLATE.get(col.type, 'string')).to_dict()
            for col in table_metadata.cols
        ]
    }
  elif file_format['inputFormat'] == 'query':
    #TODO get schema from explain query
    pass

  return JsonResponse(format_)
Example #3
0
File: api.py Project: OSUser/hue
def execute(request):
  response = {'status': -1}

  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))

  try:
    response['handle'] = get_api(request, snippet).execute(notebook, snippet)
  finally:
    if notebook['type'].startswith('query-'):
      _snippet = [s for s in notebook['snippets'] if s['id'] == snippet['id']][0]
      if 'handle' in response: # No failure
        _snippet['result']['handle'] = response['handle']
        _snippet['result']['statements_count'] = response['handle']['statements_count']
      else:
        _snippet['status'] = 'failed'
      history = _historify(notebook, request.user)
      response['history_id'] = history.id
      response['history_uuid'] = history.uuid

  # Materialize and HTML escape results
  if response['handle'].get('sync') and response['handle']['result'].get('data'):
    response['handle']['result']['data'] = escape_rows(response['handle']['result']['data'])

  response['status'] = 0

  return JsonResponse(response)
Example #4
0
def get_logs(request):
  response = {'status': -1}

  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))

  startFrom = request.POST.get('from')
  startFrom = int(startFrom) if startFrom else None

  size = request.POST.get('size')
  size = int(size) if size else None

  db = get_api(request, snippet)

  full_log = smart_str(request.POST.get('full_log', ''))
  logs = db.get_log(notebook, snippet, startFrom=startFrom, size=size)
  full_log += logs

  jobs = db.get_jobs(notebook, snippet, full_log)

  response['logs'] = logs.strip()
  response['progress'] = min(db.progress(snippet, full_log), 99) if snippet['status'] != 'available' and snippet['status'] != 'success' else 100
  response['jobs'] = jobs
  response['isFullLogs'] = snippet.get('interface') == 'oozie'
  response['status'] = 0

  return JsonResponse(response)
Example #5
0
File: api.py Project: heshunwq/hue
def get_logs(request):
  response = {'status': -1}

  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))

  startFrom = request.POST.get('from')
  startFrom = int(startFrom) if startFrom else None

  size = request.POST.get('size')
  size = int(size) if size else None

  db = get_api(request, snippet)

  logs = db.get_log(notebook, snippet, startFrom=startFrom, size=size)

  jobs = json.loads(request.POST.get('jobs', '[]'))

  # Get any new jobs from current logs snippet
  new_jobs = db.get_jobs(notebook, snippet, logs)

  # Append new jobs to known jobs and get the unique set
  if new_jobs:
    all_jobs = jobs + new_jobs
    jobs = dict((job['name'], job) for job in all_jobs).values()

  # Retrieve full log for job progress parsing
  full_log = request.POST.get('full_log', logs)

  response['logs'] = logs
  response['progress'] = db.progress(snippet, full_log) if snippet['status'] != 'available' and snippet['status'] != 'success' else 100
  response['jobs'] = jobs
  response['status'] = 0

  return JsonResponse(response)
Example #6
0
def execute_and_watch(request):
  notebook_id = request.GET.get('editor', request.GET.get('notebook'))
  snippet_id = int(request.GET['snippet'])
  action = request.GET['action']
  destination = request.GET['destination']

  notebook = Notebook(document=Document2.objects.get(id=notebook_id)).get_data()
  snippet = notebook['snippets'][snippet_id]
  editor_type = snippet['type']

  api = get_api(request, snippet)

  if action == 'save_as_table':
    sql, success_url = api.export_data_as_table(notebook, snippet, destination)
    editor = make_notebook(name='Execute and watch', editor_type=editor_type, statement=sql, status='ready-execute')
  elif action == 'insert_as_query':
    sql, success_url = api.export_large_data_to_hdfs(notebook, snippet, destination)
    editor = make_notebook(name='Execute and watch', editor_type=editor_type, statement=sql, status='ready-execute')
  else:
    raise PopupException(_('Action %s is unknown') % action)

  return render('editor.mako', request, {
      'notebooks_json': json.dumps([editor.get_data()]),
      'options_json': json.dumps({
          'languages': [{"name": "%s SQL" % editor_type.title(), "type": editor_type}],
          'mode': 'editor',
          'success_url': success_url
      }),
      'editor_type': editor_type,
  })
Example #7
0
File: api.py Project: heshunwq/hue
def export_result(request):
  response = {'status': -1, 'message': _('Exporting result failed.')}

  # Passed by check_document_access_permission but unused by APIs
  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))
  data_format = json.loads(request.POST.get('format', 'hdfs-file'))
  destination = json.loads(request.POST.get('destination', ''))
  overwrite = json.loads(request.POST.get('overwrite', False))

  api = get_api(request, snippet)

  if data_format == 'hdfs-file':
    if overwrite and request.fs.exists(destination):
      if request.fs.isfile(destination):
        request.fs.do_as_user(request.user.username, request.fs.rmtree, destination)
      else:
        raise ValidationError(_("The target path is a directory"))
    response['watch_url'] = api.export_data_as_hdfs_file(snippet, destination, overwrite)
    response['status'] = 0
  elif data_format == 'hive-table':
    notebook_id = notebook['id'] or request.GET.get('editor', request.GET.get('notebook'))
    response['watch_url'] = reverse('notebook:execute_and_watch') + '?action=save_as_table&notebook=' + str(notebook_id) + '&snippet=0&destination=' + destination
    response['status'] = 0
  elif data_format == 'hdfs-directory':
    notebook_id = notebook['id'] or request.GET.get('editor', request.GET.get('notebook'))
    response['watch_url'] = reverse('notebook:execute_and_watch') + '?action=insert_as_query&notebook=' + str(notebook_id) + '&snippet=0&destination=' + destination
    response['status'] = 0

  return JsonResponse(response)
Example #8
0
def get_logs(request):
    response = {"status": -1}

    notebook = json.loads(request.POST.get("notebook", "{}"))
    snippet = json.loads(request.POST.get("snippet", "{}"))

    startFrom = request.POST.get("from")
    startFrom = int(startFrom) if startFrom else None

    size = request.POST.get("size")
    size = int(size) if size else None

    db = get_api(request, snippet)

    full_log = str(request.POST.get("full_log", ""))
    logs = db.get_log(notebook, snippet, startFrom=startFrom, size=size)
    full_log += logs

    jobs = db.get_jobs(notebook, snippet, full_log)

    response["logs"] = logs.strip()
    response["progress"] = (
        db.progress(snippet, full_log) if snippet["status"] != "available" and snippet["status"] != "success" else 100
    )
    response["jobs"] = jobs
    response["isFullLogs"] = snippet.get("interface") == "oozie"
    response["status"] = 0

    return JsonResponse(response)
Example #9
0
File: api.py Project: shobull/hue
def get_logs(request):
    response = {"status": -1}

    notebook = json.loads(request.POST.get("notebook", "{}"))
    snippet = json.loads(request.POST.get("snippet", "{}"))

    startFrom = request.POST.get("from")
    startFrom = int(startFrom) if startFrom else None

    size = request.POST.get("size")
    size = int(size) if size else None

    db = get_api(request.user, snippet, request.fs, request.jt)

    logs = db.get_log(notebook, snippet, startFrom=startFrom, size=size)

    jobs = json.loads(request.POST.get("jobs", "[]"))

    # Get any new jobs from current logs snippet
    new_jobs = db.get_jobs(notebook, snippet, logs)

    # Append new jobs to known jobs and get the unique set
    if new_jobs:
        all_jobs = jobs + new_jobs
        jobs = dict((job["name"], job) for job in all_jobs).values()

    response["logs"] = logs
    response["progress"] = (
        db.progress(snippet, logs) if snippet["status"] != "available" and snippet["status"] != "success" else 100
    )
    response["jobs"] = jobs
    response["status"] = 0

    return JsonResponse(response)
Example #10
0
File: api.py Project: kevinhjk/hue
def execute(request):
  response = {'status': -1}

  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))

  try:
    response['handle'] = get_api(request, snippet).execute(notebook, snippet)
  finally:
    if notebook['type'].startswith('query-'):
      _snippet = [s for s in notebook['snippets'] if s['id'] == snippet['id']][0]
      if 'handle' in response: # No failure
        _snippet['result']['handle'] = response['handle']
      else:
        _snippet['status'] = 'failed'
      history = _historify(notebook, request.user)
      response['history_id'] = history.id
      response['history_uuid'] = history.uuid
      if notebook['isSaved']: # Keep track of history of saved queries
        response['history_parent_uuid'] = history.dependencies.filter(type__startswith='query-').latest('last_modified').uuid

  # Materialize and HTML escape results
  if response['handle'].get('sync') and response['handle']['result'].get('data'):
    response['handle']['result']['data'] = escape_rows(response['handle']['result']['data'])

  response['status'] = 0

  return JsonResponse(response)
Example #11
0
def dt_logout(request, next_page=None):
  """Log out the user"""
  username = request.user.get_username()
  request.audit = {
    'username': username,
    'operation': 'USER_LOGOUT',
    'operationText': 'Logged out user: %s' % username
  }

  # Close Impala session on logout
  session_app = "impala"
  if request.user.has_hue_permission(action='access', app=session_app):
    session = {"type":session_app,"sourceMethod":"dt_logout"}
    try:
      get_api(request, session).close_session(session)
    except Exception, e:
      LOG.warn("Error closing Impala session: %s" % e)
Example #12
0
def close_session(request):
    response = {"status": -1}

    session = json.loads(request.POST.get("session", "{}"))

    response["session"] = get_api(request, {"type": session["type"]}).close_session(session=session)
    response["status"] = 0

    return JsonResponse(response)
Example #13
0
def explain(request):
    response = {"status": -1}

    notebook = json.loads(request.POST.get("notebook", "{}"))
    snippet = json.loads(request.POST.get("snippet", "{}"))

    response = get_api(request, snippet).explain(notebook, snippet)

    return JsonResponse(response)
Example #14
0
File: api.py Project: heshunwq/hue
def close_session(request):
  response = {'status': -1}

  session = json.loads(request.POST.get('session', '{}'))

  response['session'] = get_api(request, {'type': session['type']}).close_session(session=session)
  response['status'] = 0

  return JsonResponse(response)
Example #15
0
File: api.py Project: heshunwq/hue
def explain(request):
  response = {'status': -1}

  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))

  response = get_api(request, snippet).explain(notebook, snippet)

  return JsonResponse(response)
Example #16
0
File: api.py Project: RunnerDu/hue
def execute(request):
  response = {'status': -1}

  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))

  response['handle'] = get_api(request.user, snippet).execute(notebook, snippet)
  response['status'] = 0

  return JsonResponse(response)
Example #17
0
File: api.py Project: heshunwq/hue
def fetch_result_metadata(request):
  response = {'status': -1}

  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))

  response['result'] = get_api(request, snippet).fetch_result_metadata(notebook, snippet)
  response['status'] = 0

  return JsonResponse(response)
Example #18
0
File: api.py Project: heshunwq/hue
def cancel_statement(request):
  response = {'status': -1}

  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))

  response['result'] = get_api(request, snippet).cancel(notebook, snippet)
  response['status'] = 0

  return JsonResponse(response)
Example #19
0
def check_status(request):
  response = {'status': -1}

  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))

  response['query_status'] = get_api(request, snippet).check_status(notebook, snippet)
  response['status'] = 0

  return JsonResponse(response)
Example #20
0
File: api.py Project: shobull/hue
def check_status(request):
    response = {"status": -1}

    notebook = json.loads(request.POST.get("notebook", "{}"))
    snippet = json.loads(request.POST.get("snippet", "{}"))

    response["query_status"] = get_api(request.user, snippet, request.fs, request.jt).check_status(notebook, snippet)
    response["status"] = 0

    return JsonResponse(response)
Example #21
0
File: api.py Project: shobull/hue
def cancel_statement(request):
    response = {"status": -1}

    notebook = json.loads(request.POST.get("notebook", "{}"))
    snippet = json.loads(request.POST.get("snippet", "{}"))

    response["result"] = get_api(request.user, snippet, request.fs, request.jt).cancel(notebook, snippet)
    response["status"] = 0

    return JsonResponse(response)
Example #22
0
File: api.py Project: shobull/hue
def fetch_result_metadata(request):
    response = {"status": -1}

    notebook = json.loads(request.POST.get("notebook", "{}"))
    snippet = json.loads(request.POST.get("snippet", "{}"))

    response["result"] = get_api(request.user, snippet, request.fs, request.jt).fetch_result_metadata(notebook, snippet)
    response["status"] = 0

    return JsonResponse(response)
Example #23
0
def guess_field_types(request):
  file_format = json.loads(request.POST.get('fileFormat', '{}'))

  if file_format['inputFormat'] == 'file':
    indexer = Indexer(request.user, request.fs)
    stream = request.fs.open(file_format["path"])
    _convert_format(file_format["format"], inverse=True)

    format_ = indexer.guess_field_types({
      "file": {
          "stream": stream,
          "name": file_format['path']
        },
      "format": file_format['format']
    })
  elif file_format['inputFormat'] == 'table':
    sample = get_api(request, {'type': 'hive'}).get_sample_data({'type': 'hive'}, database=file_format['databaseName'], table=file_format['tableName'])
    db = dbms.get(request.user)
    table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName'])

    format_ = {
        "sample": sample['rows'][:4],
        "columns": [
            Field(col.name, HiveFormat.FIELD_TYPE_TRANSLATE.get(col.type, 'string')).to_dict()
            for col in table_metadata.cols
        ]
    }
  elif file_format['inputFormat'] == 'query': # Only support open query history
    # TODO get schema from explain query, which is not possible
    notebook = Notebook(document=Document2.objects.get(id=file_format['query'])).get_data()
    snippet = notebook['snippets'][0]
    sample = get_api(request, snippet).fetch_result(notebook, snippet, 4, start_over=True)

    format_ = {
        "sample": sample['rows'][:4],
        "sample_cols": sample.meta,
        "columns": [
            Field(col['name'], HiveFormat.FIELD_TYPE_TRANSLATE.get(col['type'], 'string')).to_dict()
            for col in sample.meta
        ]
    }

  return JsonResponse(format_)
Example #24
0
File: api.py Project: RunnerDu/hue
def fetch_result_data(request):
  response = {'status': -1}

  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))
  rows = json.loads(request.POST.get('rows', 100))
  start_over = json.loads(request.POST.get('startOver', False))

  response['result'] = get_api(request.user, snippet).fetch_result(notebook, snippet, rows, start_over)
  response['status'] = 0

  return JsonResponse(response)
Example #25
0
File: api.py Project: fnerdwq/hue
def close_notebook(request):
  response = {'status': -1, 'result': []}

  notebook = json.loads(request.POST.get('notebook', '{}'))

  for session in notebook['sessions']:
    try:
      response['result'].append(get_api(request.user, session, request.fs, request.jt).close_session(session))
    except QueryExpired:
      pass
    except Exception, e:
      LOG.exception('Error closing session %s' % str(e))
Example #26
0
File: api.py Project: heshunwq/hue
def close_notebook(request):
  response = {'status': -1, 'result': []}

  notebook = json.loads(request.POST.get('notebook', '{}'))

  for session in [_s for _s in notebook['sessions'] if _s['type'] in ('scala', 'spark', 'pyspark', 'sparkr')]:
    try:
      response['result'].append(get_api(request, session).close_session(session))
    except QueryExpired:
      pass
    except Exception, e:
      LOG.exception('Error closing session %s' % str(e))
Example #27
0
def upgrade_session_properties(request, notebook):
    # Upgrade session data if using old format
    data = notebook.get_data()

    for session in data.get("sessions", []):
        api = get_api(request, session)
        if "type" in session and hasattr(api, "upgrade_properties"):
            properties = session.get("properties", None)
            session["properties"] = api.upgrade_properties(session["type"], properties)

    notebook.data = json.dumps(data)
    return notebook
Example #28
0
def statement_risk(request):
  response = {'status': -1, 'message': ''}

  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))

  api = get_api(request, snippet)

  response['query_complexity'] = api.statement_risk(notebook, snippet)
  response['status'] = 0

  return JsonResponse(response)
Example #29
0
def upgrade_session_properties(request, notebook):
  # Upgrade session data if using old format
  data = notebook.get_data()

  for session in data.get('sessions', []):
    api = get_api(request, session)
    if 'type' in session and hasattr(api, 'upgrade_properties'):
      properties = session.get('properties', None)
      session['properties'] = api.upgrade_properties(session['type'], properties)

  notebook.data = json.dumps(data)
  return notebook
Example #30
0
File: api.py Project: heshunwq/hue
def create_session(request):
  response = {'status': -1}

  notebook = json.loads(request.POST.get('notebook', '{}'))
  session = json.loads(request.POST.get('session', '{}'))

  properties = session.get('properties', [])

  response['session'] = get_api(request, session).create_session(lang=session['type'], properties=properties)
  response['status'] = 0

  return JsonResponse(response)
Example #31
0
def execute(request):
  response = {'status': -1}
  result = None

  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))

  try:
    try:
      response['handle'] = get_api(request, snippet).execute(notebook, snippet)

      # Retrieve and remove the result from the handle
      if response['handle'].get('sync'):
        result = response['handle'].pop('result')
    finally:
      if notebook['type'].startswith('query-'):
        _snippet = [s for s in notebook['snippets'] if s['id'] == snippet['id']][0]
        if 'handle' in response: # No failure
          _snippet['result']['handle'] = response['handle']
          _snippet['result']['statements_count'] = response['handle'].get('statements_count', 1)
          _snippet['result']['statement_id'] = response['handle'].get('statement_id', 0)
          _snippet['result']['handle']['statement'] = response['handle'].get('statement', snippet['statement']) # For non HS2, as non multi query yet
        else:
          _snippet['status'] = 'failed'

        history = _historify(notebook, request.user)

        response['history_id'] = history.id
        response['history_uuid'] = history.uuid
        if notebook['isSaved']: # Keep track of history of saved queries
          response['history_parent_uuid'] = history.dependencies.filter(type__startswith='query-').latest('last_modified').uuid
  except QueryError, ex: # We inject the history information from _historify() to the failed queries
    if response.get('history_id'):
      ex.extra['history_id'] = response['history_id']
    if response.get('history_uuid'):
      ex.extra['history_uuid'] = response['history_uuid']
    if response.get('history_parent_uuid'):
      ex.extra['history_parent_uuid'] = response['history_parent_uuid']
    raise ex
Example #32
0
  def fields(self, dashboard):
    database, table = self._get_database_table_names(dashboard)
    snippet = {'type': self.engine}

    table_metadata = get_api(MockRequest(self.user), snippet).autocomplete(snippet, database, table)

    return {
      'schema': {
        'fields':
            dict([(col['name'], {
              'name': str(escape(col['name'])),
              'type': str(col['type']),
              'uniqueKey': col.get('primary_key') == 'true',
              # 'dynamicBase': False,
              'indexed': False,
              'stored': True,
              'required': col.get('primary_key') == 'true'
          })
          for col in table_metadata['extended_columns']]
        )
      }
    }
Example #33
0
def browse(request, database, table, partition_spec=None):
  snippet = {'type': 'hive'}

  statement = get_api(request, snippet).get_browse_query(snippet, database, table, partition_spec)

  editor_type = snippet['type']

  if request.method == 'POST':
    notebook = make_notebook(name='Execute and watch', editor_type=editor_type, statement=statement, status='ready-execute', is_task=True)
    return JsonResponse(notebook.execute(request, batch=False))
  else:
    editor = make_notebook(name='Browse', editor_type=editor_type, statement=statement, status='ready-execute')

    return render('editor.mako', request, {
        'notebooks_json': json.dumps([editor.get_data()]),
        'options_json': json.dumps({
            'languages': get_ordered_interpreters(request.user),
            'mode': 'editor',
            'editor_type': editor_type
        }),
        'editor_type': editor_type,
    })
Example #34
0
def autocomplete(request,
                 server=None,
                 database=None,
                 table=None,
                 column=None,
                 nested=None):
    response = {'status': -1}

    # Passed by check_document_access_permission but unused by APIs
    notebook = json.loads(request.POST.get('notebook', '{}'))
    snippet = json.loads(request.POST.get('snippet', '{}'))

    try:
        autocomplete_data = get_api(request, snippet).autocomplete(
            snippet, database, table, column, nested)
        response.update(autocomplete_data)
    except QueryExpired:
        pass

    response['status'] = 0

    return JsonResponse(response)
Example #35
0
File: api.py Project: Nick-Xu/hue
def check_status(request):
  response = {'status': -1}

  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))

  if not snippet:
    nb_doc = Document2.objects.get_by_uuid(user=request.user, uuid=notebook['id'])
    notebook = Notebook(document=nb_doc).get_data()
    snippet = notebook['snippets'][0]

  try:
    response['query_status'] = get_api(request, snippet).check_status(notebook, snippet)

    response['status'] = 0
  except SessionExpired:
    response['status'] = 'expired'
    raise
  except QueryExpired:
    response['status'] = 'expired'
    raise
  finally:
    if response['status'] == 0 and snippet['status'] != response['query_status']:
      status = response['query_status']['status']
    elif response['status'] == 'expired':
      status = 'expired'
    else:
      status = 'failed'

    if notebook['type'].startswith('query') or notebook.get('isManaged'):
      nb_doc = Document2.objects.get(id=notebook['id'])
      if nb_doc.can_write(request.user):
        nb = Notebook(document=nb_doc).get_data()
        if status != nb['snippets'][0]['status']:
          nb['snippets'][0]['status'] = status
          nb_doc.update_data(nb)
          nb_doc.save()

  return JsonResponse(response)
Example #36
0
File: api.py Project: eahagopi/hue
def create_session(request):
    response = {'status': -1}

    notebook = json.loads(request.POST.get('notebook', '{}'))
    session = json.loads(request.POST.get('session', '{}'))

    properties = session.get('properties', [])

    # If not properties look for previously used notebook session
    if not properties:
        old_session = [
            _session for _session in notebook['sessions']
            if _session['type'] == session['type']
        ]
        if any(old_session) and 'properties' in old_session[0]:
            properties = old_session[0]['properties']

    response['session'] = get_api(request, session).create_session(
        lang=session['type'], properties=properties)
    response['status'] = 0

    return JsonResponse(response)
Example #37
0
def download(request):
    if not ENABLE_DOWNLOAD.get():
        return serve_403_error(request)

    notebook = json.loads(request.POST.get('notebook', '{}'))
    snippet = json.loads(request.POST.get('snippet', '{}'))
    file_format = request.POST.get('format', 'csv')

    response = get_api(request, snippet).download(notebook, snippet,
                                                  file_format)

    if response:
        request.audit = {
            'operation':
            'DOWNLOAD',
            'operationText':
            'User %s downloaded results from %s as %s' %
            (request.user.username, _get_snippet_name(notebook), file_format),
            'allowed':
            True
        }

    return response
Example #38
0
def _get_api(request):
  file_format = json.loads(request.POST.get('source', request.POST.get('fileFormat', '{}')))
  options = None
  query_server = None
  if file_format['rdbmsMode'] == 'customRdbms':
    type = 'custom'
    if file_format['rdbmsType'] == 'jdbc':
      name = file_format['rdbmsHostname'] # We make sure it's unique as name is the cache key
      interface = file_format['rdbmsType']
      options = {'driver': file_format['rdbmsJdbcDriver'],
                 'url': file_format['rdbmsHostname'],
                 'user': file_format['rdbmsUsername'],
                 'password': file_format['rdbmsPassword']
                }
    else:
      interface = 'rdbms'
      query_server = {
        'server_name': file_format['rdbmsType'],
        'server_host': file_format['rdbmsHostname'],
        'server_port': int(file_format['rdbmsPort'] or '3306'),
        'username': file_format['rdbmsUsername'],
        'password': file_format['rdbmsPassword'],
        'options': {},
        'alias': file_format['rdbmsType']
      }
      name = 'rdbms:%(server_name)s://%(server_host)s:%(server_port)s' % query_server # We make sure it's unique as name is the cache key
  else:
    if file_format['rdbmsType'] == 'jdbc':
      type = file_format['rdbmsJdbcDriverName'] and file_format['rdbmsJdbcDriverName'].lower()
    else:
      type = file_format['rdbmsType']
      query_server = rdbms.get_query_server_config(server=file_format['rdbmsType'])
    name = type
    interface = file_format['inputFormat']

  return get_api(request, { 'type': type, 'interface': interface, 'options': options, 'query_server': query_server, 'name': name})
Example #39
0
def get_logs(request):
    response = {'status': -1}

    notebook = json.loads(request.POST.get('notebook', '{}'))
    snippet = json.loads(request.POST.get('snippet', '{}'))

    startFrom = request.POST.get('from')
    startFrom = int(startFrom) if startFrom else None

    size = request.POST.get('size')
    size = int(size) if size else None

    db = get_api(request, snippet)

    logs = db.get_log(notebook, snippet, startFrom=startFrom, size=size)

    jobs = json.loads(request.POST.get('jobs', '[]'))

    # Get any new jobs from current logs snippet
    new_jobs = db.get_jobs(notebook, snippet, logs)

    # Append new jobs to known jobs and get the unique set
    if new_jobs:
        all_jobs = jobs + new_jobs
        jobs = dict((job['name'], job) for job in all_jobs).values()

    # Retrieve full log for job progress parsing
    full_log = request.POST.get('full_log', logs)

    response['logs'] = logs.strip()
    response['progress'] = db.progress(snippet, full_log) if snippet[
        'status'] != 'available' and snippet['status'] != 'success' else 100
    response['jobs'] = jobs
    response['status'] = 0

    return JsonResponse(response)
Example #40
0
File: api.py Project: mastanr/hue
def export_result(request):
  response = {'status': -1, 'message': _('Exporting result failed.')}

  # Passed by check_document_access_permission but unused by APIs
  notebook = json.loads(request.POST.get('notebook', '{}'))
  snippet = json.loads(request.POST.get('snippet', '{}'))
  data_format = json.loads(request.POST.get('format', 'hdfs-file'))
  destination = json.loads(request.POST.get('destination', ''))
  overwrite = json.loads(request.POST.get('overwrite', False))

  api = get_api(request, snippet)

  if data_format == 'hdfs-file':
    if request.fs.isdir(destination):
      if notebook.get('name'):
        destination += '/%(name)s.csv' % notebook
      else:
        destination += '/%(type)s-%(id)s.csv' % notebook
    if overwrite and request.fs.exists(destination):
      request.fs.do_as_user(request.user.username, request.fs.rmtree, destination)
    response['watch_url'] = api.export_data_as_hdfs_file(snippet, destination, overwrite)
    response['status'] = 0
  elif data_format == 'hive-table':
    notebook_id = notebook['id'] or request.GET.get('editor', request.GET.get('notebook'))
    response['watch_url'] = reverse('notebook:execute_and_watch') + '?action=save_as_table&notebook=' + str(notebook_id) + '&snippet=0&destination=' + destination
    response['status'] = 0
  elif data_format == 'hdfs-directory':
    notebook_id = notebook['id'] or request.GET.get('editor', request.GET.get('notebook'))
    response['watch_url'] = reverse('notebook:execute_and_watch') + '?action=insert_as_query&notebook=' + str(notebook_id) + '&snippet=0&destination=' + destination
    response['status'] = 0
  elif data_format == 'search-index':
    notebook_id = notebook['id'] or request.GET.get('editor', request.GET.get('notebook'))
    response['watch_url'] = reverse('notebook:execute_and_watch') + '?action=index_query&notebook=' + str(notebook_id) + '&snippet=0&destination=' + destination
    response['status'] = 0

  return JsonResponse(response)
Example #41
0
def browse(request, database, table):
    snippet = {'type': 'hive'}
    sql_select = get_api(request, snippet).get_select_star_query(
        snippet, database, table)

    editor_type = snippet['type']
    editor = make_notebook(name='Browse',
                           editor_type=editor_type,
                           statement=sql_select,
                           status='ready-execute')

    return render(
        'editor.mako', request, {
            'notebooks_json':
            json.dumps([editor.get_data()]),
            'options_json':
            json.dumps({
                'languages': get_ordered_interpreters(request.user),
                'mode': 'editor',
                'editor_type': editor_type
            }),
            'editor_type':
            editor_type,
        })
Example #42
0
File: api.py Project: eahagopi/hue
def check_status(request):
    response = {'status': -1}

    notebook = json.loads(request.POST.get('notebook', '{}'))
    snippet = json.loads(request.POST.get('snippet', '{}'))

    try:
        response['query_status'] = get_api(request, snippet).check_status(
            notebook, snippet)
        response['status'] = 0
    finally:
        if response['status'] == 0 and snippet['status'] != response[
                'query_status']:
            status = response['query_status']['status']
        else:
            status = 'failed'
        nb_doc = Document2.objects.get(id=notebook['id'])
        nb_doc.can_write_or_exception(request.user)
        nb = Notebook(document=nb_doc).get_data()
        nb['snippets'][0]['status'] = status
        nb_doc.update_data(nb)
        nb_doc.save()

    return JsonResponse(response)
Example #43
0
    def run_morphline(self,
                      request,
                      collection_name,
                      morphline,
                      input_path,
                      query=None,
                      start_time=None,
                      lib_path=None):
        workspace_path = self._upload_workspace(morphline)

        task = make_notebook(name=_('Indexing into %s') % collection_name,
                             editor_type='notebook',
                             on_success_url=reverse(
                                 'search:browse',
                                 kwargs={'name': collection_name}),
                             pub_sub_url='assist.collections.refresh',
                             is_task=True,
                             is_notebook=True,
                             last_executed=start_time)

        if query:
            q = Notebook(document=Document2.objects.get_by_uuid(user=self.user,
                                                                uuid=query))
            notebook_data = q.get_data()
            snippet = notebook_data['snippets'][0]

            api = get_api(request, snippet)

            destination = '__hue_%s' % notebook_data['uuid'][:4]
            location = '/user/%s/__hue-%s' % (request.user,
                                              notebook_data['uuid'][:4])
            sql, _success_url = api.export_data_as_table(notebook_data,
                                                         snippet,
                                                         destination,
                                                         is_temporary=True,
                                                         location=location)
            input_path = '${nameNode}%s' % location

            task.add_hive_snippet(snippet['database'], sql)

        client = SolrClient(self.user)

        extra_args = ['-Dmapreduce.job.user.classpath.first=true'
                      ] if client.is_solr_six_or_more() else []

        task.add_java_snippet(
            clazz='org.apache.solr.hadoop.MapReduceIndexerTool',
            app_jar=lib_path
            if lib_path is not None else CONFIG_INDEXER_LIBS_PATH.get(),
            arguments=extra_args + [
                u'--morphline-file',
                u'morphline.conf',
                u'--output-dir',
                u'${nameNode}/user/%s/indexer' % self.username,
                u'--log4j',
                u'log4j.properties',
                u'--go-live',
                u'--zk-host',
                client.get_zookeeper_host(),
                u'--collection',
                collection_name,
                input_path,
            ],
            files=[{
                u'path': u'%s/log4j.properties' % workspace_path,
                u'type': u'file'
            }, {
                u'path': u'%s/morphline.conf' % workspace_path,
                u'type': u'file'
            }])

        return task.execute(request, batch=True)
Example #44
0
def close_statement_async(notebook, snippet, **kwargs):
    request = _get_request(**kwargs)
    get_api(request, snippet).close_statement(notebook, snippet)
Example #45
0
def cancel_async(notebook, snippet, **kwargs):
    request = _get_request(**kwargs)
    get_api(request, snippet).cancel(notebook, snippet)
Example #46
0
File: api.py Project: tyluan/hue
def export_result(request):
    response = {'status': -1, 'message': _('Exporting result failed.')}

    # Passed by check_document_access_permission but unused by APIs
    notebook = json.loads(request.POST.get('notebook', '{}'))
    snippet = json.loads(request.POST.get('snippet', '{}'))
    data_format = json.loads(request.POST.get('format', 'hdfs-file'))
    destination = json.loads(request.POST.get('destination', ''))
    overwrite = json.loads(request.POST.get('overwrite', 'false'))
    is_embedded = json.loads(request.POST.get('is_embedded', 'false'))

    api = get_api(request, snippet)

    if data_format == 'hdfs-file':  # Blocking operation, like downloading
        if request.fs.isdir(destination):
            if notebook.get('name'):
                destination += '/%(name)s.csv' % notebook
            else:
                destination += '/%(type)s-%(id)s.csv' % notebook
        if overwrite and request.fs.exists(destination):
            request.fs.do_as_user(request.user.username, request.fs.rmtree,
                                  destination)
        response['watch_url'] = api.export_data_as_hdfs_file(
            snippet, destination, overwrite)
        response['status'] = 0
        request.audit = {
            'operation':
            'EXPORT',
            'operationText':
            'User %s exported to HDFS destination: %s' %
            (request.user.username, destination),
            'allowed':
            True
        }
    elif data_format == 'hive-table':
        if is_embedded:
            sql, success_url = api.export_data_as_table(
                notebook, snippet, destination)

            task = make_notebook(name=_('Export %s query to table %s') %
                                 (snippet['type'], destination),
                                 description=_('Query %s to %s') %
                                 (_get_snippet_name(notebook), success_url),
                                 editor_type=snippet['type'],
                                 statement=sql,
                                 status='ready-execute',
                                 database=snippet['database'],
                                 on_success_url=success_url,
                                 is_task=True)
            response = task.execute(request)
        else:
            notebook_id = notebook['id'] or request.GET.get(
                'editor', request.GET.get('notebook'))
            response['watch_url'] = reverse(
                'notebook:execute_and_watch'
            ) + '?action=save_as_table&notebook=' + str(
                notebook_id) + '&snippet=0&destination=' + destination
            response['status'] = 0
        request.audit = {
            'operation':
            'EXPORT',
            'operationText':
            'User %s exported to Hive table: %s' %
            (request.user.username, destination),
            'allowed':
            True
        }
    elif data_format == 'hdfs-directory':
        if is_embedded:
            sql, success_url = api.export_large_data_to_hdfs(
                notebook, snippet, destination)

            task = make_notebook(name=_('Export %s query to directory') %
                                 snippet['type'],
                                 description=_('Query %s to %s') %
                                 (_get_snippet_name(notebook), success_url),
                                 editor_type=snippet['type'],
                                 statement=sql,
                                 status='ready-execute',
                                 database=snippet['database'],
                                 on_success_url=success_url,
                                 is_task=True)
            response = task.execute(request)
        else:
            notebook_id = notebook['id'] or request.GET.get(
                'editor', request.GET.get('notebook'))
            response['watch_url'] = reverse(
                'notebook:execute_and_watch'
            ) + '?action=insert_as_query&notebook=' + str(
                notebook_id) + '&snippet=0&destination=' + destination
            response['status'] = 0
        request.audit = {
            'operation':
            'EXPORT',
            'operationText':
            'User %s exported to HDFS directory: %s' %
            (request.user.username, destination),
            'allowed':
            True
        }
    elif data_format == 'search-index':
        if is_embedded:
            if destination == '__hue__':
                destination = _get_snippet_name(notebook,
                                                unique=True,
                                                table_format=True)
                live_indexing = True
            else:
                live_indexing = False

            sample = get_api(request, snippet).fetch_result(notebook,
                                                            snippet,
                                                            0,
                                                            start_over=True)

            from indexer.api3 import _index  # Will be moved to the lib
            from indexer.file_format import HiveFormat
            from indexer.fields import Field

            file_format = {
                'name':
                'col',
                'inputFormat':
                'query',
                'format': {
                    'quoteChar': '"',
                    'recordSeparator': '\n',
                    'type': 'csv',
                    'hasHeader': False,
                    'fieldSeparator': '\u0001'
                },
                "sample":
                '',
                "columns": [
                    Field(
                        col['name'].rsplit('.')[-1],
                        HiveFormat.FIELD_TYPE_TRANSLATE.get(
                            col['type'], 'string')).to_dict()
                    for col in sample['meta']
                ]
            }

            if live_indexing:
                file_format['inputFormat'] = 'hs2_handle'
                file_format['fetch_handle'] = lambda rows, start_over: get_api(
                    request, snippet).fetch_result(
                        notebook, snippet, rows=rows, start_over=start_over)
                response['rowcount'] = _index(request,
                                              file_format,
                                              destination,
                                              query=notebook['uuid'])
                response['watch_url'] = reverse('search:browse',
                                                kwargs={'name': destination})
                response['status'] = 0
            else:
                response = _index(request,
                                  file_format,
                                  destination,
                                  query=notebook['uuid'])
        else:
            notebook_id = notebook['id'] or request.GET.get(
                'editor', request.GET.get('notebook'))
            response['watch_url'] = reverse(
                'notebook:execute_and_watch'
            ) + '?action=index_query&notebook=' + str(
                notebook_id) + '&snippet=0&destination=' + destination
            response['status'] = 0
        request.audit = {
            'operation':
            'EXPORT',
            'operationText':
            'User %s exported to Search index: %s' %
            (request.user.username, destination),
            'allowed':
            True
        }

    return JsonResponse(response)
Example #47
0
def export_result(request):
    response = {'status': -1, 'message': _('Success')}

    # Passed by check_document_access_permission but unused by APIs
    notebook = json.loads(request.POST.get('notebook', '{}'))
    snippet = json.loads(request.POST.get('snippet', '{}'))
    data_format = json.loads(request.POST.get('format', 'hdfs-file'))
    destination = urllib.unquote(
        json.loads(request.POST.get('destination', '')))
    overwrite = json.loads(request.POST.get('overwrite', 'false'))
    is_embedded = json.loads(request.POST.get('is_embedded', 'false'))
    start_time = json.loads(request.POST.get('start_time', '-1'))

    api = get_api(request, snippet)

    if data_format == 'hdfs-file':  # Blocking operation, like downloading
        if request.fs.isdir(destination):
            if notebook.get('name'):
                destination += '/%(name)s.csv' % notebook
            else:
                destination += '/%(type)s-%(id)s.csv' % notebook
        if overwrite and request.fs.exists(destination):
            request.fs.do_as_user(request.user.username, request.fs.rmtree,
                                  destination)
        response['watch_url'] = api.export_data_as_hdfs_file(
            snippet, destination, overwrite)
        response['status'] = 0
        request.audit = {
            'operation':
            'EXPORT',
            'operationText':
            'User %s exported to HDFS destination: %s' %
            (request.user.username, destination),
            'allowed':
            True
        }
    elif data_format == 'hive-table':
        if is_embedded:
            sql, success_url = api.export_data_as_table(
                notebook, snippet, destination)

            task = make_notebook(name=_('Export %s query to table %s') %
                                 (snippet['type'], destination),
                                 description=_('Query %s to %s') %
                                 (_get_snippet_name(notebook), success_url),
                                 editor_type=snippet['type'],
                                 statement=sql,
                                 status='ready',
                                 database=snippet['database'],
                                 on_success_url=success_url,
                                 last_executed=start_time,
                                 is_task=True)
            response = task.execute(request)
        else:
            notebook_id = notebook['id'] or request.GET.get(
                'editor', request.GET.get('notebook'))
            response['watch_url'] = reverse(
                'notebook:execute_and_watch'
            ) + '?action=save_as_table&notebook=' + str(
                notebook_id) + '&snippet=0&destination=' + destination
            response['status'] = 0
        request.audit = {
            'operation':
            'EXPORT',
            'operationText':
            'User %s exported to Hive table: %s' %
            (request.user.username, destination),
            'allowed':
            True
        }
    elif data_format == 'hdfs-directory':
        if is_embedded:
            sql, success_url = api.export_large_data_to_hdfs(
                notebook, snippet, destination)

            task = make_notebook(name=_('Export %s query to directory') %
                                 snippet['type'],
                                 description=_('Query %s to %s') %
                                 (_get_snippet_name(notebook), success_url),
                                 editor_type=snippet['type'],
                                 statement=sql,
                                 status='ready-execute',
                                 database=snippet['database'],
                                 on_success_url=success_url,
                                 last_executed=start_time,
                                 is_task=True)
            response = task.execute(request)
        else:
            notebook_id = notebook['id'] or request.GET.get(
                'editor', request.GET.get('notebook'))
            response['watch_url'] = reverse(
                'notebook:execute_and_watch'
            ) + '?action=insert_as_query&notebook=' + str(
                notebook_id) + '&snippet=0&destination=' + destination
            response['status'] = 0
        request.audit = {
            'operation':
            'EXPORT',
            'operationText':
            'User %s exported to HDFS directory: %s' %
            (request.user.username, destination),
            'allowed':
            True
        }
    elif data_format in ('search-index', 'dashboard'):
        # Open the result in the Dashboard via a SQL sub-query or the Import wizard (quick vs scalable)
        if is_embedded:
            notebook_id = notebook['id'] or request.GET.get(
                'editor', request.GET.get('notebook'))

            if data_format == 'dashboard':
                engine = notebook['type'].replace('query-', '')
                response['watch_url'] = reverse(
                    'dashboard:browse', kwargs={
                        'name': notebook_id
                    }) + '?source=query&engine=%(engine)s' % {
                        'engine': engine
                    }
                response['status'] = 0
            else:
                sample = get_api(request,
                                 snippet).fetch_result(notebook,
                                                       snippet,
                                                       rows=4,
                                                       start_over=True)
                for col in sample['meta']:
                    col['type'] = HiveFormat.FIELD_TYPE_TRANSLATE.get(
                        col['type'], 'string')

                response['status'] = 0
                response['id'] = notebook_id
                response['name'] = _get_snippet_name(notebook)
                response['source_type'] = 'query'
                response['target_type'] = 'index'
                response['target_path'] = destination
                response['sample'] = list(sample['data'])
                response['columns'] = [
                    Field(col['name'], col['type']).to_dict()
                    for col in sample['meta']
                ]
        else:
            notebook_id = notebook['id'] or request.GET.get(
                'editor', request.GET.get('notebook'))
            response['watch_url'] = reverse(
                'notebook:execute_and_watch'
            ) + '?action=index_query&notebook=' + str(
                notebook_id) + '&snippet=0&destination=' + destination
            response['status'] = 0

        if response.get('status') != 0:
            response['message'] = _('Exporting result failed.')

    return JsonResponse(response)
Example #48
0
        try:
            response['result'].append(
                get_api(request, session).close_session(session))
        except QueryExpired:
            pass
        except Exception, e:
            LOG.exception('Error closing session %s' % str(e))

    for snippet in [
            _s for _s in notebook['snippets']
            if _s['type'] in ('hive', 'impala')
    ]:
        try:
            if snippet['status'] != 'running':
                response['result'].append(
                    get_api(request, snippet).close_statement(snippet))
            else:
                LOG.info('Not closing SQL snippet as still running.')
        except QueryExpired:
            pass
        except Exception, e:
            LOG.exception('Error closing statement %s' % str(e))

    response['status'] = 0
    response['message'] = _('Notebook closed successfully')

    return JsonResponse(response)


@require_POST
@check_document_access_permission()
Example #49
0
def close_statement_async(notebook, snippet, postdict=None, user_id=None):
    request = _get_request(postdict, user_id)
    get_api(request, snippet).close_statement(notebook, snippet)
Example #50
0
def execute_and_watch(request):
  notebook_id = request.GET.get('editor', request.GET.get('notebook'))
  snippet_id = int(request.GET['snippet'])
  action = request.GET['action']
  destination = request.GET['destination']

  notebook = Notebook(document=Document2.objects.get(id=notebook_id)).get_data()
  snippet = notebook['snippets'][snippet_id]
  editor_type = snippet['type']

  api = get_api(request, snippet)

  if action == 'save_as_table':
    sql, success_url = api.export_data_as_table(notebook, snippet, destination)
    editor = make_notebook(name='Execute and watch', editor_type=editor_type, statement=sql, status='ready-execute', database=snippet['database'])
  elif action == 'insert_as_query':
    # TODO: checks/workarounds in case of non impersonation or Sentry
    # TODO: keep older simpler way in case of known not many rows?
    sql, success_url = api.export_large_data_to_hdfs(notebook, snippet, destination)
    editor = make_notebook(name='Execute and watch', editor_type=editor_type, statement=sql, status='ready-execute', database=snippet['database'], on_success_url=success_url)
  elif action == 'index_query':
    if destination == '__hue__':
      destination = _get_snippet_name(notebook, unique=True, table_format=True)
      live_indexing = True
    else:
      live_indexing = False

    sql, success_url = api.export_data_as_table(notebook, snippet, destination, is_temporary=True, location='')
    editor = make_notebook(name='Execute and watch', editor_type=editor_type, statement=sql, status='ready-execute')

    sample = get_api(request, snippet).fetch_result(notebook, snippet, 0, start_over=True)

    from indexer.api3 import _index # Will ve moved to the lib
    from indexer.file_format import HiveFormat
    from indexer.fields import Field

    file_format = {
        'name': 'col',
        'inputFormat': 'query',
        'format': {'quoteChar': '"', 'recordSeparator': '\n', 'type': 'csv', 'hasHeader': False, 'fieldSeparator': '\u0001'},
        "sample": '',
        "columns": [
            Field(col['name'].rsplit('.')[-1], HiveFormat.FIELD_TYPE_TRANSLATE.get(col['type'], 'string')).to_dict()
            for col in sample['meta']
        ]
    }

    if live_indexing:
      file_format['inputFormat'] = 'hs2_handle'
      file_format['fetch_handle'] = lambda rows, start_over: get_api(request, snippet).fetch_result(notebook, snippet, rows=rows, start_over=start_over)

    job_handle = _index(request, file_format, destination, query=notebook['uuid'])

    if live_indexing:
      return redirect(reverse('search:browse', kwargs={'name': destination}))
    else:
      return redirect(reverse('oozie:list_oozie_workflow', kwargs={'job_id': job_handle['handle']['id']}))
  else:
    raise PopupException(_('Action %s is unknown') % action)

  return render('editor.mako', request, {
      'notebooks_json': json.dumps([editor.get_data()]),
      'options_json': json.dumps({
          'languages': [{"name": "%s SQL" % editor_type.title(), "type": editor_type}],
          'mode': 'editor',
          'editor_type': editor_type,
          'success_url': success_url
      }),
      'editor_type': editor_type,
  })
Example #51
0
def cancel_async(notebook, snippet, postdict=None, user_id=None):
    request = _get_request(postdict, user_id)
    get_api(request, snippet).cancel(notebook, snippet)
Example #52
0
def guess_field_types(request):
    file_format = json.loads(request.POST.get('fileFormat', '{}'))

    if file_format['inputFormat'] == 'file':
        indexer = MorphlineIndexer(request.user, request.fs)
        stream = request.fs.open(file_format["path"])
        _convert_format(file_format["format"], inverse=True)

        format_ = indexer.guess_field_types({
            "file": {
                "stream": stream,
                "name": file_format['path']
            },
            "format": file_format['format']
        })
    elif file_format['inputFormat'] == 'table':
        sample = get_api(request, {
            'type': 'hive'
        }).get_sample_data({'type': 'hive'},
                           database=file_format['databaseName'],
                           table=file_format['tableName'])
        db = dbms.get(request.user)
        table_metadata = db.get_table(database=file_format['databaseName'],
                                      table_name=file_format['tableName'])

        format_ = {
            "sample":
            sample['rows'][:4],
            "columns": [
                Field(col.name,
                      HiveFormat.FIELD_TYPE_TRANSLATE.get(col.type,
                                                          'string')).to_dict()
                for col in table_metadata.cols
            ]
        }
    elif file_format[
            'inputFormat'] == 'query':  # Only support open query history
        # TODO get schema from explain query, which is not possible
        notebook = Notebook(document=Document2.objects.get(
            id=file_format['query'])).get_data()
        snippet = notebook['snippets'][0]
        sample = get_api(request, snippet).fetch_result(notebook,
                                                        snippet,
                                                        4,
                                                        start_over=True)

        format_ = {
            "sample":
            sample['rows'][:4],
            "sample_cols":
            sample.meta,
            "columns": [
                Field(
                    col['name'],
                    HiveFormat.FIELD_TYPE_TRANSLATE.get(col['type'],
                                                        'string')).to_dict()
                for col in sample.meta
            ]
        }
    elif file_format['inputFormat'] == 'rdbms':
        query_server = rdbms.get_query_server_config(
            server=file_format['rdbmsType'])
        db = rdbms.get(request.user, query_server=query_server)
        sample = RdbmsIndexer(request.user,
                              file_format['rdbmsType']).get_sample_data(
                                  mode=file_format['rdbmsMode'],
                                  database=file_format['rdbmsDatabaseName'],
                                  table=file_format['rdbmsTableName'])
        table_metadata = db.get_columns(file_format['rdbmsDatabaseName'],
                                        file_format['rdbmsTableName'],
                                        names_only=False)

        format_ = {
            "sample":
            list(sample['rows'])[:4],
            "columns": [
                Field(
                    col['name'],
                    HiveFormat.FIELD_TYPE_TRANSLATE.get(col['type'],
                                                        'string')).to_dict()
                for col in table_metadata
            ]
        }

    return JsonResponse(format_)
Example #53
0
File: api3.py Project: hkj123/hue
def _small_indexing(user, fs, client, source, destination, index_name):
    kwargs = {}
    errors = []

    if source['inputFormat'] not in ('manual', 'table', 'query_handle'):
        path = urllib_unquote(source["path"])
        stats = fs.stats(path)
        if stats.size > MAX_UPLOAD_SIZE:
            raise PopupException(_('File size is too large to handle!'))

    indexer = MorphlineIndexer(user, fs)

    fields = indexer.get_field_list(destination['columns'])
    _create_solr_collection(user, fs, client, destination, index_name, kwargs)

    if source['inputFormat'] == 'file':
        kwargs['separator'] = source['format']['fieldSeparator']
        path = urllib_unquote(source["path"])
        data = fs.read(path, 0, MAX_UPLOAD_SIZE)

    if client.is_solr_six_or_more():
        kwargs['processor'] = 'tolerant'
        kwargs['map'] = 'NULL:'

    try:
        if source['inputFormat'] == 'query':
            query_id = source['query']['id'] if source['query'].get(
                'id') else source['query']

            notebook = Notebook(document=Document2.objects.document(
                user=user, doc_id=query_id)).get_data()
            request = MockedDjangoRequest(user=user)
            snippet = notebook['snippets'][0]

            searcher = CollectionManagerController(user)
            columns = [
                field['name'] for field in fields if field['name'] != 'hue_id'
            ]
            # Assumes handle still live
            fetch_handle = lambda rows, start_over: get_api(
                request, snippet).fetch_result(
                    notebook, snippet, rows=rows, start_over=start_over)
            rows = searcher.update_data_from_hive(index_name,
                                                  columns,
                                                  fetch_handle=fetch_handle,
                                                  indexing_options=kwargs)
            # TODO if rows == MAX_ROWS truncation warning
        elif source['inputFormat'] == 'manual':
            pass  # No need to do anything
        else:
            response = client.index(name=index_name, data=data, **kwargs)
            errors = [
                error.get('message', '')
                for error in response['responseHeader'].get('errors', [])
            ]
    except Exception as e:
        try:
            client.delete_index(index_name, keep_config=False)
        except Exception as e2:
            LOG.warning(
                'Error while cleaning-up config of failed collection creation %s: %s'
                % (index_name, e2))
        raise e

    return {
        'status': 0,
        'on_success_url': reverse('indexer:indexes',
                                  kwargs={'index': index_name}),
        'pub_sub_url': 'assist.collections.refresh',
        'errors': errors
    }
Example #54
0
File: api3.py Project: hkj123/hue
def guess_field_types(request):
    file_format = json.loads(request.POST.get('fileFormat', '{}'))

    if file_format['inputFormat'] == 'file':
        indexer = MorphlineIndexer(request.user, request.fs)
        path = urllib_unquote(file_format["path"])
        stream = request.fs.open(path)
        encoding = check_encoding(stream.read(10000))
        stream.seek(0)
        _convert_format(file_format["format"], inverse=True)

        format_ = indexer.guess_field_types({
            "file": {
                "stream": stream,
                "name": path
            },
            "format": file_format['format']
        })

        # Note: Would also need to set charset to table (only supported in Hive)
        if 'sample' in format_ and format_['sample']:
            format_['sample'] = escape_rows(format_['sample'],
                                            nulls_only=True,
                                            encoding=encoding)
        for col in format_['columns']:
            col['name'] = smart_unicode(col['name'],
                                        errors='replace',
                                        encoding=encoding)

    elif file_format['inputFormat'] == 'table':
        sample = get_api(request, {
            'type': 'hive'
        }).get_sample_data({'type': 'hive'},
                           database=file_format['databaseName'],
                           table=file_format['tableName'])
        db = dbms.get(request.user)
        table_metadata = db.get_table(database=file_format['databaseName'],
                                      table_name=file_format['tableName'])

        format_ = {
            "sample":
            sample['rows'][:4],
            "columns": [
                Field(col.name,
                      HiveFormat.FIELD_TYPE_TRANSLATE.get(col.type,
                                                          'string')).to_dict()
                for col in table_metadata.cols
            ]
        }
    elif file_format['inputFormat'] == 'query':
        query_id = file_format['query']['id'] if file_format['query'].get(
            'id') else file_format['query']

        notebook = Notebook(document=Document2.objects.document(
            user=request.user, doc_id=query_id)).get_data()
        snippet = notebook['snippets'][0]
        db = get_api(request, snippet)

        if file_format.get('sampleCols'):
            columns = file_format.get('sampleCols')
            sample = file_format.get('sample')
        else:
            snippet['query'] = snippet['statement']
            try:
                sample = db.fetch_result(notebook, snippet, 4,
                                         start_over=True)['rows'][:4]
            except Exception as e:
                LOG.warning(
                    'Skipping sample data as query handle might be expired: %s'
                    % e)
                sample = [[], [], [], [], []]
            columns = db.autocomplete(snippet=snippet, database='', table='')
            columns = [
                Field(
                    col['name'],
                    HiveFormat.FIELD_TYPE_TRANSLATE.get(col['type'],
                                                        'string')).to_dict()
                for col in columns['extended_columns']
            ]
        format_ = {
            "sample": sample,
            "columns": columns,
        }
    elif file_format['inputFormat'] == 'rdbms':
        api = _get_api(request)
        sample = api.get_sample_data(None,
                                     database=file_format['rdbmsDatabaseName'],
                                     table=file_format['tableName'])

        format_ = {
            "sample":
            list(sample['rows'])[:4],
            "columns": [
                Field(col['name'], col['type']).to_dict()
                for col in sample['full_headers']
            ]
        }
    elif file_format['inputFormat'] == 'stream':
        if file_format['streamSelection'] == 'kafka':
            data = get_topic_data(request.user,
                                  file_format.get('kafkaSelectedTopics'))

            kafkaFieldNames = [col['name'] for col in data['full_headers']]
            kafkaFieldTypes = [col['type'] for col in data['full_headers']]
            topics_data = data['rows']

            format_ = {
                "sample":
                topics_data,
                "columns": [
                    Field(col, 'string', unique=False).to_dict()
                    for col in kafkaFieldNames
                ]
            }


#       data = """%(kafkaFieldNames)s
# %(data)s""" % {
#         'kafkaFieldNames': ','.join(kafkaFieldNames),
#         'data': '\n'.join([','.join(cols) for cols in topics_data])
#       }
#       stream = string_io()
#       stream.write(data)

#       _convert_format(file_format["format"], inverse=True)

#       indexer = MorphlineIndexer(request.user, request.fs)

#       format_ = indexer.guess_field_types({
#         "file": {
#             "stream": stream,
#             "name": file_format['path']
#         },
#         "format": file_format['format']
#       })
#       type_mapping = dict(
#         list(
#           zip(kafkaFieldNames, kafkaFieldTypes)
#         )
#       )

#       for col in format_['columns']:
#         col['keyType'] = type_mapping[col['name']]
#         col['type'] = type_mapping[col['name']]
        elif file_format['streamSelection'] == 'flume':
            if 'hue-httpd/access_log' in file_format['channelSourcePath']:
                columns = [{
                    'name': 'id',
                    'type': 'string',
                    'unique': True
                }, {
                    'name': 'client_ip',
                    'type': 'string'
                }, {
                    'name': 'time',
                    'type': 'date'
                }, {
                    'name': 'request',
                    'type': 'string'
                }, {
                    'name': 'code',
                    'type': 'plong'
                }, {
                    'name': 'bytes',
                    'type': 'plong'
                }, {
                    'name': 'method',
                    'type': 'string'
                }, {
                    'name': 'url',
                    'type': 'string'
                }, {
                    'name': 'protocol',
                    'type': 'string'
                }, {
                    'name': 'app',
                    'type': 'string'
                }, {
                    'name': 'subapp',
                    'type': 'string'
                }]
            else:
                columns = [{'name': 'message', 'type': 'string'}]

            format_ = {
                "sample": [['...'] * len(columns)] * 4,
                "columns": [
                    Field(col['name'],
                          HiveFormat.FIELD_TYPE_TRANSLATE.get(
                              col['type'], 'string'),
                          unique=col.get('unique')).to_dict()
                    for col in columns
                ]
            }
    elif file_format['inputFormat'] == 'connector':
        if file_format['connectorSelection'] == 'sfdc':
            sf = Salesforce(username=file_format['streamUsername'],
                            password=file_format['streamPassword'],
                            security_token=file_format['streamToken'])
            table_metadata = [{
                'name': column['name'],
                'type': column['type']
            } for column in sf.restful('sobjects/%(streamObject)s/describe/' %
                                       file_format)['fields']]
            query = 'SELECT %s FROM %s LIMIT 4' % (', '.join(
                [col['name']
                 for col in table_metadata]), file_format['streamObject'])
            print(query)

            try:
                records = sf.query_all(query)
            except SalesforceRefusedRequest as e:
                raise PopupException(message=str(e))

            format_ = {
                "sample":
                [list(row.values())[1:] for row in records['records']],
                "columns": [
                    Field(
                        col['name'],
                        HiveFormat.FIELD_TYPE_TRANSLATE.get(
                            col['type'], 'string')).to_dict()
                    for col in table_metadata
                ]
            }
        else:
            raise PopupException(
                _('Connector format not recognized: %(connectorSelection)s') %
                file_format)
    else:
        raise PopupException(
            _('Input format not recognized: %(inputFormat)s') % file_format)

    return JsonResponse(format_)
Example #55
0
 def datasets(self, show_all=False):
   snippet = {'type': self.engine}
   # Ideally from left assist at some point instead
   databases = get_api(MockRequest(self.user), snippet).autocomplete(snippet)['databases']
   database = databases and 'default' not in databases and databases[0] or 'default'
   return [database + '.' + table['name'] for table in get_api(MockRequest(self.user), snippet).autocomplete(snippet, database=database)['tables_meta']]
Example #56
0
def execute_and_watch(request):
    notebook_id = request.GET.get('editor', request.GET.get('notebook'))
    snippet_id = int(request.GET['snippet'])
    action = request.GET['action']
    destination = request.GET['destination']

    notebook = Notebook(document=Document2.objects.get(
        id=notebook_id)).get_data()
    snippet = notebook['snippets'][snippet_id]
    editor_type = snippet['type']

    api = get_api(request, snippet)

    if action == 'save_as_table':
        sql, success_url = api.export_data_as_table(notebook, snippet,
                                                    destination)
        editor = make_notebook(name='Execute and watch',
                               editor_type=editor_type,
                               statement=sql,
                               status='ready-execute')
    elif action == 'insert_as_query':
        sql, success_url = api.export_large_data_to_hdfs(
            notebook, snippet, destination)
        editor = make_notebook(name='Execute and watch',
                               editor_type=editor_type,
                               statement=sql,
                               status='ready-execute')
    elif action == 'index_query':
        sql, success_url = api.export_data_as_table(notebook,
                                                    snippet,
                                                    destination,
                                                    is_temporary=True,
                                                    location='')
        editor = make_notebook(name='Execute and watch',
                               editor_type=editor_type,
                               statement=sql,
                               status='ready-execute')

        sample = get_api(request, snippet).fetch_result(notebook,
                                                        snippet,
                                                        0,
                                                        start_over=True)

        from indexer.api3 import _index  # Will ve moved to the lib in next commit
        from indexer.file_format import HiveFormat
        from indexer.fields import Field

        file_format = {
            'name':
            'col',
            'inputFormat':
            'query',
            'format': {
                'quoteChar': '"',
                'recordSeparator': '\n',
                'type': 'csv',
                'hasHeader': False,
                'fieldSeparator': '\u0001'
            },
            "sample":
            '',
            "columns": [
                Field(
                    col['name'],
                    HiveFormat.FIELD_TYPE_TRANSLATE.get(col['type'],
                                                        'string')).to_dict()
                for col in sample['meta']
            ]
        }

        job_handle = _index(request,
                            file_format,
                            destination,
                            query=notebook['uuid'])
        return redirect(
            reverse('oozie:list_oozie_workflow',
                    kwargs={'job_id': job_handle['handle']['id']}))
    else:
        raise PopupException(_('Action %s is unknown') % action)

    return render(
        'editor.mako', request, {
            'notebooks_json':
            json.dumps([editor.get_data()]),
            'options_json':
            json.dumps({
                'languages': [{
                    "name": "%s SQL" % editor_type.title(),
                    "type": editor_type
                }],
                'mode':
                'editor',
                'success_url':
                success_url
            }),
            'editor_type':
            editor_type,
        })
Example #57
0
def download(request):
    notebook = json.loads(request.POST.get('notebook', '{}'))
    snippet = json.loads(request.POST.get('snippet', '{}'))
    file_format = request.POST.get('format', 'csv')

    return get_api(request, snippet).download(notebook, snippet, file_format)
Example #58
0
File: api.py Project: Nick-Xu/hue
  response = {'status': -1, 'result': []}

  notebook = json.loads(request.POST.get('notebook', '{}'))

  for session in [_s for _s in notebook['sessions'] if _s['type'] in ('scala', 'spark', 'pyspark', 'sparkr')]:
    try:
      response['result'].append(get_api(request, session).close_session(session))
    except QueryExpired:
      pass
    except Exception, e:
      LOG.exception('Error closing session %s' % str(e))

  for snippet in [_s for _s in notebook['snippets'] if _s['type'] in ('hive', 'impala')]:
    try:
      if snippet['status'] != 'running':
        response['result'].append(get_api(request, snippet).close_statement(snippet))
      else:
        LOG.info('Not closing SQL snippet as still running.')
    except QueryExpired:
      pass
    except Exception, e:
      LOG.exception('Error closing statement %s' % str(e))

  response['status'] = 0
  response['message'] = _('Notebook closed successfully')

  return JsonResponse(response)


@require_POST
@check_document_access_permission()
Example #59
0
def guess_field_types(request):
  file_format = json.loads(request.POST.get('fileFormat', '{}'))

  if file_format['inputFormat'] == 'file':
    indexer = MorphlineIndexer(request.user, request.fs)
    path = urllib.unquote(file_format["path"])
    stream = request.fs.open(path)
    encoding = chardet.detect(stream.read(10000)).get('encoding')
    stream.seek(0)
    _convert_format(file_format["format"], inverse=True)

    format_ = indexer.guess_field_types({
      "file": {
          "stream": stream,
          "name": path
        },
      "format": file_format['format']
    })

    # Note: Would also need to set charset to table (only supported in Hive)
    if 'sample' in format_:
      format_['sample'] = escape_rows(format_['sample'], nulls_only=True, encoding=encoding)
    for col in format_['columns']:
      col['name'] = smart_unicode(col['name'], errors='replace', encoding=encoding)

  elif file_format['inputFormat'] == 'table':
    sample = get_api(request, {'type': 'hive'}).get_sample_data({'type': 'hive'}, database=file_format['databaseName'], table=file_format['tableName'])
    db = dbms.get(request.user)
    table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName'])

    format_ = {
        "sample": sample['rows'][:4],
        "columns": [
            Field(col.name, HiveFormat.FIELD_TYPE_TRANSLATE.get(col.type, 'string')).to_dict()
            for col in table_metadata.cols
        ]
    }
  elif file_format['inputFormat'] == 'query':
    query_id = file_format['query']['id'] if file_format['query'].get('id') else file_format['query']

    notebook = Notebook(document=Document2.objects.document(user=request.user, doc_id=query_id)).get_data()
    snippet = notebook['snippets'][0]
    db = get_api(request, snippet)

    if file_format.get('sampleCols'):
      columns = file_format.get('sampleCols')
      sample = file_format.get('sample')
    else:
      snippet['query'] = snippet['statement']
      try:
        sample = db.fetch_result(notebook, snippet, 4, start_over=True)['rows'][:4]
      except Exception, e:
        LOG.warn('Skipping sample data as query handle might be expired: %s' % e)
        sample = [[], [], [], [], []]
      columns = db.autocomplete(snippet=snippet, database='', table='')
      columns = [
          Field(col['name'], HiveFormat.FIELD_TYPE_TRANSLATE.get(col['type'], 'string')).to_dict()
          for col in columns['extended_columns']
      ]
    format_ = {
        "sample": sample,
        "columns": columns,
    }
Example #60
0
    def stats(self, dashboard, fields):
        database, table = self._get_database_table_names(dashboard)

        # TODO: check column stats to go faster

        sql = "SELECT MIN(`%(field)s`), MAX(`%(field)s`) FROM `%(database)s`.`%(table)s`" % {
            'field': fields[0],
            'database': database,
            'table': table
        }

        editor = make_notebook(name='Execute and watch',
                               editor_type=self.engine,
                               statement=sql,
                               database=database,
                               status='ready-execute',
                               skip_historify=True
                               # async=False
                               )

        request = MockRequest(self.user)
        snippet = {'type': self.engine}
        response = editor.execute(request)

        if 'handle' in response:
            snippet['result'] = response

            if response['handle'].get('sync'):
                result = response['result']
            else:
                timeout_sec = 20  # To move to Notebook API
                sleep_interval = 0.5
                curr = time.time()
                end = curr + timeout_sec

                api = get_api(request, snippet)

                while curr <= end:
                    status = api.check_status(dashboard, snippet)
                    if status['status'] == 'available':
                        result = api.fetch_result(dashboard,
                                                  snippet,
                                                  rows=10,
                                                  start_over=True)
                        api.close_statement(snippet)
                        break
                    time.sleep(sleep_interval)
                    curr = time.time()

                if curr > end:
                    try:
                        api.cancel_operation(snippet)
                    except Exception, e:
                        LOG.warning("Failed to cancel query: %s" % e)
                        api.close_statement(snippet)
                    raise OperationTimeout(e)

            stats = list(result['data'])
            min_value, max_value = stats[0]

            if not isinstance(min_value, numbers.Number):
                min_value = min_value.replace(' ', 'T') + 'Z'
                max_value = max_value.replace(' ', 'T') + 'Z'

            return {
                'stats': {
                    'stats_fields': {
                        fields[0]: {
                            'min': min_value,
                            'max': max_value
                        }
                    }
                }
            }