Beispiel #1
0
def get_history(request):
  response = {'status': -1}

  doc_type = request.GET.get('doc_type')
  doc_text = request.GET.get('doc_text')
  page = min(int(request.GET.get('page', 1)), 100)
  limit = min(int(request.GET.get('limit', 50)), 100)
  is_notification_manager = request.GET.get('is_notification_manager', 'false') == 'true'

  if is_notification_manager:
    docs = Document2.objects.get_tasks_history(user=request.user)
  else:
    docs = Document2.objects.get_history(doc_type='query-%s' % doc_type, user=request.user)

  if doc_text:
    docs = docs.filter(Q(name__icontains=doc_text) | Q(description__icontains=doc_text) | Q(search__icontains=doc_text))

  # Paginate
  docs = docs.order_by('-last_modified')
  response['count'] = docs.count()
  docs = __paginate(page, limit, queryset=docs)['documents']

  history = []
  for doc in docs:
    notebook = Notebook(document=doc).get_data()
    if 'snippets' in notebook:
      statement = notebook['description'] if is_notification_manager else _get_statement(notebook)
      history.append({
        'name': doc.name,
        'id': doc.id,
        'uuid': doc.uuid,
        'type': doc.type,
        'data': {
            'statement': statement[:1001] if statement else '',
            'lastExecuted': notebook['snippets'][0].get('lastExecuted', -1),
            'status':  notebook['snippets'][0]['status'],
            'parentSavedQueryUuid': notebook.get('parentSavedQueryUuid', '')
        } if notebook['snippets'] else {},
        'absoluteUrl': doc.get_absolute_url(),
      })
    else:
      LOG.error('Incomplete History Notebook: %s' % notebook)
  response['history'] = sorted(history, key=lambda row: row['data']['lastExecuted'], reverse=True)
  response['message'] = _('History fetched')
  response['status'] = 0

  return JsonResponse(response)
Beispiel #2
0
def table_queries(request, database, table):
    qfilter = Q(data__icontains=table) | Q(data__icontains='%s.%s' %
                                           (database, table))

    response = {'status': -1, 'queries': []}
    try:
        queries = [{
            'doc': d.to_dict(),
            'data': Notebook(document=d).get_data()
        } for d in Document2.objects.filter(
            qfilter, owner=request.user, type='query', is_history=False)[:50]]
        response['status'] = 0
        response['queries'] = queries
    except Exception, ex:
        response['status'] = 1
        response['data'] = _("Cannot get queries related to table %s.%s: %s"
                             ) % (database, table, ex)
Beispiel #3
0
    def test_check_status(self):
        query = Notebook()

        request = Mock()
        operation_id = Mock()

        with patch(
                'notebook.api.Document2.objects.get_by_uuid') as get_by_uuid:
            with patch('notebook.api.get_api') as get_api:
                with patch('notebook.api.Notebook') as NotebookMock:
                    get_api.return_value = Mock(check_status=Mock(
                        return_value={'status': 0}))
                    resp = query.check_status(request=request,
                                              operation_id=operation_id)

                    assert_equal(0, resp['status'])
                    assert_equal(0, resp['query_status']['status'])
Beispiel #4
0
def _small_indexing(user, fs, client, source, destination, index_name):
  kwargs = {}
  errors = []

  if source['inputFormat'] not in ('manual', 'table', 'query_handle'):
    path = urllib.unquote(source["path"])
    stats = fs.stats(path)
    if stats.size > MAX_UPLOAD_SIZE:
      raise PopupException(_('File size is too large to handle!'))

  indexer = MorphlineIndexer(user, fs)

  fields = indexer.get_field_list(destination['columns'])
  _create_solr_collection(user, fs, client, destination, index_name, kwargs)

  if source['inputFormat'] == 'file':
    path = urllib.unquote(source["path"])
    data = fs.read(path, 0, MAX_UPLOAD_SIZE)

  if client.is_solr_six_or_more():
    kwargs['processor'] = 'tolerant'
    kwargs['map'] = 'NULL:'

  try:
    if source['inputFormat'] == 'query':
      query_id = source['query']['id'] if source['query'].get('id') else source['query']

      notebook = Notebook(document=Document2.objects.document(user=user, doc_id=query_id)).get_data()
      request = MockedDjangoRequest(user=user)
      snippet = notebook['snippets'][0]

      searcher = CollectionManagerController(user)
      columns = [field['name'] for field in fields if field['name'] != 'hue_id']
      fetch_handle = lambda rows, start_over: get_api(request, snippet).fetch_result(notebook, snippet, rows=rows, start_over=start_over) # Assumes handle still live
      rows = searcher.update_data_from_hive(index_name, columns, fetch_handle=fetch_handle, indexing_options=kwargs)
      # TODO if rows == MAX_ROWS truncation warning
    elif source['inputFormat'] == 'manual':
      pass # No need to do anything
    else:
      response = client.index(name=index_name, data=data, **kwargs)
      errors = [error.get('message', '') for error in response['responseHeader'].get('errors', [])]
  except Exception, e:
    try:
      client.delete_index(index_name, keep_config=False)
    except Exception, e2:
      LOG.warn('Error while cleaning-up config of failed collection creation %s: %s' % (index_name, e2))
Beispiel #5
0
def get_history(request):
    response = {'status': -1}

    doc_type = request.GET.get('doc_type')
    doc_text = request.GET.get('doc_text')
    limit = min(request.GET.get('len', 50), 100)

    docs = Document2.objects.get_history(doc_type='query-%s' % doc_type,
                                         user=request.user)

    if doc_text:
        docs = docs.filter(
            Q(name__icontains=doc_text) | Q(description__icontains=doc_text)
            | Q(search__icontains=doc_text))

    history = []
    for doc in docs.order_by('-last_modified')[:limit]:
        notebook = Notebook(document=doc).get_data()
        if 'snippets' in notebook:
            statement = _get_statement(notebook)
            history.append({
                'name': doc.name,
                'id': doc.id,
                'uuid': doc.uuid,
                'type': doc.type,
                'data': {
                    'statement':
                    statement[:1001] if statement else '',
                    'lastExecuted':
                    notebook['snippets'][0]['lastExecuted'],
                    'status':
                    notebook['snippets'][0]['status'],
                    'parentSavedQueryUuid':
                    notebook.get('parentSavedQueryUuid', '')
                } if notebook['snippets'] else {},
                'absoluteUrl': doc.get_absolute_url(),
            })
        else:
            LOG.error('Incomplete History Notebook: %s' % notebook)
    response['history'] = sorted(history,
                                 key=lambda row: row['data']['lastExecuted'],
                                 reverse=True)
    response['message'] = _('History fetched')
    response['status'] = 0

    return JsonResponse(response)
Beispiel #6
0
def get_history(request):
    response = {'status': -1}

    doc_type = request.GET.get('doc_type')

    response['status'] = 0
    response['history'] = [{
        'name': doc.name,
        'id': doc.id,
        'data': Notebook(document=doc).get_data(),
        'absoluteUrl': doc.get_absolute_url()
    } for doc in Document2.objects.get_history(
        doc_type='query-%s' %
        doc_type, user=request.user).order_by('-last_modified')[:25]]
    response['message'] = _('History fetched')

    return JsonResponse(response)
Beispiel #7
0
def execute_and_watch(request):
    notebook_id = request.GET.get('editor', request.GET.get('notebook'))
    snippet_id = int(request.GET['snippet'])
    action = request.GET['action']
    destination = request.GET['destination']

    notebook = Notebook(document=Document2.objects.get(id=notebook_id))
    snippet = notebook.get_data()['snippets'][snippet_id]
    editor_type = snippet['type']

    api = get_api(request, snippet)

    if action == 'save_as_table':
        sql, success_url = api.export_data_as_table(snippet, destination)
        editor = make_notebook(name='Execute and watch',
                               editor_type=editor_type,
                               statement=sql,
                               status='ready-execute')
    elif action == 'insert_as_query':
        sql, success_url = api.export_large_data_to_hdfs(snippet, destination)
        editor = make_notebook(name='Execute and watch',
                               editor_type=editor_type,
                               statement=sql,
                               status='ready-execute')
    else:
        raise PopupException(_('Action %s is unknown') % action)

    return render(
        'editor.mako', request, {
            'notebooks_json':
            json.dumps([editor.get_data()]),
            'options_json':
            json.dumps({
                'languages': [{
                    "name": "%s SQL" % editor_type.title(),
                    "type": editor_type
                }],
                'mode':
                'editor',
                'success_url':
                success_url
            }),
            'editor_type':
            editor_type,
        })
Beispiel #8
0
def _execute_notebook(request, notebook, snippet):
  response = {'status': -1}
  result = None
  history = None

  historify = (notebook['type'] != 'notebook' or snippet.get('wasBatchExecuted')) and not notebook.get('skipHistorify')

  try:
    try:
      if historify:
        history = _historify(notebook, request.user)
        notebook = Notebook(document=history).get_data()

      response['handle'] = get_api(request, snippet).execute(notebook, snippet)

      # Retrieve and remove the result from the handle
      if response['handle'].get('sync'):
        result = response['handle'].pop('result')
    finally:
      if historify:
        _snippet = [s for s in notebook['snippets'] if s['id'] == snippet['id']][0]
        if 'handle' in response: # No failure
          _snippet['result']['handle'] = response['handle']
          _snippet['result']['statements_count'] = response['handle'].get('statements_count', 1)
          _snippet['result']['statement_id'] = response['handle'].get('statement_id', 0)
          _snippet['result']['handle']['statement'] = response['handle'].get('statement', snippet['statement']).strip() # For non HS2, as non multi query yet
        else:
          _snippet['status'] = 'failed'

        if history:  # If _historify failed, history will be None
          history.update_data(notebook)
          history.save()

          response['history_id'] = history.id
          response['history_uuid'] = history.uuid
          if notebook['isSaved']: # Keep track of history of saved queries
            response['history_parent_uuid'] = history.dependencies.filter(type__startswith='query-').latest('last_modified').uuid
  except QueryError, ex: # We inject the history information from _historify() to the failed queries
    if response.get('history_id'):
      ex.extra['history_id'] = response['history_id']
    if response.get('history_uuid'):
      ex.extra['history_uuid'] = response['history_uuid']
    if response.get('history_parent_uuid'):
      ex.extra['history_parent_uuid'] = response['history_parent_uuid']
    raise ex
Beispiel #9
0
def make_notebook(name='Browse',
                  description='',
                  editor_type='hive',
                  statement='',
                  status='ready',
                  files=None,
                  functions=None,
                  settings=None):
    editor = Notebook()

    editor.data = json.dumps({
        'name':
        name,
        'description':
        description,
        'sessions': [{
            'type': editor_type,
            'properties': [],
            'id': None
        }],
        'selectedSnippet':
        editor_type,
        'type':
        'query-%s' % editor_type,
        'showHistory':
        True,
        'snippets': [{
            'status': status,
            'id': str(uuid.uuid4()),
            'statement_raw': statement,
            'statement': statement,
            'type': editor_type,
            'properties': {
                'files': [] if files is None else files,
                'functions': [] if functions is None else functions,
                'settings': [] if settings is None else settings
            },
            'name': name,
            'database': 'default',
            'result': {}
        }]
    })

    return editor
Beispiel #10
0
def export_documents(request):
  if request.GET.get('documents'):
    selection = json.loads(request.GET.get('documents'))
  else:
    selection = json.loads(request.POST.get('documents'))

  # Only export documents the user has permissions to read
  docs = Document2.objects.documents(user=request.user, perms='both', include_history=True, include_trashed=True).\
    filter(id__in=selection).order_by('-id')

  # Add any dependencies to the set of exported documents
  export_doc_set = _get_dependencies(docs)

  # For directories, add any children docs to the set of exported documents
  export_doc_set.update(_get_dependencies(docs, deps_mode=False))

  # Get PKs of documents to export
  doc_ids = [doc.pk for doc in export_doc_set]

  f = StringIO.StringIO()

  if doc_ids:
    doc_ids = ','.join(map(str, doc_ids))
    management.call_command('dumpdata', 'desktop.Document2', primary_keys=doc_ids, indent=2, use_natural_keys=True, verbosity=2, stdout=f)

  if request.GET.get('format') == 'json':
    return JsonResponse(f.getvalue(), safe=False)
  elif request.GET.get('format') == 'zip':
    zfile = zipfile.ZipFile(f, 'w')
    zfile.writestr("hue.json", f.getvalue())
    for doc in docs:
      if doc.type == 'notebook':
        try:
          from spark.models import Notebook
          zfile.writestr("notebook-%s-%s.txt" % (doc.name, doc.id), smart_str(Notebook(document=doc).get_str()))
        except Exception, e:
          LOG.exception(e)
    zfile.close()
    response = HttpResponse(content_type="application/zip")
    response["Content-Length"] = len(f.getvalue())
    response['Content-Disposition'] = 'attachment; filename="hue-documents.zip"'
    response.write(f.getvalue())
    return response
Beispiel #11
0
def guess_field_types(request):
  file_format = json.loads(request.POST.get('fileFormat', '{}'))

  if file_format['inputFormat'] == 'file':
    indexer = Indexer(request.user, request.fs)
    stream = request.fs.open(file_format["path"])
    _convert_format(file_format["format"], inverse=True)

    format_ = indexer.guess_field_types({
      "file": {
          "stream": stream,
          "name": file_format['path']
        },
      "format": file_format['format']
    })
  elif file_format['inputFormat'] == 'table':
    sample = get_api(request, {'type': 'hive'}).get_sample_data({'type': 'hive'}, database=file_format['databaseName'], table=file_format['tableName'])
    db = dbms.get(request.user)
    table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName'])

    format_ = {
        "sample": sample['rows'][:4],
        "columns": [
            Field(col.name, HiveFormat.FIELD_TYPE_TRANSLATE.get(col.type, 'string')).to_dict()
            for col in table_metadata.cols
        ]
    }
  elif file_format['inputFormat'] == 'query': # Only support open query history
    # TODO get schema from explain query, which is not possible
    notebook = Notebook(document=Document2.objects.get(id=file_format['query'])).get_data()
    snippet = notebook['snippets'][0]
    sample = get_api(request, snippet).fetch_result(notebook, snippet, 4, start_over=True)

    format_ = {
        "sample": sample['rows'][:4],
        "sample_cols": sample.meta,
        "columns": [
            Field(col['name'], HiveFormat.FIELD_TYPE_TRANSLATE.get(col['type'], 'string')).to_dict()
            for col in sample.meta
        ]
    }

  return JsonResponse(format_)
Beispiel #12
0
def create_notebook(request):
  response = {'status': -1}

  editor_type = request.POST.get('type', 'notebook')
  directory_uuid = request.POST.get('directory_uuid')

  editor = Notebook()
  data = editor.get_data()

  if editor_type != 'notebook':
    data['name'] = ''
    data['type'] = 'query-%s' % editor_type  # TODO: Add handling for non-SQL types

  data['directoryUuid'] = directory_uuid
  editor.data = json.dumps(data)

  response['notebook'] = editor.get_data()
  response['status'] = 0

  return JsonResponse(response)
Beispiel #13
0
def close_statement(request):
    response = {'status': -1}

    # Passed by check_document_access_permission but unused by APIs
    notebook = json.loads(request.POST.get('notebook', '{}'))
    nb_doc = Document2.objects.get_by_uuid(user=request.user,
                                           uuid=notebook['uuid'])
    notebook = Notebook(document=nb_doc).get_data()
    snippet = notebook['snippets'][0]

    try:
        response['result'] = get_api(request, snippet).close_statement(
            notebook, snippet)
    except QueryExpired:
        pass

    response['status'] = 0
    response['message'] = _('Statement closed !')

    return JsonResponse(response)
Beispiel #14
0
def extract_archive_in_hdfs(request, upload_path, file_name):
  _upload_extract_archive_script_to_hdfs(request.fs)

  output_path = upload_path + '/' + file_name.split('.')[0]

  shell_notebook = Notebook(
      description=_('HDFS Extraction of %(upload_path)s/%(file_name)s') % {'upload_path': upload_path, 'file_name': file_name},
      isManaged=True,
      onSuccessUrl=reverse('filebrowser.views.view', kwargs={'path': output_path})
  )

  shell_notebook.add_shell_snippet(
      shell_command='extract_archive_in_hdfs.sh',
      arguments=[{'value': '-u=' + upload_path}, {'value': '-f=' + file_name}, {'value': '-o=' + output_path}],
      archives=[],
      files=[{'value': '/user/' + DEFAULT_USER.get() + '/common/extract_archive_in_hdfs.sh'}, {"value": upload_path + '/' + urllib.quote(file_name)}],
      env_var=[{'value': 'HADOOP_USER_NAME=${wf:user()}'}]
  )

  return shell_notebook.execute(request, batch=True)
Beispiel #15
0
    def autocomplete(self,
                     snippet,
                     database=None,
                     table=None,
                     column=None,
                     nested=None):
        db = self._get_db(snippet)
        query = None

        if snippet.get('query'):
            query = snippet.get('query')
        elif snippet.get('source') == 'query':
            document = Document2.objects.get(id=database)
            document.can_read_or_exception(self.user)
            notebook = Notebook(document=document).get_data()
            snippet = notebook['snippets'][0]
            query = self._get_current_statement(db, snippet)['statement']
            database, table = '', ''

        return _autocomplete(db, database, table, column, nested, query=query)
Beispiel #16
0
def get_external_statement(request):
    response = {'status': -1, 'message': ''}

    notebook = json.loads(request.POST.get('notebook', '{}'))
    snippet = json.loads(request.POST.get('snippet', '{}'))

    if snippet.get('statementType') == 'file':
        response['statement'] = _get_statement_from_file(
            request.user, request.fs, snippet)
    elif snippet.get('statementType') == 'document':
        notebook = Notebook(
            Document2.objects.get_by_uuid(
                user=request.user,
                uuid=snippet['associatedDocumentUuid'],
                perm_type='read'))
        response['statement'] = notebook.get_str()

    response['status'] = 0

    return JsonResponse(response)
Beispiel #17
0
def compress_files_in_hdfs(request, file_names, upload_path, archive_name):

    _upload_compress_files_script_to_hdfs(request.fs)

    files = [{
        "value":
        upload_path + '/' +
        urllib_quote(file_name.encode('utf-8'), SAFE_CHARACTERS_URI)
    } for file_name in file_names]
    files.append({
        'value':
        '/user/' + DEFAULT_USER.get() + '/common/compress_files_in_hdfs.sh'
    })
    start_time = json.loads(request.POST.get('start_time', '-1'))

    shell_notebook = Notebook(
        name=_('HDFS Compression to %(upload_path)s/hue_compressed.zip') %
        {'upload_path': upload_path},
        isManaged=True,
        onSuccessUrl='/filebrowser/view=' + urllib_quote(
            upload_path.encode('utf-8'), safe=SAFE_CHARACTERS_URI_COMPONENTS))

    shell_notebook.add_shell_snippet(shell_command='compress_files_in_hdfs.sh',
                                     arguments=[{
                                         'value': '-u=' + upload_path
                                     }, {
                                         'value':
                                         '-f=' + ','.join(file_names)
                                     }, {
                                         'value': '-n=' + archive_name
                                     }],
                                     archives=[],
                                     files=files,
                                     env_var=[{
                                         'value':
                                         'HADOOP_USER_NAME=${wf:user()}'
                                     }],
                                     last_executed=start_time)

    return shell_notebook.execute(request, batch=True)
Beispiel #18
0
    def test_delete_notebook(self):
        trash_notebook_json = """
        {
          "selectedSnippet": "hive",
          "showHistory": false,
          "description": "Test Hive Query",
          "name": "Test Hive Query",
          "sessions": [
              {
                  "type": "hive",
                  "properties": [],
                  "id": null
              }
          ],
          "type": "query-hive",
          "id": null,
          "snippets": [{"id": "e069ef32-5c95-4507-b961-e79c090b5abf","type":"hive","status":"ready","database":"default","statement":"select * from web_logs","statement_raw":"select * from web_logs","properties":{"settings":[],"files":[],"functions":[]},"result":{}}],
          "uuid": "8a20da5f-b69c-4843-b17d-dea5c74c41d1"
      }
      """

        # Assert that the notebook is first saved
        response = self.client.post(reverse('notebook:save_notebook'),
                                    {'notebook': trash_notebook_json})
        data = json.loads(response.content)
        assert_equal(0, data['status'], data)

        # Test that deleting it moves it to the user's Trash folder
        notebook_doc = Document2.objects.get(id=data['id'])
        trash_notebooks = [Notebook(notebook_doc).get_data()]
        response = self.client.post(reverse('notebook:delete'),
                                    {'notebooks': json.dumps(trash_notebooks)})
        data = json.loads(response.content)
        assert_equal(0, data['status'], data)
        assert_equal('Trashed 1 notebook(s)', data['message'], data)

        response = self.client.get('/desktop/api2/doc', {'path': '/.Trash'})
        data = json.loads(response.content)
        trash_uuids = [doc['uuid'] for doc in data['children']]
        assert_true(notebook_doc.uuid in trash_uuids, data)
Beispiel #19
0
def cancel_statement(request):
    response = {'status': -1}

    notebook = json.loads(request.POST.get('notebook', '{}'))
    nb_doc = Document2.objects.get_by_uuid(user=request.user,
                                           uuid=notebook['uuid'])
    notebook = Notebook(document=nb_doc).get_data()
    snippet = notebook['snippets'][0]

    with opentracing.tracer.start_span('notebook-cancel_statement') as span:
        response['result'] = get_api(request,
                                     snippet).cancel(notebook, snippet)

        span.set_tag('user-id', request.user.username)
        span.set_tag(
            'query-id', snippet['result']['handle']['guid']
            if snippet['result'].get('handle')
            and snippet['result']['handle'].get('guid') else None)

    response['status'] = 0

    return JsonResponse(response)
Beispiel #20
0
def extract_archive_in_hdfs(request, upload_path, file_name):
  _upload_extract_archive_script_to_hdfs(request.fs)

  output_path = upload_path + '/' + file_name.split('.')[0]
  start_time = json.loads(request.POST.get('start_time', '-1'))

  shell_notebook = Notebook(
      name=_('HDFS Extraction of %(upload_path)s/%(file_name)s') % {'upload_path': upload_path, 'file_name': file_name},
      isManaged=True,
      onSuccessUrl='/filebrowser/view=' + urllib.parse.quote(output_path.encode('utf-8'), safe=SAFE_CHARACTERS_URI_COMPONENTS)
  )

  shell_notebook.add_shell_snippet(
      shell_command='extract_archive_in_hdfs.sh',
      arguments=[{'value': '-u=' + upload_path}, {'value': '-f=' + file_name}, {'value': '-o=' + output_path}],
      archives=[],
      files=[{'value': '/user/' + DEFAULT_USER.get() + '/common/extract_archive_in_hdfs.sh'}, {"value": upload_path + '/' + urllib.parse.quote(file_name)}],
      env_var=[{'value': 'HADOOP_USER_NAME=${wf:user()}'}],
      last_executed=start_time
  )

  return shell_notebook.execute(request, batch=True)
Beispiel #21
0
  def autocomplete(self, snippet, database=None, table=None, column=None, nested=None):
    db = self._get_db(snippet, interpreter=self.interpreter)
    query = None

    if snippet.get('query'):
      query = snippet.get('query')
    elif snippet.get('source') == 'query':
      document = Document2.objects.get(id=database)
      document.can_read_or_exception(self.user)
      notebook = Notebook(document=document).get_data()
      snippet = notebook['snippets'][0]
      query = self._get_current_statement(notebook, snippet)['statement']
      database, table = '', ''

    resp = _autocomplete(db, database, table, column, nested, query=query, cluster=self.interpreter)

    if resp.get('error'):
      resp['message'] = resp.pop('error')
      if 'Read timed out' in resp['message']:
        raise QueryExpired(resp['message'])

    return resp
Beispiel #22
0
def get_history(request):
    response = {'status': -1}

    doc_type = request.GET.get('doc_type')
    limit = max(request.GET.get('len', 50), 100)

    response['status'] = 0
    history = []
    for doc in Document2.objects.get_history(
            doc_type='query-%s' % doc_type,
            user=request.user).order_by('-last_modified')[:limit]:
        notebook = Notebook(document=doc).get_data()
        if 'snippets' in notebook:
            history.append({
                'name': doc.name,
                'id': doc.id,
                'uuid': doc.uuid,
                'type': doc.type,
                'data': {
                    'statement_raw':
                    notebook['snippets'][0]['statement_raw'][:1001],
                    'lastExecuted':
                    notebook['snippets'][0]['lastExecuted'],
                    'status':
                    notebook['snippets'][0]['status'],
                    'parentUuid':
                    notebook.get('parentUuid', '')
                } if notebook['snippets'] else {},
                'absoluteUrl': doc.get_absolute_url(),
            })
        else:
            LOG.error('Incomplete History Notebook: %s' % notebook)
    response['history'] = history
    response['message'] = _('History fetched')

    return JsonResponse(response)
Beispiel #23
0
def check_status(request):
    response = {'status': -1}

    notebook = json.loads(request.POST.get('notebook', '{}'))
    snippet = json.loads(request.POST.get('snippet', '{}'))

    try:
        response['query_status'] = get_api(request, snippet).check_status(
            notebook, snippet)
        response['status'] = 0
    finally:
        if response['status'] == 0 and snippet['status'] != response[
                'query_status']:
            status = response['query_status']['status']
        else:
            status = 'failed'
        nb_doc = Document2.objects.get(id=notebook['id'])
        nb_doc.can_write_or_exception(request.user)
        nb = Notebook(document=nb_doc).get_data()
        nb['snippets'][0]['status'] = status
        nb_doc.update_data(nb)
        nb_doc.save()

    return JsonResponse(response)
Beispiel #24
0
def extract_archive_in_hdfs(request, upload_path, file_name):

    _upload_extract_archive_script_to_hdfs(request.fs)

    shell_notebook = Notebook()
    shell_notebook.add_shell_snippet(
        shell_command='extract_archive_in_hdfs.sh',
        arguments=[{
            'value': '-u=' + upload_path
        }, {
            'value': '-f=' + file_name
        }],
        archives=[],
        files=[{
            'value':
            '/user/' + DEFAULT_USER.get() +
            '/common/extract_archive_in_hdfs.sh'
        }, {
            "value": upload_path + '/' + file_name
        }],
        env_var=[{
            'value': 'HADOOP_USER_NAME=${wf:user()}'
        }])
    return shell_notebook.execute(request, batch=True)
Beispiel #25
0
def compress_files_in_hdfs(request, file_names, upload_path):

  _upload_compress_files_script_to_hdfs(request.fs)

  output_path = upload_path

  files = [{"value": upload_path + '/' + file_name} for file_name in file_names]
  files.append({'value': '/user/' + DEFAULT_USER.get() + '/common/compress_files_in_hdfs.sh'})

  shell_notebook = Notebook(
    description=_('HDFS Compression to %(upload_path)s/hue_compressed.zip') % {'upload_path': upload_path},
    isManaged=True,
    onSuccessUrl=reverse('filebrowser.views.view', kwargs={'path': output_path})
  )

  shell_notebook.add_shell_snippet(
      shell_command='compress_files_in_hdfs.sh',
      arguments=[{'value': '-u=' + upload_path}, {'value': '-f=' + ','.join(file_names)}, {'value': '-o=' + output_path}],
      archives=[],
      files=files,
      env_var=[{'value': 'HADOOP_USER_NAME=${wf:user()}'}]
  )

  return shell_notebook.execute(request, batch=True)
Beispiel #26
0
def _get_document_helper(request, uuid, with_data, with_dependencies, path):
    if uuid:
        if uuid.isdigit():
            document = Document2.objects.document(user=request.user,
                                                  doc_id=uuid)
        else:
            document = Document2.objects.get_by_uuid(user=request.user,
                                                     uuid=uuid)
    else:  # Find by path
        document = Document2.objects.get_by_path(user=request.user, path=path)

    response = {
        'document':
        document.to_dict(),
        'parent':
        document.parent_directory.to_dict()
        if document.parent_directory else None,
        'children': [],
        'dependencies': [],
        'dependents': [],
        'data':
        '',
        'status':
        0
    }

    response['user_perms'] = {
        'can_read': document.can_read(request.user),
        'can_write': document.can_write(request.user)
    }

    if with_data:
        data = json.loads(document.data)
        # Upgrade session properties for Hive and Impala
        if document.type.startswith('query'):
            notebook = Notebook(document=document)
            notebook = upgrade_session_properties(request, notebook)
            data = json.loads(notebook.data)
            if data.get('uuid') != document.uuid:  # Old format < 3.11
                data['uuid'] = document.uuid

        response['data'] = data

    if with_dependencies:
        response['dependencies'] = [
            dependency.to_dict() for dependency in document.dependencies.all()
        ]
        response['dependents'] = [
            dependent.to_dict() for dependent in document.dependents.all()
        ]

    # Get children documents if this is a directory
    if document.is_directory:
        directory = Directory.objects.get(id=document.id)

        # If this is the user's home directory, fetch shared docs too
        if document.is_home_directory:
            children = directory.get_children_and_shared_documents(
                user=request.user)
            response.update(
                _filter_documents(request, queryset=children, flatten=True))
        else:
            children = directory.get_children_documents()
            response.update(
                _filter_documents(request, queryset=children, flatten=False))

    # Paginate and serialize Results
    if 'documents' in response:
        response.update(_paginate(request, queryset=response['documents']))
        # Rename documents to children
        response['children'] = response.pop('documents')
        response['children'] = [doc.to_dict() for doc in response['children']]

    return response
Beispiel #27
0
def make_notebook(name='Browse', description='', editor_type='hive', statement='', status='ready',
                  files=None, functions=None, settings=None, is_saved=False, database='default', snippet_properties=None, batch_submit=False,
                  on_success_url=None, skip_historify=False, is_task=False, last_executed=-1, is_notebook=False, pub_sub_url=None, result_properties={},
                  namespace=None, compute=None):
  '''
  skip_historify: do not add the task to the query history. e.g. SQL Dashboard
  is_task / isManaged: true when being a managed by Hue operation (include_managed=True in document), e.g. exporting query result, dropping some tables
  '''
  from notebook.connectors.hiveserver2 import HS2Api

  # impala can have compute name appended to the editor_type (impala/dbms.py - get_query_server_config)
  if editor_type.startswith('impala'):
    editor_type = 'impala'

  editor = Notebook()
  if snippet_properties is None:
    snippet_properties = {}

  if editor_type == 'hive':
    sessions_properties = HS2Api.get_properties(editor_type)
    if files is not None:
      _update_property_value(sessions_properties, 'files', files)

    if functions is not None:
      _update_property_value(sessions_properties, 'functions', functions)

    if settings is not None:
      _update_property_value(sessions_properties, 'settings', settings)
  elif editor_type == 'impala':
    sessions_properties = HS2Api.get_properties(editor_type)
    if settings is not None:
      _update_property_value(sessions_properties, 'files', files)
  elif editor_type == 'java':
    sessions_properties = [] # Java options
  else:
    sessions_properties = []

  data = {
    'name': name,
    'uuid': str(uuid.uuid4()),
    'description': description,
    'sessions': [
      {
         'type': editor_type,
         'properties': sessions_properties,
         'id': None
      }
    ],
    'selectedSnippet': editor_type,
    'type': 'notebook' if is_notebook else 'query-%s' % editor_type,
    'showHistory': True,
    'isSaved': is_saved,
    'onSuccessUrl': urllib.quote(on_success_url.encode('utf-8'), safe=SAFE_CHARACTERS_URI) if on_success_url else None,
    'pubSubUrl': pub_sub_url,
    'skipHistorify': skip_historify,
    'isManaged': is_task,
    'snippets': [
      {
         'status': status,
         'id': str(uuid.uuid4()),
         'statement_raw': statement,
         'statement': statement,
         'type': editor_type,
         'wasBatchExecuted': batch_submit,
         'lastExecuted': last_executed,
         'properties': {
            'files': [] if files is None else files,
            'functions': [] if functions is None else functions,
            'settings': [] if settings is None else settings
         },
         'name': name,
         'database': database,
         'namespace': namespace if namespace else {},
         'compute': compute if compute else {},
         'result': {'handle':{}},
         'variables': []
      }
    ] if not is_notebook else []
  }

  if snippet_properties:
    data['snippets'][0]['properties'].update(snippet_properties)
  if result_properties:
    data['snippets'][0]['result'].update(result_properties)

  editor.data = json.dumps(data)

  return editor
Beispiel #28
0
 def _get_query(self, name):
     nb_doc = Document2.objects.document(user=self.user, doc_id=name)
     notebook = Notebook(document=nb_doc).get_data()
     snippet = notebook['snippets'][0]
     return snippet['statement'].strip(';')
Beispiel #29
0
def make_notebook(name='Browse',
                  description='',
                  editor_type='hive',
                  statement='',
                  status='ready',
                  files=None,
                  functions=None,
                  settings=None,
                  is_saved=False,
                  database='default',
                  snippet_properties=None,
                  batch_submit=False,
                  on_success_url=None,
                  skip_historify=False,
                  is_task=False,
                  last_executed=-1):
    '''
  skip_historify: do not add the task to the query history. e.g. SQL Dashboard
  isManaged: true when being a managed by Hue operation (include_managed=True in document), e.g. exporting query result, dropping some tables
  '''
    from notebook.connectors.hiveserver2 import HS2Api

    editor = Notebook()
    if snippet_properties is None:
        snippet_properties = {}

    if editor_type == 'hive':
        sessions_properties = HS2Api.get_properties(editor_type)
        if files is not None:
            _update_property_value(sessions_properties, 'files', files)

        if functions is not None:
            _update_property_value(sessions_properties, 'functions', functions)

        if settings is not None:
            _update_property_value(sessions_properties, 'settings', settings)
    elif editor_type == 'impala':
        sessions_properties = HS2Api.get_properties(editor_type)
        if settings is not None:
            _update_property_value(sessions_properties, 'files', files)
    elif editor_type == 'java':
        sessions_properties = []  # Java options
    else:
        sessions_properties = []

    data = {
        'name':
        name,
        'uuid':
        str(uuid.uuid4()),
        'description':
        description,
        'sessions': [{
            'type': editor_type,
            'properties': sessions_properties,
            'id': None
        }],
        'selectedSnippet':
        editor_type,
        'type':
        'query-%s' % editor_type,
        'showHistory':
        True,
        'isSaved':
        is_saved,
        'onSuccessUrl':
        on_success_url,
        'skipHistorify':
        skip_historify,
        'isManaged':
        is_task,
        'snippets': [{
            'status': status,
            'id': str(uuid.uuid4()),
            'statement_raw': statement,
            'statement': statement,
            'type': editor_type,
            'wasBatchExecuted': batch_submit,
            'lastExecuted': last_executed,
            'properties': {
                'files': [] if files is None else files,
                'functions': [] if functions is None else functions,
                'settings': [] if settings is None else settings
            },
            'name': name,
            'database': database,
            'result': {
                'handle': {}
            },
            'variables': []
        }]
    }

    if snippet_properties:
        data['snippets'][0]['properties'].update(snippet_properties)

    editor.data = json.dumps(data)

    return editor
Beispiel #30
0
def execute_and_watch(request):
    notebook_id = request.GET.get('editor', request.GET.get('notebook'))
    snippet_id = int(request.GET['snippet'])
    action = request.GET['action']
    destination = request.GET['destination']

    notebook = Notebook(document=Document2.objects.get(
        id=notebook_id)).get_data()
    snippet = notebook['snippets'][snippet_id]
    editor_type = snippet['type']

    api = get_api(request, snippet)

    if action == 'save_as_table':
        sql, success_url = api.export_data_as_table(notebook, snippet,
                                                    destination)
        editor = make_notebook(name='Execute and watch',
                               editor_type=editor_type,
                               statement=sql,
                               status='ready-execute')
    elif action == 'insert_as_query':
        sql, success_url = api.export_large_data_to_hdfs(
            notebook, snippet, destination)
        editor = make_notebook(name='Execute and watch',
                               editor_type=editor_type,
                               statement=sql,
                               status='ready-execute')
    elif action == 'index_query':
        sql, success_url = api.export_data_as_table(notebook,
                                                    snippet,
                                                    destination,
                                                    is_temporary=True,
                                                    location='')
        editor = make_notebook(name='Execute and watch',
                               editor_type=editor_type,
                               statement=sql,
                               status='ready-execute')

        sample = get_api(request, snippet).fetch_result(notebook,
                                                        snippet,
                                                        0,
                                                        start_over=True)

        from indexer.api3 import _index  # Will ve moved to the lib in next commit
        from indexer.file_format import HiveFormat
        from indexer.fields import Field

        file_format = {
            'name':
            'col',
            'inputFormat':
            'query',
            'format': {
                'quoteChar': '"',
                'recordSeparator': '\n',
                'type': 'csv',
                'hasHeader': False,
                'fieldSeparator': '\u0001'
            },
            "sample":
            '',
            "columns": [
                Field(
                    col['name'],
                    HiveFormat.FIELD_TYPE_TRANSLATE.get(col['type'],
                                                        'string')).to_dict()
                for col in sample['meta']
            ]
        }

        job_handle = _index(request,
                            file_format,
                            destination,
                            query=notebook['uuid'])
        return redirect(
            reverse('oozie:list_oozie_workflow',
                    kwargs={'job_id': job_handle['handle']['id']}))
    else:
        raise PopupException(_('Action %s is unknown') % action)

    return render(
        'editor.mako', request, {
            'notebooks_json':
            json.dumps([editor.get_data()]),
            'options_json':
            json.dumps({
                'languages': [{
                    "name": "%s SQL" % editor_type.title(),
                    "type": editor_type
                }],
                'mode':
                'editor',
                'success_url':
                success_url
            }),
            'editor_type':
            editor_type,
        })