def get_history(request): response = {'status': -1} doc_type = request.GET.get('doc_type') doc_text = request.GET.get('doc_text') page = min(int(request.GET.get('page', 1)), 100) limit = min(int(request.GET.get('limit', 50)), 100) is_notification_manager = request.GET.get('is_notification_manager', 'false') == 'true' if is_notification_manager: docs = Document2.objects.get_tasks_history(user=request.user) else: docs = Document2.objects.get_history(doc_type='query-%s' % doc_type, user=request.user) if doc_text: docs = docs.filter(Q(name__icontains=doc_text) | Q(description__icontains=doc_text) | Q(search__icontains=doc_text)) # Paginate docs = docs.order_by('-last_modified') response['count'] = docs.count() docs = __paginate(page, limit, queryset=docs)['documents'] history = [] for doc in docs: notebook = Notebook(document=doc).get_data() if 'snippets' in notebook: statement = notebook['description'] if is_notification_manager else _get_statement(notebook) history.append({ 'name': doc.name, 'id': doc.id, 'uuid': doc.uuid, 'type': doc.type, 'data': { 'statement': statement[:1001] if statement else '', 'lastExecuted': notebook['snippets'][0].get('lastExecuted', -1), 'status': notebook['snippets'][0]['status'], 'parentSavedQueryUuid': notebook.get('parentSavedQueryUuid', '') } if notebook['snippets'] else {}, 'absoluteUrl': doc.get_absolute_url(), }) else: LOG.error('Incomplete History Notebook: %s' % notebook) response['history'] = sorted(history, key=lambda row: row['data']['lastExecuted'], reverse=True) response['message'] = _('History fetched') response['status'] = 0 return JsonResponse(response)
def table_queries(request, database, table): qfilter = Q(data__icontains=table) | Q(data__icontains='%s.%s' % (database, table)) response = {'status': -1, 'queries': []} try: queries = [{ 'doc': d.to_dict(), 'data': Notebook(document=d).get_data() } for d in Document2.objects.filter( qfilter, owner=request.user, type='query', is_history=False)[:50]] response['status'] = 0 response['queries'] = queries except Exception, ex: response['status'] = 1 response['data'] = _("Cannot get queries related to table %s.%s: %s" ) % (database, table, ex)
def test_check_status(self): query = Notebook() request = Mock() operation_id = Mock() with patch( 'notebook.api.Document2.objects.get_by_uuid') as get_by_uuid: with patch('notebook.api.get_api') as get_api: with patch('notebook.api.Notebook') as NotebookMock: get_api.return_value = Mock(check_status=Mock( return_value={'status': 0})) resp = query.check_status(request=request, operation_id=operation_id) assert_equal(0, resp['status']) assert_equal(0, resp['query_status']['status'])
def _small_indexing(user, fs, client, source, destination, index_name): kwargs = {} errors = [] if source['inputFormat'] not in ('manual', 'table', 'query_handle'): path = urllib.unquote(source["path"]) stats = fs.stats(path) if stats.size > MAX_UPLOAD_SIZE: raise PopupException(_('File size is too large to handle!')) indexer = MorphlineIndexer(user, fs) fields = indexer.get_field_list(destination['columns']) _create_solr_collection(user, fs, client, destination, index_name, kwargs) if source['inputFormat'] == 'file': path = urllib.unquote(source["path"]) data = fs.read(path, 0, MAX_UPLOAD_SIZE) if client.is_solr_six_or_more(): kwargs['processor'] = 'tolerant' kwargs['map'] = 'NULL:' try: if source['inputFormat'] == 'query': query_id = source['query']['id'] if source['query'].get('id') else source['query'] notebook = Notebook(document=Document2.objects.document(user=user, doc_id=query_id)).get_data() request = MockedDjangoRequest(user=user) snippet = notebook['snippets'][0] searcher = CollectionManagerController(user) columns = [field['name'] for field in fields if field['name'] != 'hue_id'] fetch_handle = lambda rows, start_over: get_api(request, snippet).fetch_result(notebook, snippet, rows=rows, start_over=start_over) # Assumes handle still live rows = searcher.update_data_from_hive(index_name, columns, fetch_handle=fetch_handle, indexing_options=kwargs) # TODO if rows == MAX_ROWS truncation warning elif source['inputFormat'] == 'manual': pass # No need to do anything else: response = client.index(name=index_name, data=data, **kwargs) errors = [error.get('message', '') for error in response['responseHeader'].get('errors', [])] except Exception, e: try: client.delete_index(index_name, keep_config=False) except Exception, e2: LOG.warn('Error while cleaning-up config of failed collection creation %s: %s' % (index_name, e2))
def get_history(request): response = {'status': -1} doc_type = request.GET.get('doc_type') doc_text = request.GET.get('doc_text') limit = min(request.GET.get('len', 50), 100) docs = Document2.objects.get_history(doc_type='query-%s' % doc_type, user=request.user) if doc_text: docs = docs.filter( Q(name__icontains=doc_text) | Q(description__icontains=doc_text) | Q(search__icontains=doc_text)) history = [] for doc in docs.order_by('-last_modified')[:limit]: notebook = Notebook(document=doc).get_data() if 'snippets' in notebook: statement = _get_statement(notebook) history.append({ 'name': doc.name, 'id': doc.id, 'uuid': doc.uuid, 'type': doc.type, 'data': { 'statement': statement[:1001] if statement else '', 'lastExecuted': notebook['snippets'][0]['lastExecuted'], 'status': notebook['snippets'][0]['status'], 'parentSavedQueryUuid': notebook.get('parentSavedQueryUuid', '') } if notebook['snippets'] else {}, 'absoluteUrl': doc.get_absolute_url(), }) else: LOG.error('Incomplete History Notebook: %s' % notebook) response['history'] = sorted(history, key=lambda row: row['data']['lastExecuted'], reverse=True) response['message'] = _('History fetched') response['status'] = 0 return JsonResponse(response)
def get_history(request): response = {'status': -1} doc_type = request.GET.get('doc_type') response['status'] = 0 response['history'] = [{ 'name': doc.name, 'id': doc.id, 'data': Notebook(document=doc).get_data(), 'absoluteUrl': doc.get_absolute_url() } for doc in Document2.objects.get_history( doc_type='query-%s' % doc_type, user=request.user).order_by('-last_modified')[:25]] response['message'] = _('History fetched') return JsonResponse(response)
def execute_and_watch(request): notebook_id = request.GET.get('editor', request.GET.get('notebook')) snippet_id = int(request.GET['snippet']) action = request.GET['action'] destination = request.GET['destination'] notebook = Notebook(document=Document2.objects.get(id=notebook_id)) snippet = notebook.get_data()['snippets'][snippet_id] editor_type = snippet['type'] api = get_api(request, snippet) if action == 'save_as_table': sql, success_url = api.export_data_as_table(snippet, destination) editor = make_notebook(name='Execute and watch', editor_type=editor_type, statement=sql, status='ready-execute') elif action == 'insert_as_query': sql, success_url = api.export_large_data_to_hdfs(snippet, destination) editor = make_notebook(name='Execute and watch', editor_type=editor_type, statement=sql, status='ready-execute') else: raise PopupException(_('Action %s is unknown') % action) return render( 'editor.mako', request, { 'notebooks_json': json.dumps([editor.get_data()]), 'options_json': json.dumps({ 'languages': [{ "name": "%s SQL" % editor_type.title(), "type": editor_type }], 'mode': 'editor', 'success_url': success_url }), 'editor_type': editor_type, })
def _execute_notebook(request, notebook, snippet): response = {'status': -1} result = None history = None historify = (notebook['type'] != 'notebook' or snippet.get('wasBatchExecuted')) and not notebook.get('skipHistorify') try: try: if historify: history = _historify(notebook, request.user) notebook = Notebook(document=history).get_data() response['handle'] = get_api(request, snippet).execute(notebook, snippet) # Retrieve and remove the result from the handle if response['handle'].get('sync'): result = response['handle'].pop('result') finally: if historify: _snippet = [s for s in notebook['snippets'] if s['id'] == snippet['id']][0] if 'handle' in response: # No failure _snippet['result']['handle'] = response['handle'] _snippet['result']['statements_count'] = response['handle'].get('statements_count', 1) _snippet['result']['statement_id'] = response['handle'].get('statement_id', 0) _snippet['result']['handle']['statement'] = response['handle'].get('statement', snippet['statement']).strip() # For non HS2, as non multi query yet else: _snippet['status'] = 'failed' if history: # If _historify failed, history will be None history.update_data(notebook) history.save() response['history_id'] = history.id response['history_uuid'] = history.uuid if notebook['isSaved']: # Keep track of history of saved queries response['history_parent_uuid'] = history.dependencies.filter(type__startswith='query-').latest('last_modified').uuid except QueryError, ex: # We inject the history information from _historify() to the failed queries if response.get('history_id'): ex.extra['history_id'] = response['history_id'] if response.get('history_uuid'): ex.extra['history_uuid'] = response['history_uuid'] if response.get('history_parent_uuid'): ex.extra['history_parent_uuid'] = response['history_parent_uuid'] raise ex
def make_notebook(name='Browse', description='', editor_type='hive', statement='', status='ready', files=None, functions=None, settings=None): editor = Notebook() editor.data = json.dumps({ 'name': name, 'description': description, 'sessions': [{ 'type': editor_type, 'properties': [], 'id': None }], 'selectedSnippet': editor_type, 'type': 'query-%s' % editor_type, 'showHistory': True, 'snippets': [{ 'status': status, 'id': str(uuid.uuid4()), 'statement_raw': statement, 'statement': statement, 'type': editor_type, 'properties': { 'files': [] if files is None else files, 'functions': [] if functions is None else functions, 'settings': [] if settings is None else settings }, 'name': name, 'database': 'default', 'result': {} }] }) return editor
def export_documents(request): if request.GET.get('documents'): selection = json.loads(request.GET.get('documents')) else: selection = json.loads(request.POST.get('documents')) # Only export documents the user has permissions to read docs = Document2.objects.documents(user=request.user, perms='both', include_history=True, include_trashed=True).\ filter(id__in=selection).order_by('-id') # Add any dependencies to the set of exported documents export_doc_set = _get_dependencies(docs) # For directories, add any children docs to the set of exported documents export_doc_set.update(_get_dependencies(docs, deps_mode=False)) # Get PKs of documents to export doc_ids = [doc.pk for doc in export_doc_set] f = StringIO.StringIO() if doc_ids: doc_ids = ','.join(map(str, doc_ids)) management.call_command('dumpdata', 'desktop.Document2', primary_keys=doc_ids, indent=2, use_natural_keys=True, verbosity=2, stdout=f) if request.GET.get('format') == 'json': return JsonResponse(f.getvalue(), safe=False) elif request.GET.get('format') == 'zip': zfile = zipfile.ZipFile(f, 'w') zfile.writestr("hue.json", f.getvalue()) for doc in docs: if doc.type == 'notebook': try: from spark.models import Notebook zfile.writestr("notebook-%s-%s.txt" % (doc.name, doc.id), smart_str(Notebook(document=doc).get_str())) except Exception, e: LOG.exception(e) zfile.close() response = HttpResponse(content_type="application/zip") response["Content-Length"] = len(f.getvalue()) response['Content-Disposition'] = 'attachment; filename="hue-documents.zip"' response.write(f.getvalue()) return response
def guess_field_types(request): file_format = json.loads(request.POST.get('fileFormat', '{}')) if file_format['inputFormat'] == 'file': indexer = Indexer(request.user, request.fs) stream = request.fs.open(file_format["path"]) _convert_format(file_format["format"], inverse=True) format_ = indexer.guess_field_types({ "file": { "stream": stream, "name": file_format['path'] }, "format": file_format['format'] }) elif file_format['inputFormat'] == 'table': sample = get_api(request, {'type': 'hive'}).get_sample_data({'type': 'hive'}, database=file_format['databaseName'], table=file_format['tableName']) db = dbms.get(request.user) table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName']) format_ = { "sample": sample['rows'][:4], "columns": [ Field(col.name, HiveFormat.FIELD_TYPE_TRANSLATE.get(col.type, 'string')).to_dict() for col in table_metadata.cols ] } elif file_format['inputFormat'] == 'query': # Only support open query history # TODO get schema from explain query, which is not possible notebook = Notebook(document=Document2.objects.get(id=file_format['query'])).get_data() snippet = notebook['snippets'][0] sample = get_api(request, snippet).fetch_result(notebook, snippet, 4, start_over=True) format_ = { "sample": sample['rows'][:4], "sample_cols": sample.meta, "columns": [ Field(col['name'], HiveFormat.FIELD_TYPE_TRANSLATE.get(col['type'], 'string')).to_dict() for col in sample.meta ] } return JsonResponse(format_)
def create_notebook(request): response = {'status': -1} editor_type = request.POST.get('type', 'notebook') directory_uuid = request.POST.get('directory_uuid') editor = Notebook() data = editor.get_data() if editor_type != 'notebook': data['name'] = '' data['type'] = 'query-%s' % editor_type # TODO: Add handling for non-SQL types data['directoryUuid'] = directory_uuid editor.data = json.dumps(data) response['notebook'] = editor.get_data() response['status'] = 0 return JsonResponse(response)
def close_statement(request): response = {'status': -1} # Passed by check_document_access_permission but unused by APIs notebook = json.loads(request.POST.get('notebook', '{}')) nb_doc = Document2.objects.get_by_uuid(user=request.user, uuid=notebook['uuid']) notebook = Notebook(document=nb_doc).get_data() snippet = notebook['snippets'][0] try: response['result'] = get_api(request, snippet).close_statement( notebook, snippet) except QueryExpired: pass response['status'] = 0 response['message'] = _('Statement closed !') return JsonResponse(response)
def extract_archive_in_hdfs(request, upload_path, file_name): _upload_extract_archive_script_to_hdfs(request.fs) output_path = upload_path + '/' + file_name.split('.')[0] shell_notebook = Notebook( description=_('HDFS Extraction of %(upload_path)s/%(file_name)s') % {'upload_path': upload_path, 'file_name': file_name}, isManaged=True, onSuccessUrl=reverse('filebrowser.views.view', kwargs={'path': output_path}) ) shell_notebook.add_shell_snippet( shell_command='extract_archive_in_hdfs.sh', arguments=[{'value': '-u=' + upload_path}, {'value': '-f=' + file_name}, {'value': '-o=' + output_path}], archives=[], files=[{'value': '/user/' + DEFAULT_USER.get() + '/common/extract_archive_in_hdfs.sh'}, {"value": upload_path + '/' + urllib.quote(file_name)}], env_var=[{'value': 'HADOOP_USER_NAME=${wf:user()}'}] ) return shell_notebook.execute(request, batch=True)
def autocomplete(self, snippet, database=None, table=None, column=None, nested=None): db = self._get_db(snippet) query = None if snippet.get('query'): query = snippet.get('query') elif snippet.get('source') == 'query': document = Document2.objects.get(id=database) document.can_read_or_exception(self.user) notebook = Notebook(document=document).get_data() snippet = notebook['snippets'][0] query = self._get_current_statement(db, snippet)['statement'] database, table = '', '' return _autocomplete(db, database, table, column, nested, query=query)
def get_external_statement(request): response = {'status': -1, 'message': ''} notebook = json.loads(request.POST.get('notebook', '{}')) snippet = json.loads(request.POST.get('snippet', '{}')) if snippet.get('statementType') == 'file': response['statement'] = _get_statement_from_file( request.user, request.fs, snippet) elif snippet.get('statementType') == 'document': notebook = Notebook( Document2.objects.get_by_uuid( user=request.user, uuid=snippet['associatedDocumentUuid'], perm_type='read')) response['statement'] = notebook.get_str() response['status'] = 0 return JsonResponse(response)
def compress_files_in_hdfs(request, file_names, upload_path, archive_name): _upload_compress_files_script_to_hdfs(request.fs) files = [{ "value": upload_path + '/' + urllib_quote(file_name.encode('utf-8'), SAFE_CHARACTERS_URI) } for file_name in file_names] files.append({ 'value': '/user/' + DEFAULT_USER.get() + '/common/compress_files_in_hdfs.sh' }) start_time = json.loads(request.POST.get('start_time', '-1')) shell_notebook = Notebook( name=_('HDFS Compression to %(upload_path)s/hue_compressed.zip') % {'upload_path': upload_path}, isManaged=True, onSuccessUrl='/filebrowser/view=' + urllib_quote( upload_path.encode('utf-8'), safe=SAFE_CHARACTERS_URI_COMPONENTS)) shell_notebook.add_shell_snippet(shell_command='compress_files_in_hdfs.sh', arguments=[{ 'value': '-u=' + upload_path }, { 'value': '-f=' + ','.join(file_names) }, { 'value': '-n=' + archive_name }], archives=[], files=files, env_var=[{ 'value': 'HADOOP_USER_NAME=${wf:user()}' }], last_executed=start_time) return shell_notebook.execute(request, batch=True)
def test_delete_notebook(self): trash_notebook_json = """ { "selectedSnippet": "hive", "showHistory": false, "description": "Test Hive Query", "name": "Test Hive Query", "sessions": [ { "type": "hive", "properties": [], "id": null } ], "type": "query-hive", "id": null, "snippets": [{"id": "e069ef32-5c95-4507-b961-e79c090b5abf","type":"hive","status":"ready","database":"default","statement":"select * from web_logs","statement_raw":"select * from web_logs","properties":{"settings":[],"files":[],"functions":[]},"result":{}}], "uuid": "8a20da5f-b69c-4843-b17d-dea5c74c41d1" } """ # Assert that the notebook is first saved response = self.client.post(reverse('notebook:save_notebook'), {'notebook': trash_notebook_json}) data = json.loads(response.content) assert_equal(0, data['status'], data) # Test that deleting it moves it to the user's Trash folder notebook_doc = Document2.objects.get(id=data['id']) trash_notebooks = [Notebook(notebook_doc).get_data()] response = self.client.post(reverse('notebook:delete'), {'notebooks': json.dumps(trash_notebooks)}) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_equal('Trashed 1 notebook(s)', data['message'], data) response = self.client.get('/desktop/api2/doc', {'path': '/.Trash'}) data = json.loads(response.content) trash_uuids = [doc['uuid'] for doc in data['children']] assert_true(notebook_doc.uuid in trash_uuids, data)
def cancel_statement(request): response = {'status': -1} notebook = json.loads(request.POST.get('notebook', '{}')) nb_doc = Document2.objects.get_by_uuid(user=request.user, uuid=notebook['uuid']) notebook = Notebook(document=nb_doc).get_data() snippet = notebook['snippets'][0] with opentracing.tracer.start_span('notebook-cancel_statement') as span: response['result'] = get_api(request, snippet).cancel(notebook, snippet) span.set_tag('user-id', request.user.username) span.set_tag( 'query-id', snippet['result']['handle']['guid'] if snippet['result'].get('handle') and snippet['result']['handle'].get('guid') else None) response['status'] = 0 return JsonResponse(response)
def extract_archive_in_hdfs(request, upload_path, file_name): _upload_extract_archive_script_to_hdfs(request.fs) output_path = upload_path + '/' + file_name.split('.')[0] start_time = json.loads(request.POST.get('start_time', '-1')) shell_notebook = Notebook( name=_('HDFS Extraction of %(upload_path)s/%(file_name)s') % {'upload_path': upload_path, 'file_name': file_name}, isManaged=True, onSuccessUrl='/filebrowser/view=' + urllib.parse.quote(output_path.encode('utf-8'), safe=SAFE_CHARACTERS_URI_COMPONENTS) ) shell_notebook.add_shell_snippet( shell_command='extract_archive_in_hdfs.sh', arguments=[{'value': '-u=' + upload_path}, {'value': '-f=' + file_name}, {'value': '-o=' + output_path}], archives=[], files=[{'value': '/user/' + DEFAULT_USER.get() + '/common/extract_archive_in_hdfs.sh'}, {"value": upload_path + '/' + urllib.parse.quote(file_name)}], env_var=[{'value': 'HADOOP_USER_NAME=${wf:user()}'}], last_executed=start_time ) return shell_notebook.execute(request, batch=True)
def autocomplete(self, snippet, database=None, table=None, column=None, nested=None): db = self._get_db(snippet, interpreter=self.interpreter) query = None if snippet.get('query'): query = snippet.get('query') elif snippet.get('source') == 'query': document = Document2.objects.get(id=database) document.can_read_or_exception(self.user) notebook = Notebook(document=document).get_data() snippet = notebook['snippets'][0] query = self._get_current_statement(notebook, snippet)['statement'] database, table = '', '' resp = _autocomplete(db, database, table, column, nested, query=query, cluster=self.interpreter) if resp.get('error'): resp['message'] = resp.pop('error') if 'Read timed out' in resp['message']: raise QueryExpired(resp['message']) return resp
def get_history(request): response = {'status': -1} doc_type = request.GET.get('doc_type') limit = max(request.GET.get('len', 50), 100) response['status'] = 0 history = [] for doc in Document2.objects.get_history( doc_type='query-%s' % doc_type, user=request.user).order_by('-last_modified')[:limit]: notebook = Notebook(document=doc).get_data() if 'snippets' in notebook: history.append({ 'name': doc.name, 'id': doc.id, 'uuid': doc.uuid, 'type': doc.type, 'data': { 'statement_raw': notebook['snippets'][0]['statement_raw'][:1001], 'lastExecuted': notebook['snippets'][0]['lastExecuted'], 'status': notebook['snippets'][0]['status'], 'parentUuid': notebook.get('parentUuid', '') } if notebook['snippets'] else {}, 'absoluteUrl': doc.get_absolute_url(), }) else: LOG.error('Incomplete History Notebook: %s' % notebook) response['history'] = history response['message'] = _('History fetched') return JsonResponse(response)
def check_status(request): response = {'status': -1} notebook = json.loads(request.POST.get('notebook', '{}')) snippet = json.loads(request.POST.get('snippet', '{}')) try: response['query_status'] = get_api(request, snippet).check_status( notebook, snippet) response['status'] = 0 finally: if response['status'] == 0 and snippet['status'] != response[ 'query_status']: status = response['query_status']['status'] else: status = 'failed' nb_doc = Document2.objects.get(id=notebook['id']) nb_doc.can_write_or_exception(request.user) nb = Notebook(document=nb_doc).get_data() nb['snippets'][0]['status'] = status nb_doc.update_data(nb) nb_doc.save() return JsonResponse(response)
def extract_archive_in_hdfs(request, upload_path, file_name): _upload_extract_archive_script_to_hdfs(request.fs) shell_notebook = Notebook() shell_notebook.add_shell_snippet( shell_command='extract_archive_in_hdfs.sh', arguments=[{ 'value': '-u=' + upload_path }, { 'value': '-f=' + file_name }], archives=[], files=[{ 'value': '/user/' + DEFAULT_USER.get() + '/common/extract_archive_in_hdfs.sh' }, { "value": upload_path + '/' + file_name }], env_var=[{ 'value': 'HADOOP_USER_NAME=${wf:user()}' }]) return shell_notebook.execute(request, batch=True)
def compress_files_in_hdfs(request, file_names, upload_path): _upload_compress_files_script_to_hdfs(request.fs) output_path = upload_path files = [{"value": upload_path + '/' + file_name} for file_name in file_names] files.append({'value': '/user/' + DEFAULT_USER.get() + '/common/compress_files_in_hdfs.sh'}) shell_notebook = Notebook( description=_('HDFS Compression to %(upload_path)s/hue_compressed.zip') % {'upload_path': upload_path}, isManaged=True, onSuccessUrl=reverse('filebrowser.views.view', kwargs={'path': output_path}) ) shell_notebook.add_shell_snippet( shell_command='compress_files_in_hdfs.sh', arguments=[{'value': '-u=' + upload_path}, {'value': '-f=' + ','.join(file_names)}, {'value': '-o=' + output_path}], archives=[], files=files, env_var=[{'value': 'HADOOP_USER_NAME=${wf:user()}'}] ) return shell_notebook.execute(request, batch=True)
def _get_document_helper(request, uuid, with_data, with_dependencies, path): if uuid: if uuid.isdigit(): document = Document2.objects.document(user=request.user, doc_id=uuid) else: document = Document2.objects.get_by_uuid(user=request.user, uuid=uuid) else: # Find by path document = Document2.objects.get_by_path(user=request.user, path=path) response = { 'document': document.to_dict(), 'parent': document.parent_directory.to_dict() if document.parent_directory else None, 'children': [], 'dependencies': [], 'dependents': [], 'data': '', 'status': 0 } response['user_perms'] = { 'can_read': document.can_read(request.user), 'can_write': document.can_write(request.user) } if with_data: data = json.loads(document.data) # Upgrade session properties for Hive and Impala if document.type.startswith('query'): notebook = Notebook(document=document) notebook = upgrade_session_properties(request, notebook) data = json.loads(notebook.data) if data.get('uuid') != document.uuid: # Old format < 3.11 data['uuid'] = document.uuid response['data'] = data if with_dependencies: response['dependencies'] = [ dependency.to_dict() for dependency in document.dependencies.all() ] response['dependents'] = [ dependent.to_dict() for dependent in document.dependents.all() ] # Get children documents if this is a directory if document.is_directory: directory = Directory.objects.get(id=document.id) # If this is the user's home directory, fetch shared docs too if document.is_home_directory: children = directory.get_children_and_shared_documents( user=request.user) response.update( _filter_documents(request, queryset=children, flatten=True)) else: children = directory.get_children_documents() response.update( _filter_documents(request, queryset=children, flatten=False)) # Paginate and serialize Results if 'documents' in response: response.update(_paginate(request, queryset=response['documents'])) # Rename documents to children response['children'] = response.pop('documents') response['children'] = [doc.to_dict() for doc in response['children']] return response
def make_notebook(name='Browse', description='', editor_type='hive', statement='', status='ready', files=None, functions=None, settings=None, is_saved=False, database='default', snippet_properties=None, batch_submit=False, on_success_url=None, skip_historify=False, is_task=False, last_executed=-1, is_notebook=False, pub_sub_url=None, result_properties={}, namespace=None, compute=None): ''' skip_historify: do not add the task to the query history. e.g. SQL Dashboard is_task / isManaged: true when being a managed by Hue operation (include_managed=True in document), e.g. exporting query result, dropping some tables ''' from notebook.connectors.hiveserver2 import HS2Api # impala can have compute name appended to the editor_type (impala/dbms.py - get_query_server_config) if editor_type.startswith('impala'): editor_type = 'impala' editor = Notebook() if snippet_properties is None: snippet_properties = {} if editor_type == 'hive': sessions_properties = HS2Api.get_properties(editor_type) if files is not None: _update_property_value(sessions_properties, 'files', files) if functions is not None: _update_property_value(sessions_properties, 'functions', functions) if settings is not None: _update_property_value(sessions_properties, 'settings', settings) elif editor_type == 'impala': sessions_properties = HS2Api.get_properties(editor_type) if settings is not None: _update_property_value(sessions_properties, 'files', files) elif editor_type == 'java': sessions_properties = [] # Java options else: sessions_properties = [] data = { 'name': name, 'uuid': str(uuid.uuid4()), 'description': description, 'sessions': [ { 'type': editor_type, 'properties': sessions_properties, 'id': None } ], 'selectedSnippet': editor_type, 'type': 'notebook' if is_notebook else 'query-%s' % editor_type, 'showHistory': True, 'isSaved': is_saved, 'onSuccessUrl': urllib.quote(on_success_url.encode('utf-8'), safe=SAFE_CHARACTERS_URI) if on_success_url else None, 'pubSubUrl': pub_sub_url, 'skipHistorify': skip_historify, 'isManaged': is_task, 'snippets': [ { 'status': status, 'id': str(uuid.uuid4()), 'statement_raw': statement, 'statement': statement, 'type': editor_type, 'wasBatchExecuted': batch_submit, 'lastExecuted': last_executed, 'properties': { 'files': [] if files is None else files, 'functions': [] if functions is None else functions, 'settings': [] if settings is None else settings }, 'name': name, 'database': database, 'namespace': namespace if namespace else {}, 'compute': compute if compute else {}, 'result': {'handle':{}}, 'variables': [] } ] if not is_notebook else [] } if snippet_properties: data['snippets'][0]['properties'].update(snippet_properties) if result_properties: data['snippets'][0]['result'].update(result_properties) editor.data = json.dumps(data) return editor
def _get_query(self, name): nb_doc = Document2.objects.document(user=self.user, doc_id=name) notebook = Notebook(document=nb_doc).get_data() snippet = notebook['snippets'][0] return snippet['statement'].strip(';')
def make_notebook(name='Browse', description='', editor_type='hive', statement='', status='ready', files=None, functions=None, settings=None, is_saved=False, database='default', snippet_properties=None, batch_submit=False, on_success_url=None, skip_historify=False, is_task=False, last_executed=-1): ''' skip_historify: do not add the task to the query history. e.g. SQL Dashboard isManaged: true when being a managed by Hue operation (include_managed=True in document), e.g. exporting query result, dropping some tables ''' from notebook.connectors.hiveserver2 import HS2Api editor = Notebook() if snippet_properties is None: snippet_properties = {} if editor_type == 'hive': sessions_properties = HS2Api.get_properties(editor_type) if files is not None: _update_property_value(sessions_properties, 'files', files) if functions is not None: _update_property_value(sessions_properties, 'functions', functions) if settings is not None: _update_property_value(sessions_properties, 'settings', settings) elif editor_type == 'impala': sessions_properties = HS2Api.get_properties(editor_type) if settings is not None: _update_property_value(sessions_properties, 'files', files) elif editor_type == 'java': sessions_properties = [] # Java options else: sessions_properties = [] data = { 'name': name, 'uuid': str(uuid.uuid4()), 'description': description, 'sessions': [{ 'type': editor_type, 'properties': sessions_properties, 'id': None }], 'selectedSnippet': editor_type, 'type': 'query-%s' % editor_type, 'showHistory': True, 'isSaved': is_saved, 'onSuccessUrl': on_success_url, 'skipHistorify': skip_historify, 'isManaged': is_task, 'snippets': [{ 'status': status, 'id': str(uuid.uuid4()), 'statement_raw': statement, 'statement': statement, 'type': editor_type, 'wasBatchExecuted': batch_submit, 'lastExecuted': last_executed, 'properties': { 'files': [] if files is None else files, 'functions': [] if functions is None else functions, 'settings': [] if settings is None else settings }, 'name': name, 'database': database, 'result': { 'handle': {} }, 'variables': [] }] } if snippet_properties: data['snippets'][0]['properties'].update(snippet_properties) editor.data = json.dumps(data) return editor
def execute_and_watch(request): notebook_id = request.GET.get('editor', request.GET.get('notebook')) snippet_id = int(request.GET['snippet']) action = request.GET['action'] destination = request.GET['destination'] notebook = Notebook(document=Document2.objects.get( id=notebook_id)).get_data() snippet = notebook['snippets'][snippet_id] editor_type = snippet['type'] api = get_api(request, snippet) if action == 'save_as_table': sql, success_url = api.export_data_as_table(notebook, snippet, destination) editor = make_notebook(name='Execute and watch', editor_type=editor_type, statement=sql, status='ready-execute') elif action == 'insert_as_query': sql, success_url = api.export_large_data_to_hdfs( notebook, snippet, destination) editor = make_notebook(name='Execute and watch', editor_type=editor_type, statement=sql, status='ready-execute') elif action == 'index_query': sql, success_url = api.export_data_as_table(notebook, snippet, destination, is_temporary=True, location='') editor = make_notebook(name='Execute and watch', editor_type=editor_type, statement=sql, status='ready-execute') sample = get_api(request, snippet).fetch_result(notebook, snippet, 0, start_over=True) from indexer.api3 import _index # Will ve moved to the lib in next commit from indexer.file_format import HiveFormat from indexer.fields import Field file_format = { 'name': 'col', 'inputFormat': 'query', 'format': { 'quoteChar': '"', 'recordSeparator': '\n', 'type': 'csv', 'hasHeader': False, 'fieldSeparator': '\u0001' }, "sample": '', "columns": [ Field( col['name'], HiveFormat.FIELD_TYPE_TRANSLATE.get(col['type'], 'string')).to_dict() for col in sample['meta'] ] } job_handle = _index(request, file_format, destination, query=notebook['uuid']) return redirect( reverse('oozie:list_oozie_workflow', kwargs={'job_id': job_handle['handle']['id']})) else: raise PopupException(_('Action %s is unknown') % action) return render( 'editor.mako', request, { 'notebooks_json': json.dumps([editor.get_data()]), 'options_json': json.dumps({ 'languages': [{ "name": "%s SQL" % editor_type.title(), "type": editor_type }], 'mode': 'editor', 'success_url': success_url }), 'editor_type': editor_type, })