def export_large_data_to_hdfs(self, notebook, snippet, destination): db = self._get_db(snippet) response = self._get_current_statement(db, snippet) session = self._get_session(notebook, snippet['type']) query = self._prepare_hql_query(snippet, response.pop('statement'), session) if 'select' not in query.hql_query.strip().lower(): raise PopupException( _('Only SELECT statements can be saved. Provided statement: %(query)s' ) % {'query': query.hql_query}) hql = ''' DROP TABLE IF EXISTS `%(table)s`; CREATE TABLE `%(table)s` ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t' ESCAPED BY '\\\\' LINES TERMINATED BY '\\n' STORED AS TEXTFILE LOCATION '%(location)s' AS %(hql)s; ALTER TABLE `%(table)s` SET TBLPROPERTIES('EXTERNAL'='TRUE'); DROP TABLE IF EXISTS `%(table)s`; ''' % { 'table': _get_snippet_name(notebook), 'location': destination, 'hql': query.hql_query } success_url = '/filebrowser/view=%s' % destination return hql, success_url
def export_large_data_to_hdfs(self, notebook, snippet, destination): response = self._get_current_statement(notebook, snippet) session = self._get_session(notebook, snippet['type']) query = self._prepare_hql_query(snippet, response.pop('statement'), session) if 'select' not in query.hql_query.strip().lower(): raise PopupException( _('Only SELECT statements can be saved. Provided statement: %(query)s' ) % {'query': query.hql_query}) hql = ''' DROP TABLE IF EXISTS `%(table)s`; CREATE TABLE `%(table)s` ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t' ESCAPED BY '\\\\' LINES TERMINATED BY '\\n' STORED AS TEXTFILE LOCATION '%(location)s' AS %(hql)s; ALTER TABLE `%(table)s` SET TBLPROPERTIES('EXTERNAL'='TRUE'); DROP TABLE IF EXISTS `%(table)s`; ''' % { 'table': _get_snippet_name( notebook, unique=True, table_format=True), 'location': self.request.fs.netnormpath(destination), 'hql': query.hql_query } success_url = '/filebrowser/view=%s' % urllib_quote( destination.encode('utf-8'), safe=SAFE_CHARACTERS_URI_COMPONENTS) return hql, success_url
def download(self, notebook, snippet, format): try: db = self._get_db(snippet) handle = self._get_handle(snippet) # Test handle to verify if still valid db.fetch(handle, start_over=True, rows=1) file_name = _get_snippet_name(notebook) return data_export.download(handle, format, db, id=snippet['id'], file_name=file_name) except Exception, e: title = 'The query result cannot be downloaded.' LOG.exception(title) if hasattr(e, 'message') and e.message: if 'generic failure: Unable to find a callback: 32775' in e.message: message = e.message + " " + _( "Increase the sasl_max_buffer value in hue.ini") else: message = e.message else: message = e raise PopupException(_(title), detail=message)
def download(self, notebook, snippet, format, user_agent=None): try: db = self._get_db(snippet) handle = self._get_handle(snippet) # Test handle to verify if still valid db.fetch(handle, start_over=True, rows=1) file_name = _get_snippet_name(notebook) return data_export.download(handle, format, db, id=snippet['id'], file_name=file_name, user_agent=user_agent) except Exception, e: title = 'The query result cannot be downloaded.' LOG.exception(title) if hasattr(e, 'message') and e.message: if 'generic failure: Unable to find a callback: 32775' in e.message: message = e.message + " " + _( "Increase the sasl_max_buffer value in hue.ini") elif 'query result cache exceeded its limit' in e.message: message = e.message.replace( "Restarting the fetch is not possible.", _("Please execute the query again.")) else: message = e.message else: message = e raise PopupException(_(title), detail=message)
def download(request): if not ENABLE_DOWNLOAD.get(): return serve_403_error(request) notebook = json.loads(request.POST.get('notebook', '{}')) snippet = json.loads(request.POST.get('snippet', '{}')) file_format = request.POST.get('format', 'csv') response = get_api(request, snippet).download( notebook, snippet, file_format, user_agent=request.META.get('HTTP_USER_AGENT')) if response: request.audit = { 'operation': 'DOWNLOAD', 'operationText': 'User %s downloaded results from %s as %s' % (request.user.username, _get_snippet_name(notebook), file_format), 'allowed': True } return response
def download(request): if not ENABLE_DOWNLOAD.get(): return serve_403_error(request) notebook = json.loads(request.POST.get('notebook', '{}')) snippet = json.loads(request.POST.get('snippet', '{}')) file_format = request.POST.get('format', 'csv') user_agent = request.META.get('HTTP_USER_AGENT') file_name = _get_snippet_name(notebook) content_generator = get_api(request, snippet).download(notebook, snippet, file_format=file_format) response = export_csvxls.make_response(content_generator, file_format, file_name, user_agent=user_agent) if snippet['id']: response.set_cookie( 'download-%s' % snippet['id'], json.dumps({ 'truncated': 'false', 'row_counter': '0' }), max_age=DOWNLOAD_COOKIE_AGE ) if response: request.audit = { 'operation': 'DOWNLOAD', 'operationText': 'User %s downloaded results from %s as %s' % (request.user.username, _get_snippet_name(notebook), file_format), 'allowed': True } return response
def download(self, notebook, snippet, format, user_agent=None): file_name = _get_snippet_name(notebook) results = self._execute(notebook, snippet) db = FixedResult(results) return data_export.download(None, format, db, id=snippet['id'], file_name=file_name, user_agent=user_agent)
def download(self, notebook, snippet, format, user_agent=None): file_name = _get_snippet_name(notebook) guid = uuid.uuid4().hex connection = self.engine.connect() result = connection.execute(snippet['statement']) CONNECTION_CACHE[guid] = {'connection': connection, 'result': result} db = FixedResult([ col[0] if type(col) is dict or type(col) is tuple else col for col in result.cursor.description ]) def callback(): connection = CONNECTION_CACHE.get(guid) if connection: connection['connection'].close() del CONNECTION_CACHE[guid] return data_export.download({'guid': guid}, format, db, id=snippet['id'], file_name=file_name, callback=callback)
def download(self, notebook, snippet, format, user_agent=None, max_rows=None, store_data_type_in_header=False): file_name = _get_snippet_name(notebook) data, description = query_and_fetch(self.db, snippet['statement']) db = FixedResult(data, description) return data_export.download(None, format, db, id=snippet['id'], file_name=file_name)
def download(self, notebook, snippet, format, user_agent=None, max_rows=None, store_data_type_in_header=False): file_name = _get_snippet_name(notebook) results = self._execute(notebook, snippet) db = FixedResult(results) return data_export.download(None, format, db, id=snippet['id'], file_name=file_name, user_agent=user_agent)
def download(request): notebook = json.loads(request.POST.get('notebook', '{}')) snippet = json.loads(request.POST.get('snippet', '{}')) file_format = request.POST.get('format', 'csv') response = get_api(request, snippet).download(notebook, snippet, file_format) if response: request.audit = { 'operation': 'DOWNLOAD', 'operationText': 'User %s downloaded results from %s as %s' % (request.user.username, _get_snippet_name(notebook), file_format), 'allowed': True } return response
def send_result_file(request, channel_id, message_ts, doc, file_format): notebook = json.loads(doc.data) snippet = notebook['snippets'][0] snippet['statement'] = notebook['snippets'][0]['statement_raw'] content_generator = get_api(request, snippet).download(notebook, snippet, file_format) file_format = 'xlsx' file_name = _get_snippet_name(notebook) try: slack_client.files_upload( channels=channel_id, file=next(content_generator), thread_ts=message_ts, filetype=file_format, filename='{name}.{format}'.format(name=file_name, format=file_format), initial_comment='Here is your result file!' ) except Exception as e: raise PopupException(_("Cannot upload result file"), detail=e)
def export_result(request): response = {'status': -1, 'message': _('Success')} # Passed by check_document_access_permission but unused by APIs notebook = json.loads(request.POST.get('notebook', '{}')) snippet = json.loads(request.POST.get('snippet', '{}')) data_format = json.loads(request.POST.get('format', '"hdfs-file"')) destination = urllib.unquote(json.loads(request.POST.get('destination', '""'))) overwrite = json.loads(request.POST.get('overwrite', 'false')) is_embedded = json.loads(request.POST.get('is_embedded', 'false')) start_time = json.loads(request.POST.get('start_time', '-1')) api = get_api(request, snippet) if data_format == 'hdfs-file': # Blocking operation, like downloading if request.fs.isdir(destination): if notebook.get('name'): destination += '/%(name)s.csv' % notebook else: destination += '/%(type)s-%(id)s.csv' % notebook if overwrite and request.fs.exists(destination): request.fs.do_as_user(request.user.username, request.fs.rmtree, destination) response['watch_url'] = api.export_data_as_hdfs_file(snippet, destination, overwrite) response['status'] = 0 request.audit = { 'operation': 'EXPORT', 'operationText': 'User %s exported to HDFS destination: %s' % (request.user.username, destination), 'allowed': True } elif data_format == 'hive-table': if is_embedded: sql, success_url = api.export_data_as_table(notebook, snippet, destination) task = make_notebook( name=_('Export %s query to table %s') % (snippet['type'], destination), description=_('Query %s to %s') % (_get_snippet_name(notebook), success_url), editor_type=snippet['type'], statement=sql, status='ready', database=snippet['database'], on_success_url=success_url, last_executed=start_time, is_task=True ) response = task.execute(request) else: notebook_id = notebook['id'] or request.GET.get('editor', request.GET.get('notebook')) response['watch_url'] = reverse('notebook:execute_and_watch') + '?action=save_as_table¬ebook=' + str(notebook_id) + '&snippet=0&destination=' + destination response['status'] = 0 request.audit = { 'operation': 'EXPORT', 'operationText': 'User %s exported to Hive table: %s' % (request.user.username, destination), 'allowed': True } elif data_format == 'hdfs-directory': if is_embedded: sql, success_url = api.export_large_data_to_hdfs(notebook, snippet, destination) task = make_notebook( name=_('Export %s query to directory') % snippet['type'], description=_('Query %s to %s') % (_get_snippet_name(notebook), success_url), editor_type=snippet['type'], statement=sql, status='ready-execute', database=snippet['database'], on_success_url=success_url, last_executed=start_time, is_task=True ) response = task.execute(request) else: notebook_id = notebook['id'] or request.GET.get('editor', request.GET.get('notebook')) response['watch_url'] = reverse('notebook:execute_and_watch') + '?action=insert_as_query¬ebook=' + str(notebook_id) + '&snippet=0&destination=' + destination response['status'] = 0 request.audit = { 'operation': 'EXPORT', 'operationText': 'User %s exported to HDFS directory: %s' % (request.user.username, destination), 'allowed': True } elif data_format in ('search-index', 'dashboard'): # Open the result in the Dashboard via a SQL sub-query or the Import wizard (quick vs scalable) if is_embedded: notebook_id = notebook['id'] or request.GET.get('editor', request.GET.get('notebook')) if data_format == 'dashboard': engine = notebook['type'].replace('query-', '') response['watch_url'] = reverse('dashboard:browse', kwargs={'name': notebook_id}) + '?source=query&engine=%(engine)s' % {'engine': engine} response['status'] = 0 else: sample = get_api(request, snippet).fetch_result(notebook, snippet, rows=4, start_over=True) for col in sample['meta']: col['type'] = HiveFormat.FIELD_TYPE_TRANSLATE.get(col['type'], 'string') response['status'] = 0 response['id'] = notebook_id response['name'] = _get_snippet_name(notebook) response['source_type'] = 'query' response['target_type'] = 'index' response['target_path'] = destination response['sample'] = list(sample['data']) response['columns'] = [ Field(col['name'], col['type']).to_dict() for col in sample['meta'] ] else: notebook_id = notebook['id'] or request.GET.get('editor', request.GET.get('notebook')) response['watch_url'] = reverse('notebook:execute_and_watch') + '?action=index_query¬ebook=' + str(notebook_id) + '&snippet=0&destination=' + destination response['status'] = 0 if response.get('status') != 0: response['message'] = _('Exporting result failed.') return JsonResponse(response)
def get_default(self, user, name, engine='solr', source='data'): fields = self.fields_data(user, name, engine, source=source) id_field = [field['name'] for field in fields if field.get('isId')] if id_field: id_field = id_field[0] else: id_field = '' # Schemaless might not have an id if source == 'query': nb_doc = Document2.objects.document(user=user, doc_id=name) notebook = Notebook(document=nb_doc).get_data() label = _get_snippet_name(notebook, unique=True) else: label = name TEMPLATE = { "extracode": escape( "<style type=\"text/css\">\nem {\n font-weight: bold;\n background-color: yellow;\n}</style>\n\n<script>\n</script>" ), "highlighting": [""], "properties": { "highlighting_enabled": True }, "template": """ <div class="row-fluid"> <div class="row-fluid"> <div class="span12">%s</div> </div> <br/> </div>""" % ' '.join(['{{%s}}' % field['name'] for field in fields]), "isGridLayout": True, "showFieldList": True, "showGrid": True, "showChart": False, "chartSettings": { 'chartType': 'bars', 'chartSorting': 'none', 'chartScatterGroup': None, 'chartScatterSize': None, 'chartScope': 'world', 'chartX': None, 'chartYSingle': None, 'chartYMulti': [], 'chartData': [], 'chartMapLabel': None, }, "fieldsAttributes": [self._make_gridlayout_header_field(field) for field in fields], "fieldsSelected": [], "leafletmap": { 'latitudeField': None, 'longitudeField': None, 'labelField': None }, "rows": 25, } FACETS = [] return { 'id': None, 'name': name, 'engine': engine, 'source': source, 'label': label, 'enabled': False, 'template': TEMPLATE, 'facets': FACETS, 'fields': fields, 'idField': id_field, }
def execute_and_watch(request): notebook_id = request.GET.get('editor', request.GET.get('notebook')) snippet_id = int(request.GET['snippet']) action = request.GET['action'] destination = request.GET['destination'] notebook = Notebook(document=Document2.objects.get(id=notebook_id)).get_data() snippet = notebook['snippets'][snippet_id] editor_type = snippet['type'] api = get_api(request, snippet) if action == 'save_as_table': sql, success_url = api.export_data_as_table(notebook, snippet, destination) editor = make_notebook(name='Execute and watch', editor_type=editor_type, statement=sql, status='ready-execute', database=snippet['database']) elif action == 'insert_as_query': # TODO: checks/workarounds in case of non impersonation or Sentry # TODO: keep older simpler way in case of known not many rows? sql, success_url = api.export_large_data_to_hdfs(notebook, snippet, destination) editor = make_notebook(name='Execute and watch', editor_type=editor_type, statement=sql, status='ready-execute', database=snippet['database'], on_success_url=success_url) elif action == 'index_query': if destination == '__hue__': destination = _get_snippet_name(notebook, unique=True, table_format=True) live_indexing = True else: live_indexing = False sql, success_url = api.export_data_as_table(notebook, snippet, destination, is_temporary=True, location='') editor = make_notebook(name='Execute and watch', editor_type=editor_type, statement=sql, status='ready-execute') sample = get_api(request, snippet).fetch_result(notebook, snippet, 0, start_over=True) from indexer.api3 import _index # Will ve moved to the lib from indexer.file_format import HiveFormat from indexer.fields import Field file_format = { 'name': 'col', 'inputFormat': 'query', 'format': {'quoteChar': '"', 'recordSeparator': '\n', 'type': 'csv', 'hasHeader': False, 'fieldSeparator': '\u0001'}, "sample": '', "columns": [ Field(col['name'].rsplit('.')[-1], HiveFormat.FIELD_TYPE_TRANSLATE.get(col['type'], 'string')).to_dict() for col in sample['meta'] ] } if live_indexing: file_format['inputFormat'] = 'hs2_handle' file_format['fetch_handle'] = lambda rows, start_over: get_api(request, snippet).fetch_result(notebook, snippet, rows=rows, start_over=start_over) job_handle = _index(request, file_format, destination, query=notebook['uuid']) if live_indexing: return redirect(reverse('search:browse', kwargs={'name': destination})) else: return redirect(reverse('oozie:list_oozie_workflow', kwargs={'job_id': job_handle['handle']['id']})) else: raise PopupException(_('Action %s is unknown') % action) return render('editor.mako', request, { 'notebooks_json': json.dumps([editor.get_data()]), 'options_json': json.dumps({ 'languages': [{"name": "%s SQL" % editor_type.title(), "type": editor_type}], 'mode': 'editor', 'editor_type': editor_type, 'success_url': success_url }), 'editor_type': editor_type, })
def export_result(request): response = {'status': -1, 'message': _('Exporting result failed.')} # Passed by check_document_access_permission but unused by APIs notebook = json.loads(request.POST.get('notebook', '{}')) snippet = json.loads(request.POST.get('snippet', '{}')) data_format = json.loads(request.POST.get('format', 'hdfs-file')) destination = json.loads(request.POST.get('destination', '')) overwrite = json.loads(request.POST.get('overwrite', 'false')) is_embedded = json.loads(request.POST.get('is_embedded', 'false')) api = get_api(request, snippet) if data_format == 'hdfs-file': # Blocking operation, like downloading if request.fs.isdir(destination): if notebook.get('name'): destination += '/%(name)s.csv' % notebook else: destination += '/%(type)s-%(id)s.csv' % notebook if overwrite and request.fs.exists(destination): request.fs.do_as_user(request.user.username, request.fs.rmtree, destination) response['watch_url'] = api.export_data_as_hdfs_file( snippet, destination, overwrite) response['status'] = 0 request.audit = { 'operation': 'EXPORT', 'operationText': 'User %s exported to HDFS destination: %s' % (request.user.username, destination), 'allowed': True } elif data_format == 'hive-table': if is_embedded: sql, success_url = api.export_data_as_table( notebook, snippet, destination) task = make_notebook(name=_('Export %s query to table %s') % (snippet['type'], destination), description=_('Query %s to %s') % (_get_snippet_name(notebook), success_url), editor_type=snippet['type'], statement=sql, status='ready-execute', database=snippet['database'], on_success_url=success_url, is_task=True) response = task.execute(request) else: notebook_id = notebook['id'] or request.GET.get( 'editor', request.GET.get('notebook')) response['watch_url'] = reverse( 'notebook:execute_and_watch' ) + '?action=save_as_table¬ebook=' + str( notebook_id) + '&snippet=0&destination=' + destination response['status'] = 0 request.audit = { 'operation': 'EXPORT', 'operationText': 'User %s exported to Hive table: %s' % (request.user.username, destination), 'allowed': True } elif data_format == 'hdfs-directory': if is_embedded: sql, success_url = api.export_large_data_to_hdfs( notebook, snippet, destination) task = make_notebook(name=_('Export %s query to directory') % snippet['type'], description=_('Query %s to %s') % (_get_snippet_name(notebook), success_url), editor_type=snippet['type'], statement=sql, status='ready-execute', database=snippet['database'], on_success_url=success_url, is_task=True) response = task.execute(request) else: notebook_id = notebook['id'] or request.GET.get( 'editor', request.GET.get('notebook')) response['watch_url'] = reverse( 'notebook:execute_and_watch' ) + '?action=insert_as_query¬ebook=' + str( notebook_id) + '&snippet=0&destination=' + destination response['status'] = 0 request.audit = { 'operation': 'EXPORT', 'operationText': 'User %s exported to HDFS directory: %s' % (request.user.username, destination), 'allowed': True } elif data_format == 'search-index': if is_embedded: if destination == '__hue__': destination = _get_snippet_name(notebook, unique=True, table_format=True) live_indexing = True else: live_indexing = False sample = get_api(request, snippet).fetch_result(notebook, snippet, 0, start_over=True) from indexer.api3 import _index # Will be moved to the lib from indexer.file_format import HiveFormat from indexer.fields import Field file_format = { 'name': 'col', 'inputFormat': 'query', 'format': { 'quoteChar': '"', 'recordSeparator': '\n', 'type': 'csv', 'hasHeader': False, 'fieldSeparator': '\u0001' }, "sample": '', "columns": [ Field( col['name'].rsplit('.')[-1], HiveFormat.FIELD_TYPE_TRANSLATE.get( col['type'], 'string')).to_dict() for col in sample['meta'] ] } if live_indexing: file_format['inputFormat'] = 'hs2_handle' file_format['fetch_handle'] = lambda rows, start_over: get_api( request, snippet).fetch_result( notebook, snippet, rows=rows, start_over=start_over) response['rowcount'] = _index(request, file_format, destination, query=notebook['uuid']) response['watch_url'] = reverse('search:browse', kwargs={'name': destination}) response['status'] = 0 else: response = _index(request, file_format, destination, query=notebook['uuid']) else: notebook_id = notebook['id'] or request.GET.get( 'editor', request.GET.get('notebook')) response['watch_url'] = reverse( 'notebook:execute_and_watch' ) + '?action=index_query¬ebook=' + str( notebook_id) + '&snippet=0&destination=' + destination response['status'] = 0 request.audit = { 'operation': 'EXPORT', 'operationText': 'User %s exported to Search index: %s' % (request.user.username, destination), 'allowed': True } return JsonResponse(response)