def _list_query_history(user, querydict, page_size, prefix=""): """ _list_query_history(user, querydict, page_size, prefix) -> (page, filter_param) A helper to gather the history page. It understands all the GET params in ``list_query_history``, by reading keys from the ``querydict`` with the given ``prefix``. """ DEFAULT_SORT = ('-', 'date') # Descending date SORT_ATTR_TRANSLATION = dict( date='submission_date', state='last_state', name='design__name', type='design__type', ) db_queryset = models.QueryHistory.objects.select_related() # Filtering # # Queries without designs are the ones we submitted on behalf of the user, # (e.g. view table data). Exclude those when returning query history. if querydict.get(prefix + 'auto_query', 'on') != 'on': db_queryset = db_queryset.exclude(design__isnull=False, design__is_auto=True) user_filter = querydict.get(prefix + 'user', user.username) if user_filter != ':all': db_queryset = db_queryset.filter(owner__username=user_filter) # Design id design_id = querydict.get(prefix + 'design_id') if design_id: if design_id.isdigit(): db_queryset = db_queryset.filter(design__id=int(design_id)) else: raise PopupException( _('list_query_history requires design_id parameter to be an integer: %s' ) % design_id) # Search search_filter = querydict.get(prefix + 'search') if search_filter: db_queryset = db_queryset.filter( Q(design__name__icontains=search_filter) | Q(query__icontains=search_filter) | Q(owner__username__icontains=search_filter)) # Design type d_type = querydict.get(prefix + 'type') if d_type: if d_type not in list(SavedQuery.TYPES_MAPPING.keys()): LOG.warn('Bad parameter to list_query_history: type=%s' % (d_type, )) else: db_queryset = db_queryset.filter( design__type=SavedQuery.TYPES_MAPPING[d_type]) # If recent query recent = querydict.get('recent') if recent: db_queryset = db_queryset.filter(is_cleared=False) # Ordering sort_key = querydict.get(prefix + 'sort') if sort_key: sort_dir, sort_attr = '', sort_key if sort_key[0] == '-': sort_dir, sort_attr = '-', sort_key[1:] if sort_attr not in SORT_ATTR_TRANSLATION: LOG.warn('Bad parameter to list_query_history: sort=%s' % (sort_key, )) sort_dir, sort_attr = DEFAULT_SORT else: sort_dir, sort_attr = DEFAULT_SORT db_queryset = db_queryset.order_by( sort_dir + SORT_ATTR_TRANSLATION[sort_attr], '-id') # Get the total return count before slicing total_count = db_queryset.count() # Slicing (must be the last filter applied) pagenum = int(querydict.get(prefix + 'page', 1)) if pagenum < 1: pagenum = 1 db_queryset = db_queryset[page_size * (pagenum - 1):page_size * pagenum] paginator = Paginator(db_queryset, page_size, allow_empty_first_page=True) try: page = paginator.page(pagenum) except EmptyPage: page = None # We do slicing ourselves, rather than letting the Paginator handle it, in order to # update the last_state on the running queries if page: for history in page.object_list: _update_query_state(history.get_full_object()) # We need to pass the parameters back to the template to generate links keys_to_copy = [ prefix + key for key in ('user', 'type', 'sort', 'design_id', 'auto_query', 'search') ] filter_params = copy_query_dict(querydict, keys_to_copy) return paginator, page, filter_params
def collection(self, name): try: collections = self.collections() return collections[name] except Exception, e: raise PopupException(e, title=_('Error while accessing Solr'))
def app(self, appid): try: job = NativeYarnApi(self.user).get_job(jobid=appid) except ApplicationNotRunning as e: if e.job.get('state', '').lower() == 'accepted': rm_api = resource_manager_api.get_resource_manager(self.user) job = Application(e.job, rm_api) else: raise e # Job has not yet been accepted by RM except JobExpired as e: raise PopupException(_('Job %s has expired.') % appid, detail=_('Cannot be found on the History Server.')) except Exception as e: msg = 'Could not find job %s.' LOG.exception(msg % appid) raise PopupException(_(msg) % appid, detail=e) app = massage_job_for_json(job, user=self.user) common = { 'id': app['id'], 'name': app['name'], 'type': app['applicationType'], 'status': app['status'], 'apiStatus': self._api_status(app['status']), 'user': app['user'], 'progress': app['progress'], 'duration': app['durationMs'], 'submitted': app['startTimeMs'], 'canWrite': app['canKill'], } if app['applicationType'] == 'MR2' or app['applicationType'] == 'MAPREDUCE': common['type'] = 'MAPREDUCE' if app['desiredMaps'] is None or app['finishedMaps'] is None: app['mapsPercentComplete'] = 100 if app['desiredReduces'] is None or app['finishedReduces'] is None: app['reducesPercentComplete'] = 100 common['properties'] = { 'maps_percent_complete': app['mapsPercentComplete'] or 0, 'reduces_percent_complete': app['reducesPercentComplete'] or 0, 'finishedMaps': app['finishedMaps'] or 0, 'finishedReduces': app['finishedReduces'] or 0, 'desiredMaps': app['desiredMaps'] or 0, 'desiredReduces': app['desiredReduces'] or 0, 'durationFormatted': app['durationFormatted'], 'startTimeFormatted': app['startTimeFormatted'], 'diagnostics': app['diagnostics'] if app['diagnostics'] else '', 'tasks': [], 'metadata': [], 'counters': [] } elif app['applicationType'] == 'SPARK': app['logs'] = job.logs_url if hasattr(job, 'logs_url') else '' app['trackingUrl'] = job.trackingUrl if hasattr(job, 'trackingUrl') else '' common['type'] = 'SPARK' common['properties'] = { 'metadata': [{'name': name, 'value': value} for name, value in app.items() if name != "url" and name != "killUrl"], 'executors': [] } if hasattr(job, 'metrics'): common['metrics'] = job.metrics elif app['applicationType'] == 'YarnV2': common['applicationType'] = app.get('type') common['properties'] = { 'startTime': job.startTime, 'finishTime': job.finishTime, 'elapsedTime': job.duration, 'attempts': [], 'diagnostics': job.diagnostics } return common
return wraps(view_func)(decorate) def get_job(request, job_id): try: job = get_api(request.user, request.jt).get_job(jobid=job_id) except ApplicationNotRunning, e: if e.job.get('state', '').lower() == 'accepted': rm_api = resource_manager_api.get_resource_manager(request.user) job = Application(e.job, rm_api) else: raise e # Job has not yet been accepted by RM except JobExpired, e: raise PopupException( _('Job %s has expired.') % job_id, detail=_('Cannot be found on the History Server.')) except Exception, e: msg = 'Could not find job %s.' LOG.exception(msg % job_id) raise PopupException(_(msg) % job_id, detail=e) return job def apps(request): return render( 'apps.mako', request, { 'is_embeddable': request.GET.get('is_embeddable', False), 'hiveserver2_impersonation_enabled': hiveserver2_impersonation_enabled()
try: users = sync_ldap_users() groups = sync_ldap_groups() except ldap.LDAPError, e: LOG.error("LDAP Exception: %s" % e) raise PopupException( _('There was an error when communicating with LDAP'), detail=str(e)) # Create home dirs for every user sync'd if is_ensuring_home_directory: for user in users: try: ensure_home_directory(fs, user.username) except (IOError, WebHdfsException), e: raise PopupException( _("The import may not be complete, sync again."), detail=e) def ensure_home_directory(fs, username): """ Adds a users home directory if it doesn't already exist. Throws IOError, WebHdfsException. """ home_dir = '/user/%s' % username fs.do_as_user(username, fs.create_home_dir, home_dir) def _check_remove_last_super(user_obj): """Raise an error if we're removing the last superuser""" if not user_obj.is_superuser:
def popup_exception_view(request, *args, **kwargs): raise PopupException(exc_msg, title="earrĂ¡id", detail=exc_msg)
def download(self, notebook, snippet, format): raise PopupException('Downloading is not supported yet')
def upload(self, data, data_type='queries', source_platform='generic', workload_id=None): if data_type in ('table_stats', 'cols_stats'): data_suffix = '.json' if data_type == 'table_stats': extra_parameters = {'fileType': 'TABLE_STATS'} else: extra_parameters = {'fileType': 'COLUMN_STATS'} else: data_suffix = '.csv' extra_parameters = { 'colDelim': ',', 'rowDelim': '\n', "headerFields": [{ "count": 0, "name": "SQL_ID", "coltype": "SQL_ID", "use": True, "tag": "" }, { "count": 0, "name": "ELAPSED_TIME", "coltype": "NONE", "use": True, "tag": "" }, { "count": 0, "name": "SQL_FULLTEXT", "coltype": "SQL_QUERY", "use": True, "tag": "" }, { "count": 0, "name": "DATABASE", "coltype": "NONE", "use": True, "tag": "DATABASE" }], } f_queries_path = NamedTemporaryFile(suffix=data_suffix) f_queries_path.close( ) # Reopened as real file below to work well with the command try: f_queries = open(f_queries_path.name, 'w+') try: # Queries if data_suffix == '.csv': content_generator = OptimizerQueryDataAdapter(data) queries_csv = export_csvxls.create_generator( content_generator, 'csv') for row in queries_csv: f_queries.write(row) LOG.debug(row) else: # Table, column stats f_queries.write(json.dumps(data)) LOG.debug(json.dumps(data)) finally: f_queries.close() parameters = { 'tenant': self._product_name, 'fileLocation': f_queries.name, 'sourcePlatform': source_platform, } parameters.update(extra_parameters) response = self._api.call_api('upload', parameters) status = json.loads(response) status['count'] = len(data) return status except RestException, e: raise PopupException(e, title=_('Error while accessing Optimizer'))
def get_api(request, snippet): from notebook.connectors.oozie_batch import OozieApi if snippet.get('wasBatchExecuted') and not TASK_SERVER.ENABLED.get(): return OozieApi(user=request.user, request=request) if snippet['type'] == 'report': snippet['type'] = 'impala' interpreter = get_interpreter(connector_type=snippet['type'], user=request.user) interface = interpreter['interface'] if get_cluster_config(request.user)['has_computes']: compute = json.loads(request.POST.get( 'cluster', '""')) # Via Catalog autocomplete API or Notebook create sessions. if compute == '""' or compute == 'undefined': compute = None if not compute and snippet.get('compute'): # Via notebook.ko.js interpreter['compute'] = snippet['compute'] LOG.debug('Selected interpreter %s interface=%s compute=%s' % (interpreter['type'], interface, interpreter.get('compute') and interpreter['compute']['name'])) if interface == 'hiveserver2': from notebook.connectors.hiveserver2 import HS2Api return HS2Api(user=request.user, request=request, interpreter=interpreter) elif interface == 'oozie': return OozieApi(user=request.user, request=request) elif interface == 'livy': from notebook.connectors.spark_shell import SparkApi return SparkApi(request.user) elif interface == 'livy-batch': from notebook.connectors.spark_batch import SparkBatchApi return SparkBatchApi(request.user) elif interface == 'text' or interface == 'markdown': from notebook.connectors.text import TextApi return TextApi(request.user) elif interface == 'rdbms': from notebook.connectors.rdbms import RdbmsApi return RdbmsApi(request.user, interpreter=snippet['type'], query_server=snippet.get('query_server')) elif interface == 'jdbc': if interpreter['options'] and interpreter['options'].get( 'url', '').find('teradata') >= 0: from notebook.connectors.jdbc_teradata import JdbcApiTeradata return JdbcApiTeradata(request.user, interpreter=interpreter) if interpreter['options'] and interpreter['options'].get( 'url', '').find('awsathena') >= 0: from notebook.connectors.jdbc_athena import JdbcApiAthena return JdbcApiAthena(request.user, interpreter=interpreter) elif interpreter['options'] and interpreter['options'].get( 'url', '').find('presto') >= 0: from notebook.connectors.jdbc_presto import JdbcApiPresto return JdbcApiPresto(request.user, interpreter=interpreter) elif interpreter['options'] and interpreter['options'].get( 'url', '').find('clickhouse') >= 0: from notebook.connectors.jdbc_clickhouse import JdbcApiClickhouse return JdbcApiClickhouse(request.user, interpreter=interpreter) elif interpreter['options'] and interpreter['options'].get( 'url', '').find('vertica') >= 0: from notebook.connectors.jdbc_vertica import JdbcApiVertica return JdbcApiVertica(request.user, interpreter=interpreter) else: from notebook.connectors.jdbc import JdbcApi return JdbcApi(request.user, interpreter=interpreter) elif interface == 'teradata': from notebook.connectors.jdbc import JdbcApiTeradata return JdbcApiTeradata(request.user, interpreter=interpreter) elif interface == 'athena': from notebook.connectors.jdbc import JdbcApiAthena return JdbcApiAthena(request.user, interpreter=interpreter) elif interface == 'presto': from notebook.connectors.jdbc_presto import JdbcApiPresto return JdbcApiPresto(request.user, interpreter=interpreter) elif interface == 'sqlalchemy': from notebook.connectors.sql_alchemy import SqlAlchemyApi return SqlAlchemyApi(request.user, interpreter=interpreter) elif interface == 'solr': from notebook.connectors.solr import SolrApi return SolrApi(request.user, interpreter=interpreter) elif interface == 'hbase': from notebook.connectors.hbase import HBaseApi return HBaseApi(request.user) elif interface == 'kafka': from notebook.connectors.kafka import KafkaApi return KafkaApi(request.user) elif interface == 'pig': return OozieApi(user=request.user, request=request) # Backward compatibility until Hue 4 else: raise PopupException( _('Notebook connector interface not recognized: %s') % interface)
def execute_and_watch(request): notebook_id = request.GET.get('editor', request.GET.get('notebook')) snippet_id = int(request.GET['snippet']) action = request.GET['action'] destination = request.GET['destination'] notebook = Notebook(document=Document2.objects.get(id=notebook_id)).get_data() snippet = notebook['snippets'][snippet_id] editor_type = snippet['type'] api = get_api(request, snippet) if action == 'save_as_table': sql, success_url = api.export_data_as_table(notebook, snippet, destination) editor = make_notebook(name='Execute and watch', editor_type=editor_type, statement=sql, status='ready-execute', database=snippet['database']) elif action == 'insert_as_query': # TODO: checks/workarounds in case of non impersonation or Sentry # TODO: keep older simpler way in case of known not many rows? sql, success_url = api.export_large_data_to_hdfs(notebook, snippet, destination) editor = make_notebook(name='Execute and watch', editor_type=editor_type, statement=sql, status='ready-execute', database=snippet['database'], on_success_url=success_url) elif action == 'index_query': if destination == '__hue__': destination = _get_snippet_name(notebook, unique=True, table_format=True) live_indexing = True else: live_indexing = False sql, success_url = api.export_data_as_table(notebook, snippet, destination, is_temporary=True, location='') editor = make_notebook(name='Execute and watch', editor_type=editor_type, statement=sql, status='ready-execute') sample = get_api(request, snippet).fetch_result(notebook, snippet, 0, start_over=True) from indexer.api3 import _index # Will ve moved to the lib from indexer.file_format import HiveFormat from indexer.fields import Field file_format = { 'name': 'col', 'inputFormat': 'query', 'format': {'quoteChar': '"', 'recordSeparator': '\n', 'type': 'csv', 'hasHeader': False, 'fieldSeparator': '\u0001'}, "sample": '', "columns": [ Field(col['name'].rsplit('.')[-1], HiveFormat.FIELD_TYPE_TRANSLATE.get(col['type'], 'string')).to_dict() for col in sample['meta'] ] } if live_indexing: file_format['inputFormat'] = 'hs2_handle' file_format['fetch_handle'] = lambda rows, start_over: get_api(request, snippet).fetch_result(notebook, snippet, rows=rows, start_over=start_over) job_handle = _index(request, file_format, destination, query=notebook['uuid']) if live_indexing: return redirect(reverse('search:browse', kwargs={'name': destination})) else: return redirect(reverse('oozie:list_oozie_workflow', kwargs={'job_id': job_handle['handle']['id']})) else: raise PopupException(_('Action %s is unknown') % action) return render('editor.mako', request, { 'notebooks_json': json.dumps([editor.get_data()]), 'options_json': json.dumps({ 'languages': [{"name": "%s SQL" % editor_type.title(), "type": editor_type}], 'mode': 'editor', 'editor_type': editor_type, 'success_url': success_url }), 'editor_type': editor_type, })
credentials = Credentials() try: credentials.fetch(api) except Exception, e: LOG.error(smart_str(e)) if USE_NEW_EDITOR.get(): workflows = [dict([('uuid', d.uuid), ('name', d.name)]) for d in Document2.objects.documents(request.user).search_documents(types=['oozie-workflow2'])] else: workflows = [dict([('uuid', d.content_object.uuid), ('name', d.content_object.name)]) for d in Document.objects.available_docs(Document2, request.user).filter(extra='workflow2')] if coordinator_id and not filter(lambda a: a['uuid'] == coordinator.data['properties']['workflow'], workflows): raise PopupException(_('You don\'t have access to the workflow of this coordinator.')) return render('editor2/coordinator_editor.mako', request, { 'coordinator_json': coordinator.to_json_for_html(), 'credentials_json': json.dumps(credentials.credentials.keys(), cls=JSONEncoderForHTML), 'workflows_json': json.dumps(workflows, cls=JSONEncoderForHTML), 'doc_uuid': doc.uuid if doc else '', 'can_edit_json': json.dumps(doc is None or doc.doc.get().is_editable(request.user)) }) @check_editor_access_permission def new_coordinator(request): return edit_coordinator(request)
class YarnApi(JobBrowserApi): """ List all the jobs with Resource Manager API. Get running single job information with MapReduce API. Get finished single job information with History Server API. The trick is that we use appid when the job is running and jobid when it is finished. We also suppose that each app id has only one MR job id. e.g. job_1355791146953_0105, application_1355791146953_0105 A better alternative might be to call the Resource Manager instead of relying on the type of job id. The perfect solution would be to have all this logic embedded """ def __init__(self, user): self.user = user self.resource_manager_api = resource_manager_api.get_resource_manager() self.mapreduce_api = mapreduce_api.get_mapreduce_api() self.history_server_api = history_server_api.get_history_server_api() def get_job_link(self, job_id): return self.get_job(job_id) @rm_ha def get_jobs(self, user, **kwargs): state_filters = { 'running': 'UNDEFINED', 'completed': 'SUCCEEDED', 'failed': 'FAILED', 'killed': 'KILLED', } filters = {} if kwargs['username']: filters['user'] = kwargs['username'] if kwargs['state'] and kwargs['state'] != 'all': filters['finalStatus'] = state_filters[kwargs['state']] json = self.resource_manager_api.apps(**filters) if type(json) == str and 'This is standby RM' in json: raise Exception(json) if json['apps']: jobs = [Application(app) for app in json['apps']['app']] else: return [] if kwargs['text']: text = kwargs['text'].lower() jobs = filter( lambda job: text in job.name.lower() or text in job.id.lower() or text in job.user.lower() or text in job.queue.lower(), jobs) return self.filter_jobs(user, jobs) def filter_jobs(self, user, jobs, **kwargs): check_permission = not SHARE_JOBS.get() and not user.is_superuser return filter( lambda job: not check_permission or user.is_superuser or job.user == user.username, jobs) @rm_ha def get_job(self, jobid): try: # App id jobid = jobid.replace('job', 'application') job = self.resource_manager_api.app(jobid)['app'] if job['state'] == 'ACCEPTED': raise ApplicationNotRunning(jobid, job) elif job['state'] == 'KILLED': return KilledYarnJob(self.resource_manager_api, job) if job.get('applicationType') == 'SPARK': job = SparkJob(job, self.resource_manager_api) elif job.get('applicationType') == 'MAPREDUCE': jobid = jobid.replace('application', 'job') if job['state'] in ('NEW', 'SUBMITTED', 'ACCEPTED', 'RUNNING'): json = self.mapreduce_api.job(self.user, jobid) job = YarnJob(self.mapreduce_api, json['job']) else: json = self.history_server_api.job(self.user, jobid) job = YarnJob(self.history_server_api, json['job']) else: job = Application(job, self.resource_manager_api) except ApplicationNotRunning, e: raise e except Exception, e: if 'NotFoundException' in str(e): raise JobExpired(jobid) else: raise PopupException('Job %s could not be found: %s' % (jobid, e), detail=e)
def check_request_permission(self, request): """Raise PopupException if request user doesn't have permission to modify workflow""" if not is_admin(request.user) and request.user.username != self.user: access_warn(request, _('Insufficient permission.')) raise PopupException(_("Permission denied. User %(username)s cannot modify user %(user)s's job.") % dict(username=request.user.username, user=self.user))
if self.api.remove_collection(name): # Delete instance directory. try: root_node = '%s/%s' % (ZK_SOLR_CONFIG_NAMESPACE, name) with ZookeeperClient(hosts=get_solr_ensemble(), read_only=False) as zc: zc.delete_path(root_node) except Exception, e: # Re-create collection so that we don't have an orphan config self.api.add_collection(name) raise PopupException( _('Error in deleting Solr configurations.'), detail=e) else: raise PopupException( _('Could not remove collection. Check error logs for more info.' )) def get_index_schema(self, index_name): """ Returns a tuple of the unique key and schema fields for a given index """ try: field_data = self.api.fields(index_name) fields = self._format_flags(field_data['schema']['fields']) uniquekey = self.api.uniquekey(index_name) return uniquekey, fields except Exception, e: LOG.exception(e.message) raise IndexControllerException( _("Error in getting schema information for index '%s'" %
try: if request.FILES.get('documents'): documents = request.FILES['documents'].read() else: documents = json.loads(request.POST.get('documents')) documents = json.loads(documents) except ValueError, e: raise PopupException( _('Failed to import documents, the file does not contain valid JSON.' )) # Validate documents if not _is_import_valid(documents): raise PopupException( _('Failed to import documents, the file does not contain the expected JSON schema for Hue documents.' )) docs = [] uuids_map = dict((doc['fields']['uuid'], None) for doc in documents if not is_reserved_directory(doc)) for doc in documents: # Filter docs to import, ignoring reserved directories (home and Trash) and history docs if not is_reserved_directory(doc): # Remove any deprecated fields if 'tags' in doc['fields']: doc['fields'].pop('tags') # If doc is not owned by current user, make a copy of the document with current user as owner
def import_documents(request): def is_reserved_directory(doc): return doc['fields']['type'] == 'directory' and doc['fields'][ 'name'] in (Document2.HOME_DIR, Document2.TRASH_DIR) try: if request.FILES.get('documents'): documents = request.FILES['documents'].read() else: documents = json.loads(request.POST.get('documents')) documents = json.loads(documents) except ValueError as e: raise PopupException( _('Failed to import documents, the file does not contain valid JSON.' )) # Validate documents if not _is_import_valid(documents): raise PopupException( _('Failed to import documents, the file does not contain the expected JSON schema for Hue documents.' )) docs = [] uuids_map = dict((doc['fields']['uuid'], None) for doc in documents if not is_reserved_directory(doc)) for doc in documents: # Filter docs to import, ignoring reserved directories (home and Trash) and history docs if not is_reserved_directory(doc): # Remove any deprecated fields if 'tags' in doc['fields']: doc['fields'].pop('tags') # If doc is not owned by current user, make a copy of the document with current user as owner if doc['fields']['owner'][0] != request.user.username: doc = _copy_document_with_owner(doc, request.user, uuids_map) else: # Update existing doc or create new doc = _create_or_update_document_with_owner( doc, request.user, uuids_map) # For oozie docs replace dependent uuids with the newly created ones if doc['fields']['type'].startswith('oozie-'): doc = _update_imported_oozie_document(doc, uuids_map) # If the doc contains any history dependencies, ignore them # NOTE: this assumes that each dependency is exported as an array using the natural PK [uuid, version, is_history] deps_minus_history = [ dep for dep in doc['fields'].get('dependencies', []) if len(dep) >= 3 and not dep[2] ] doc['fields']['dependencies'] = deps_minus_history # Replace illegal characters if '/' in doc['fields']['name']: new_name = doc['fields']['name'].replace('/', '-') LOG.warn( "Found illegal slash in document named: %s, renaming to: %s." % (doc['fields']['name'], new_name)) doc['fields']['name'] = new_name # Set last modified date to now doc['fields']['last_modified'] = datetime.now().replace( microsecond=0).isoformat() docs.append(doc) f = tempfile.NamedTemporaryFile(mode='w+', suffix='.json') f.write(json.dumps(docs)) f.flush() stdout = string_io() try: with transaction.atomic( ): # We wrap both commands to commit loaddata & sync management.call_command( 'loaddata', f.name, verbosity=3, traceback=True, stdout=stdout, commit=False ) # We need to use commit=False because commit=True will close the connection and make Document.objects.sync fail. Document.objects.sync() if request.POST.get('redirect'): return redirect(request.POST.get('redirect')) else: return JsonResponse({ 'status': 0, 'message': stdout.getvalue(), 'count': len(documents), 'created_count': len([doc for doc in documents if doc['pk'] is None]), 'updated_count': len([doc for doc in documents if doc['pk'] is not None]), 'username': request.user.username, 'documents': [ dict([('name', doc['fields']['name']), ('uuid', doc['fields']['uuid']), ('type', doc['fields']['type']), ('owner', doc['fields']['owner'][0])]) for doc in docs ] }) except Exception as e: LOG.error('Failed to run loaddata command in import_documents:\n %s' % stdout.getvalue()) return JsonResponse({'status': -1, 'message': smart_str(e)}) finally: stdout.close()
def copy_document(request): uuid = json.loads(request.POST.get('uuid'), '""') if not uuid: raise PopupException(_('copy_document requires uuid')) # Document2 and Document model objects are linked and both are saved when saving document = Document2.objects.get_by_uuid(user=request.user, uuid=uuid) # Document model object document1 = document.doc.get() if document.type == 'directory': raise PopupException(_('Directory copy is not supported')) name = document.name + '-copy' # Make the copy of the Document2 model object copy_document = document.copy(name=name, owner=request.user) # Make the copy of Document model object too document1.copy(content_object=copy_document, name=name, owner=request.user) # Import workspace for all oozie jobs if document.type == 'oozie-workflow2' or document.type == 'oozie-bundle2' or document.type == 'oozie-coordinator2': from oozie.models2 import Workflow, Coordinator, Bundle, _import_workspace # Update the name field in the json 'data' field if document.type == 'oozie-workflow2': workflow = Workflow(document=document) workflow.update_name(name) workflow.update_uuid(copy_document.uuid) _import_workspace(request.fs, request.user, workflow) copy_document.update_data( {'workflow': workflow.get_data()['workflow']}) copy_document.save() if document.type == 'oozie-bundle2' or document.type == 'oozie-coordinator2': if document.type == 'oozie-bundle2': bundle_or_coordinator = Bundle(document=document) else: bundle_or_coordinator = Coordinator(document=document) json_data = bundle_or_coordinator.get_data_for_json() json_data['name'] = name json_data['uuid'] = copy_document.uuid copy_document.update_data(json_data) copy_document.save() _import_workspace(request.fs, request.user, bundle_or_coordinator) elif document.type == 'search-dashboard': from dashboard.models import Collection2 collection = Collection2(request.user, document=document) collection.data['collection']['label'] = name collection.data['collection']['uuid'] = copy_document.uuid copy_document.update_data( {'collection': collection.data['collection']}) copy_document.save() # Keep the document and data in sync else: copy_data = copy_document.data_dict if 'name' in copy_data: copy_data['name'] = name if 'uuid' in copy_data: copy_data['uuid'] = copy_document.uuid copy_document.update_data(copy_data) copy_document.save() return JsonResponse({'status': 0, 'document': copy_document.to_dict()})
if api.remove_collection(name): # Delete instance directory. try: root_node = '%s/%s' % (ZK_SOLR_CONFIG_NAMESPACE, name) with ZookeeperClient(hosts=get_solr_ensemble(), read_only=False) as zc: zc.delete_path(root_node) except Exception, e: # Re-create collection so that we don't have an orphan config api.add_collection(name) raise PopupException( _('Error in deleting Solr configurations.'), detail=e) else: raise PopupException( _('Could not remove collection. Check error logs for more info.' )) def update_collection(self, name, fields): """ Only create new fields """ api = SolrApi(SOLR_URL.get(), self.user, SECURITY_ENABLED.get()) # Create only new fields # Fields that already exist, do not overwrite since there is no way to do that, currently. old_field_names = api.fields(name)['schema']['fields'].keys() new_fields = filter(lambda field: field['name'] not in old_field_names, fields) new_fields_filtered = [] for field in new_fields: new_field = {}
def index(request): try: overview = _get_global_overview() except Exception, e: raise PopupException(_('Could not correctly connect to Zookeeper.'), detail=e)
class SavedQuery(models.Model): """ Stores the query that people have save or submitted. Note that this used to be called QueryDesign. Any references to 'design' probably mean a SavedQuery. """ DEFAULT_NEW_DESIGN_NAME = _('My saved query') AUTO_DESIGN_SUFFIX = _(' (new)') TYPES = QUERY_TYPES TYPES_MAPPING = { 'beeswax': HQL, 'hql': HQL, 'impala': IMPALA, 'rdbms': RDBMS, 'spark': SPARK } type = models.IntegerField(null=False) owner = models.ForeignKey(User, db_index=True) # Data is a json of dictionary. See the beeswax.design module. data = models.TextField(max_length=65536) name = models.CharField(max_length=64) desc = models.TextField(max_length=1024) mtime = models.DateTimeField(auto_now=True) # An auto design is a place-holder for things users submit but not saved. # We still want to store it as a design to allow users to save them later. is_auto = models.BooleanField(default=False, db_index=True) is_trashed = models.BooleanField(default=False, db_index=True, verbose_name=_t('Is trashed'), help_text=_t('If this query is trashed.')) doc = generic.GenericRelation(Document, related_name='hql_doc') class Meta: ordering = ['-mtime'] def get_design(self): try: return HQLdesign.loads(self.data) except ValueError: # data is empty pass def clone(self): """clone() -> A new SavedQuery with a deep copy of the same data""" design = SavedQuery(type=self.type, owner=self.owner) design.data = copy.deepcopy(self.data) design.name = copy.deepcopy(self.name) design.desc = copy.deepcopy(self.desc) design.is_auto = copy.deepcopy(self.is_auto) return design @classmethod def create_empty(cls, app_name, owner, data): query_type = SavedQuery.TYPES_MAPPING[app_name] design = SavedQuery(owner=owner, type=query_type) design.name = SavedQuery.DEFAULT_NEW_DESIGN_NAME design.desc = '' design.data = data design.is_auto = True design.save() return design @staticmethod def get(id, owner=None, type=None): """ get(id, owner=None, type=None) -> SavedQuery object Checks that the owner and type match (when given). May raise PopupException (type/owner mismatch). May raise SavedQuery.DoesNotExist. """ try: design = SavedQuery.objects.get(id=id) except SavedQuery.DoesNotExist, err: msg = _('Cannot retrieve query id %(id)s.') % {'id': id} raise err if owner is not None and design.owner != owner: msg = _('Query id %(id)s does not belong to user %(user)s.') % { 'id': id, 'user': owner } LOG.error(msg) raise PopupException(msg) if type is not None and design.type != type: msg = _('Type mismatch for design id %(id)s (owner %(owner)s) - Expected %(expected_type)s, got %(real_type)s.') % \ {'id': id, 'owner': owner, 'expected_type': design.type, 'real_type': type} LOG.error(msg) raise PopupException(msg) return design
def process_view(self, request, view_func, view_args, view_kwargs): """ We also perform access logging in ``process_view()`` since we have the view function, which tells us the log level. The downside is that we don't have the status code, which isn't useful for status logging anyways. """ access_log_level = getattr(view_func, 'access_log_level', None) # First, skip views not requiring login # If the view has "opted out" of login required, skip if hasattr(view_func, "login_notrequired"): log_page_hit(request, view_func, level=access_log_level or logging.DEBUG) return None # There are certain django views which are also opt-out, but # it would be evil to go add attributes to them if view_func in DJANGO_VIEW_AUTH_WHITELIST: log_page_hit(request, view_func, level=access_log_level or logging.DEBUG) return None # If user is logged in, check that he has permissions to access the # app. if request.user.is_active and request.user.is_authenticated(): AppSpecificMiddleware.augment_request_with_app(request, view_func) # Until we get Django 1.3 and resolve returning the URL name, we just do a match of the name of the view try: access_view = 'access_view:%s:%s' % ( request._desktop_app, resolve(request.path)[0].__name__) except Exception, e: access_log(request, 'error checking view perm: %s', e, level=access_log_level) access_view = '' # Accessing an app can access an underlying other app. # e.g. impala or spark uses code from beeswax and so accessing impala shows up as beeswax here. # Here we trust the URL to be the real app we need to check the perms. app_accessed = request._desktop_app ui_app_accessed = get_app_name(request) if app_accessed != ui_app_accessed and ui_app_accessed not in ( 'logs', 'accounts', 'login'): app_accessed = ui_app_accessed if app_accessed and \ app_accessed not in ("desktop", "home", "about") and \ not (request.user.has_hue_permission(action="access", app=app_accessed) or request.user.has_hue_permission(action=access_view, app=app_accessed)): access_log(request, 'permission denied', level=access_log_level) return PopupException(_( "You do not have permission to access the %(app_name)s application." ) % { 'app_name': app_accessed.capitalize() }, error_code=401).response(request) else: log_page_hit(request, view_func, level=access_log_level) return None
def cores(self): try: return self._root.get('admin/cores', params={'wt': 'json'})['status'] except RestException, e: raise PopupException('Error while accessing Solr: %s' % e)
def cores(self): try: params = self._get_params() + (('wt', 'json'), ) return self._root.get('admin/cores', params=params)['status'] except RestException, e: raise PopupException(e, title=_('Error while accessing Solr'))
if USE_NEW_EDITOR.get(): scheduled_uuid = coordinator.data['properties'][ 'workflow'] or coordinator.data['properties']['document'] if scheduled_uuid: try: document = Document2.objects.get(uuid=scheduled_uuid) except Document2.DoesNotExist as e: document = None coordinator.data['properties']['workflow'] = '' LOG.warn("Workflow with uuid %s doesn't exist: %s" % (scheduled_uuid, e)) if document and document.is_trashed: raise PopupException( _('Your workflow %s has been trashed!') % (document.name if document.name else '')) if document and not document.can_read(request.user): raise PopupException( _('You don\'t have access to the workflow or document of this coordinator.' )) else: workflows = [ dict([('uuid', d.content_object.uuid), ('name', d.content_object.name)]) for d in Document.objects.available_docs( Document2, request.user).filter(extra='workflow2') ] if coordinator_id and not filter(
def edit_user(request, username=None): """ edit_user(request, username = None) -> reply @type request: HttpRequest @param request: The request object @type username: string @param username: Default to None, when creating a new user """ if request.user.username != username and not request.user.is_superuser: raise PopupException( _("You must be a superuser to add or edit another user."), error_code=401) if username is not None: instance = User.objects.get(username=username) else: instance = None if request.user.is_superuser: form_class = SuperUserChangeForm else: form_class = UserChangeForm if request.method == 'POST': form = form_class(request.POST, instance=instance) if form.is_valid(): # All validation rules pass if instance is None: instance = form.save() get_profile(instance) else: if username != form.instance.username: raise PopupException(_("You cannot change a username."), error_code=401) if request.user.username == username and not form.instance.is_active: raise PopupException( _("You cannot make yourself inactive."), error_code=401) global __users_lock __users_lock.acquire() try: # form.instance (and instance) now carry the new data orig = User.objects.get(username=username) if orig.is_superuser: if not form.instance.is_superuser or not form.instance.is_active: _check_remove_last_super(orig) else: if form.instance.is_superuser and not request.user.is_superuser: raise PopupException( _("You cannot make yourself a superuser."), error_code=401) # All ok form.save() request.info(_('User information updated')) finally: __users_lock.release() # Ensure home directory is created, if necessary. if form.cleaned_data['ensure_home_directory']: try: ensure_home_directory(request.fs, instance.username) except (IOError, WebHdfsException), e: request.error( _('Cannot make home directory for user %s.' % instance.username)) if request.user.is_superuser: return redirect(reverse(list_users)) else: return redirect( reverse(edit_user, kwargs={'username': username}))
def _submit_coordinator(request, coordinator, mapping): try: wf = coordinator.workflow if IS_MULTICLUSTER_ONLY.get() and has_multi_cluster(): mapping['auto-cluster'] = { u'additionalClusterResourceTags': [], u'automaticTerminationCondition': u'EMPTY_JOB_QUEUE', #'u'NONE', u'cdhVersion': u'CDH514', u'clouderaManagerPassword': u'guest', u'clouderaManagerUsername': u'guest', u'clusterName': u'analytics4', # Add time variable u'computeWorkersConfiguration': { u'bidUSDPerHr': 0, u'groupSize': 0, u'useSpot': False }, u'environmentName': u'crn:altus:environments:us-west-1:12a0079b-1591-4ca0-b721-a446bda74e67:environment:analytics/236ebdda-18bd-428a-9d2b-cd6973d42946', u'instanceBootstrapScript': u'', u'instanceType': u'm4.xlarge', u'jobSubmissionGroupName': u'', u'jobs': [ { u'failureAction': u'INTERRUPT_JOB_QUEUE', u'name': u'a87e20d7-5c0d-49ee-ab37-625fa2803d51', u'sparkJob': { u'applicationArguments': ['5'], u'jars': [ u's3a://datawarehouse-customer360/ETL/spark-examples.jar' ], u'mainClass': u'org.apache.spark.examples.SparkPi' } }, # { # u'failureAction': u'INTERRUPT_JOB_QUEUE', # u'name': u'a87e20d7-5c0d-49ee-ab37-625fa2803d51', # u'sparkJob': { # u'applicationArguments': ['10'], # u'jars': [u's3a://datawarehouse-customer360/ETL/spark-examples.jar'], # u'mainClass': u'org.apache.spark.examples.SparkPi' # } # }, # { # u'failureAction': u'INTERRUPT_JOB_QUEUE', # u'name': u'a87e20d7-5c0d-49ee-ab37-625fa2803d51', # u'sparkJob': { # u'applicationArguments': [u'filesystems3.conf'], # u'jars': [u's3a://datawarehouse-customer360/ETL/envelope-0.6.0-SNAPSHOT-c6.jar'], # u'mainClass': u'com.cloudera.labs.envelope.EnvelopeMain', # u'sparkArguments': u'--archives=s3a://datawarehouse-customer360/ETL/filesystems3.conf' # } # } ], u'namespaceName': u'crn:altus:sdx:us-west-1:12a0079b-1591-4ca0-b721-a446bda74e67:namespace:analytics/7ea35fe5-dbc9-4b17-92b1-97a1ab32e410', u'publicKey': DEFAULT_PUBLIC_KEY.get(), u'serviceType': u'SPARK', u'workersConfiguration': {}, u'workersGroupSize': u'3' } wf_dir = Submission( request.user, wf, request.fs, request.jt, mapping, local_tz=coordinator.data['properties']['timezone']).deploy() properties = {'wf_application_path': request.fs.get_hdfs_path(wf_dir)} properties.update(mapping) submission = Submission(request.user, coordinator, request.fs, request.jt, properties=properties) job_id = submission.run() return job_id except RestException, ex: LOG.exception('Error submitting coordinator') raise PopupException(_("Error submitting coordinator %s") % (coordinator, ), detail=ex._headers.get('oozie-error-message', ex), error_code=200)
def save_results_hdfs_file(request, query_history_id): """ Save the results of a query to an HDFS file. Do not rerun the query. """ response = {'status': 0, 'message': ''} query_history = authorized_get_query_history(request, query_history_id, must_exist=True) server_id, state = _get_query_handle_and_state(query_history) query_history.save_state(state) error_msg, log = None, None if request.method != 'POST': response['message'] = _('A POST request is required.') else: if not query_history.is_success(): response['message'] = _('This query is %(state)s. Results unavailable.') % {'state': state} response['status'] = -1 return JsonResponse(response) db = dbms.get(request.user, query_history.get_query_server_config()) form = beeswax.forms.SaveResultsFileForm({ 'target_file': request.POST.get('path'), 'overwrite': request.POST.get('overwrite', False), }) if form.is_valid(): target_file = form.cleaned_data['target_file'] overwrite = form.cleaned_data['overwrite'] try: handle, state = _get_query_handle_and_state(query_history) except Exception as ex: response['message'] = _('Cannot find query handle and state: %s') % str(query_history) response['status'] = -2 return JsonResponse(response) try: if overwrite and request.fs.exists(target_file): if request.fs.isfile(target_file): request.fs.do_as_user(request.user.username, request.fs.rmtree, target_file) else: raise PopupException(_("The target path is a directory")) upload(target_file, handle, request.user, db, request.fs) response['type'] = 'hdfs-file' response['id'] = query_history.id response['query'] = query_history.query response['path'] = target_file response['success_url'] = '/filebrowser/view=%s' % target_file response['watch_url'] = reverse(get_app_name(request) + ':api_watch_query_refresh_json', kwargs={'id': query_history.id}) except Exception as ex: error_msg, log = expand_exception(ex, db) response['message'] = _('The result could not be saved: %s.') % error_msg response['status'] = -3 else: response['status'] = 1 response['errors'] = form.errors return JsonResponse(response)
def describe_partitions(request, database, table): cluster = json.loads(request.POST.get('cluster', '{}')) db = _get_db(user=request.user, cluster=cluster) table_obj = db.get_table(database, table) if not table_obj.partition_keys: raise PopupException(_("Table '%(table)s' is not partitioned.") % {'table': table}) reverse_sort = request.GET.get("sort", "desc").lower() == "desc" if request.method == "POST": partition_filters = {} for part in table_obj.partition_keys: if request.GET.get(part.name): partition_filters[part.name] = request.GET.get(part.name) partition_spec = ','.join(["%s='%s'" % (k, v) for k, v in list(partition_filters.items())]) else: partition_spec = '' try: partitions = db.get_partitions(database, table_obj, partition_spec, reverse_sort=reverse_sort) except: LOG.exception('Table partitions could not be retrieved') partitions = [] massaged_partitions = [_massage_partition(database, table_obj, partition) for partition in partitions] if request.method == "POST" or request.GET.get('format', 'html') == 'json': return JsonResponse({ 'partition_keys_json': [partition.name for partition in table_obj.partition_keys], 'partition_values_json': massaged_partitions, }) else: apps_list = _get_apps(request.user, '') return render("metastore.mako", request, { 'apps': apps_list, 'breadcrumbs': [{ 'name': database, 'url': reverse('metastore:show_tables', kwargs={'database': database}) }, { 'name': table, 'url': reverse('metastore:describe_table', kwargs={'database': database, 'table': table}) }, { 'name': 'partitions', 'url': reverse('metastore:describe_partitions', kwargs={'database': database, 'table': table}) }, ], 'database': database, 'table': table_obj, 'partitions': partitions, 'partition_keys_json': json.dumps([partition.name for partition in table_obj.partition_keys]), 'partition_values_json': json.dumps(massaged_partitions), 'request': request, 'has_write_access': has_write_access(request.user), 'is_optimizer_enabled': has_optimizer(), 'is_navigator_enabled': has_catalog(request.user), 'optimizer_url': get_optimizer_url(), 'navigator_url': get_catalog_url(), 'is_embeddable': request.GET.get('is_embeddable', False), 'source_type': _get_servername(db), })
def generate_config(self, source, destination): configs = [] if source['channelSourceType'] == 'directory': agent_source = ''' tier1.sources.source1.type = exec tier1.sources.source1.command = tail -F %(directory)s tier1.sources.source1.channels = channel1 ''' % { 'directory': source['channelSourcePath'] } elif source['channelSourceType'] == 'kafka': agent_source = ''' tier1.sources.source1.type = org.apache.flume.source.kafka.KafkaSource tier1.sources.source1.channels = channel1 tier1.sources.source1.batchSize = 5000 tier1.sources.source1.batchDurationMillis = 2000 tier1.sources.source1.kafka.bootstrap.servers = localhost:9092 tier1.sources.source1.kafka.topics = test1, test2 tier1.sources.source1.kafka.consumer.group.id = custom.g.id ''' % { 'directory': source['channelSourcePath'] } else: raise PopupException(_('Input format not recognized: %(channelSourceType)s') % source) if destination['ouputFormat'] == 'file': agent_sink = ''' a1.channels = c1 a1.sinks = k1 a1.sinks.k1.type = hdfs a1.sinks.k1.channel = c1 a1.sinks.k1.hdfs.path = /flume/events/%y-%m-%d/%H%M/%S a1.sinks.k1.hdfs.filePrefix = events- a1.sinks.k1.hdfs.round = true a1.sinks.k1.hdfs.roundValue = 10 a1.sinks.k1.hdfs.roundUnit = minute''' elif destination['ouputFormat'] == 'table': agent_sink = ''' a1.channels = c1 a1.channels.c1.type = memory a1.sinks = k1 a1.sinks.k1.type = hive a1.sinks.k1.channel = c1 a1.sinks.k1.hive.metastore = thrift://127.0.0.1:9083 a1.sinks.k1.hive.database = logsdb a1.sinks.k1.hive.table = weblogs a1.sinks.k1.hive.partition = asia,%{country},%y-%m-%d-%H-%M a1.sinks.k1.useLocalTimeStamp = false a1.sinks.k1.round = true a1.sinks.k1.roundValue = 10 a1.sinks.k1.roundUnit = minute a1.sinks.k1.serializer = DELIMITED a1.sinks.k1.serializer.delimiter = "\t" a1.sinks.k1.serializer.serdeSeparator = '\t' a1.sinks.k1.serializer.fieldnames =id,,msg''' elif destination['ouputFormat'] == 'kafka': manager = ManagerApi() agent_sink = ''' tier1.sinks.sink1.type = org.apache.flume.sink.kafka.KafkaSink tier1.sinks.sink1.topic = hueAccessLogs tier1.sinks.sink1.brokerList = %(brokers)s tier1.sinks.sink1.channel = channel1 tier1.sinks.sink1.batchSize = 20''' % { 'brokers': manager.get_kafka_brokers() } elif destination['ouputFormat'] == 'index': # Morphline file configs.append(self.generate_morphline_config(destination)) # Flume config agent_sink = ''' tier1.sinks.sink1.type = org.apache.flume.sink.solr.morphline.MorphlineSolrSink tier1.sinks.sink1.morphlineFile = morphlines.conf tier1.sinks.sink1.morphlineId = hue_accesslogs_no_geo tier1.sinks.sink1.channel = channel1''' else: raise PopupException(_('Output format not recognized: %(ouputFormat)s') % destination) # TODO: use agent id: input + output and do not overide all the configs # TODO: use Kafka channel if possible flume_config = '''tier1.sources = source1 tier1.channels = channel1 tier1.sinks = sink1 %(sources)s tier1.channels.channel1.type = memory tier1.channels.channel1.capacity = 10000 tier1.channels.channel1.transactionCapacity = 1000 %(sinks)s''' % { 'sources': agent_source, 'sinks': agent_sink, } configs.append(('agent_config_file', flume_config)) return configs
def _execute(self, function, *args, **kwargs): response = None try: response = function(*args, **kwargs) except Exception, e: raise PopupException(_('YARN RM returned a failed response: %s') % e)