def analyze_table(request, database, table, columns=None): app_name = get_app_name(request) cluster = json.loads(request.POST.get('cluster', '{}')) query_server = get_query_server_config(app_name, cluster=cluster) db = dbms.get(request.user, query_server) table_obj = db.get_table(database, table) if table_obj.is_impala_only and app_name != 'impala': query_server = get_query_server_config('impala') db = dbms.get(request.user, query_server) response = {'status': -1, 'message': '', 'redirect': ''} if request.method == "POST": if columns is None: query_history = db.analyze_table(database, table) else: query_history = db.analyze_table_columns(database, table) response['watch_url'] = reverse('beeswax:api_watch_query_refresh_json', kwargs={'id': query_history.id}) response['status'] = 0 else: response['message'] = _('A POST request is required.') return JsonResponse(response)
def handle(self, *args, **options): days = int(args[0]) if len(args) >= 1 else 7 close_all = args[1] == 'all' if len(args) >= 2 else False self.stdout.write('Closing (all=%s) HiveServer2/Impala queries older than %s days...\n' % (close_all, days)) n = 0 queries = HiveServerQueryHistory.objects.filter(last_state__in=[QueryHistory.STATE.expired.index, QueryHistory.STATE.failed.index, QueryHistory.STATE.available.index]) if close_all: queries = HiveServerQueryHistory.objects.all() queries = queries.filter(submission_date__gte=datetime.today() - timedelta(days=days)) for query in queries: try: query_history = HiveServerQueryHistory.objects.get(id=query.id) if query_history.server_id is not None: handle = query_history.get_handle() dbms.get(user=query_history.owner).close_operation(handle) n += 1 query.last_state = QueryHistory.STATE.expired.index query.save() except Exception, e: if 'Invalid OperationHandle' in str(e): query.last_state = QueryHistory.STATE.expired.index query.save() else: self.stdout.write('Error: %s\n' % e)
def handle(self, *args, **options): days = int(args[0]) if len(args) >= 1 else 7 close_all = args[1] == 'all' if len(args) >= 2 else False self.stdout.write('Closing (all=%s) HiveServer2 queries older than %s days...\n' % (close_all, days)) queries = QueryHistory.objects.filter(last_state__in=[QueryHistory.STATE.expired.value, QueryHistory.STATE.failed.value, QueryHistory.STATE.available.value]) if close_all: queries = QueryHistory.objects.all() queries = queries.filter(submission_date__lte=datetime.today() - timedelta(days=days)) import os import beeswax from beeswax import conf from beeswax import hive_site try: beeswax.conf.HIVE_CONF_DIR.set_for_testing(os.environ['HIVE_CONF_DIR']) except: LOG.exception('failed to lookup HIVE_CONF_DIR in environment') self.stdout.write('Did you export HIVE_CONF_DIR=/etc/hive/conf?\n') raise hive_site.reset() hive_site.get_conf() closed_queries = 0 already_closed_queries = 0 for query in queries: try: query_history = QueryHistory.get(id=query.id) if query_history.server_id is not None: handle = query_history.get_handle() dbms.get(user=query_history.owner).close_operation(handle) closed_queries += 1 else: already_closed_queries += 1 query.last_state = QueryHistory.STATE.expired.value query.save() except Exception, e: if 'None' in str(e) or 'Invalid OperationHandle' in str(e): already_closed_queries += 1 query.last_state = QueryHistory.STATE.expired.value query.save() else: self.stdout.write('Info: %s\n' % e)
def database_initialize(request): """ Install the tables for this application """ # Connexion to the db query_server = get_query_server_config(name='impala') db = dbms.get(request.user, query_server=query_server) # The sql queries sql = "DROP TABLE IF EXISTS map_sample_id; CREATE TABLE map_sample_id (internal_sample_id STRING, customer_sample_id STRING, date_creation TIMESTAMP, date_modification TIMESTAMP); DROP TABLE IF EXISTS sample_files; CREATE TABLE sample_files (id STRING, internal_sample_id STRING, file_path STRING, file_type STRING, date_creation TIMESTAMP, date_modification TIMESTAMP);" # The clinical db sql += "DROP TABLE IF EXISTS clinical_sample; CREATE TABLE clinical_sample (sample_id STRING, patient_id STRING, date_of_collection STRING, original_sample_id STRING, status STRING, sample_type STRING, biological_contamination STRING, storage_condition STRING, biobank_id STRING, pn_id STRING);" #DROP TABLE IF EXISTS variants; CREATE TABLE variants (id STRING, alternate_bases STRING, calls STRING, names STRING, info STRING, reference_bases STRING, quality DOUBLE, created TIMESTAMP, elem_start BIGINT, elem_end BIGINT, variantset_id STRING); DROP TABLE IF EXISTS variantsets; #CREATE TABLE variantsets (id STRING, dataset_id STRING, metadata STRING, reference_bounds STRING); #DROP TABLE IF EXISTS datasets; CREATE TABLE datasets (id STRING, is_public BOOLEAN, name STRING);''' # Executing the different queries tmp = sql.split(";") for hql in tmp: hql = hql.strip() if hql: query = hql_query(hql) handle = db.execute_and_wait(query, timeout_sec=5.0) return render('database.initialize.mako', request, locals())
def execute_directly(request, query, design, query_server, tablename=None, **kwargs): if design is not None: design = authorized_get_design(request, design.id) parameters = kwargs.pop('parameters', None) db = dbms.get(request.user, query_server) database = query.query.get('database', 'default') db.use(database) history_obj = db.execute_query(query, design) watch_url = reverse(get_app_name(request) + ':api_watch_query_refresh_json', kwargs={'id': history_obj.id}) if parameters is not None: history_obj.update_extra('parameters', parameters) history_obj.save() response = { 'status': 0, 'id': history_obj.id, 'watch_url': watch_url, 'statement': history_obj.get_current_statement(), 'is_redacted': history_obj.is_redacted } return JsonResponse(response)
def show_tables(request, database=None): if database is None: database = request.COOKIES.get('hueBeeswaxLastDatabase', 'default') # Assume always 'default' db = dbms.get(request.user) databases = db.get_databases() if request.method == 'POST': db_form = DbForm(request.POST, databases=databases) if db_form.is_valid(): database = db_form.cleaned_data['database'] else: db_form = DbForm(initial={'database': database}, databases=databases) tables = db.get_tables(database=database) resp = render("tables.mako", request, { 'breadcrumbs': [ { 'name': database, 'url': reverse('metastore:show_tables', kwargs={'database': database}) } ], 'tables': tables, 'db_form': db_form, 'database': database, 'tables_json': json.dumps(tables), 'has_write_access': has_write_access(request.user), }) resp.set_cookie("hueBeeswaxLastDatabase", database, expires=90) return resp
def sample_search(request): """ Search the data related to a given sample id """ result = {'status': -1,'data': {}} if request.method != 'POST' or not request.POST or not request.POST['sample_id']: result['status'] = 0 return HttpResponse(json.dumps(result), mimetype="application/json") sample_id = str(request.POST['sample_id']) # Database connexion query_server = get_query_server_config(name='impala') db = dbms.get(request.user, query_server=query_server) customer_sample_id = str(request.user.id)+"_"+sample_id # Selecting the files related to the sample id hql = "SELECT sample_files.id, sample_files.file_path FROM sample_files JOIN map_sample_id ON sample_files.internal_sample_id = map_sample_id.internal_sample_id WHERE map_sample_id.customer_sample_id = '"+customer_sample_id+"';" query = hql_query(hql) handle = db.execute_and_wait(query, timeout_sec=5.0) if handle: data = db.fetch(handle, rows=100) result['status'] = 1 result['data'] = list(data.rows()) db.close(handle) # Returning the data return HttpResponse(json.dumps(result), mimetype="application/json")
def config_validator(user): # dbms is dependent on beeswax.conf (this file) # import in method to avoid circular dependency from beeswax.design import hql_query from beeswax.server import dbms res = [] try: try: if not 'test' in sys.argv: # Avoid tests hanging server = dbms.get(user) query = hql_query("SELECT 'Hello World!';") handle = server.execute_and_wait(query, timeout_sec=10.0) if handle: server.fetch(handle, rows=100) server.close(handle) except StructuredThriftTransportException, e: if 'Error validating the login' in str(e): msg = 'Failed to authenticate to HiveServer2, check authentication configurations.' LOG.exception(msg) res.append((NICE_NAME, _(msg))) else: raise e except Exception, e: msg = "The application won't work without a running HiveServer2." LOG.exception(msg) res.append((NICE_NAME, _(msg)))
def handle(self, *args, **options): days = int(args[0]) if len(args) >= 1 else 7 query_type = args[1] if len(args) >= 2 else None if query_type == 'hive' or query_type is None: query_type = 'beeswax' self.stdout.write('Closing (all=%s) HiveServer2/Impala sessions older than %s days...\n' % (query_type, days)) n = 0 sessions = Session.objects.all() if query_type != 'all': sessions = sessions.filter(application=query_type) sessions = sessions.filter(last_used__lte=datetime.today() - timedelta(days=days)) for session in sessions: try: resp = dbms.get(user=session.owner).close_session(session) if not 'Session does not exist!' in str(resp): self.stdout.write('Error: %s\n' % resp) n += 1 except Exception, e: if not 'Session does not exist!' in str(e): self.stdout.write('Error: %s\n' % e)
def teardown_class(cls): if is_live_cluster(): # Delete test DB and tables query_server = get_query_server_config() client = make_logged_in_client() user = User.objects.get(username='******') db = dbms.get(user, query_server) # Kill Spark context if running if is_hive_on_spark() and cluster.is_yarn(): # TODO: We should clean up the running Hive on Spark job here pass for db_name in [cls.db_name, '%s_other' % cls.db_name]: databases = db.get_databases() if db_name in databases: tables = db.get_tables(database=db_name) for table in tables: make_query(client, 'DROP TABLE IF EXISTS `%(db)s`.`%(table)s`' % {'db': db_name, 'table': table}, wait=True) make_query(client, 'DROP VIEW IF EXISTS `%(db)s`.`myview`' % {'db': db_name}, wait=True) make_query(client, 'DROP DATABASE IF EXISTS %(db)s' % {'db': db_name}, wait=True) # Check the cleanup databases = db.get_databases() assert_false(db_name in databases) global _INITIALIZED _INITIALIZED = False
def describe_partitions(request, database, table): db = dbms.get(request.user) table_obj = db.get_table(database, table) if not table_obj.partition_keys: raise PopupException(_("Table '%(table)s' is not partitioned.") % {'table': table}) partitions = db.get_partitions(database, table_obj, max_parts=None) return render("describe_partitions.mako", request, {'breadcrumbs': [ { 'name': database, 'url': reverse('metastore:show_tables', kwargs={'database': database}) }, { 'name': table, 'url': reverse('metastore:describe_table', kwargs={'database': database, 'table': table}) }, { 'name': 'partitions', 'url': reverse('metastore:describe_partitions', kwargs={'database': database, 'table': table}) }, ], 'database': database, 'table': table_obj, 'partitions': partitions, 'request': request})
def put(self, request, original_variant, pk): # Allow to modify a variant in HBase/Impala f = open('/tmp/superhello.txt','w') f.write(json.dumps(request.data)) f.close() # We convert the original and modified data to flatjson fc = formatConverters(input_file='stuff.json',output_file='stuff.json') original_flatjson = fc.convertVariantJsonToFlatJson(json_data=original_variant) modified_flatjson = fc.convertVariantJsonToFlatJson(json_data=request.data) # We convert the data to hbase, and we modify directly some fields (note: the keys are almost the same for hbase and impala) hbase_data = fc.convertVariantFlatJsonToHbase(original_data=original_flatjson,modified_data=modified_flatjson) # Impala - We make the query query_server = get_query_server_config(name='impala') db = dbms.get(request.user, query_server=query_server) query = hql_query("INSERT INTO variant("+",".join(query_data)+")") handle = db.execute_and_wait(query, timeout_sec=5.0) if handle: db.close(handle) else: raise Exception("Impossible to create the variant...") # HBase - We add the data in that table too hbaseApi = HbaseApi(user=request.user) currentCluster = hbaseApi.getClusters().pop() rowkey = pk hbaseApi.putRow(cluster=currentCluster['name'], tableName='variants', row=rowkey, data=hbase_data)
def show_tables(request, database=None): if database is None: database = 'default' # Assume always 'default' if request.REQUEST.get("format", "html") == "json": db = dbms.get(request.user) try: databases = db.get_databases() if database not in databases: database = 'default' if request.method == 'POST': db_form = DbForm(request.POST, databases=databases) if db_form.is_valid(): database = db_form.cleaned_data['database'] else: db_form = DbForm(initial={'database': database}, databases=databases) search_filter = request.GET.get('filter', '') tables = db.get_tables_meta(database=database, table_names=search_filter) # SparkSql returns [] table_names = [table['name'] for table in tables] except Exception, e: raise PopupException(_('Failed to retrieve tables for database: %s' % database), detail=e) resp = JsonResponse({ 'status': 0, 'database_meta': db.get_database(database), 'tables': tables, 'table_names': table_names, 'search_filter': search_filter })
def guess_format(request): file_format = json.loads(request.POST.get('fileFormat', '{}')) if file_format['inputFormat'] == 'file': indexer = Indexer(request.user, request.fs) stream = request.fs.open(file_format["path"]) format_ = indexer.guess_format({ "file":{ "stream": stream, "name": file_format['path'] } }) _convert_format(format_) elif file_format['inputFormat'] == 'table': db = dbms.get(request.user) table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName']) storage = dict([(delim['data_type'], delim['comment']) for delim in table_metadata.storage_details]) if table_metadata.details['properties']['format'] == 'text': format_ = {"quoteChar": "\"", "recordSeparator": '\\n', "type": "csv", "hasHeader": False, "fieldSeparator": storage['serialization.format']} elif table_metadata.details['properties']['format'] == 'parquet': format_ = {"type": "parquet", "hasHeader": False,} else: raise PopupException('Hive table format %s is not supported.' % table_metadata.details['properties']['format']) elif file_format['inputFormat'] == 'query': format_ = {"quoteChar": "\"", "recordSeparator": "\\n", "type": "csv", "hasHeader": False, "fieldSeparator": "\t"} # \t --> CTRL+A return JsonResponse(format_)
def drop_table(request, database): db = dbms.get(request.user) if request.method == 'POST': try: tables = request.POST.getlist('table_selection') tables_objects = [db.get_table(database, table) for table in tables] skip_trash = request.POST.get('skip_trash') == 'on' if request.POST.get('is_embeddable'): sql = db.drop_tables(database, tables_objects, design=None, skip_trash=skip_trash, generate_ddl_only=True) job = make_notebook( name='Execute and watch', editor_type='hive', statement=sql.strip(), status='ready', database=database, on_success_url='assist.db.refresh', is_task=True ) return JsonResponse(job.execute(request)) else: # Can't be simpler without an important refactoring design = SavedQuery.create_empty(app_name='beeswax', owner=request.user, data=hql_query('').dumps()) query_history = db.drop_tables(database, tables_objects, design, skip_trash=skip_trash) url = reverse('beeswax:watch_query_history', kwargs={'query_history_id': query_history.id}) + '?on_success_url=' + reverse('metastore:show_tables', kwargs={'database': database}) return redirect(url) except Exception, ex: error_message, log = dbms.expand_exception(ex, db) error = _("Failed to remove %(tables)s. Error: %(error)s") % {'tables': ','.join(tables), 'error': error_message} raise PopupException(error, title=_("Hive Error"), detail=log)
def create_session(self, lang='hive', properties=None): application = 'beeswax' if lang == 'hive' else lang session = Session.objects.get_session(self.user, application=application) if session is None: session = dbms.get(self.user, query_server=get_query_server_config(name=lang)).open_session(self.user) response = { 'type': lang, 'id': session.id } if not properties: config = None if USE_DEFAULT_CONFIGURATION.get(): config = DefaultConfiguration.objects.get_configuration_for_user(app=lang, user=self.user) if config is not None: properties = config.properties_list else: properties = self.get_properties(lang) response['properties'] = properties if lang == 'impala': impala_settings = session.get_formatted_properties() http_addr = next((setting['value'] for setting in impala_settings if setting['key'].lower() == 'http_addr'), None) response['http_addr'] = http_addr return response
def show_tables(request, database=None): if database is None: database = _get_last_database(request, database) if request.method == 'POST': resp = {} try: tables = _get_table_list(request, database) table_list_rendered = django_mako.render_to_string("table_list.mako", dict( app_name=get_app_name(request), database=database, tables=tables, )) except Exception as ex: resp['error'] = escapejs(ex.message) else: resp['table_list_rendered'] = table_list_rendered resp['tables'] = tables return HttpResponse(json.dumps(resp)) db = dbms.get(request.user) databases = db.get_databases() db_form = hcatalog.forms.DbForm(initial={'database': database}, databases=databases) response = render("show_tables.mako", request, { 'database': database, 'db_form': db_form, }) response.set_cookie("hueHcatalogLastDatabase", database, expires=90) return response
def describe_table_json(request, database, table): try: db = dbms.get(request.user) table = db.get_table(database, table) result = {"columns": [{"type": col.type, "name": col.name} for col in table.cols]} except NoSuchObjectException, e: result = {"status": "failure", 'failureInfo' : unicode(table+' table not found')}
def alter_column(request, database, table): db = dbms.get(request.user) response = {'status': -1, 'data': ''} try: column = request.POST.get('column', None) if column is None: raise PopupException(_('alter_column requires a column parameter')) column_obj = db.get_column(database, table, column) if column_obj: new_column_name = request.POST.get('new_column_name', column_obj.name) new_column_type = request.POST.get('new_column_type', column_obj.type) comment = request.POST.get('comment', None) partition_spec = request.POST.get('partition_spec', None) column_obj = db.alter_column(database, table, column, new_column_name, new_column_type, comment=comment, partition_spec=partition_spec) response['status'] = 0 response['data'] = { 'name': column_obj.name, 'type': column_obj.type, 'comment': column_obj.comment } else: raise PopupException(_('Column `%s`.`%s` `%s` not found') % (database, table, column)) except Exception, ex: response['status'] = 1 response['data'] = _("Failed to alter column `%s`.`%s` `%s`: %s") % (database, table, column, str(ex))
def load_table(request, table): table_obj = dbms.get(request.user).get_table('default', table) if request.method == "POST": form = beeswax.forms.LoadDataForm(table_obj, request.POST) if form.is_valid(): # TODO(philip/todd): When PathField might refer to non-HDFS, # we need a pathfield.is_local function. hql = "LOAD DATA INPATH" hql += " '%s'" % form.cleaned_data['path'] if form.cleaned_data['overwrite']: hql += " OVERWRITE" hql += " INTO TABLE " hql += "`%s`" % (table,) if form.partition_columns: hql += " PARTITION (" vals = [] for key, column_name in form.partition_columns.iteritems(): vals.append("%s='%s'" % (column_name, form.cleaned_data[key])) hql += ", ".join(vals) hql += ")" on_success_url = urlresolvers.reverse(describe_table, kwargs={'table': table}) return confirm_query(request, hql, on_success_url) else: form = beeswax.forms.LoadDataForm(table_obj) return render("load_table.mako", request, dict(form=form, table=table, action=request.get_full_path()))
def autocomplete(request, database=None, table=None, column=None, nested=None): app_name = get_app_name(request) query_server = get_query_server_config(app_name) do_as = request.user if (request.user.is_superuser or request.user.has_hue_permission(action="impersonate", app="security")) and 'doas' in request.GET: do_as = User.objects.get(username=request.GET.get('doas')) db = dbms.get(do_as, query_server) response = {} try: if database is None: response['databases'] = db.get_databases() elif table is None: response['tables'] = db.get_tables(database=database) elif column is None: t = db.get_table(database, table) response['hdfs_link'] = t.hdfs_link response['columns'] = [column.name for column in t.cols] response['extended_columns'] = massage_columns_for_json(t.cols) else: col = db.get_column(database, table, column) if col: parse_tree = parser.parse_column(col.name, col.type, col.comment) if nested: parse_tree = _extract_nested_type(parse_tree, nested) response = parse_tree else: raise Exception('Could not find column `%s`.`%s`.`%s`' % (database, table, column)) except (QueryServerTimeoutException, TTransportException), e: response['code'] = 503 response['error'] = e.message
def guess_field_types(request): file_format = json.loads(request.POST.get('fileFormat', '{}')) if file_format['inputFormat'] == 'file': indexer = Indexer(request.user, request.fs) stream = request.fs.open(file_format["path"]) _convert_format(file_format["format"], inverse=True) format_ = indexer.guess_field_types({ "file": { "stream": stream, "name": file_format['path'] }, "format": file_format['format'] }) elif file_format['inputFormat'] == 'table': sample = get_api(request, {'type': 'hive'}).get_sample_data({'type': 'hive'}, database=file_format['databaseName'], table=file_format['tableName']) db = dbms.get(request.user) table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName']) format_ = { "sample": sample['rows'][:4], "columns": [ Field(col.name, HiveFormat.FIELD_TYPE_TRANSLATE.get(col.type, 'string')).to_dict() for col in table_metadata.cols ] } elif file_format['inputFormat'] == 'query': #TODO get schema from explain query pass return JsonResponse(format_)
def drop_database(request): db = dbms.get(request.user) if request.method == 'POST': databases = request.POST.getlist('database_selection') try: design = SavedQuery.create_empty(app_name='beeswax', owner=request.user, data=hql_query('').dumps()) if request.POST.get('is_embeddable'): sql = db.drop_databases(databases, design, generate_ddl_only=True) job = make_notebook( name='Execute and watch', editor_type='hive', statement=sql.strip(), status='ready', database=None, on_success_url='assist.db.refresh', is_task=True ) return JsonResponse(job.execute(request)) else: query_history = db.drop_databases(databases, design) url = reverse('beeswax:watch_query_history', kwargs={'query_history_id': query_history.id}) + '?on_success_url=' + reverse('metastore:databases') return redirect(url) except Exception, ex: error_message, log = dbms.expand_exception(ex, db) error = _("Failed to remove %(databases)s. Error: %(error)s") % {'databases': ','.join(databases), 'error': error_message} raise PopupException(error, title=_("Hive Error"), detail=log)
def create(self, django_user): """ Create table in the Hive Metastore. """ LOG.info('Creating table "%s"' % (self.name,)) db = dbms.get(django_user, self.query_server) try: # Already exists? if self.app_name == 'impala': db.invalidate_tables('default', [self.name]) db.get_table('default', self.name) msg = _('Table "%(table)s" already exists.') % {'table': self.name} LOG.error(msg) return False except Exception: query = hql_query(self.hql) try: results = db.execute_and_wait(query) if not results: msg = _('Error creating table %(table)s: Operation timeout.') % {'table': self.name} LOG.error(msg) raise InstallException(msg) return True except Exception, ex: msg = _('Error creating table %(table)s: %(error)s.') % {'table': self.name, 'error': ex} LOG.error(msg) raise InstallException(msg)
def init_example(request): """ Allow to make some test for the developpers, to see if the insertion and the querying of data is correct """ result = {'status': -1,'data': {}} query_server = get_query_server_config(name='impala') db = dbms.get(request.user, query_server=query_server) # Deleting the db hql = "DROP TABLE IF EXISTS val_test_2;" query = hql_query(hql) handle = db.execute_and_wait(query, timeout_sec=5.0) # Creating the db hql = "CREATE TABLE val_test_2 (id int, token string);" query = hql_query(hql) handle = db.execute_and_wait(query, timeout_sec=5.0) # Adding some data hql = " INSERT OVERWRITE val_test_2 values (1, 'a'), (2, 'b'), (-1,'xyzzy');" # hql = "INSERT INTO TABLE testset_bis VALUES (2, 25.0)" query = hql_query(hql) handle = db.execute_and_wait(query, timeout_sec=5.0) # querying the data hql = "SELECT * FROM val_test_2" query = hql_query(hql) handle = db.execute_and_wait(query, timeout_sec=5.0) if handle: data = db.fetch(handle, rows=100) result['data'] = list(data.rows()) db.close(handle) return render('database.initialize.mako', request, locals())
def describe_partitions(request, database, table): db = dbms.get(request.user) table_obj = db.get_table(database, table) if not table_obj.partition_keys: raise PopupException(_("Table '%(table)s' is not partitioned.") % {"table": table}) partitions = db.get_partitions(database, table_obj, max_parts=None) return render( "describe_partitions.mako", request, { "breadcrumbs": [ {"name": database, "url": reverse("metastore:show_tables", kwargs={"database": database})}, { "name": table, "url": reverse("metastore:describe_table", kwargs={"database": database, "table": table}), }, { "name": "partitions", "url": reverse("metastore:describe_partitions", kwargs={"database": database, "table": table}), }, ], "database": database, "table": table_obj, "partitions": partitions, "request": request, }, )
def show_tables(request, database=None): if database is None: database = request.COOKIES.get('hueBeeswaxLastDatabase', 'default') # Assume always 'default' db = dbms.get(request.user) databases = db.get_databases() if request.method == 'POST': db_form = DbForm(request.POST, databases=databases) if db_form.is_valid(): database = db_form.cleaned_data['database'] else: db_form = DbForm(initial={'database': database}, databases=databases) tables = db.get_tables(database=database) examples_installed = beeswax.models.MetaInstall.get().installed_example #table_selection = TableSelection(tables=tables) return render("show_tables.mako", request, { 'tables': tables, 'examples_installed': examples_installed, 'db_form': db_form, 'database': database, 'tables_json': json.dumps(tables), })
def show_tables(request, database=None): if database is None: database = request.COOKIES.get("hueBeeswaxLastDatabase", "default") # Assume always 'default' db = dbms.get(request.user) databases = db.get_databases() if database not in databases: database = "default" if request.method == "POST": db_form = DbForm(request.POST, databases=databases) if db_form.is_valid(): database = db_form.cleaned_data["database"] else: db_form = DbForm(initial={"database": database}, databases=databases) tables = db.get_tables(database=database) resp = render( "tables.mako", request, { "breadcrumbs": [{"name": database, "url": reverse("metastore:show_tables", kwargs={"database": database})}], "tables": tables, "db_form": db_form, "database": database, "tables_json": json.dumps(tables), "has_write_access": has_write_access(request.user), }, ) resp.set_cookie("hueBeeswaxLastDatabase", database, expires=90) return resp
def index_file(request): file_format = json.loads(request.POST.get('fileFormat', '{}')) _convert_format(file_format["format"], inverse=True) collection_name = file_format["name"] indexer = Indexer(request.user, request.fs) unique_field = indexer.get_unique_field(file_format) is_unique_generated = indexer.is_unique_generated(file_format) schema_fields = indexer.get_kept_field_list(file_format['columns']) if is_unique_generated: schema_fields += [{"name": unique_field, "type": "string"}] morphline = indexer.generate_morphline_config(collection_name, file_format, unique_field) collection_manager = CollectionManagerController(request.user) if not collection_manager.collection_exists(collection_name): collection_manager.create_collection(collection_name, schema_fields, unique_key_field=unique_field) if file_format['inputFormat'] == 'table': db = dbms.get(request.user) table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName']) input_path = table_metadata.path_location else: input_path = file_format["path"] job_handle = indexer.run_morphline(request, collection_name, morphline, input_path) #TODO if query generate insert return JsonResponse(job_handle)
def view_results(request, id, first_row=0): """ Returns the view for the results of the QueryHistory with the given id. The query results MUST be ready. To display query results, one should always go through the execute_query view. If the result set has has_result_set=False, display an empty result. If ``first_row`` is 0, restarts (if necessary) the query read. Otherwise, just spits out a warning if first_row doesn't match the servers conception. Multiple readers will produce a confusing interaction here, and that's known. It understands the ``context`` GET parameter. (See execute_query().) """ first_row = long(first_row) start_over = (first_row == 0) results = type('Result', (object,), { 'rows': 0, 'columns': [], 'has_more': False, 'start_row': 0, }) data = [] fetch_error = False error_message = '' log = '' columns = [] app_name = get_app_name(request) query_history = authorized_get_query_history(request, id, must_exist=True) query_server = query_history.get_query_server_config() db = dbms.get(request.user, query_server) handle, state = _get_query_handle_and_state(query_history) context_param = request.GET.get('context', '') query_context = parse_query_context(context_param) # Update the status as expired should not be accessible expired = state == models.QueryHistory.STATE.expired # Retrieve query results or use empty result if no result set try: if query_server['server_name'] == 'impala' and not handle.has_result_set: downloadable = False else: results = db.fetch(handle, start_over, 100) # Materialize and HTML escape results data = escape_rows(results.rows()) # We display the "Download" button only when we know that there are results: downloadable = first_row > 0 or data log = db.get_log(handle) columns = results.data_table.cols() except Exception, ex: LOG.exception('error fetching results') fetch_error = True error_message, log = expand_exception(ex, db, handle)
def config_validator(user): # dbms is dependent on beeswax.conf (this file) # import in method to avoid circular dependency from beeswax.server import dbms res = [] try: if not 'test' in sys.argv: # Avoid tests hanging server = dbms.get(user) server.get_databases() except: res.append((NICE_NAME, _("The application won't work without a running HiveServer2."))) try: from hadoop import cluster warehouse = beeswax.hive_site.get_metastore_warehouse_dir() fs = cluster.get_hdfs() fs.stats(warehouse) except Exception: return [(NICE_NAME, _('Failed to access Hive warehouse: %s') % warehouse)] return res
def show_tables(request, database=None): if database is None: database = 'default' # Assume always 'default' if request.REQUEST.get("format", "html") == "json": db = dbms.get(request.user) try: databases = db.get_databases() if database not in databases: database = 'default' if request.method == 'POST': db_form = DbForm(request.POST, databases=databases) if db_form.is_valid(): database = db_form.cleaned_data['database'] else: db_form = DbForm(initial={'database': database}, databases=databases) search_filter = request.GET.get('filter', '') tables = db.get_tables_meta( database=database, table_names=search_filter) # SparkSql returns [] table_names = [table['name'] for table in tables] except Exception, e: raise PopupException(_( 'Failed to retrieve tables for database: %s' % database), detail=e) resp = JsonResponse({ 'status': 0, 'database_meta': db.get_database(database), 'tables': tables, 'table_names': table_names, 'search_filter': search_filter })
def autocomplete(request, database=None, table=None): app_name = get_app_name(request) query_server = get_query_server_config(app_name) do_as = request.user if (request.user.is_superuser or request.user.has_hue_permission(action="impersonate", app="security")) and 'doas' in request.GET: do_as = User.objects.get(username=request.GET.get('doas')) db = dbms.get(do_as, query_server) response = {} try: if database is None: response['databases'] = db.get_databases() elif table is None: response['tables'] = db.get_tables(database=database) else: t = db.get_table(database, table) response['hdfs_link'] = t.hdfs_link response['columns'] = [column.name for column in t.cols] response['extended_columns'] = massage_columns_for_json(t.cols) except TTransportException, tx: response['code'] = 503 response['error'] = tx.message
def config_validator(user): # dbms is dependent on beeswax.conf (this file) # import in method to avoid circular dependency from beeswax.server import dbms res = [] try: try: if not 'test' in sys.argv: # Avoid tests hanging server = dbms.get(user) server.get_databases() except StructuredThriftTransportException, e: if 'Error validating the login' in str(e): msg = 'Failed to authenticate to HiveServer2, check authentication configurations.' LOG.exception(msg) res.append((NICE_NAME, _(msg))) else: raise e except Exception, e: msg = "The application won't work without a running HiveServer2." LOG.exception(msg) res.append((NICE_NAME, _(msg)))
def get_exec_summary(request, query_history_id): query_server = dbms.get_query_server_config() db = beeswax_dbms.get(request.user, query_server=query_server) response = {'status': -1} query_history = authorized_get_query_history(request, query_history_id, must_exist=True) if query_history is None: response['message'] = _( 'get_exec_summary requires a valid query_history_id') else: session = Session.objects.get_session(request.user, query_server['server_name']) operation_handle = query_history.get_handle().get_rpc_handle() session_handle = session.get_handle() summary = db.get_exec_summary(operation_handle, session_handle) response['status'] = 0 response['summary'] = summary return JsonResponse(response)
def _get_db(self, snippet, is_async=False, interpreter=None): if interpreter and interpreter.get('dialect'): dialect = interpreter['dialect'] else: dialect = snippet[ 'type'] # Backward compatibility without connectors if not is_async and dialect == 'hive': name = 'beeswax' elif dialect == 'hive': name = 'hive' elif dialect == 'llap': name = 'llap' elif dialect == 'impala': name = 'impala' else: name = 'sparksql' # Note: name is not used if interpreter is present return dbms.get(self.user, query_server=get_query_server_config( name=name, connector=interpreter))
def alter_database(request, database): db = dbms.get(request.user) response = {'status': -1, 'data': ''} try: properties = request.POST.get('properties') if not properties: raise PopupException( _("Alter database requires a properties value of key-value pairs." )) properties = json.loads(properties) db.alter_database(database, properties=properties) db_metadata = db.get_database(database) db_metadata['hdfs_link'] = location_to_url(db_metadata['location']) response['status'] = 0 response['data'] = db_metadata except Exception, ex: response['status'] = 1 response['data'] = _("Failed to alter database `%s`: %s") % (database, ex)
def show_tables(request, database=None): if database is None: database = request.COOKIES.get('hueBeeswaxLastDatabase', 'default') # Assume always 'default' db = dbms.get(request.user) databases = db.get_databases() if request.method == 'POST': db_form = DbForm(request.POST, databases=databases) if db_form.is_valid(): database = db_form.cleaned_data['database'] else: db_form = DbForm(initial={'database': database}, databases=databases) tables = db.get_tables(database=database) resp = render( "tables.mako", request, { 'breadcrumbs': [{ 'name': database, 'url': reverse('metastore:show_tables', kwargs={'database': database}) }], 'tables': tables, 'db_form': db_form, 'database': database, 'tables_json': json.dumps(tables), 'has_write_access': has_write_access(request.user), }) resp.set_cookie("hueBeeswaxLastDatabase", database, expires=90) return resp
def drop_database(request): db = dbms.get(request.user) if request.method == 'POST': databases = request.POST.getlist('database_selection') try: # Can't be simpler without an important refactoring design = SavedQuery.create_empty(app_name='beeswax', owner=request.user) query_history = db.drop_databases(databases, design) url = reverse('beeswax:watch_query', args=[ query_history.id ]) + '?on_success_url=' + reverse('metastore:databases') return redirect(url) except Exception, ex: error_message, log = dbms.expand_exception(ex, db) error = _("Failed to remove %(databases)s. Error: %(error)s") % { 'databases': ','.join(databases), 'error': error_message } raise PopupException(error, title=_("Beeswax Error"), detail=log)
def _update_query_state(query_history): """ Update the last_state for a QueryHistory object. Returns success as True/False. This only occurs iff the current last_state is submitted or running, since the other states are stable, more-or-less. Note that there is a transition from available/failed to expired. That occurs lazily when the user attempts to view results that have expired. """ if query_history.last_state <= models.QueryHistory.STATE.running.index: try: state_enum = dbms.get( query_history.owner, query_history.get_query_server_config()).get_state( query_history.get_handle()) if state_enum is None: # Error was logged at the source return False except Exception, e: LOG.error(e) state_enum = models.QueryHistory.STATE.failed query_history.save_state(state_enum)
def drop_partition(request, database, table): db = dbms.get(request.user) if request.method == 'POST': partition_specs = request.POST.getlist('partition_selection') partition_specs = [spec for spec in partition_specs] try: design = SavedQuery.create_empty(app_name='beeswax', owner=request.user, data=hql_query('').dumps()) query_history = db.drop_partitions(database, table, partition_specs, design) url = reverse('beeswax:watch_query_history', kwargs={'query_history_id': query_history.id}) + '?on_success_url=' + \ reverse('metastore:describe_partitions', kwargs={'database': database, 'table': table}) return redirect(url) except Exception, ex: error_message, log = dbms.expand_exception(ex, db) error = _("Failed to remove %(partition)s. Error: %(error)s") % { 'partition': '\n'.join(partition_specs), 'error': error_message } raise PopupException(error, title=_("Hive Error"), detail=log)
def download(request, id, format, user_agent=None): if not ENABLE_DOWNLOAD.get(): return serve_403_error(request) try: query_history = authorized_get_query_history(request, id, must_exist=True) db = dbms.get(request.user, query_history.get_query_server_config()) LOG.debug('Download results for query %s: [ %s ]' % (query_history.server_id, query_history.query)) return data_export.download(query_history.get_handle(), format, db, user_agent=user_agent) except Exception as e: if not hasattr(e, 'message') or not e.message: message = e else: message = e.message raise PopupException(message, detail='')
def watch_query(request, id): """ Wait for the query to finish and (by default) displays the results of query id. It understands the optional GET params: on_success_url If given, it will be displayed when the query is successfully finished. Otherwise, it will display the view query results page by default. context A string of "name:data" that describes the context that generated this query result. It may be: - "table":"<table_name>" - "design":<design_id> All other GET params will be passed to on_success_url (if present). """ # Coerce types; manage arguments query_history = authorized_get_history(request, id, must_exist=True) db = dbms.get(request.user, query_history.get_query_server_config()) # GET param: context. context_param = request.GET.get('context', '') # GET param: on_success_url. Default to view_results results_url = reverse(get_app_name(request) + ':view_results', kwargs={'id': id, 'first_row': 0}) if request.GET.get('download', ''): results_url += '?download=true' on_success_url = request.GET.get('on_success_url') if not on_success_url: on_success_url = results_url # Go to next statement if asked to continue or when a statement with no dataset finished. if request.method == 'POST' or (not query_history.is_finished() and query_history.is_success() and not query_history.has_results): try: query_history = db.execute_next_statement(query_history) except Exception, ex: pass
def top_tables(request): response = {'status': -1} database = request.POST.get('database', 'default') len = request.POST.get('len', 1000) if OPTIMIZER.MOCKING.get(): from beeswax.server import dbms from beeswax.server.dbms import get_query_server_config db = dbms.get(request.user) tables = [{ 'name': table, 'popularity': random.randint(1, 100), 'column_count': random.randint(1, 100), 'is_fact': bool(random.getrandbits(1)) } for table in db.get_tables(database=database)][:len] else: """ Get back: # u'details': [{u'columnCount': 28, u'name': u'date_dim', u'patternCount': 136, u'workloadPercent': 89, u'total': 92, u'type': u'Dimension', u'eid': u'19'}, """ api = OptimizerApi() data = api.top_tables() tables = [{ 'eid': table['eid'], 'name': table['name'], 'popularity': table['workloadPercent'], 'column_count': table['columnCount'], 'patternCount': table['patternCount'], 'total': table['total'], 'is_fact': table['type'] != 'Dimension' } for table in data['details']] response['top_tables'] = tables response['status'] = 0 return JsonResponse(response)
def drop_database(request): db = dbms.get(request.user) if request.method == 'POST': databases = request.POST.getlist('database_selection') try: design = SavedQuery.create_empty(app_name='beeswax', owner=request.user, data=hql_query('').dumps()) if request.POST.get('is_embeddable'): sql = db.drop_databases(databases, design, generate_ddl_only=True) job = make_notebook(name='Execute and watch', editor_type='hive', statement=sql.strip(), status='ready', database=None, on_success_url='assist.db.refresh', is_task=True) return JsonResponse(job.execute(request)) else: query_history = db.drop_databases(databases, design) url = reverse( 'beeswax:watch_query_history', kwargs={ 'query_history_id': query_history.id }) + '?on_success_url=' + reverse('metastore:databases') return redirect(url) except Exception, ex: error_message, log = dbms.expand_exception(ex, db) error = _("Failed to remove %(databases)s. Error: %(error)s") % { 'databases': ','.join(databases), 'error': error_message } raise PopupException(error, title=_("Hive Error"), detail=log)
def handle(self, *args, **options): days = int(args[0]) if len(args) >= 1 else 7 close_all = args[1] == 'all' if len(args) >= 2 else False self.stdout.write('Closing (all=%s) HiveServer2 sessions older than %s days...\n' % (close_all, days)) sessions = Session.objects.filter(status_code=0) if not close_all: sessions = sessions.filter(application='beeswax') sessions = sessions.filter(last_used__lte=datetime.today() - timedelta(days=days)) self.stdout.write('Found %d open HiveServer2 sessions to close' % len(sessions)) import os import beeswax from beeswax import hive_site try: beeswax.conf.HIVE_CONF_DIR.set_for_testing(os.environ['HIVE_CONF_DIR']) except: LOG.exception('failed to lookup HIVE_CONF_DIR in environment') self.stdout.write('Did you export HIVE_CONF_DIR=/etc/hive/conf?\n') raise hive_site.reset() hive_site.get_conf() closed = 0 skipped = 0 for session in sessions: try: session = dbms.get(user=session.owner).close_session(session) closed += 1 except Exception, e: skipped += 1 self.stdout.write('Session with ID %d could not be closed: %s' % (session.id, str(e)))
def drop_table(request, database): db = dbms.get(request.user) if request.method == 'POST': tables = request.POST.getlist('table_selection') tables_objects = [db.get_table(database, table) for table in tables] try: # Can't be simpler without an important refactoring design = SavedQuery.create_empty(app_name='beeswax', owner=request.user, data=hql_query('').dumps()) query_history = db.drop_tables(database, tables_objects, design) url = reverse('beeswax:watch_query', args=[ query_history.id ]) + '?on_success_url=' + reverse('metastore:show_tables') return redirect(url) except Exception, ex: error_message, log = dbms.expand_exception(ex, db) error = _("Failed to remove %(tables)s. Error: %(error)s") % { 'tables': ','.join(tables), 'error': error_message } raise PopupException(error, title=_("Hive Error"), detail=log)
def load(self, django_user): """ Load data into table. Raise InstallException on failure. """ LOAD_HQL = \ """ LOAD DATA local INPATH '%(filename)s' OVERWRITE INTO TABLE %(tablename)s """ LOG.info('Loading data into table "%s"' % (self.name,)) hql = LOAD_HQL % dict(tablename=self.name, filename=self._contents_file) query = hql_query(hql) try: results = dbms.get(django_user).execute_and_wait(query) if not results: msg = _('Error loading table %(table)s: Operation timeout.') % {'table': self.name} LOG.error(msg) raise InstallException(msg) except BeeswaxException, ex: msg = _('Error loading table %(table)s: %(error)s') % {'table': self.name, 'error': ex} LOG.error(msg) raise InstallException(msg)
def _get_sample_data(db, database, table, column): table_obj = db.get_table(database, table) if table_obj.is_impala_only and db.client.query_server[ 'server_name'] != 'impala': query_server = get_query_server_config('impala') db = dbms.get(db.client.user, query_server) sample_data = db.get_sample(database, table_obj, column) response = {'status': -1} if sample_data: sample = escape_rows(sample_data.rows(), nulls_only=True) if column: sample = set([row[0] for row in sample]) sample = [[item] for item in sorted(list(sample))] response['status'] = 0 response['headers'] = sample_data.cols() response['full_headers'] = sample_data.full_cols() response['rows'] = sample else: response['message'] = _('Failed to get sample data.') return response
def get_query_form(request): try: try: # Get database choices query_server = dbms.get_query_server_config(get_app_name(request)) db = dbms.get(request.user, query_server) databases = [(database, database) for database in db.get_databases()] except StructuredThriftTransportException, e: # If Thrift exception was due to failed authentication, raise corresponding message if 'TSocket read 0 bytes' in str( e) or 'Error validating the login' in str(e): raise PopupException(_( 'Failed to authenticate to query server, check authentication configurations.' ), detail=e) else: raise e except Exception, e: raise PopupException(_( 'Unable to access databases, Query Server or Metastore may be down.' ), detail=e)
def close_session(self, session): app_name = session.get('type') session_id = session.get('id') query_server = get_query_server_config(name=app_name) response = {'status': -1, 'message': ''} try: filters = {'id': session_id, 'application': query_server['server_name']} if not self.user.is_superuser: filters['owner'] = self.user session = Session.objects.get(**filters) except Session.DoesNotExist: response['message'] = _('Session does not exist or you do not have permissions to close the session.') if session: session = dbms.get(self.user, query_server).close_session(session) response['status'] = 0 response['message'] = _('Session successfully closed.') response['session'] = {'id': session_id, 'application': session.application, 'status': session.status_code} return response
def _index(request, file_format, collection_name, query=None): indexer = Indexer(request.user, request.fs) unique_field = indexer.get_unique_field(file_format) is_unique_generated = indexer.is_unique_generated(file_format) schema_fields = indexer.get_kept_field_list(file_format['columns']) if is_unique_generated: schema_fields += [{"name": unique_field, "type": "string"}] collection_manager = CollectionManagerController(request.user) if not collection_manager.collection_exists(collection_name): collection_manager.create_collection(collection_name, schema_fields, unique_key_field=unique_field) if file_format['inputFormat'] == 'table': db = dbms.get(request.user) table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName']) input_path = table_metadata.path_location elif file_format['inputFormat'] == 'file': input_path = '${nameNode}%s' % file_format["path"] elif file_format['inputFormat'] == 'hs2_handle': searcher = CollectionManagerController(request.user) columns = ['_uuid' ] + [field['name'] for field in file_format['columns']] return searcher.update_data_from_hive( collection_name, columns, fetch_handle=file_format['fetch_handle']) else: input_path = None morphline = indexer.generate_morphline_config(collection_name, file_format, unique_field) return indexer.run_morphline(request, collection_name, morphline, input_path, query)
def _get_sample_data(db, database, table, column, is_async=False, cluster=None, operation=None): table_obj = db.get_table(database, table) if table_obj.is_impala_only and db.client.query_server['server_name'] != 'impala': query_server = get_query_server_config('impala', connector=cluster) db = dbms.get(db.client.user, query_server, cluster=cluster) sample_data = db.get_sample(database, table_obj, column, generate_sql_only=is_async, operation=operation) response = {'status': -1} if sample_data: response['status'] = 0 if is_async: notebook = make_notebook( name=_('Table sample for `%(database)s`.`%(table)s`.`%(column)s`') % {'database': database, 'table': table, 'column': column}, editor_type=_get_servername(db), statement=sample_data, status='ready-execute', skip_historify=True, is_task=False, compute=cluster if cluster else None ) response['result'] = notebook.execute(request=MockedDjangoRequest(user=db.client.user), batch=False) if table_obj.is_impala_only: response['result']['type'] = 'impala' else: sample = escape_rows(sample_data.rows(), nulls_only=True) if column: sample = set([row[0] for row in sample]) sample = [[item] for item in sorted(list(sample))] response['headers'] = sample_data.cols() response['full_headers'] = sample_data.full_cols() response['rows'] = sample else: response['message'] = _('Failed to get sample data.') return response
def put(self, request, original_variant, pk): # Allow to modify a variant in HBase/Impala f = open('/tmp/superhello.txt', 'w') f.write(json.dumps(request.data)) f.close() # We convert the original and modified data to flatjson fc = formatConverters(input_file='stuff.json', output_file='stuff.json') original_flatjson = fc.convertVariantJsonToFlatJson( json_data=original_variant) modified_flatjson = fc.convertVariantJsonToFlatJson( json_data=request.data) # We convert the data to hbase, and we modify directly some fields (note: the keys are almost the same for hbase and impala) hbase_data = fc.convertVariantFlatJsonToHbase( original_data=original_flatjson, modified_data=modified_flatjson) # Impala - We make the query query_server = get_query_server_config(name='impala') db = dbms.get(request.user, query_server=query_server) query = hql_query("INSERT INTO variant(" + ",".join(query_data) + ")") handle = db.execute_and_wait(query, timeout_sec=5.0) if handle: db.close(handle) else: raise Exception("Impossible to create the variant...") # HBase - We add the data in that table too hbaseApi = HbaseApi(user=request.user) currentCluster = hbaseApi.getClusters().pop() rowkey = pk hbaseApi.putRow(cluster=currentCluster['name'], tableName='variants', row=rowkey, data=hbase_data)
def guess_format(request): file_format = json.loads(request.POST.get('fileFormat', '{}')) if file_format['inputFormat'] == 'file': path = urllib.unquote(file_format["path"]) indexer = MorphlineIndexer(request.user, request.fs) if not request.fs.isfile(path): raise PopupException(_('Path %(path)s is not a file') % file_format) stream = request.fs.open(path) format_ = indexer.guess_format({ "file": { "stream": stream, "name": path } }) _convert_format(format_) elif file_format['inputFormat'] == 'table': db = dbms.get(request.user) try: table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName']) except Exception, e: raise PopupException(e.message if hasattr(e, 'message') and e.message else e) storage = {} for delim in table_metadata.storage_details: if delim['data_type']: if '=' in delim['data_type']: key, val = delim['data_type'].split('=', 1) storage[key] = val else: storage[delim['data_type']] = delim['comment'] if table_metadata.details['properties']['format'] == 'text': format_ = {"quoteChar": "\"", "recordSeparator": '\\n', "type": "csv", "hasHeader": False, "fieldSeparator": storage.get('field.delim', ',')} elif table_metadata.details['properties']['format'] == 'parquet': format_ = {"type": "parquet", "hasHeader": False,} else: raise PopupException('Hive table format %s is not supported.' % table_metadata.details['properties']['format'])
def alter_table(request, database, table): db = dbms.get(request.user) response = {'status': -1, 'data': ''} try: new_table_name = request.POST.get('new_table_name', None) comment = request.POST.get('comment', None) # Cannot modify both name and comment at same time, name will get precedence if new_table_name and comment: LOG.warn('Cannot alter both table name and comment at the same time, will perform rename.') table_obj = db.alter_table(database, table, new_table_name=new_table_name, comment=comment) response['status'] = 0 response['data'] = { 'name': table_obj.name, 'comment': table_obj.comment, 'is_view': table_obj.is_view, 'location': table_obj.path_location, 'properties': table_obj.properties } except Exception, ex: response['status'] = 1 response['data'] = _("Failed to alter table `%s`.`%s`: %s") % (database, table, str(ex))
def get_query_form(request): try: try: # Get database choices query_server = dbms.get_query_server_config(get_app_name(request)) db = dbms.get(request.user, query_server) databases = [(database, database) for database in db.get_databases()] except StructuredThriftTransportException as e: # If Thrift exception was due to failed authentication, raise corresponding message if 'TSocket read 0 bytes' in str(e) or 'Error validating the login' in str(e): raise PopupException(_('Failed to authenticate to query server, check authentication configurations.'), detail=e) else: raise e except Exception as e: raise PopupException(_('Unable to access databases, Query Server or Metastore may be down.'), detail=e) if not databases: raise RuntimeError(_("No databases are available. Permissions could be missing.")) query_form = QueryForm() query_form.bind(request.POST) query_form.query.fields['database'].choices = databases # Could not do it in the form return query_form
def config_validator(user): # dbms is dependent on beeswax.conf, import in method to avoid circular dependency from beeswax.design import hql_query from beeswax.server import dbms from beeswax.server.dbms import get_query_server_config res = [] if has_connectors(): return res try: try: if not 'test' in sys.argv: # Avoid tests hanging query_server = get_query_server_config(name='impala') server = dbms.get(user, query_server) query = hql_query("SELECT 'Hello World!';") handle = server.execute_and_wait(query, timeout_sec=10.0) if handle: server.fetch(handle, rows=100) server.close(handle) except StructuredThriftTransportException as ex: if 'TSocket read 0 bytes' in str( ex): # this message appears when authentication fails msg = "Failed to authenticate to Impalad, check authentication configurations." LOG.exception(msg) res.append((NICE_NAME, _(msg))) else: raise ex except Exception as ex: msg = "No available Impalad to send queries to." LOG.exception(msg) res.append((NICE_NAME, _(msg))) return res
def get_shared_beeswax_server(db_name='default'): global _SHARED_HIVE_SERVER global _SHARED_HIVE_SERVER_CLOSER if _SHARED_HIVE_SERVER is None: cluster = pseudo_hdfs4.shared_cluster() if is_live_cluster(): def s(): pass else: s = _start_mini_hs2(cluster) start = time.time() started = False sleep = 1 make_logged_in_client() user = User.objects.get(username='******') query_server = get_query_server_config() db = dbms.get(user, query_server) while not started and time.time() - start <= 30: try: db.open_session(user) started = True break except Exception, e: LOG.info('HiveServer2 server could not be found after: %s' % e) time.sleep(sleep) if not started: raise Exception("Server took too long to come up.") _SHARED_HIVE_SERVER, _SHARED_HIVE_SERVER_CLOSER = cluster, s
def create_session(self, lang='hive', properties=None): application = 'beeswax' if lang == 'hive' else lang session = Session.objects.get_session(self.user, application=application) reuse_session = session is not None if not reuse_session: session = dbms.get( self.user, query_server=get_query_server_config( name=lang, cluster=self.cluster)).open_session(self.user) response = {'type': lang, 'id': session.id} if not properties: config = None if USE_DEFAULT_CONFIGURATION.get(): config = DefaultConfiguration.objects.get_configuration_for_user( app=lang, user=self.user) if config is not None: properties = config.properties_list else: properties = self.get_properties(lang) response['properties'] = properties response['configuration'] = json.loads(session.properties) response['reuse_session'] = reuse_session response['session_id'] = '' try: decoded_guid = session.get_handle().sessionId.guid response['session_id'] = unpack_guid(decoded_guid) except Exception, e: LOG.warn('Failed to decode session handle: %s' % e)