Exemple #1
0
def analyze_table(request, database, table, columns=None):
  app_name = get_app_name(request)
  cluster = json.loads(request.POST.get('cluster', '{}'))

  query_server = get_query_server_config(app_name, cluster=cluster)
  db = dbms.get(request.user, query_server)

  table_obj = db.get_table(database, table)
  if table_obj.is_impala_only and app_name != 'impala':
    query_server = get_query_server_config('impala')
    db = dbms.get(request.user, query_server)

  response = {'status': -1, 'message': '', 'redirect': ''}

  if request.method == "POST":
    if columns is None:
      query_history = db.analyze_table(database, table)
    else:
      query_history = db.analyze_table_columns(database, table)

    response['watch_url'] = reverse('beeswax:api_watch_query_refresh_json', kwargs={'id': query_history.id})
    response['status'] = 0
  else:
    response['message'] = _('A POST request is required.')

  return JsonResponse(response)
Exemple #2
0
  def handle(self, *args, **options):
    days = int(args[0]) if len(args) >= 1 else 7
    close_all = args[1] == 'all' if len(args) >= 2 else False

    self.stdout.write('Closing (all=%s) HiveServer2/Impala queries older than %s days...\n' % (close_all, days))

    n = 0
    queries = HiveServerQueryHistory.objects.filter(last_state__in=[QueryHistory.STATE.expired.index, QueryHistory.STATE.failed.index, QueryHistory.STATE.available.index])

    if close_all:
      queries = HiveServerQueryHistory.objects.all()

    queries = queries.filter(submission_date__gte=datetime.today() - timedelta(days=days))

    for query in queries:
      try:
        query_history = HiveServerQueryHistory.objects.get(id=query.id)
        if query_history.server_id is not None:
          handle = query_history.get_handle()
          dbms.get(user=query_history.owner).close_operation(handle)
          n += 1
        query.last_state = QueryHistory.STATE.expired.index
        query.save()
      except Exception, e:
        if 'Invalid OperationHandle' in str(e):
          query.last_state = QueryHistory.STATE.expired.index
          query.save()
        else:
          self.stdout.write('Error: %s\n' % e)
Exemple #3
0
  def handle(self, *args, **options):
    days = int(args[0]) if len(args) >= 1 else 7
    close_all = args[1] == 'all' if len(args) >= 2 else False

    self.stdout.write('Closing (all=%s) HiveServer2 queries older than %s days...\n' % (close_all, days))

    queries = QueryHistory.objects.filter(last_state__in=[QueryHistory.STATE.expired.value, QueryHistory.STATE.failed.value, QueryHistory.STATE.available.value])

    if close_all:
      queries = QueryHistory.objects.all()

    queries = queries.filter(submission_date__lte=datetime.today() - timedelta(days=days))

    import os
    import beeswax
    from beeswax import conf
    from beeswax import hive_site
    try:
      beeswax.conf.HIVE_CONF_DIR.set_for_testing(os.environ['HIVE_CONF_DIR'])
    except:
      LOG.exception('failed to lookup HIVE_CONF_DIR in environment')
      self.stdout.write('Did you export HIVE_CONF_DIR=/etc/hive/conf?\n')
      raise

    hive_site.reset()
    hive_site.get_conf()

    closed_queries = 0
    already_closed_queries = 0

    for query in queries:
      try:
        query_history = QueryHistory.get(id=query.id)
        if query_history.server_id is not None:
          handle = query_history.get_handle()
          dbms.get(user=query_history.owner).close_operation(handle)
          closed_queries += 1
        else:
          already_closed_queries += 1

        query.last_state = QueryHistory.STATE.expired.value
        query.save()
      except Exception, e:
        if 'None' in str(e) or 'Invalid OperationHandle' in str(e):
          already_closed_queries += 1
          query.last_state = QueryHistory.STATE.expired.value
          query.save()
        else:
          self.stdout.write('Info: %s\n' % e)
Exemple #4
0
def database_initialize(request):
    """ Install the tables for this application """

    # Connexion to the db
    query_server = get_query_server_config(name='impala')
    db = dbms.get(request.user, query_server=query_server)
  
    # The sql queries
    sql = "DROP TABLE IF EXISTS map_sample_id; CREATE TABLE map_sample_id (internal_sample_id STRING, customer_sample_id STRING, date_creation TIMESTAMP, date_modification TIMESTAMP);  DROP TABLE IF EXISTS sample_files; CREATE TABLE sample_files (id STRING, internal_sample_id STRING, file_path STRING, file_type STRING, date_creation TIMESTAMP, date_modification TIMESTAMP);"

    # The clinical db
    sql += "DROP TABLE IF EXISTS clinical_sample; CREATE TABLE clinical_sample (sample_id STRING, patient_id STRING, date_of_collection STRING, original_sample_id STRING, status STRING, sample_type STRING, biological_contamination STRING, storage_condition STRING, biobank_id STRING, pn_id STRING);"

    #DROP TABLE IF EXISTS variants; CREATE TABLE variants (id STRING, alternate_bases STRING, calls STRING, names STRING, info STRING, reference_bases STRING, quality DOUBLE, created TIMESTAMP, elem_start BIGINT, elem_end BIGINT, variantset_id STRING); DROP TABLE IF EXISTS variantsets;
    #CREATE TABLE variantsets (id STRING, dataset_id STRING, metadata STRING, reference_bounds STRING);
    #DROP TABLE IF EXISTS datasets; CREATE TABLE datasets (id STRING, is_public BOOLEAN, name STRING);'''
  
    # Executing the different queries
    tmp = sql.split(";")
    for hql in tmp:
        hql = hql.strip()
        if hql:
            query = hql_query(hql)
            handle = db.execute_and_wait(query, timeout_sec=5.0)
     
    return render('database.initialize.mako', request, locals())
Exemple #5
0
def execute_directly(request, query, design, query_server, tablename=None, **kwargs):
  if design is not None:
    design = authorized_get_design(request, design.id)
  parameters = kwargs.pop('parameters', None)

  db = dbms.get(request.user, query_server)
  database = query.query.get('database', 'default')
  db.use(database)

  history_obj = db.execute_query(query, design)
  watch_url = reverse(get_app_name(request) + ':api_watch_query_refresh_json', kwargs={'id': history_obj.id})

  if parameters is not None:
    history_obj.update_extra('parameters', parameters)
    history_obj.save()

  response = {
    'status': 0,
    'id': history_obj.id,
    'watch_url': watch_url,
    'statement': history_obj.get_current_statement(),
    'is_redacted': history_obj.is_redacted
  }

  return JsonResponse(response)
Exemple #6
0
def show_tables(request, database=None):
  if database is None:
    database = request.COOKIES.get('hueBeeswaxLastDatabase', 'default') # Assume always 'default'

  db = dbms.get(request.user)

  databases = db.get_databases()

  if request.method == 'POST':
    db_form = DbForm(request.POST, databases=databases)
    if db_form.is_valid():
      database = db_form.cleaned_data['database']
  else:
    db_form = DbForm(initial={'database': database}, databases=databases)

  tables = db.get_tables(database=database)

  resp = render("tables.mako", request, {
    'breadcrumbs': [
      {
        'name': database,
        'url': reverse('metastore:show_tables', kwargs={'database': database})
      }
    ],
    'tables': tables,
    'db_form': db_form,
    'database': database,
    'tables_json': json.dumps(tables),
    'has_write_access': has_write_access(request.user),
  })
  resp.set_cookie("hueBeeswaxLastDatabase", database, expires=90)
  return resp
Exemple #7
0
def sample_search(request):
    """ Search the data related to a given sample id """

    result = {'status': -1,'data': {}}

    if request.method != 'POST' or not request.POST or not request.POST['sample_id']:
        result['status'] = 0
        return HttpResponse(json.dumps(result), mimetype="application/json")

    sample_id = str(request.POST['sample_id'])

    # Database connexion
    query_server = get_query_server_config(name='impala')
    db = dbms.get(request.user, query_server=query_server)
    customer_sample_id = str(request.user.id)+"_"+sample_id

    # Selecting the files related to the sample id
    hql = "SELECT sample_files.id, sample_files.file_path FROM sample_files JOIN map_sample_id ON sample_files.internal_sample_id = map_sample_id.internal_sample_id WHERE map_sample_id.customer_sample_id = '"+customer_sample_id+"';"
    query = hql_query(hql)
    handle = db.execute_and_wait(query, timeout_sec=5.0)
    if handle:
        data = db.fetch(handle, rows=100)
        result['status'] = 1
        result['data'] = list(data.rows())
        db.close(handle)

    # Returning the data
    return HttpResponse(json.dumps(result), mimetype="application/json")
Exemple #8
0
def config_validator(user):
  # dbms is dependent on beeswax.conf (this file)
  # import in method to avoid circular dependency
  from beeswax.design import hql_query
  from beeswax.server import dbms

  res = []
  try:
    try:
      if not 'test' in sys.argv: # Avoid tests hanging
        server = dbms.get(user)
        query = hql_query("SELECT 'Hello World!';")
        handle = server.execute_and_wait(query, timeout_sec=10.0)

        if handle:
          server.fetch(handle, rows=100)
          server.close(handle)
    except StructuredThriftTransportException, e:
      if 'Error validating the login' in str(e):
        msg = 'Failed to authenticate to HiveServer2, check authentication configurations.'
        LOG.exception(msg)
        res.append((NICE_NAME, _(msg)))
      else:
        raise e
  except Exception, e:
    msg = "The application won't work without a running HiveServer2."
    LOG.exception(msg)
    res.append((NICE_NAME, _(msg)))
Exemple #9
0
  def handle(self, *args, **options):
    days = int(args[0]) if len(args) >= 1 else 7
    query_type = args[1] if len(args) >= 2 else None
    if query_type == 'hive' or query_type is None:
      query_type = 'beeswax'

    self.stdout.write('Closing (all=%s) HiveServer2/Impala sessions older than %s days...\n' % (query_type, days))

    n = 0
    sessions = Session.objects.all()

    if query_type != 'all':
      sessions = sessions.filter(application=query_type)

    sessions = sessions.filter(last_used__lte=datetime.today() - timedelta(days=days))

    for session in sessions:
      try:
          resp = dbms.get(user=session.owner).close_session(session)
          if not 'Session does not exist!' in str(resp):
            self.stdout.write('Error: %s\n' % resp)
            n += 1
      except Exception, e:
        if not 'Session does not exist!' in str(e):
          self.stdout.write('Error: %s\n' % e)
Exemple #10
0
  def teardown_class(cls):
    if is_live_cluster():
      # Delete test DB and tables
      query_server = get_query_server_config()
      client = make_logged_in_client()
      user = User.objects.get(username='******')

      db = dbms.get(user, query_server)

      # Kill Spark context if running
      if is_hive_on_spark() and cluster.is_yarn():
        # TODO: We should clean up the running Hive on Spark job here
        pass

      for db_name in [cls.db_name, '%s_other' % cls.db_name]:
        databases = db.get_databases()

        if db_name in databases:
          tables = db.get_tables(database=db_name)
          for table in tables:
            make_query(client, 'DROP TABLE IF EXISTS `%(db)s`.`%(table)s`' % {'db': db_name, 'table': table}, wait=True)
          make_query(client, 'DROP VIEW IF EXISTS `%(db)s`.`myview`' % {'db': db_name}, wait=True)
          make_query(client, 'DROP DATABASE IF EXISTS %(db)s' % {'db': db_name}, wait=True)

          # Check the cleanup
          databases = db.get_databases()
          assert_false(db_name in databases)

      global _INITIALIZED
      _INITIALIZED = False
Exemple #11
0
def describe_partitions(request, database, table):
  db = dbms.get(request.user)

  table_obj = db.get_table(database, table)
  if not table_obj.partition_keys:
    raise PopupException(_("Table '%(table)s' is not partitioned.") % {'table': table})

  partitions = db.get_partitions(database, table_obj, max_parts=None)

  return render("describe_partitions.mako", request,
      {'breadcrumbs': [
        {
          'name': database,
          'url': reverse('metastore:show_tables', kwargs={'database': database})
        },
        {
          'name': table,
          'url': reverse('metastore:describe_table', kwargs={'database': database, 'table': table})
        },
        {
          'name': 'partitions',
          'url': reverse('metastore:describe_partitions', kwargs={'database': database, 'table': table})
        },
      ],
      'database': database, 'table': table_obj, 'partitions': partitions, 'request': request})
Exemple #12
0
    def put(self, request, original_variant, pk):
        # Allow to modify a variant in HBase/Impala

        f = open('/tmp/superhello.txt','w')
        f.write(json.dumps(request.data))
        f.close()

        # We convert the original and modified data to flatjson
        fc = formatConverters(input_file='stuff.json',output_file='stuff.json')
        original_flatjson = fc.convertVariantJsonToFlatJson(json_data=original_variant)
        modified_flatjson = fc.convertVariantJsonToFlatJson(json_data=request.data)

        # We convert the data to hbase, and we modify directly some fields (note: the keys are almost the same for hbase and impala)
        hbase_data = fc.convertVariantFlatJsonToHbase(original_data=original_flatjson,modified_data=modified_flatjson)


        # Impala - We make the query
        query_server = get_query_server_config(name='impala')
        db = dbms.get(request.user, query_server=query_server)
        query = hql_query("INSERT INTO variant("+",".join(query_data)+")")
        handle = db.execute_and_wait(query, timeout_sec=5.0)
        if handle:
            db.close(handle)
        else:
            raise Exception("Impossible to create the variant...")

        # HBase - We add the data in that table too
        hbaseApi = HbaseApi(user=request.user)
        currentCluster = hbaseApi.getClusters().pop()
        rowkey = pk
        hbaseApi.putRow(cluster=currentCluster['name'], tableName='variants', row=rowkey, data=hbase_data)
Exemple #13
0
def show_tables(request, database=None):
  if database is None:
    database = 'default' # Assume always 'default'

  if request.REQUEST.get("format", "html") == "json":
    db = dbms.get(request.user)

    try:
      databases = db.get_databases()

      if database not in databases:
        database = 'default'

      if request.method == 'POST':
        db_form = DbForm(request.POST, databases=databases)
        if db_form.is_valid():
          database = db_form.cleaned_data['database']
      else:
        db_form = DbForm(initial={'database': database}, databases=databases)

      search_filter = request.GET.get('filter', '')

      tables = db.get_tables_meta(database=database, table_names=search_filter) # SparkSql returns []
      table_names = [table['name'] for table in tables]
    except Exception, e:
      raise PopupException(_('Failed to retrieve tables for database: %s' % database), detail=e)

    resp = JsonResponse({
        'status': 0,
        'database_meta': db.get_database(database),
        'tables': tables,
        'table_names': table_names,
        'search_filter': search_filter
    })
Exemple #14
0
def guess_format(request):
  file_format = json.loads(request.POST.get('fileFormat', '{}'))

  if file_format['inputFormat'] == 'file':
    indexer = Indexer(request.user, request.fs)
    stream = request.fs.open(file_format["path"])
    format_ = indexer.guess_format({
      "file":{
        "stream": stream,
        "name": file_format['path']
        }
      })
    _convert_format(format_)
  elif file_format['inputFormat'] == 'table':
    db = dbms.get(request.user)
    table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName'])

    storage = dict([(delim['data_type'], delim['comment']) for delim in table_metadata.storage_details])
    if table_metadata.details['properties']['format'] == 'text':
      format_ = {"quoteChar": "\"", "recordSeparator": '\\n', "type": "csv", "hasHeader": False, "fieldSeparator": storage['serialization.format']}
    elif table_metadata.details['properties']['format'] == 'parquet':
      format_ = {"type": "parquet", "hasHeader": False,}
    else:
      raise PopupException('Hive table format %s is not supported.' % table_metadata.details['properties']['format'])
  elif file_format['inputFormat'] == 'query':
    format_ = {"quoteChar": "\"", "recordSeparator": "\\n", "type": "csv", "hasHeader": False, "fieldSeparator": "\t"} # \t --> CTRL+A

  return JsonResponse(format_)
Exemple #15
0
def drop_table(request, database):
  db = dbms.get(request.user)

  if request.method == 'POST':
    try:
      tables = request.POST.getlist('table_selection')
      tables_objects = [db.get_table(database, table) for table in tables]
      skip_trash = request.POST.get('skip_trash') == 'on'

      if request.POST.get('is_embeddable'):
        sql = db.drop_tables(database, tables_objects, design=None, skip_trash=skip_trash, generate_ddl_only=True)
        job = make_notebook(
            name='Execute and watch',
            editor_type='hive',
            statement=sql.strip(),
            status='ready',
            database=database,
            on_success_url='assist.db.refresh',
            is_task=True
        )
        return JsonResponse(job.execute(request))
      else:
        # Can't be simpler without an important refactoring
        design = SavedQuery.create_empty(app_name='beeswax', owner=request.user, data=hql_query('').dumps())
        query_history = db.drop_tables(database, tables_objects, design, skip_trash=skip_trash)
        url = reverse('beeswax:watch_query_history', kwargs={'query_history_id': query_history.id}) + '?on_success_url=' + reverse('metastore:show_tables', kwargs={'database': database})
        return redirect(url)
    except Exception, ex:
      error_message, log = dbms.expand_exception(ex, db)
      error = _("Failed to remove %(tables)s.  Error: %(error)s") % {'tables': ','.join(tables), 'error': error_message}
      raise PopupException(error, title=_("Hive Error"), detail=log)
Exemple #16
0
  def create_session(self, lang='hive', properties=None):
    application = 'beeswax' if lang == 'hive' else lang

    session = Session.objects.get_session(self.user, application=application)

    if session is None:
      session = dbms.get(self.user, query_server=get_query_server_config(name=lang)).open_session(self.user)

    response = {
      'type': lang,
      'id': session.id
    }

    if not properties:

      config = None
      if USE_DEFAULT_CONFIGURATION.get():
        config = DefaultConfiguration.objects.get_configuration_for_user(app=lang, user=self.user)

      if config is not None:
        properties = config.properties_list
      else:
        properties = self.get_properties(lang)

    response['properties'] = properties

    if lang == 'impala':
      impala_settings = session.get_formatted_properties()
      http_addr = next((setting['value'] for setting in impala_settings if setting['key'].lower() == 'http_addr'), None)
      response['http_addr'] = http_addr

    return response
Exemple #17
0
def show_tables(request, database=None):
    if database is None:
        database = _get_last_database(request, database)
    if request.method == 'POST':
        resp = {}
        try:
            tables = _get_table_list(request, database)
            table_list_rendered = django_mako.render_to_string("table_list.mako", dict(
                app_name=get_app_name(request),
                database=database,
                tables=tables,
            ))
        except Exception as ex:
            resp['error'] = escapejs(ex.message)
        else:
            resp['table_list_rendered'] = table_list_rendered
            resp['tables'] = tables
        return HttpResponse(json.dumps(resp))

    db = dbms.get(request.user)
    databases = db.get_databases()
    db_form = hcatalog.forms.DbForm(initial={'database': database}, databases=databases)
    response = render("show_tables.mako", request, {
        'database': database,
        'db_form': db_form,
    })
    response.set_cookie("hueHcatalogLastDatabase", database, expires=90)
    return response
Exemple #18
0
def describe_table_json(request, database, table):
    try:
        db = dbms.get(request.user)
        table = db.get_table(database, table)
        result = {"columns": [{"type": col.type, "name": col.name} for col in table.cols]}
    except NoSuchObjectException, e:
        result = {"status": "failure", 'failureInfo' : unicode(table+' table not found')}
Exemple #19
0
def alter_column(request, database, table):
  db = dbms.get(request.user)
  response = {'status': -1, 'data': ''}
  try:
    column = request.POST.get('column', None)

    if column is None:
      raise PopupException(_('alter_column requires a column parameter'))

    column_obj = db.get_column(database, table, column)
    if column_obj:
      new_column_name = request.POST.get('new_column_name', column_obj.name)
      new_column_type = request.POST.get('new_column_type', column_obj.type)
      comment = request.POST.get('comment', None)
      partition_spec = request.POST.get('partition_spec', None)

      column_obj = db.alter_column(database, table, column, new_column_name, new_column_type, comment=comment, partition_spec=partition_spec)

      response['status'] = 0
      response['data'] = {
        'name': column_obj.name,
        'type': column_obj.type,
        'comment': column_obj.comment
      }
    else:
      raise PopupException(_('Column `%s`.`%s` `%s` not found') % (database, table, column))
  except Exception, ex:
    response['status'] = 1
    response['data'] = _("Failed to alter column `%s`.`%s` `%s`: %s") % (database, table, column, str(ex))
Exemple #20
0
def load_table(request, table):
  table_obj = dbms.get(request.user).get_table('default', table)

  if request.method == "POST":
    form = beeswax.forms.LoadDataForm(table_obj, request.POST)
    if form.is_valid():
      # TODO(philip/todd): When PathField might refer to non-HDFS,
      # we need a pathfield.is_local function.
      hql = "LOAD DATA INPATH"
      hql += " '%s'" % form.cleaned_data['path']
      if form.cleaned_data['overwrite']:
        hql += " OVERWRITE"
      hql += " INTO TABLE "
      hql += "`%s`" % (table,)
      if form.partition_columns:
        hql += " PARTITION ("
        vals = []
        for key, column_name in form.partition_columns.iteritems():
          vals.append("%s='%s'" % (column_name, form.cleaned_data[key]))
        hql += ", ".join(vals)
        hql += ")"

      on_success_url = urlresolvers.reverse(describe_table, kwargs={'table': table})
      return confirm_query(request, hql, on_success_url)
  else:
    form = beeswax.forms.LoadDataForm(table_obj)
    return render("load_table.mako", request, dict(form=form, table=table, action=request.get_full_path()))
Exemple #21
0
def autocomplete(request, database=None, table=None, column=None, nested=None):
  app_name = get_app_name(request)
  query_server = get_query_server_config(app_name)
  do_as = request.user
  if (request.user.is_superuser or request.user.has_hue_permission(action="impersonate", app="security")) and 'doas' in request.GET:
    do_as = User.objects.get(username=request.GET.get('doas'))
  db = dbms.get(do_as, query_server)
  response = {}

  try:
    if database is None:
      response['databases'] = db.get_databases()
    elif table is None:
      response['tables'] = db.get_tables(database=database)
    elif column is None:
      t = db.get_table(database, table)
      response['hdfs_link'] = t.hdfs_link
      response['columns'] = [column.name for column in t.cols]
      response['extended_columns'] = massage_columns_for_json(t.cols)
    else:
      col = db.get_column(database, table, column)
      if col:
        parse_tree = parser.parse_column(col.name, col.type, col.comment)
        if nested:
          parse_tree = _extract_nested_type(parse_tree, nested)
        response = parse_tree
      else:
        raise Exception('Could not find column `%s`.`%s`.`%s`' % (database, table, column))
  except (QueryServerTimeoutException, TTransportException), e:
    response['code'] = 503
    response['error'] = e.message
Exemple #22
0
def guess_field_types(request):
  file_format = json.loads(request.POST.get('fileFormat', '{}'))

  if file_format['inputFormat'] == 'file':
    indexer = Indexer(request.user, request.fs)
    stream = request.fs.open(file_format["path"])
    _convert_format(file_format["format"], inverse=True)

    format_ = indexer.guess_field_types({
      "file": {
        "stream": stream,
        "name": file_format['path']
        },
      "format": file_format['format']
    })
  elif file_format['inputFormat'] == 'table':
    sample = get_api(request, {'type': 'hive'}).get_sample_data({'type': 'hive'}, database=file_format['databaseName'], table=file_format['tableName'])
    db = dbms.get(request.user)
    table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName'])

    format_ = {
        "sample": sample['rows'][:4],
        "columns": [
            Field(col.name, HiveFormat.FIELD_TYPE_TRANSLATE.get(col.type, 'string')).to_dict()
            for col in table_metadata.cols
        ]
    }
  elif file_format['inputFormat'] == 'query':
    #TODO get schema from explain query
    pass

  return JsonResponse(format_)
Exemple #23
0
def drop_database(request):
  db = dbms.get(request.user)

  if request.method == 'POST':
    databases = request.POST.getlist('database_selection')

    try:
      design = SavedQuery.create_empty(app_name='beeswax', owner=request.user, data=hql_query('').dumps())

      if request.POST.get('is_embeddable'):
        sql = db.drop_databases(databases, design, generate_ddl_only=True)
        job = make_notebook(
            name='Execute and watch',
            editor_type='hive',
            statement=sql.strip(),
            status='ready',
            database=None,
            on_success_url='assist.db.refresh',
            is_task=True
        )
        return JsonResponse(job.execute(request))
      else:
        query_history = db.drop_databases(databases, design)
        url = reverse('beeswax:watch_query_history', kwargs={'query_history_id': query_history.id}) + '?on_success_url=' + reverse('metastore:databases')
        return redirect(url)
    except Exception, ex:
      error_message, log = dbms.expand_exception(ex, db)
      error = _("Failed to remove %(databases)s.  Error: %(error)s") % {'databases': ','.join(databases), 'error': error_message}
      raise PopupException(error, title=_("Hive Error"), detail=log)
  def create(self, django_user):
    """
    Create table in the Hive Metastore.
    """
    LOG.info('Creating table "%s"' % (self.name,))
    db = dbms.get(django_user, self.query_server)

    try:
      # Already exists?
      if self.app_name == 'impala':
        db.invalidate_tables('default', [self.name])
      db.get_table('default', self.name)
      msg = _('Table "%(table)s" already exists.') % {'table': self.name}
      LOG.error(msg)
      return False
    except Exception:
      query = hql_query(self.hql)
      try:
        results = db.execute_and_wait(query)
        if not results:
          msg = _('Error creating table %(table)s: Operation timeout.') % {'table': self.name}
          LOG.error(msg)
          raise InstallException(msg)
        return True
      except Exception, ex:
        msg = _('Error creating table %(table)s: %(error)s.') % {'table': self.name, 'error': ex}
        LOG.error(msg)
        raise InstallException(msg)
Exemple #25
0
def init_example(request):
    """ Allow to make some test for the developpers, to see if the insertion and the querying of data is correct """

    result = {'status': -1,'data': {}}

    query_server = get_query_server_config(name='impala')
    db = dbms.get(request.user, query_server=query_server)
  
    # Deleting the db
    hql = "DROP TABLE IF EXISTS val_test_2;"
    query = hql_query(hql)
    handle = db.execute_and_wait(query, timeout_sec=5.0)
  
    # Creating the db
    hql = "CREATE TABLE val_test_2 (id int, token string);"
    query = hql_query(hql)
    handle = db.execute_and_wait(query, timeout_sec=5.0)
  
    # Adding some data
    hql = " INSERT OVERWRITE val_test_2 values (1, 'a'), (2, 'b'), (-1,'xyzzy');"
    # hql = "INSERT INTO TABLE testset_bis VALUES (2, 25.0)"
    query = hql_query(hql)
    handle = db.execute_and_wait(query, timeout_sec=5.0)
  
    # querying the data
    hql = "SELECT * FROM val_test_2"
    query = hql_query(hql)
    handle = db.execute_and_wait(query, timeout_sec=5.0)
    if handle:
        data = db.fetch(handle, rows=100)
        result['data'] = list(data.rows())
        db.close(handle)
 
    return render('database.initialize.mako', request, locals())
Exemple #26
0
def describe_partitions(request, database, table):
    db = dbms.get(request.user)

    table_obj = db.get_table(database, table)
    if not table_obj.partition_keys:
        raise PopupException(_("Table '%(table)s' is not partitioned.") % {"table": table})

    partitions = db.get_partitions(database, table_obj, max_parts=None)

    return render(
        "describe_partitions.mako",
        request,
        {
            "breadcrumbs": [
                {"name": database, "url": reverse("metastore:show_tables", kwargs={"database": database})},
                {
                    "name": table,
                    "url": reverse("metastore:describe_table", kwargs={"database": database, "table": table}),
                },
                {
                    "name": "partitions",
                    "url": reverse("metastore:describe_partitions", kwargs={"database": database, "table": table}),
                },
            ],
            "database": database,
            "table": table_obj,
            "partitions": partitions,
            "request": request,
        },
    )
Exemple #27
0
def show_tables(request, database=None):
  if database is None:
    database = request.COOKIES.get('hueBeeswaxLastDatabase', 'default') # Assume always 'default'
  db = dbms.get(request.user)

  databases = db.get_databases()

  if request.method == 'POST':
    db_form = DbForm(request.POST, databases=databases)
    if db_form.is_valid():
      database = db_form.cleaned_data['database']
  else:
    db_form = DbForm(initial={'database': database}, databases=databases)

  tables = db.get_tables(database=database)
  examples_installed = beeswax.models.MetaInstall.get().installed_example
  #table_selection = TableSelection(tables=tables)

  return render("show_tables.mako", request, {
      'tables': tables,
      'examples_installed': examples_installed,
      'db_form': db_form,
      'database': database,
      'tables_json': json.dumps(tables),
  })
Exemple #28
0
def show_tables(request, database=None):
    if database is None:
        database = request.COOKIES.get("hueBeeswaxLastDatabase", "default")  # Assume always 'default'

    db = dbms.get(request.user)

    databases = db.get_databases()

    if database not in databases:
        database = "default"

    if request.method == "POST":
        db_form = DbForm(request.POST, databases=databases)
        if db_form.is_valid():
            database = db_form.cleaned_data["database"]
    else:
        db_form = DbForm(initial={"database": database}, databases=databases)

    tables = db.get_tables(database=database)

    resp = render(
        "tables.mako",
        request,
        {
            "breadcrumbs": [{"name": database, "url": reverse("metastore:show_tables", kwargs={"database": database})}],
            "tables": tables,
            "db_form": db_form,
            "database": database,
            "tables_json": json.dumps(tables),
            "has_write_access": has_write_access(request.user),
        },
    )
    resp.set_cookie("hueBeeswaxLastDatabase", database, expires=90)
    return resp
Exemple #29
0
def index_file(request):
  file_format = json.loads(request.POST.get('fileFormat', '{}'))
  _convert_format(file_format["format"], inverse=True)
  collection_name = file_format["name"]

  indexer = Indexer(request.user, request.fs)
  unique_field = indexer.get_unique_field(file_format)
  is_unique_generated = indexer.is_unique_generated(file_format)

  schema_fields = indexer.get_kept_field_list(file_format['columns'])
  if is_unique_generated:
    schema_fields += [{"name": unique_field, "type": "string"}]

  morphline = indexer.generate_morphline_config(collection_name, file_format, unique_field)

  collection_manager = CollectionManagerController(request.user)
  if not collection_manager.collection_exists(collection_name):
    collection_manager.create_collection(collection_name, schema_fields, unique_key_field=unique_field)

  if file_format['inputFormat'] == 'table':
    db = dbms.get(request.user)
    table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName'])
    input_path = table_metadata.path_location
  else:
    input_path = file_format["path"]

  job_handle = indexer.run_morphline(request, collection_name, morphline, input_path) #TODO if query generate insert
  return JsonResponse(job_handle)
Exemple #30
0
def view_results(request, id, first_row=0):
  """
  Returns the view for the results of the QueryHistory with the given id.

  The query results MUST be ready.
  To display query results, one should always go through the execute_query view.
  If the result set has has_result_set=False, display an empty result.

  If ``first_row`` is 0, restarts (if necessary) the query read.  Otherwise, just
  spits out a warning if first_row doesn't match the servers conception.
  Multiple readers will produce a confusing interaction here, and that's known.

  It understands the ``context`` GET parameter. (See execute_query().)
  """
  first_row = long(first_row)
  start_over = (first_row == 0)
  results = type('Result', (object,), {
                'rows': 0,
                'columns': [],
                'has_more': False,
                'start_row': 0,
            })
  data = []
  fetch_error = False
  error_message = ''
  log = ''
  columns = []
  app_name = get_app_name(request)

  query_history = authorized_get_query_history(request, id, must_exist=True)
  query_server = query_history.get_query_server_config()
  db = dbms.get(request.user, query_server)

  handle, state = _get_query_handle_and_state(query_history)
  context_param = request.GET.get('context', '')
  query_context = parse_query_context(context_param)

  # Update the status as expired should not be accessible
  expired = state == models.QueryHistory.STATE.expired

  # Retrieve query results or use empty result if no result set
  try:
    if query_server['server_name'] == 'impala' and not handle.has_result_set:
      downloadable = False
    else:
      results = db.fetch(handle, start_over, 100)

      # Materialize and HTML escape results
      data = escape_rows(results.rows())

      # We display the "Download" button only when we know that there are results:
      downloadable = first_row > 0 or data
      log = db.get_log(handle)
      columns = results.data_table.cols()

  except Exception, ex:
    LOG.exception('error fetching results')

    fetch_error = True
    error_message, log = expand_exception(ex, db, handle)
Exemple #31
0
def config_validator(user):
  # dbms is dependent on beeswax.conf (this file)
  # import in method to avoid circular dependency
  from beeswax.server import dbms

  res = []
  try:
    if not 'test' in sys.argv: # Avoid tests hanging
      server = dbms.get(user)
      server.get_databases()
  except:
    res.append((NICE_NAME, _("The application won't work without a running HiveServer2.")))

  try:
    from hadoop import cluster
    warehouse = beeswax.hive_site.get_metastore_warehouse_dir()
    fs = cluster.get_hdfs()
    fs.stats(warehouse)
  except Exception:
    return [(NICE_NAME, _('Failed to access Hive warehouse: %s') % warehouse)]

  return res
Exemple #32
0
def show_tables(request, database=None):
    if database is None:
        database = 'default'  # Assume always 'default'

    if request.REQUEST.get("format", "html") == "json":
        db = dbms.get(request.user)

        try:
            databases = db.get_databases()

            if database not in databases:
                database = 'default'

            if request.method == 'POST':
                db_form = DbForm(request.POST, databases=databases)
                if db_form.is_valid():
                    database = db_form.cleaned_data['database']
            else:
                db_form = DbForm(initial={'database': database},
                                 databases=databases)

            search_filter = request.GET.get('filter', '')

            tables = db.get_tables_meta(
                database=database,
                table_names=search_filter)  # SparkSql returns []
            table_names = [table['name'] for table in tables]
        except Exception, e:
            raise PopupException(_(
                'Failed to retrieve tables for database: %s' % database),
                                 detail=e)

        resp = JsonResponse({
            'status': 0,
            'database_meta': db.get_database(database),
            'tables': tables,
            'table_names': table_names,
            'search_filter': search_filter
        })
Exemple #33
0
def autocomplete(request, database=None, table=None):
  app_name = get_app_name(request)
  query_server = get_query_server_config(app_name)
  do_as = request.user
  if (request.user.is_superuser or request.user.has_hue_permission(action="impersonate", app="security")) and 'doas' in request.GET:
    do_as = User.objects.get(username=request.GET.get('doas'))
  db = dbms.get(do_as, query_server)
  response = {}

  try:
    if database is None:
      response['databases'] = db.get_databases()
    elif table is None:
      response['tables'] = db.get_tables(database=database)
    else:
      t = db.get_table(database, table)
      response['hdfs_link'] = t.hdfs_link
      response['columns'] = [column.name for column in t.cols]
      response['extended_columns'] = massage_columns_for_json(t.cols)
  except TTransportException, tx:
    response['code'] = 503
    response['error'] = tx.message
Exemple #34
0
def config_validator(user):
  # dbms is dependent on beeswax.conf (this file)
  # import in method to avoid circular dependency
  from beeswax.server import dbms

  res = []
  try:
    try:
      if not 'test' in sys.argv: # Avoid tests hanging
        server = dbms.get(user)
        server.get_databases()
    except StructuredThriftTransportException, e:
      if 'Error validating the login' in str(e):
        msg = 'Failed to authenticate to HiveServer2, check authentication configurations.'
        LOG.exception(msg)
        res.append((NICE_NAME, _(msg)))
      else:
        raise e
  except Exception, e:
    msg = "The application won't work without a running HiveServer2."
    LOG.exception(msg)
    res.append((NICE_NAME, _(msg)))
Exemple #35
0
def get_exec_summary(request, query_history_id):
    query_server = dbms.get_query_server_config()
    db = beeswax_dbms.get(request.user, query_server=query_server)

    response = {'status': -1}
    query_history = authorized_get_query_history(request,
                                                 query_history_id,
                                                 must_exist=True)

    if query_history is None:
        response['message'] = _(
            'get_exec_summary requires a valid query_history_id')
    else:
        session = Session.objects.get_session(request.user,
                                              query_server['server_name'])
        operation_handle = query_history.get_handle().get_rpc_handle()
        session_handle = session.get_handle()
        summary = db.get_exec_summary(operation_handle, session_handle)
        response['status'] = 0
        response['summary'] = summary

    return JsonResponse(response)
Exemple #36
0
    def _get_db(self, snippet, is_async=False, interpreter=None):
        if interpreter and interpreter.get('dialect'):
            dialect = interpreter['dialect']
        else:
            dialect = snippet[
                'type']  # Backward compatibility without connectors

        if not is_async and dialect == 'hive':
            name = 'beeswax'
        elif dialect == 'hive':
            name = 'hive'
        elif dialect == 'llap':
            name = 'llap'
        elif dialect == 'impala':
            name = 'impala'
        else:
            name = 'sparksql'

        # Note: name is not used if interpreter is present
        return dbms.get(self.user,
                        query_server=get_query_server_config(
                            name=name, connector=interpreter))
Exemple #37
0
def alter_database(request, database):
    db = dbms.get(request.user)
    response = {'status': -1, 'data': ''}
    try:
        properties = request.POST.get('properties')

        if not properties:
            raise PopupException(
                _("Alter database requires a properties value of key-value pairs."
                  ))

        properties = json.loads(properties)
        db.alter_database(database, properties=properties)

        db_metadata = db.get_database(database)
        db_metadata['hdfs_link'] = location_to_url(db_metadata['location'])
        response['status'] = 0
        response['data'] = db_metadata
    except Exception, ex:
        response['status'] = 1
        response['data'] = _("Failed to alter database `%s`: %s") % (database,
                                                                     ex)
Exemple #38
0
def show_tables(request, database=None):
    if database is None:
        database = request.COOKIES.get('hueBeeswaxLastDatabase',
                                       'default')  # Assume always 'default'

    db = dbms.get(request.user)

    databases = db.get_databases()

    if request.method == 'POST':
        db_form = DbForm(request.POST, databases=databases)
        if db_form.is_valid():
            database = db_form.cleaned_data['database']
    else:
        db_form = DbForm(initial={'database': database}, databases=databases)

    tables = db.get_tables(database=database)

    resp = render(
        "tables.mako", request, {
            'breadcrumbs': [{
                'name':
                database,
                'url':
                reverse('metastore:show_tables', kwargs={'database': database})
            }],
            'tables':
            tables,
            'db_form':
            db_form,
            'database':
            database,
            'tables_json':
            json.dumps(tables),
            'has_write_access':
            has_write_access(request.user),
        })
    resp.set_cookie("hueBeeswaxLastDatabase", database, expires=90)
    return resp
Exemple #39
0
def drop_database(request):
    db = dbms.get(request.user)

    if request.method == 'POST':
        databases = request.POST.getlist('database_selection')

        try:
            # Can't be simpler without an important refactoring
            design = SavedQuery.create_empty(app_name='beeswax',
                                             owner=request.user)
            query_history = db.drop_databases(databases, design)
            url = reverse('beeswax:watch_query', args=[
                query_history.id
            ]) + '?on_success_url=' + reverse('metastore:databases')
            return redirect(url)
        except Exception, ex:
            error_message, log = dbms.expand_exception(ex, db)
            error = _("Failed to remove %(databases)s.  Error: %(error)s") % {
                'databases': ','.join(databases),
                'error': error_message
            }
            raise PopupException(error, title=_("Beeswax Error"), detail=log)
Exemple #40
0
def _update_query_state(query_history):
    """
  Update the last_state for a QueryHistory object. Returns success as True/False.

  This only occurs iff the current last_state is submitted or running, since the other
  states are stable, more-or-less.
  Note that there is a transition from available/failed to expired. That occurs lazily
  when the user attempts to view results that have expired.
  """
    if query_history.last_state <= models.QueryHistory.STATE.running.index:
        try:
            state_enum = dbms.get(
                query_history.owner,
                query_history.get_query_server_config()).get_state(
                    query_history.get_handle())
            if state_enum is None:
                # Error was logged at the source
                return False
        except Exception, e:
            LOG.error(e)
            state_enum = models.QueryHistory.STATE.failed
        query_history.save_state(state_enum)
Exemple #41
0
def drop_partition(request, database, table):
    db = dbms.get(request.user)

    if request.method == 'POST':
        partition_specs = request.POST.getlist('partition_selection')
        partition_specs = [spec for spec in partition_specs]
        try:
            design = SavedQuery.create_empty(app_name='beeswax',
                                             owner=request.user,
                                             data=hql_query('').dumps())
            query_history = db.drop_partitions(database, table,
                                               partition_specs, design)
            url = reverse('beeswax:watch_query_history', kwargs={'query_history_id': query_history.id}) + '?on_success_url=' + \
                  reverse('metastore:describe_partitions', kwargs={'database': database, 'table': table})
            return redirect(url)
        except Exception, ex:
            error_message, log = dbms.expand_exception(ex, db)
            error = _("Failed to remove %(partition)s.  Error: %(error)s") % {
                'partition': '\n'.join(partition_specs),
                'error': error_message
            }
            raise PopupException(error, title=_("Hive Error"), detail=log)
Exemple #42
0
def download(request, id, format, user_agent=None):
    if not ENABLE_DOWNLOAD.get():
        return serve_403_error(request)

    try:
        query_history = authorized_get_query_history(request,
                                                     id,
                                                     must_exist=True)
        db = dbms.get(request.user, query_history.get_query_server_config())
        LOG.debug('Download results for query %s: [ %s ]' %
                  (query_history.server_id, query_history.query))

        return data_export.download(query_history.get_handle(),
                                    format,
                                    db,
                                    user_agent=user_agent)
    except Exception as e:
        if not hasattr(e, 'message') or not e.message:
            message = e
        else:
            message = e.message
        raise PopupException(message, detail='')
Exemple #43
0
def watch_query(request, id):
  """
  Wait for the query to finish and (by default) displays the results of query id.
  It understands the optional GET params:

    on_success_url
      If given, it will be displayed when the query is successfully finished.
      Otherwise, it will display the view query results page by default.

    context
      A string of "name:data" that describes the context
      that generated this query result. It may be:
        - "table":"<table_name>"
        - "design":<design_id>

  All other GET params will be passed to on_success_url (if present).
  """
  # Coerce types; manage arguments
  query_history = authorized_get_history(request, id, must_exist=True)
  db = dbms.get(request.user, query_history.get_query_server_config())

  # GET param: context.
  context_param = request.GET.get('context', '')

  # GET param: on_success_url. Default to view_results
  results_url = reverse(get_app_name(request) + ':view_results', kwargs={'id': id, 'first_row': 0})
  if request.GET.get('download', ''):
    results_url += '?download=true'
  on_success_url = request.GET.get('on_success_url')
  if not on_success_url:
    on_success_url = results_url

  # Go to next statement if asked to continue or when a statement with no dataset finished.
  if request.method == 'POST' or (not query_history.is_finished() and query_history.is_success() and not query_history.has_results):
    try:
      query_history = db.execute_next_statement(query_history)
    except Exception, ex:
      pass
Exemple #44
0
def top_tables(request):
    response = {'status': -1}

    database = request.POST.get('database', 'default')
    len = request.POST.get('len', 1000)

    if OPTIMIZER.MOCKING.get():
        from beeswax.server import dbms
        from beeswax.server.dbms import get_query_server_config
        db = dbms.get(request.user)
        tables = [{
            'name': table,
            'popularity': random.randint(1, 100),
            'column_count': random.randint(1, 100),
            'is_fact': bool(random.getrandbits(1))
        } for table in db.get_tables(database=database)][:len]
    else:
        """
    Get back:
    # u'details': [{u'columnCount': 28, u'name': u'date_dim', u'patternCount': 136, u'workloadPercent': 89, u'total': 92, u'type': u'Dimension', u'eid': u'19'},
    """
        api = OptimizerApi()
        data = api.top_tables()

        tables = [{
            'eid': table['eid'],
            'name': table['name'],
            'popularity': table['workloadPercent'],
            'column_count': table['columnCount'],
            'patternCount': table['patternCount'],
            'total': table['total'],
            'is_fact': table['type'] != 'Dimension'
        } for table in data['details']]

    response['top_tables'] = tables
    response['status'] = 0

    return JsonResponse(response)
Exemple #45
0
def drop_database(request):
    db = dbms.get(request.user)

    if request.method == 'POST':
        databases = request.POST.getlist('database_selection')

        try:
            design = SavedQuery.create_empty(app_name='beeswax',
                                             owner=request.user,
                                             data=hql_query('').dumps())

            if request.POST.get('is_embeddable'):
                sql = db.drop_databases(databases,
                                        design,
                                        generate_ddl_only=True)
                job = make_notebook(name='Execute and watch',
                                    editor_type='hive',
                                    statement=sql.strip(),
                                    status='ready',
                                    database=None,
                                    on_success_url='assist.db.refresh',
                                    is_task=True)
                return JsonResponse(job.execute(request))
            else:
                query_history = db.drop_databases(databases, design)
                url = reverse(
                    'beeswax:watch_query_history',
                    kwargs={
                        'query_history_id': query_history.id
                    }) + '?on_success_url=' + reverse('metastore:databases')
                return redirect(url)
        except Exception, ex:
            error_message, log = dbms.expand_exception(ex, db)
            error = _("Failed to remove %(databases)s.  Error: %(error)s") % {
                'databases': ','.join(databases),
                'error': error_message
            }
            raise PopupException(error, title=_("Hive Error"), detail=log)
Exemple #46
0
  def handle(self, *args, **options):
    days = int(args[0]) if len(args) >= 1 else 7
    close_all = args[1] == 'all' if len(args) >= 2 else False

    self.stdout.write('Closing (all=%s) HiveServer2 sessions older than %s days...\n' % (close_all, days))

    sessions = Session.objects.filter(status_code=0)

    if not close_all:
      sessions = sessions.filter(application='beeswax')

    sessions = sessions.filter(last_used__lte=datetime.today() - timedelta(days=days))

    self.stdout.write('Found %d open HiveServer2 sessions to close' % len(sessions))

    import os
    import beeswax
    from beeswax import hive_site

    try:
      beeswax.conf.HIVE_CONF_DIR.set_for_testing(os.environ['HIVE_CONF_DIR'])
    except:
      LOG.exception('failed to lookup HIVE_CONF_DIR in environment')
      self.stdout.write('Did you export HIVE_CONF_DIR=/etc/hive/conf?\n')
      raise

    hive_site.reset()
    hive_site.get_conf()

    closed = 0
    skipped = 0
    for session in sessions:
      try:
        session = dbms.get(user=session.owner).close_session(session)
        closed += 1
      except Exception, e:
        skipped += 1
        self.stdout.write('Session with ID %d could not be closed: %s' % (session.id, str(e)))
Exemple #47
0
def drop_table(request, database):
    db = dbms.get(request.user)

    if request.method == 'POST':
        tables = request.POST.getlist('table_selection')
        tables_objects = [db.get_table(database, table) for table in tables]
        try:
            # Can't be simpler without an important refactoring
            design = SavedQuery.create_empty(app_name='beeswax',
                                             owner=request.user,
                                             data=hql_query('').dumps())
            query_history = db.drop_tables(database, tables_objects, design)
            url = reverse('beeswax:watch_query', args=[
                query_history.id
            ]) + '?on_success_url=' + reverse('metastore:show_tables')
            return redirect(url)
        except Exception, ex:
            error_message, log = dbms.expand_exception(ex, db)
            error = _("Failed to remove %(tables)s.  Error: %(error)s") % {
                'tables': ','.join(tables),
                'error': error_message
            }
            raise PopupException(error, title=_("Hive Error"), detail=log)
  def load(self, django_user):
    """
    Load data into table. Raise InstallException on failure.
    """
    LOAD_HQL = \
      """
      LOAD DATA local INPATH
      '%(filename)s' OVERWRITE INTO TABLE %(tablename)s
      """

    LOG.info('Loading data into table "%s"' % (self.name,))
    hql = LOAD_HQL % dict(tablename=self.name, filename=self._contents_file)
    query = hql_query(hql)
    try:
      results = dbms.get(django_user).execute_and_wait(query)
      if not results:
        msg = _('Error loading table %(table)s: Operation timeout.') % {'table': self.name}
        LOG.error(msg)
        raise InstallException(msg)
    except BeeswaxException, ex:
      msg = _('Error loading table %(table)s: %(error)s') % {'table': self.name, 'error': ex}
      LOG.error(msg)
      raise InstallException(msg)
Exemple #49
0
def _get_sample_data(db, database, table, column):
    table_obj = db.get_table(database, table)
    if table_obj.is_impala_only and db.client.query_server[
            'server_name'] != 'impala':
        query_server = get_query_server_config('impala')
        db = dbms.get(db.client.user, query_server)
    sample_data = db.get_sample(database, table_obj, column)
    response = {'status': -1}

    if sample_data:
        sample = escape_rows(sample_data.rows(), nulls_only=True)
        if column:
            sample = set([row[0] for row in sample])
            sample = [[item] for item in sorted(list(sample))]

        response['status'] = 0
        response['headers'] = sample_data.cols()
        response['full_headers'] = sample_data.full_cols()
        response['rows'] = sample
    else:
        response['message'] = _('Failed to get sample data.')

    return response
Exemple #50
0
def get_query_form(request):
    try:
        try:
            # Get database choices
            query_server = dbms.get_query_server_config(get_app_name(request))
            db = dbms.get(request.user, query_server)
            databases = [(database, database)
                         for database in db.get_databases()]
        except StructuredThriftTransportException, e:
            # If Thrift exception was due to failed authentication, raise corresponding message
            if 'TSocket read 0 bytes' in str(
                    e) or 'Error validating the login' in str(e):
                raise PopupException(_(
                    'Failed to authenticate to query server, check authentication configurations.'
                ),
                                     detail=e)
            else:
                raise e
    except Exception, e:
        raise PopupException(_(
            'Unable to access databases, Query Server or Metastore may be down.'
        ),
                             detail=e)
Exemple #51
0
  def close_session(self, session):
    app_name = session.get('type')
    session_id = session.get('id')

    query_server = get_query_server_config(name=app_name)

    response = {'status': -1, 'message': ''}

    try:
      filters = {'id': session_id, 'application': query_server['server_name']}
      if not self.user.is_superuser:
        filters['owner'] = self.user
      session = Session.objects.get(**filters)
    except Session.DoesNotExist:
      response['message'] = _('Session does not exist or you do not have permissions to close the session.')

    if session:
      session = dbms.get(self.user, query_server).close_session(session)
      response['status'] = 0
      response['message'] = _('Session successfully closed.')
      response['session'] = {'id': session_id, 'application': session.application, 'status': session.status_code}

    return response
Exemple #52
0
def _index(request, file_format, collection_name, query=None):
    indexer = Indexer(request.user, request.fs)

    unique_field = indexer.get_unique_field(file_format)
    is_unique_generated = indexer.is_unique_generated(file_format)

    schema_fields = indexer.get_kept_field_list(file_format['columns'])
    if is_unique_generated:
        schema_fields += [{"name": unique_field, "type": "string"}]

    collection_manager = CollectionManagerController(request.user)
    if not collection_manager.collection_exists(collection_name):
        collection_manager.create_collection(collection_name,
                                             schema_fields,
                                             unique_key_field=unique_field)

    if file_format['inputFormat'] == 'table':
        db = dbms.get(request.user)
        table_metadata = db.get_table(database=file_format['databaseName'],
                                      table_name=file_format['tableName'])
        input_path = table_metadata.path_location
    elif file_format['inputFormat'] == 'file':
        input_path = '${nameNode}%s' % file_format["path"]
    elif file_format['inputFormat'] == 'hs2_handle':
        searcher = CollectionManagerController(request.user)
        columns = ['_uuid'
                   ] + [field['name'] for field in file_format['columns']]
        return searcher.update_data_from_hive(
            collection_name, columns, fetch_handle=file_format['fetch_handle'])
    else:
        input_path = None

    morphline = indexer.generate_morphline_config(collection_name, file_format,
                                                  unique_field)

    return indexer.run_morphline(request, collection_name, morphline,
                                 input_path, query)
Exemple #53
0
def _get_sample_data(db, database, table, column, is_async=False, cluster=None, operation=None):
  table_obj = db.get_table(database, table)
  if table_obj.is_impala_only and db.client.query_server['server_name'] != 'impala':
    query_server = get_query_server_config('impala', connector=cluster)
    db = dbms.get(db.client.user, query_server, cluster=cluster)

  sample_data = db.get_sample(database, table_obj, column, generate_sql_only=is_async, operation=operation)
  response = {'status': -1}

  if sample_data:
    response['status'] = 0
    if is_async:
      notebook = make_notebook(
          name=_('Table sample for `%(database)s`.`%(table)s`.`%(column)s`') % {'database': database, 'table': table, 'column': column},
          editor_type=_get_servername(db),
          statement=sample_data,
          status='ready-execute',
          skip_historify=True,
          is_task=False,
          compute=cluster if cluster else None
      )
      response['result'] = notebook.execute(request=MockedDjangoRequest(user=db.client.user), batch=False)
      if table_obj.is_impala_only:
        response['result']['type'] = 'impala'
    else:
      sample = escape_rows(sample_data.rows(), nulls_only=True)
      if column:
        sample = set([row[0] for row in sample])
        sample = [[item] for item in sorted(list(sample))]

      response['headers'] = sample_data.cols()
      response['full_headers'] = sample_data.full_cols()
      response['rows'] = sample
  else:
    response['message'] = _('Failed to get sample data.')

  return response
Exemple #54
0
    def put(self, request, original_variant, pk):
        # Allow to modify a variant in HBase/Impala

        f = open('/tmp/superhello.txt', 'w')
        f.write(json.dumps(request.data))
        f.close()

        # We convert the original and modified data to flatjson
        fc = formatConverters(input_file='stuff.json',
                              output_file='stuff.json')
        original_flatjson = fc.convertVariantJsonToFlatJson(
            json_data=original_variant)
        modified_flatjson = fc.convertVariantJsonToFlatJson(
            json_data=request.data)

        # We convert the data to hbase, and we modify directly some fields (note: the keys are almost the same for hbase and impala)
        hbase_data = fc.convertVariantFlatJsonToHbase(
            original_data=original_flatjson, modified_data=modified_flatjson)

        # Impala - We make the query
        query_server = get_query_server_config(name='impala')
        db = dbms.get(request.user, query_server=query_server)
        query = hql_query("INSERT INTO variant(" + ",".join(query_data) + ")")
        handle = db.execute_and_wait(query, timeout_sec=5.0)
        if handle:
            db.close(handle)
        else:
            raise Exception("Impossible to create the variant...")

        # HBase - We add the data in that table too
        hbaseApi = HbaseApi(user=request.user)
        currentCluster = hbaseApi.getClusters().pop()
        rowkey = pk
        hbaseApi.putRow(cluster=currentCluster['name'],
                        tableName='variants',
                        row=rowkey,
                        data=hbase_data)
Exemple #55
0
def guess_format(request):
  file_format = json.loads(request.POST.get('fileFormat', '{}'))

  if file_format['inputFormat'] == 'file':
    path = urllib.unquote(file_format["path"])
    indexer = MorphlineIndexer(request.user, request.fs)
    if not request.fs.isfile(path):
      raise PopupException(_('Path %(path)s is not a file') % file_format)

    stream = request.fs.open(path)
    format_ = indexer.guess_format({
      "file": {
        "stream": stream,
        "name": path
      }
    })
    _convert_format(format_)
  elif file_format['inputFormat'] == 'table':
    db = dbms.get(request.user)
    try:
      table_metadata = db.get_table(database=file_format['databaseName'], table_name=file_format['tableName'])
    except Exception, e:
      raise PopupException(e.message if hasattr(e, 'message') and e.message else e)
    storage = {}
    for delim in table_metadata.storage_details:
      if delim['data_type']:
        if '=' in delim['data_type']:
          key, val = delim['data_type'].split('=', 1)
          storage[key] = val
        else:
          storage[delim['data_type']] = delim['comment']
    if table_metadata.details['properties']['format'] == 'text':
      format_ = {"quoteChar": "\"", "recordSeparator": '\\n', "type": "csv", "hasHeader": False, "fieldSeparator": storage.get('field.delim', ',')}
    elif table_metadata.details['properties']['format'] == 'parquet':
      format_ = {"type": "parquet", "hasHeader": False,}
    else:
      raise PopupException('Hive table format %s is not supported.' % table_metadata.details['properties']['format'])
Exemple #56
0
def alter_table(request, database, table):
  db = dbms.get(request.user)
  response = {'status': -1, 'data': ''}
  try:
    new_table_name = request.POST.get('new_table_name', None)
    comment = request.POST.get('comment', None)

    # Cannot modify both name and comment at same time, name will get precedence
    if new_table_name and comment:
      LOG.warn('Cannot alter both table name and comment at the same time, will perform rename.')

    table_obj = db.alter_table(database, table, new_table_name=new_table_name, comment=comment)

    response['status'] = 0
    response['data'] = {
      'name': table_obj.name,
      'comment': table_obj.comment,
      'is_view': table_obj.is_view,
      'location': table_obj.path_location,
      'properties': table_obj.properties
    }
  except Exception, ex:
    response['status'] = 1
    response['data'] = _("Failed to alter table `%s`.`%s`: %s") % (database, table, str(ex))
Exemple #57
0
def get_query_form(request):
  try:
    try:
      # Get database choices
      query_server = dbms.get_query_server_config(get_app_name(request))
      db = dbms.get(request.user, query_server)
      databases = [(database, database) for database in db.get_databases()]
    except StructuredThriftTransportException as e:
      # If Thrift exception was due to failed authentication, raise corresponding message
      if 'TSocket read 0 bytes' in str(e) or 'Error validating the login' in str(e):
        raise PopupException(_('Failed to authenticate to query server, check authentication configurations.'), detail=e)
      else:
        raise e
  except Exception as e:
    raise PopupException(_('Unable to access databases, Query Server or Metastore may be down.'), detail=e)

  if not databases:
    raise RuntimeError(_("No databases are available. Permissions could be missing."))

  query_form = QueryForm()
  query_form.bind(request.POST)
  query_form.query.fields['database'].choices = databases # Could not do it in the form

  return query_form
Exemple #58
0
def config_validator(user):
    # dbms is dependent on beeswax.conf, import in method to avoid circular dependency
    from beeswax.design import hql_query
    from beeswax.server import dbms
    from beeswax.server.dbms import get_query_server_config

    res = []

    if has_connectors():
        return res

    try:
        try:
            if not 'test' in sys.argv:  # Avoid tests hanging
                query_server = get_query_server_config(name='impala')
                server = dbms.get(user, query_server)
                query = hql_query("SELECT 'Hello World!';")
                handle = server.execute_and_wait(query, timeout_sec=10.0)

                if handle:
                    server.fetch(handle, rows=100)
                    server.close(handle)
        except StructuredThriftTransportException as ex:
            if 'TSocket read 0 bytes' in str(
                    ex):  # this message appears when authentication fails
                msg = "Failed to authenticate to Impalad, check authentication configurations."
                LOG.exception(msg)
                res.append((NICE_NAME, _(msg)))
            else:
                raise ex
    except Exception as ex:
        msg = "No available Impalad to send queries to."
        LOG.exception(msg)
        res.append((NICE_NAME, _(msg)))

    return res
Exemple #59
0
def get_shared_beeswax_server(db_name='default'):
    global _SHARED_HIVE_SERVER
    global _SHARED_HIVE_SERVER_CLOSER
    if _SHARED_HIVE_SERVER is None:

        cluster = pseudo_hdfs4.shared_cluster()

        if is_live_cluster():

            def s():
                pass
        else:
            s = _start_mini_hs2(cluster)

        start = time.time()
        started = False
        sleep = 1

        make_logged_in_client()
        user = User.objects.get(username='******')
        query_server = get_query_server_config()
        db = dbms.get(user, query_server)

        while not started and time.time() - start <= 30:
            try:
                db.open_session(user)
                started = True
                break
            except Exception, e:
                LOG.info('HiveServer2 server could not be found after: %s' % e)
                time.sleep(sleep)

        if not started:
            raise Exception("Server took too long to come up.")

        _SHARED_HIVE_SERVER, _SHARED_HIVE_SERVER_CLOSER = cluster, s
Exemple #60
0
    def create_session(self, lang='hive', properties=None):
        application = 'beeswax' if lang == 'hive' else lang

        session = Session.objects.get_session(self.user,
                                              application=application)

        reuse_session = session is not None
        if not reuse_session:
            session = dbms.get(
                self.user,
                query_server=get_query_server_config(
                    name=lang, cluster=self.cluster)).open_session(self.user)

        response = {'type': lang, 'id': session.id}

        if not properties:
            config = None
            if USE_DEFAULT_CONFIGURATION.get():
                config = DefaultConfiguration.objects.get_configuration_for_user(
                    app=lang, user=self.user)

            if config is not None:
                properties = config.properties_list
            else:
                properties = self.get_properties(lang)

        response['properties'] = properties
        response['configuration'] = json.loads(session.properties)
        response['reuse_session'] = reuse_session
        response['session_id'] = ''

        try:
            decoded_guid = session.get_handle().sessionId.guid
            response['session_id'] = unpack_guid(decoded_guid)
        except Exception, e:
            LOG.warn('Failed to decode session handle: %s' % e)