Esempio n. 1
0
    def create_table_as_a_select(self, request, query_history, target_database, target_table, result_meta):
        design = query_history.design.get_design()
        database = design.query["database"]

        # Case 1: Hive Server 2 backend or results straight from an existing table
        if result_meta.in_tablename:
            self.use(database)
            query = self._get_and_validate_select_query(design, query_history)
            hql = "CREATE TABLE %s.%s AS %s" % (target_database, target_table, query)
            query_history = self.execute_statement(hql)
        else:
            # Case 2: The results are in some temporary location
            # Beeswax backward compatibility and optimization
            # 1. Create table
            cols = ""
            schema = result_meta.schema
            for i, field in enumerate(schema.fieldSchemas):
                if i != 0:
                    cols += ",\n"
                cols += "`%s` %s" % (field.name, field.type)

            # The representation of the delimiter is messy.
            # It came from Java as a string, which might has been converted from an integer.
            # So it could be "1" (^A), or "10" (\n), or "," (a comma literally).
            delim = result_meta.delim
            if not delim.isdigit():
                delim = str(ord(delim))

            hql = """
            CREATE TABLE `%s` (
            %s
            )
            ROW FORMAT DELIMITED
            FIELDS TERMINATED BY '\%s'
            STORED AS TextFile
            """ % (
                target_table,
                cols,
                delim.zfill(3),
            )

            query = hql_query(hql)
            self.execute_and_wait(query)

            try:
                # 2. Move the results into the table's storage
                table_obj = self.get_table("default", target_table)
                table_loc = request.fs.urlsplit(table_obj.path_location)[2]
                result_dir = request.fs.urlsplit(result_meta.table_dir)[2]
                request.fs.rename_star(result_dir, table_loc)
                LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, table_loc))
                request.info(request, _("Saved query results as new table %(table)s.") % {"table": target_table})
                query_history.save_state(QueryHistory.STATE.expired)
            except Exception, ex:
                query = hql_query("DROP TABLE `%s`" % target_table)
                try:
                    self.execute_and_wait(query)
                except Exception, double_trouble:
                    LOG.exception('Failed to drop table "%s" as well: %s' % (target_table, double_trouble))
                raise ex
Esempio n. 2
0
def init_example(request):
    """ Allow to make some test for the developpers, to see if the insertion and the querying of data is correct """

    result = {'status': -1,'data': {}}

    query_server = get_query_server_config(name='impala')
    db = dbms.get(request.user, query_server=query_server)
  
    # Deleting the db
    hql = "DROP TABLE IF EXISTS val_test_2;"
    query = hql_query(hql)
    handle = db.execute_and_wait(query, timeout_sec=5.0)
  
    # Creating the db
    hql = "CREATE TABLE val_test_2 (id int, token string);"
    query = hql_query(hql)
    handle = db.execute_and_wait(query, timeout_sec=5.0)
  
    # Adding some data
    hql = " INSERT OVERWRITE val_test_2 values (1, 'a'), (2, 'b'), (-1,'xyzzy');"
    # hql = "INSERT INTO TABLE testset_bis VALUES (2, 25.0)"
    query = hql_query(hql)
    handle = db.execute_and_wait(query, timeout_sec=5.0)
  
    # querying the data
    hql = "SELECT * FROM val_test_2"
    query = hql_query(hql)
    handle = db.execute_and_wait(query, timeout_sec=5.0)
    if handle:
        data = db.fetch(handle, rows=100)
        result['data'] = list(data.rows())
        db.close(handle)
 
    return render('database.initialize.mako', request, locals())
Esempio n. 3
0
def _save_results_ctas(request, query_history, target_table, result_meta):
  """
  Handle saving results as a new table. Returns HTTP response.
  May raise BeeswaxException, IOError.
  """
  query_server = query_history.get_query_server() # Query server requires DDL support
  db = dbms.get(request.user)

  # Case 1: The results are straight from an existing table
  if result_meta.in_tablename:
    hql = 'CREATE TABLE `%s` AS SELECT * FROM %s' % (target_table, result_meta.in_tablename)
    query = hql_query(hql)
    # Display the CTAS running. Could take a long time.
    return execute_directly(request, query, query_server, on_success_url=urlresolvers.reverse(show_tables))

  # Case 2: The results are in some temporary location
  # 1. Create table
  cols = ''
  schema = result_meta.schema
  for i, field in enumerate(schema.fieldSchemas):
    if i != 0:
      cols += ',\n'
    cols += '`%s` %s' % (field.name, field.type)

  # The representation of the delimiter is messy.
  # It came from Java as a string, which might has been converted from an integer.
  # So it could be "1" (^A), or "10" (\n), or "," (a comma literally).
  delim = result_meta.delim
  if not delim.isdigit():
    delim = str(ord(delim))

  hql = '''
        CREATE TABLE `%s` (
        %s
        )
        ROW FORMAT DELIMITED
        FIELDS TERMINATED BY '\%s'
        STORED AS TextFile
        ''' % (target_table, cols, delim.zfill(3))

  query = hql_query(hql)
  db.execute_and_wait(query)

  try:
    # 2. Move the results into the table's storage
    table_obj = db.get_table('default', target_table)
    table_loc = request.fs.urlsplit(table_obj.path_location)[2]
    request.fs.rename_star(result_meta.table_dir, table_loc)
    LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, table_loc))
    messages.info(request, _('Saved query results as new table %(table)s') % {'table': target_table})
    query_history.save_state(models.QueryHistory.STATE.expired)
  except Exception, ex:
    LOG.error('Error moving data into storage of table %s. Will drop table.' % (target_table,))
    query = hql_query('DROP TABLE `%s`' % (target_table,))
    try:
      db.execute_directly(query)        # Don't wait for results
    except Exception, double_trouble:
      LOG.exception('Failed to drop table "%s" as well: %s' % (target_table, double_trouble))
Esempio n. 4
0
File: dbms.py Progetto: jackesh/hue
  def create_table_as_a_select(self, request, query_history, target_table, result_meta):
    design = query_history.design.get_design()
    database = design.query['database']

    # Case 1: Hive Server 2 backend or results straight from an existing table
    if result_meta.in_tablename:
      hql = 'CREATE TABLE `%s.%s` AS %s' % (database, target_table, design.query['query'])
      #query = hql_query(hql, database=database)
      query_history = self.execute_statement(hql)
      url = redirect(reverse('beeswax:watch_query', args=[query_history.id]) + '?on_success_url=' + reverse('metastore:describe_table', args=[database, target_table]))
    else:
      # Case 2: The results are in some temporary location
      # Beeswax backward compatibility and optimization
      # 1. Create table
      cols = ''
      schema = result_meta.schema
      for i, field in enumerate(schema.fieldSchemas):
        if i != 0:
          cols += ',\n'
        cols += '`%s` %s' % (field.name, field.type)

      # The representation of the delimiter is messy.
      # It came from Java as a string, which might has been converted from an integer.
      # So it could be "1" (^A), or "10" (\n), or "," (a comma literally).
      delim = result_meta.delim
      if not delim.isdigit():
        delim = str(ord(delim))

      hql = '''
            CREATE TABLE `%s` (
            %s
            )
            ROW FORMAT DELIMITED
            FIELDS TERMINATED BY '\%s'
            STORED AS TextFile
            ''' % (target_table, cols, delim.zfill(3))

      query = hql_query(hql)
      self.execute_and_wait(query)

      try:
        # 2. Move the results into the table's storage
        table_obj = self.get_table('default', target_table)
        table_loc = request.fs.urlsplit(table_obj.path_location)[2]
        result_dir = request.fs.urlsplit(result_meta.table_dir)[2]
        request.fs.rename_star(result_dir, table_loc)
        LOG.debug("Moved results from %s to %s" % (result_meta.table_dir, table_loc))
        request.info(request, _('Saved query results as new table %(table)s.') % {'table': target_table})
        query_history.save_state(QueryHistory.STATE.expired)
      except Exception, ex:
        query = hql_query('DROP TABLE `%s`' % target_table)
        try:
          self.execute_and_wait(query)
        except Exception, double_trouble:
          LOG.exception('Failed to drop table "%s" as well: %s' % (target_table, double_trouble))
        raise ex
Esempio n. 5
0
File: conf.py Progetto: lorelib/hue
def config_validator(user):
  # dbms is dependent on beeswax.conf (this file)
  # import in method to avoid circular dependency
  from beeswax.design import hql_query
  from beeswax.server import dbms
  from beeswax.server.dbms import get_query_server_config

  res = []
  try:
    try:
      if not 'test' in sys.argv: # Avoid tests hanging
        query_server = get_query_server_config(name='impala')
        server = dbms.get(user, query_server)
        query = hql_query("SELECT 'Hello World!';")
        handle = server.execute_and_wait(query, timeout_sec=10.0)

        if handle:
          server.fetch(handle, rows=100)
          server.close(handle)
    except StructuredThriftTransportException, ex:
      if 'TSocket read 0 bytes' in str(ex):  # this message appears when authentication fails
        msg = "Failed to authenticate to Impalad, check authentication configurations."
        LOG.exception(msg)
        res.append((NICE_NAME, _(msg)))
      else:
        raise ex
  except Exception, ex:
    msg = "No available Impalad to send queries to."
    LOG.exception(msg)
    res.append((NICE_NAME, _(msg)))
Esempio n. 6
0
 def invalidate(self, database=None, flush_all=False):
   handle = None
   try:
     if flush_all or database is None:
       hql = "INVALIDATE METADATA"
       query = hql_query(hql, query_type=QUERY_TYPES[1])
       handle = self.execute_and_wait(query, timeout_sec=10.0)
     else:
       diff_tables = self._get_different_tables(database)
       for table in diff_tables:
         hql = "INVALIDATE METADATA `%s`.`%s`" % (database, table)
         query = hql_query(hql, query_type=QUERY_TYPES[1])
         handle = self.execute_and_wait(query, timeout_sec=10.0)
   except QueryServerTimeoutException, e:
     # Allow timeout exceptions to propagate
     raise e
Esempio n. 7
0
File: tests.py Progetto: ahonko/hue
  def test_data_export(self):
    hql = 'SELECT * FROM test'
    query = hql_query(hql)

    # Get the result in xls.
    handle = self.db.execute_and_wait(query)
    xls_resp = download(handle, 'xls', self.db)
    translated_csv = xls2csv(xls_resp.content)
    # It should have 257 lines (256 + header)
    assert_equal(len(translated_csv.strip('\r\n').split('\r\n')), 257)

    # Get the result in csv.
    query = hql_query(hql)
    handle = self.db.execute_and_wait(query)
    csv_resp = download(handle, 'csv', self.db)
    assert_equal(csv_resp.content, translated_csv)
Esempio n. 8
0
  def get_sample(self, database, table, column=None, nested=None):
    result = None
    hql = None

    if not table.is_view:

      limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get())

      if column or nested: # Could do column for any type, then nested with partitions 
        if self.server_name == 'impala':
          select_clause, from_clause = ImpalaDbms.get_nested_select(database, table.name, column, nested)
          hql = 'SELECT %s FROM %s LIMIT %s' % (select_clause, from_clause, limit)
      else:
        partition_query = ""
        if table.partition_keys:
          partitions = self.get_partitions(database, table, partition_spec=None, max_parts=1)
          partition_query = 'WHERE ' + ' AND '.join(["%s='%s'" % (table.partition_keys[idx].name, key) for idx, key in enumerate(partitions[0].values)])
        hql = "SELECT * FROM `%s`.`%s` %s LIMIT %s" % (database, table.name, partition_query, limit)

      if hql:
        query = hql_query(hql)
        handle = self.execute_and_wait(query, timeout_sec=5.0)

        if handle:
          result = self.fetch(handle, rows=100)
          self.close(handle)

    return result
Esempio n. 9
0
  def create(self, django_user):
    """
    Create table in the Hive Metastore.
    """
    LOG.info('Creating table "%s"' % (self.name,))
    db = dbms.get(django_user, self.query_server)

    try:
      # Already exists?
      if self.app_name == 'impala':
        db.invalidate_tables('default', [self.name])
      db.get_table('default', self.name)
      msg = _('Table "%(table)s" already exists.') % {'table': self.name}
      LOG.error(msg)
      return False
    except Exception:
      query = hql_query(self.hql)
      try:
        results = db.execute_and_wait(query)
        if not results:
          msg = _('Error creating table %(table)s: Operation timeout.') % {'table': self.name}
          LOG.error(msg)
          raise InstallException(msg)
        return True
      except Exception, ex:
        msg = _('Error creating table %(table)s: %(error)s.') % {'table': self.name, 'error': ex}
        LOG.error(msg)
        raise InstallException(msg)
Esempio n. 10
0
def load_after_create(request, database):
    """
  Automatically load data into a newly created table.

  We get here from the create's on_success_url, and expect to find
  ``table`` and ``path`` from the parameters.
  """
    tablename = request.REQUEST.get("table")
    path = request.REQUEST.get("path")
    is_remove_header = request.REQUEST.get("removeHeader").lower() == "on" and not path.endswith("gz")

    if not tablename or not path:
        msg = _("Internal error: Missing needed parameter to load data into table.")
        LOG.error(msg)
        raise PopupException(msg)

    if is_remove_header:
        remove_header(request.fs, path)

    LOG.debug("Auto loading data from %s into table %s" % (path, tablename))
    hql = "LOAD DATA INPATH '%s' INTO TABLE `%s.%s`" % (path, database, tablename)
    query = hql_query(hql)

    on_success_url = reverse("metastore:describe_table", kwargs={"database": database, "table": tablename})

    return execute_directly(request, query, on_success_url=on_success_url)
Esempio n. 11
0
File: dbms.py Progetto: gorden2/hue
  def alter_column(self, database, table_name, column_name, new_column_name, column_type, comment=None,
                   partition_spec=None, cascade=False):
    hql = 'ALTER TABLE `%s`.`%s`' % (database, table_name)

    if partition_spec:
      hql += ' PARTITION (%s)' % partition_spec

    hql += ' CHANGE COLUMN `%s` `%s` %s' % (column_name, new_column_name, column_type.upper())

    if comment:
      hql += " COMMENT '%s'" % comment

    if cascade:
      hql += ' CASCADE'

    timeout = SERVER_CONN_TIMEOUT.get()
    query = hql_query(hql)
    handle = self.execute_and_wait(query, timeout_sec=timeout)

    if handle:
      self.close(handle)
    else:
      msg = _("Failed to execute alter column statement: %s") % hql
      raise QueryServerException(msg)

    return self.get_column(database, table_name, new_column_name)
Esempio n. 12
0
File: dbms.py Progetto: gorden2/hue
  def get_sample(self, database, table, column=None, nested=None, limit=100):
    result = None
    hql = None

    # Filter on max # of partitions for partitioned tables
    column = '`%s`' % column if column else '*'
    if table.partition_keys:
      hql = self._get_sample_partition_query(database, table, column, limit)
    elif self.server_name == 'impala':
      if column or nested:
        from impala.dbms import ImpalaDbms
        select_clause, from_clause = ImpalaDbms.get_nested_select(database, table.name, column, nested)
        hql = 'SELECT %s FROM %s LIMIT %s;' % (select_clause, from_clause, limit)
      else:
        hql = "SELECT * FROM `%s`.`%s` LIMIT %s;" % (database, table.name, limit)
    else:
      hql = "SELECT %s FROM `%s`.`%s` LIMIT %s;" % (column, database, table.name, limit)
      # TODO: Add nested select support for HS2

    if hql:
      query = hql_query(hql)
      handle = self.execute_and_wait(query, timeout_sec=5.0)

      if handle:
        result = self.fetch(handle, rows=100)
        self.close(handle)

    return result
Esempio n. 13
0
  def test_document_create(self):
    sql = 'SELECT * FROM sample_07'

    design = hql_query(sql)

    # is_auto
    # is_trashed
    # is_redacted
    old_query = SavedQuery.objects.create(
        type=SavedQuery.TYPES_MAPPING['hql'],
        owner=self.user,
        data=design.dumps(),
        name='See examples',
        desc='Example of old format'
    )

    try:
      new_query = import_saved_beeswax_query(old_query)
      new_query_data = new_query.get_data()

      assert_equal('query-hive', new_query_data['type'])
      assert_equal('See examples', new_query_data['name'])
      assert_equal('Example of old format', new_query_data['description'])

      assert_equal('ready', new_query_data['snippets'][0]['status'])
      assert_equal('See examples', new_query_data['snippets'][0]['name'])
      assert_equal('SELECT * FROM sample_07', new_query_data['snippets'][0]['statement_raw'])

      assert_equal([], new_query_data['snippets'][0]['properties']['settings'])
      assert_equal([], new_query_data['snippets'][0]['properties']['files'])
      assert_equal([], new_query_data['snippets'][0]['properties']['functions'])
    finally:
      old_query.delete()
Esempio n. 14
0
def database_initialize(request):
    """ Install the tables for this application """

    # The variant tables (impala and hbase)
    database_create_variants(request, temporary=False)

    # Connexion to the db
    query_server = get_query_server_config(name='impala')
    db = dbms.get(request.user, query_server=query_server)
  
    # The sql queries
    sql = "DROP TABLE IF EXISTS map_sample_id; CREATE TABLE map_sample_id (internal_sample_id STRING, customer_sample_id STRING, date_creation TIMESTAMP, date_modification TIMESTAMP);  DROP TABLE IF EXISTS sample_files; CREATE TABLE sample_files (id STRING, internal_sample_id STRING, file_path STRING, file_type STRING, date_creation TIMESTAMP, date_modification TIMESTAMP) row format delimited fields terminated by ',' stored as textfile;"

    # The clinical db
    sql += "DROP TABLE IF EXISTS clinical_sample; CREATE TABLE clinical_sample (sample_id STRING, patient_id STRING, date_of_collection STRING, original_sample_id STRING, status STRING, sample_type STRING, biological_contamination STRING, storage_condition STRING, biobank_id STRING, pn_id STRING) row format delimited fields terminated by ',' stored as textfile;"

    # Executing the different queries
    tmp = sql.split(";")
    for hql in tmp:
        hql = hql.strip()
        if hql:
            query = hql_query(hql)
            handle = db.execute_and_wait(query, timeout_sec=5.0)
     
    return render('database.initialize.mako', request, locals())
Esempio n. 15
0
File: views.py Progetto: 10sr/hue
def drop_database(request):
  db = dbms.get(request.user)

  if request.method == 'POST':
    databases = request.POST.getlist('database_selection')

    try:
      design = SavedQuery.create_empty(app_name='beeswax', owner=request.user, data=hql_query('').dumps())

      if request.POST.get('is_embeddable'):
        sql = db.drop_databases(databases, design, generate_ddl_only=True)
        job = make_notebook(
            name='Execute and watch',
            editor_type='hive',
            statement=sql.strip(),
            status='ready',
            database=None,
            on_success_url='assist.db.refresh',
            is_task=True
        )
        return JsonResponse(job.execute(request))
      else:
        query_history = db.drop_databases(databases, design)
        url = reverse('beeswax:watch_query_history', kwargs={'query_history_id': query_history.id}) + '?on_success_url=' + reverse('metastore:databases')
        return redirect(url)
    except Exception, ex:
      error_message, log = dbms.expand_exception(ex, db)
      error = _("Failed to remove %(databases)s.  Error: %(error)s") % {'databases': ','.join(databases), 'error': error_message}
      raise PopupException(error, title=_("Hive Error"), detail=log)
Esempio n. 16
0
def sample_search(request):
    """ Search the data related to a given sample id """

    result = {'status': -1,'data': {}}

    if request.method != 'POST' or not request.POST or not request.POST['sample_id']:
        result['status'] = 0
        return HttpResponse(json.dumps(result), mimetype="application/json")

    sample_id = str(request.POST['sample_id'])

    # Database connexion
    query_server = get_query_server_config(name='impala')
    db = dbms.get(request.user, query_server=query_server)
    customer_sample_id = str(request.user.id)+"_"+sample_id

    # Selecting the files related to the sample id
    hql = "SELECT sample_files.id, sample_files.file_path FROM sample_files JOIN map_sample_id ON sample_files.internal_sample_id = map_sample_id.internal_sample_id WHERE map_sample_id.customer_sample_id = '"+customer_sample_id+"';"
    query = hql_query(hql)
    handle = db.execute_and_wait(query, timeout_sec=5.0)
    if handle:
        data = db.fetch(handle, rows=100)
        result['status'] = 1
        result['data'] = list(data.rows())
        db.close(handle)

    # Returning the data
    return HttpResponse(json.dumps(result), mimetype="application/json")
Esempio n. 17
0
  def execute(self, notebook, snippet):
    db = self._get_db(snippet)

    # Multiquery, if not first statement or arrived to the last query
    statement_id = snippet['result']['handle'].get('statement_id', 0)
    statements_count = snippet['result']['handle'].get('statements_count', 1)
    if snippet['result']['handle'].get('has_more_statements'):
      try:
        handle = self._get_handle(snippet)
        db.close_operation(handle) # Close all the time past multi queries
      except:
        LOG.warn('Could not close previous multiquery query')
      statement_id += 1
    else:
      statement_id = 0

    statements = self._get_statements(snippet['statement'])
    if statements_count != len(statements):
      statement_id = 0
    statement = statements[statement_id]

    settings = snippet['properties'].get('settings', None)
    file_resources = snippet['properties'].get('files', None)
    functions = snippet['properties'].get('functions', None)
    database = snippet.get('database') or 'default'

    query = hql_query(statement, query_type=QUERY_TYPES[0], settings=settings, file_resources=file_resources, functions=functions, database=database)

    try:
      db.use(database)
      handle = db.client.query(query)
    except QueryServerException, ex:
      raise QueryError(ex.message)
Esempio n. 18
0
File: dbms.py Progetto: cloudera/hue
  def alter_table(self, database, table_name, new_table_name=None, comment=None, tblproperties=None):
    table_obj = self.get_table(database, table_name)
    if table_obj is None:
      raise PopupException(_("Failed to find the table: %s") % table_name)

    if table_obj.is_view:
      hql = 'ALTER VIEW `%s`.`%s`' % (database, table_name)
    else:
      hql = 'ALTER TABLE `%s`.`%s`' % (database, table_name)

    if new_table_name:
      table_name = new_table_name
      hql += ' RENAME TO `%s`' % table_name
    elif comment is not None:
      hql += " SET TBLPROPERTIES ('comment' = '%s')" % comment
    elif tblproperties:
      hql += " SET TBLPROPERTIES (%s)" % ' ,'.join("'%s' = '%s'" % (k, v) for k, v in tblproperties.items())

    timeout = SERVER_CONN_TIMEOUT.get()
    query = hql_query(hql)
    handle = self.execute_and_wait(query, timeout_sec=timeout)

    if handle:
      self.close(handle)
    else:
      msg = _("Failed to execute alter table statement: %s") % hql
      raise QueryServerException(msg)

    return self.client.get_table(database, table_name)
Esempio n. 19
0
File: dbms.py Progetto: cloudera/hue
  def get_table_columns_stats(self, database, table, column):
    if self.server_name.startswith('impala'):
      hql = 'SHOW COLUMN STATS `%(database)s`.`%(table)s`' % {'database': database, 'table': table}
    else:
      hql = 'DESCRIBE FORMATTED `%(database)s`.`%(table)s` `%(column)s`' % {'database': database, 'table': table, 'column': column}

    query = hql_query(hql)
    handle = self.execute_and_wait(query, timeout_sec=5.0)

    if handle:
      result = self.fetch(handle, rows=100)
      self.close(handle)
      data = list(result.rows())

      if self.server_name.startswith('impala'):
        if column == -1: # All the columns
          return [self._extract_impala_column(col) for col in data]
        else:
          data = [col for col in data if col[0] == column][0]
          return self._extract_impala_column(data)
      else:
        return [
            {'col_name': data[2][0]},
            {'data_type': data[2][1]},
            {'min': data[2][2]},
            {'max': data[2][3]},
            {'num_nulls': data[2][4]},
            {'distinct_count': data[2][5]},
            {'avg_col_len': data[2][6]},
            {'max_col_len': data[2][7]},
            {'num_trues': data[2][8]},
            {'num_falses': data[2][9]}
        ]
    else:
      return []
Esempio n. 20
0
File: conf.py Progetto: lorelib/hue
def config_validator(user):
  # dbms is dependent on beeswax.conf (this file)
  # import in method to avoid circular dependency
  from beeswax.design import hql_query
  from beeswax.server import dbms

  res = []
  try:
    try:
      if not 'test' in sys.argv: # Avoid tests hanging
        server = dbms.get(user)
        query = hql_query("SELECT 'Hello World!';")
        handle = server.execute_and_wait(query, timeout_sec=10.0)

        if handle:
          server.fetch(handle, rows=100)
          server.close(handle)
    except StructuredThriftTransportException, e:
      if 'Error validating the login' in str(e):
        msg = 'Failed to authenticate to HiveServer2, check authentication configurations.'
        LOG.exception(msg)
        res.append((NICE_NAME, _(msg)))
      else:
        raise e
  except Exception, e:
    msg = "The application won't work without a running HiveServer2."
    LOG.exception(msg)
    res.append((NICE_NAME, _(msg)))
Esempio n. 21
0
  def get_sample(self, database, table, column=None, nested=None):
    result = None
    hql = None

    limit = 100

    if column or nested: # Could do column for any type, then nested with partitions
      if self.server_name == 'impala':
        from impala.dbms import ImpalaDbms
        select_clause, from_clause = ImpalaDbms.get_nested_select(database, table.name, column, nested)
        hql = 'SELECT %s FROM %s LIMIT %s' % (select_clause, from_clause, limit)
    else:
      # Filter on max # of partitions for partitioned tables
      # Impala's SHOW PARTITIONS is different from Hive, so we only support Hive for now
      if self.server_name != 'impala' and table.partition_keys:
        hql = self._get_sample_partition_query(database, table, limit)
      else:
        hql = "SELECT * FROM `%s`.`%s` LIMIT %s" % (database, table.name, limit)

    if hql:
      query = hql_query(hql)
      handle = self.execute_and_wait(query, timeout_sec=5.0)

      if handle:
        result = self.fetch(handle, rows=100)
        self.close(handle)

    return result
Esempio n. 22
0
def _guess_range(user, dashboard, field):
    
    hql = "SELECT MIN(%(field)s), MAX(%(field)s) FROM %(database)s.%(table)s" % {
      'field': field['name'],
      'database': dashboard['properties'][0]['database'],
      'table': dashboard['properties'][0]['table']      
    }
    
    query_server = get_query_server_config(name='impala')
    db = dbms.get(user, query_server=query_server)
    
    query = hql_query(hql)
    handle = db.execute_and_wait(query, timeout_sec=35.0)    

    data = db.fetch(handle, rows=1)
    stats_min, stats_max = list(data.rows())[0]
    db.close(handle)
    
    _min, _m = _round_number_range(stats_min)
    _m, _max = _round_number_range(stats_max)    

    properties = {
      'min': stats_min,
      'max': stats_max,
      'start': _min,
      'end': _max,
      #'gap': gap,
      'canRange': True,
      'isDate': False,
    }
    
    return properties
Esempio n. 23
0
  def _prepare_hql_query(self, snippet, statement, session):
    settings = snippet['properties'].get('settings', None)
    file_resources = snippet['properties'].get('files', None)
    functions = snippet['properties'].get('functions', None)
    properties = session['properties'] if session else []

    # Get properties from session if not defined in snippet
    if not settings:
      settings = next((prop['value'] for prop in properties if prop['key'] == 'settings'), None)

    if not file_resources:
      file_resources = next((prop['value'] for prop in properties if prop['key'] == 'files'), None)

    if not functions:
      functions = next((prop['value'] for prop in properties if prop['key'] == 'functions'), None)

    database = snippet.get('database') or 'default'

    return hql_query(
      statement,
      query_type=QUERY_TYPES[0],
      settings=settings,
      file_resources=file_resources,
      functions=functions,
      database=database
    )
Esempio n. 24
0
    def put(self, request, original_variant, pk):
        # Allow to modify a variant in HBase/Impala

        f = open('/tmp/superhello.txt','w')
        f.write(json.dumps(request.data))
        f.close()

        # We convert the original and modified data to flatjson
        fc = formatConverters(input_file='stuff.json',output_file='stuff.json')
        original_flatjson = fc.convertVariantJsonToFlatJson(json_data=original_variant)
        modified_flatjson = fc.convertVariantJsonToFlatJson(json_data=request.data)

        # We convert the data to hbase, and we modify directly some fields (note: the keys are almost the same for hbase and impala)
        hbase_data = fc.convertVariantFlatJsonToHbase(original_data=original_flatjson,modified_data=modified_flatjson)


        # Impala - We make the query
        query_server = get_query_server_config(name='impala')
        db = dbms.get(request.user, query_server=query_server)
        query = hql_query("INSERT INTO variant("+",".join(query_data)+")")
        handle = db.execute_and_wait(query, timeout_sec=5.0)
        if handle:
            db.close(handle)
        else:
            raise Exception("Impossible to create the variant...")

        # HBase - We add the data in that table too
        hbaseApi = HbaseApi(user=request.user)
        currentCluster = hbaseApi.getClusters().pop()
        rowkey = pk
        hbaseApi.putRow(cluster=currentCluster['name'], tableName='variants', row=rowkey, data=hbase_data)
Esempio n. 25
0
def database_initialize(request):
    """ Install the tables for this application """

    # Connexion to the db
    query_server = get_query_server_config(name='impala')
    db = dbms.get(request.user, query_server=query_server)
  
    # The sql queries
    sql = "DROP TABLE IF EXISTS map_sample_id; CREATE TABLE map_sample_id (internal_sample_id STRING, customer_sample_id STRING, date_creation TIMESTAMP, date_modification TIMESTAMP);  DROP TABLE IF EXISTS sample_files; CREATE TABLE sample_files (id STRING, internal_sample_id STRING, file_path STRING, file_type STRING, date_creation TIMESTAMP, date_modification TIMESTAMP);"

    # The clinical db
    sql += "DROP TABLE IF EXISTS clinical_sample; CREATE TABLE clinical_sample (sample_id STRING, patient_id STRING, date_of_collection STRING, original_sample_id STRING, status STRING, sample_type STRING, biological_contamination STRING, storage_condition STRING, biobank_id STRING, pn_id STRING);"

    #DROP TABLE IF EXISTS variants; CREATE TABLE variants (id STRING, alternate_bases STRING, calls STRING, names STRING, info STRING, reference_bases STRING, quality DOUBLE, created TIMESTAMP, elem_start BIGINT, elem_end BIGINT, variantset_id STRING); DROP TABLE IF EXISTS variantsets;
    #CREATE TABLE variantsets (id STRING, dataset_id STRING, metadata STRING, reference_bounds STRING);
    #DROP TABLE IF EXISTS datasets; CREATE TABLE datasets (id STRING, is_public BOOLEAN, name STRING);'''
  
    # Executing the different queries
    tmp = sql.split(";")
    for hql in tmp:
        hql = hql.strip()
        if hql:
            query = hql_query(hql)
            handle = db.execute_and_wait(query, timeout_sec=5.0)
     
    return render('database.initialize.mako', request, locals())
Esempio n. 26
0
def patient_search(request):
    """ Search the data related to a given patient id or dossier no """
    result = {'status': -1,'data': {}}

    if request.method != 'POST' or not request.POST or not request.POST['sample_id']:
        result['status'] = 0
        return HttpResponse(json.dumps(result), mimetype="application/json")
    
    
    hql = "SELECT patient.patient_id, patient.dossier_no, patient.patient_family_name, patient.patient_first_name, ethnicity.ethnicity FROM patient JOIN ethnicity ON patient.ethnic_id = ethnicity.ethnic_id WHERE "
    
    if "patient_id" in request.POST:
        patient_id = str(request.POST['patient_id'])
        hql += "patient.patient_id = '"+patient_id+"';"
    elif "dossier_no" in request.POST:
        dossier_no = str(request.POST['dossier_no'])
        hql += "patient.dossier_no = '"+dossier_no+"';"

    # Database connexion
    query_server = get_query_server_config(name='impala')
    db = dbms.get(request.user, query_server=query_server)
    customer_sample_id = str(request.user.id)+"_"+sample_id

    # Selecting the files related to the sample id
    
    query = hql_query(hql)
    handle = db.execute_and_wait(query, timeout_sec=5.0)
    if handle:
        data = db.fetch(handle, rows=100)
        result['status'] = 1
        result['data'] = list(data.rows())
        db.close(handle)

    # Returning the data
    return HttpResponse(json.dumps(result), mimetype="application/json")
Esempio n. 27
0
  def test_import_project(self):
    # Test that when importing a Document that is tagged with a project, we create a directory with that tag name and
    # place the document within it
    sql = 'SELECT * FROM sample_07'
    design = hql_query(sql)

    query = SavedQuery.objects.create(
        type=SavedQuery.TYPES_MAPPING['impala'],
        owner=self.user,
        data=design.dumps(),
        name='Impala query',
        desc='Test Impala query'
    )
    doc = Document.objects.link(query, owner=query.owner, extra=query.type, name=query.name, description=query.desc)
    default_tag = DocumentTag.objects.get_default_tag(self.user)
    custom_tag = DocumentTag.objects.create_tag(self.user, 'sample_07')
    doc.add_tag(default_tag)
    doc.add_tag(custom_tag)

    try:
      converter = DocumentConverter(self.user)
      converter.convert()

      # Should have a directory named after custom tag
      assert_true(Directory.objects.filter(owner=self.user, name=custom_tag.tag, parent_directory=self.home_dir).exists())

      # But ignore reserved tags (default)
      assert_false(Directory.objects.filter(owner=self.user, name=default_tag.tag, parent_directory=self.home_dir).exists())

      # Document should exist under custom directory
      project_dir = Directory.objects.get(owner=self.user, name=custom_tag.tag, parent_directory=self.home_dir)
      assert_true(Document2.objects.filter(owner=self.user, name='Impala query', parent_directory=project_dir).exists())
    finally:
      query.delete()
Esempio n. 28
0
    def test_convert_hive_query_with_invalid_name(self):
        sql = "SELECT * FROM sample_07"
        settings = [
            {"key": "hive.exec.scratchdir", "value": "/tmp/mydir"},
            {"key": "hive.querylog.location", "value": "/tmp/doc2"},
        ]
        file_resources = [{"type": "jar", "path": "/tmp/doc2/test.jar"}]
        functions = [{"name": "myUpper", "class_name": "org.hue.udf.MyUpper"}]
        design = hql_query(sql, database="etl", settings=settings, file_resources=file_resources, functions=functions)

        query = SavedQuery.objects.create(
            type=SavedQuery.TYPES_MAPPING["hql"],
            owner=self.user,
            data=design.dumps(),
            name="Test / Hive query",
            desc="Test Hive query",
        )
        doc = Document.objects.link(query, owner=query.owner, extra=query.type, name=query.name, description=query.desc)

        try:
            # Test that corresponding doc2 is created after convert
            assert_equal(0, Document2.objects.filter(owner=self.user, type="query-hive").count())

            converter = DocumentConverter(self.user)
            converter.convert()

            assert_equal(1, Document2.objects.filter(owner=self.user, type="query-hive").count())

            doc2 = Document2.objects.get(owner=self.user, type="query-hive", is_history=False)
            # Verify Document2 name is stripped of invalid chars
            assert_equal("Test  Hive query", doc2.data_dict["name"])
        finally:
            query.delete()
Esempio n. 29
0
    def test_document_create(self):
        sql = "SELECT * FROM sample_07"

        design = hql_query(sql)

        # is_auto
        # is_trashed
        # is_redacted
        old_query = SavedQuery.objects.create(
            type=SavedQuery.TYPES_MAPPING["hql"],
            owner=self.user,
            data=design.dumps(),
            name="See examples",
            desc="Example of old format",
        )

        try:
            new_query = import_saved_beeswax_query(old_query)
            new_query_data = new_query.get_data()

            assert_equal("query-hive", new_query_data["type"])
            assert_equal("See examples", new_query_data["name"])
            assert_equal("Example of old format", new_query_data["description"])

            assert_equal("ready", new_query_data["snippets"][0]["status"])
            assert_equal("See examples", new_query_data["snippets"][0]["name"])
            assert_equal("SELECT * FROM sample_07", new_query_data["snippets"][0]["statement_raw"])

            assert_equal([], new_query_data["snippets"][0]["properties"]["settings"])
            assert_equal([], new_query_data["snippets"][0]["properties"]["files"])
            assert_equal([], new_query_data["snippets"][0]["properties"]["functions"])
        finally:
            old_query.delete()
Esempio n. 30
0
  def get_sample(self, database, table, column=None, nested=None):
    result = None
    hql = None

    if not table.is_view:
      limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get())

      if column or nested: # Could do column for any type, then nested with partitions 
        if self.server_name == 'impala':
          select_clause, from_clause = ImpalaDbms.get_nested_select(database, table.name, column, nested)
          hql = 'SELECT %s FROM %s LIMIT %s' % (select_clause, from_clause, limit)
      else:
        if table.partition_keys:  # Filter on max # of partitions for partitioned tables
          hql = self._get_sample_partition_query(database, table, limit)
        else:
          hql = "SELECT * FROM `%s`.`%s` LIMIT %s" % (database, table.name, limit)

      if hql:
        query = hql_query(hql)
        handle = self.execute_and_wait(query, timeout_sec=5.0)

        if handle:
          result = self.fetch(handle, rows=100)
          self.close(handle)

    return result
Esempio n. 31
0
  def get_table_stats(self, database, table):
    stats = []

    if self.server_name == 'impala':
      hql = 'SHOW TABLE STATS `%(database)s`.`%(table)s`' % {'database': database, 'table': table}

      query = hql_query(hql)
      handle = self.execute_and_wait(query, timeout_sec=5.0)

      if handle:
        result = self.fetch(handle, rows=100)
        self.close(handle)
        stats = list(result.rows())
    else:
      table = self.get_table(database, table)
      stats = table.stats

    return stats
Esempio n. 32
0
  def get_databases(self, database_names='*'):
    identifier = self.to_matching_wildcard(database_names)

    hql = "SHOW DATABASES LIKE '%s'" % (identifier) # self.client.get_databases() is too slow
    query = hql_query(hql)
    timeout = SERVER_CONN_TIMEOUT.get()

    handle = self.execute_and_wait(query, timeout_sec=timeout)

    if handle:
      result = self.fetch(handle, rows=5000)
      self.close(handle)
      databases = [name for database in result.rows() for name in database]
      if len(databases) <= APPLY_NATURAL_SORT_MAX.get():
        databases = apply_natural_sort(databases)
      return databases
    else:
      return []
Esempio n. 33
0
def _submit_create_and_load(request, create_hql, table_name, path, do_load, database):
  """
  Submit the table creation, and setup the load to happen (if ``do_load``).
  """
  on_success_params = {}
  app_name = get_app_name(request)

  if do_load:
    on_success_params['table'] = table_name
    on_success_params['path'] = path
    on_success_url = reverse(app_name + ':load_after_create', kwargs={'database': database})
  else:
    on_success_url = reverse('metastore:describe_table', kwargs={'database': database, 'table': table_name})

  query = hql_query(create_hql, database=database)
  return execute_directly(request, query,
                          on_success_url=on_success_url,
                          on_success_params=on_success_params)
Esempio n. 34
0
def _submit_create_and_load(request, create_hql, table_name, path, load_data, database):
  """
  Submit the table creation, and setup the load to happen (if ``load_data`` == IMPORT).
  """
  on_success_params = QueryDict('', mutable=True)
  app_name = get_app_name(request)

  if load_data == 'IMPORT':
    on_success_params['table'] = table_name
    on_success_params['path'] = path
    on_success_url = reverse(app_name + ':load_after_create', kwargs={'database': database}) + '?' + on_success_params.urlencode()
  else:
    on_success_url = reverse('metastore:describe_table', kwargs={'database': database, 'table': table_name})

  query = hql_query(create_hql, database=database)
  return execute_directly(request, query,
                                on_success_url=on_success_url,
                                on_success_params=on_success_params)
Esempio n. 35
0
def drop_partition(request, database, table):
    source_type = request.POST.get('source_type', 'hive')
    cluster = json.loads(request.POST.get('cluster', '{}'))

    db = _get_db(user=request.user, source_type=source_type, cluster=cluster)

    if request.method == 'POST':
        partition_specs = request.POST.getlist('partition_selection')
        partition_specs = [spec for spec in partition_specs]
        try:
            if request.GET.get("format", "html") == "json":
                last_executed = json.loads(request.POST.get('start_time'),
                                           '-1')
                sql = db.drop_partitions(database,
                                         table,
                                         partition_specs,
                                         design=None,
                                         generate_ddl_only=True)
                job = make_notebook(name=_('Drop partition %s') %
                                    ', '.join(partition_specs)[:100],
                                    editor_type=source_type,
                                    statement=sql.strip(),
                                    status='ready',
                                    database=None,
                                    on_success_url='assist.db.refresh',
                                    is_task=True,
                                    last_executed=last_executed)
                return JsonResponse(job.execute(request))
            else:
                design = SavedQuery.create_empty(app_name='beeswax',
                                                 owner=request.user,
                                                 data=hql_query('').dumps())
                query_history = db.drop_partitions(database, table,
                                                   partition_specs, design)
                url = reverse('beeswax:watch_query_history', kwargs={'query_history_id': query_history.id}) + '?on_success_url=' + \
                      reverse('metastore:describe_partitions', kwargs={'database': database, 'table': table})
                return redirect(url)
        except Exception, ex:
            error_message, log = dbms.expand_exception(ex, db)
            error = _("Failed to remove %(partition)s.  Error: %(error)s") % {
                'partition': '\n'.join(partition_specs),
                'error': error_message
            }
            raise PopupException(error, title=_("DB Error"), detail=log)
Esempio n. 36
0
def _submit_create_and_load(request, create_hql, table_name, path, do_load):
    """
  Submit the table creation, and setup the load to happen (if ``do_load``).
  """
    on_success_params = {}

    if do_load:
        on_success_params['table'] = table_name
        on_success_params['path'] = path
        on_success_url = urlresolvers.reverse(load_after_create)
    else:
        on_success_url = urlresolvers.reverse(describe_table,
                                              kwargs={'table': table_name})

    query = hql_query(create_hql)
    return execute_directly(request,
                            query,
                            on_success_url=on_success_url,
                            on_success_params=on_success_params)
Esempio n. 37
0
    def drop_tables(self, database, tables, design):
        hql = []

        for table in tables:
            if table.is_view:
                hql.append("DROP VIEW `%s.%s`" % (
                    database,
                    table.name,
                ))
            else:
                hql.append("DROP TABLE `%s.%s`" % (
                    database,
                    table.name,
                ))
        query = hql_query(';'.join(hql), database)
        design.data = query.dumps()
        design.save()

        return self.execute_query(query, design)
Esempio n. 38
0
  def get_top_terms(self, database, table, column, limit=30, prefix=None):
    limit = min(limit, 100)
    prefix_match = ''
    if prefix:
      prefix_match = "WHERE CAST(%(column)s AS STRING) LIKE '%(prefix)s%%'" % {'column': column, 'prefix': prefix}

    hql = 'SELECT %(column)s, COUNT(*) AS ct FROM `%(database)s`.`%(table)s` %(prefix_match)s GROUP BY %(column)s ORDER BY ct DESC LIMIT %(limit)s' % {
        'database': database, 'table': table, 'column': column, 'prefix_match': prefix_match, 'limit': limit,
    }

    query = hql_query(hql)
    handle = self.execute_and_wait(query, timeout_sec=60.0) # Hive is very slow

    if handle:
      result = self.fetch(handle, rows=limit)
      self.close(handle)
      return list(result.rows())
    else:
      return []
Esempio n. 39
0
File: dbms.py Progetto: ztwu/hue
    def get_sample(self,
                   database,
                   table,
                   column=None,
                   nested=None,
                   limit=100,
                   generate_sql_only=False):
        result = None
        hql = None

        # Filter on max # of partitions for partitioned tables
        column = '`%s`' % column if column else '*'
        if table.partition_keys:
            hql = self._get_sample_partition_query(database, table, column,
                                                   limit)
        elif self.server_name == 'impala':
            if column or nested:
                from impala.dbms import ImpalaDbms
                select_clause, from_clause = ImpalaDbms.get_nested_select(
                    database, table.name, column, nested)
                hql = 'SELECT %s FROM %s LIMIT %s;' % (select_clause,
                                                       from_clause, limit)
            else:
                hql = "SELECT * FROM `%s`.`%s` LIMIT %s;" % (database,
                                                             table.name, limit)
        else:
            hql = "SELECT %s FROM `%s`.`%s` LIMIT %s;" % (column, database,
                                                          table.name, limit)
            # TODO: Add nested select support for HS2

        if hql:
            if generate_sql_only:
                return hql
            else:
                query = hql_query(hql)
                handle = self.execute_and_wait(query, timeout_sec=5.0)

                if handle:
                    result = self.fetch(handle, rows=100)
                    self.close(handle)

        return result
Esempio n. 40
0
    def test_import_permissions(self):
        make_logged_in_client(username="******",
                              groupname="default",
                              recreate=True,
                              is_superuser=False)

        other_user = User.objects.get(username="******")
        test_group = get_default_user_group()

        # Test that when importing a Document with permissions, the corresponding permissions are created for the Doc2
        sql = 'SELECT * FROM sample_07'
        design = hql_query(sql)

        query = SavedQuery.objects.create(
            type=SavedQuery.TYPES_MAPPING['impala'],
            owner=self.user,
            data=design.dumps(),
            name='Impala query',
            desc='Test Impala query')
        doc = Document.objects.link(query,
                                    owner=query.owner,
                                    extra=query.type,
                                    name=query.name,
                                    description=query.desc)
        read_perm = DocumentPermission.objects.create(doc=doc, perms='read')
        read_perm.users.add(other_user)
        read_perm.groups.add(test_group)
        write_perm = DocumentPermission.objects.create(doc=doc, perms='write')
        write_perm.users.add(other_user)

        try:
            converter = DocumentConverter(self.user)
            converter.convert()

            doc2 = Document2.objects.get(owner=self.user, name=query.name)
            # Test that doc2 has same read permissions
            assert_true(other_user in doc2.get_permission('read').users.all())
            assert_true(test_group in doc2.get_permission('read').groups.all())
            # Test that doc2 has same write permissions
            assert_true(other_user in doc2.get_permission('write').users.all())
        finally:
            query.delete()
Esempio n. 41
0
def drop_database(request):
    db = dbms.get(request.user)

    if request.method == 'POST':
        databases = request.POST.getlist('database_selection')

        try:
            design = SavedQuery.create_empty(app_name='beeswax',
                                             owner=request.user,
                                             data=hql_query('').dumps())

            if request.POST.get('is_embeddable'):
                last_executed = json.loads(request.POST.get('start_time'),
                                           '-1')
                sql = db.drop_databases(databases,
                                        design,
                                        generate_ddl_only=True)
                job = make_notebook(name=_('Drop database %s') %
                                    ', '.join(databases)[:100],
                                    editor_type='hive',
                                    statement=sql.strip(),
                                    status='ready',
                                    database=None,
                                    on_success_url='assist.db.refresh',
                                    is_task=True,
                                    last_executed=last_executed)
                return JsonResponse(job.execute(request))
            else:
                query_history = db.drop_databases(databases, design)
                url = reverse(
                    'beeswax:watch_query_history',
                    kwargs={
                        'query_history_id': query_history.id
                    }) + '?on_success_url=' + reverse('metastore:databases')
                return redirect(url)
        except Exception, ex:
            error_message, log = dbms.expand_exception(ex, db)
            error = _("Failed to remove %(databases)s.  Error: %(error)s") % {
                'databases': ','.join(databases),
                'error': error_message
            }
            raise PopupException(error, title=_("DB Error"), detail=log)
Esempio n. 42
0
def config_validator(user):
    # dbms is dependent on beeswax.conf (this file)
    # import in method to avoid circular dependency
    from beeswax.design import hql_query
    from beeswax.server import dbms

    res = []
    try:
        try:
            if not 'test' in sys.argv:  # Avoid tests hanging
                server = dbms.get(user)
                query = hql_query("SELECT 'Hello World!';")
                handle = server.execute_and_wait(query, timeout_sec=10.0)

                if handle:
                    server.fetch(handle, rows=100)
                    server.close(handle)
        except StructuredThriftTransportException as e:
            if 'Error validating the login' in str(e):
                msg = 'Failed to authenticate to HiveServer2, check authentication configurations.'
                LOG.exception(msg)
                res.append((NICE_NAME, _(msg)))
            else:
                raise e
    except Exception as e:
        msg = "The application won't work without a running HiveServer2."
        LOG.exception(msg)
        res.append((NICE_NAME, _(msg)))

    try:
        from desktop.lib.fsmanager import get_filesystem
        warehouse = beeswax.hive_site.get_metastore_warehouse_dir()
        fs = get_filesystem()
        if fs:
            fs.do_as_superuser(fs.stats, warehouse)
    except Exception:
        msg = 'Failed to access Hive warehouse: %s'
        LOG.exception(msg % warehouse)

        return [(NICE_NAME, _(msg) % warehouse)]

    return res
Esempio n. 43
0
  def load_data(self, database, table, form, design):
    hql = "LOAD DATA INPATH"
    hql += " '%s'" % form.cleaned_data['path']
    if form.cleaned_data['overwrite']:
      hql += " OVERWRITE"
    hql += " INTO TABLE "
    hql += "`%s.%s`" % (database, table.name,)
    if form.partition_columns:
      hql += " PARTITION ("
      vals = []
      for key, column_name in form.partition_columns.iteritems():
        vals.append("%s='%s'" % (column_name, form.cleaned_data[key]))
      hql += ", ".join(vals)
      hql += ")"

    query = hql_query(hql, database)
    design.data = query.dumps()
    design.save()

    return self.execute_query(query, design)
Esempio n. 44
0
  def drop_tables(self, database, tables, design, skip_trash=False, generate_ddl_only=False):
    hql = []

    for table in tables:
      if table.is_view:
        hql.append("DROP VIEW `%s`.`%s`" % (database, table.name,))
      else:
        drop_query = "DROP TABLE `%s`.`%s`" % (database, table.name,)
        drop_query += skip_trash and " PURGE" or ""
        hql.append(drop_query)

    query = hql_query(';'.join(hql), database)

    if generate_ddl_only:
      return query.hql_query
    else:
      design.data = query.dumps()
      design.save()

      return self.execute_query(query, design)
Esempio n. 45
0
    def get_sample(self, database, table):
        """No samples if it's a view (HUE-526)"""
        if not table.is_view:
            limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get())
            partition_query = ""
            if table.partition_keys:
                partitions = self.get_partitions(database, table, 1)
                partition_query = 'WHERE ' + ' AND '.join([
                    "%s='%s'" % (table.partition_keys[idx].name, key)
                    for idx, key in enumerate(partitions[0].values)
                ])
            hql = "SELECT * FROM `%s.%s` %s LIMIT %s" % (
                database, table.name, partition_query, limit)
            query = hql_query(hql)
            handle = self.execute_and_wait(query, timeout_sec=5.0)

            if handle:
                result = self.fetch(handle, rows=100)
                self.close(handle)
                return result
Esempio n. 46
0
  def load(self, django_user):
    """
    Upload data to HDFS home of user then load (aka move) it into the Hive table (in the Hive metastore in HDFS).
    """
    LOAD_HQL = \
      """
      LOAD DATA INPATH
      '%(filename)s' OVERWRITE INTO TABLE %(tablename)s
      """

    fs = cluster.get_hdfs()

    if self.app_name == 'impala':
      # Because Impala does not have impersonation on by default, we use a public destination for the upload.
      from impala.conf import IMPERSONATION_ENABLED
      if not IMPERSONATION_ENABLED.get():
        tmp_public = '/tmp/public_hue_examples'
        fs.do_as_user(django_user, fs.mkdir, tmp_public, '0777')
        hdfs_root_destination = tmp_public
    else:
      hdfs_root_destination = fs.do_as_user(django_user, fs.get_home_dir)

    hdfs_destination = os.path.join(hdfs_root_destination, self.name)

    LOG.info('Uploading local data %s to HDFS table "%s"' % (self.name, hdfs_destination))
    fs.do_as_user(django_user, fs.copyFromLocal, self._contents_file, hdfs_destination)

    LOG.info('Loading data into table "%s"' % (self.name,))
    hql = LOAD_HQL % {'tablename': self.name, 'filename': hdfs_destination}
    query = hql_query(hql)

    try:
      results = dbms.get(django_user, self.query_server).execute_and_wait(query)
      if not results:
        msg = _('Error loading table %(table)s: Operation timeout.') % {'table': self.name}
        LOG.error(msg)
        raise InstallException(msg)
    except QueryServerException, ex:
      msg = _('Error loading table %(table)s: %(error)s.') % {'table': self.name, 'error': ex}
      LOG.error(msg)
      raise InstallException(msg)
Esempio n. 47
0
  def load_data(self, database, table, form_data, design, generate_ddl_only=False):
    hql = "LOAD DATA INPATH"
    hql += " '%(path)s'" % form_data
    if form_data['overwrite']:
      hql += " OVERWRITE"
    hql += " INTO TABLE "
    hql += "`%s`.`%s`" % (database, table)
    if form_data['partition_columns']:
      hql += " PARTITION ("
      vals = ["%s='%s'" % (column_name, column_value) for column_name, column_value in form_data['partition_columns']]
      hql += ", ".join(vals)
      hql += ")"

    if generate_ddl_only:
      return hql
    else:
      query = hql_query(hql, database)
      design.data = query.dumps()
      design.save()

      return self.execute_query(query, design)
Esempio n. 48
0
    def _load_data_to_table(self, django_user, hql, hdfs_destination):
        LOG.info('Loading data into table "%s"' % (self.name, ))
        query = hql_query(hql)

        try:
            results = dbms.get(django_user,
                               self.query_server).execute_and_wait(query)
            if not results:
                msg = _(
                    'Error loading table %(table)s: Operation timeout.') % {
                        'table': self.name
                    }
                LOG.error(msg)
                raise InstallException(msg)
        except QueryServerException as ex:
            msg = _('Error loading table %(table)s: %(error)s.') % {
                'table': self.name,
                'error': ex
            }
            LOG.error(msg)
            raise InstallException(msg)
Esempio n. 49
0
  def get_sample(self, database, table, column=None, nested=None):
    result = None
    hql = None

    if not table.is_view:
      limit = min(100, BROWSE_PARTITIONED_TABLE_LIMIT.get())

      if table.partition_keys:  # Filter on max # of partitions for partitioned tables
        hql = self._get_sample_partition_query(database, table, limit)
      else:
        hql = "SELECT * FROM `%s`.`%s` LIMIT %s" % (database, table.name, limit)

      if hql:
        query = hql_query(hql)
        handle = self.execute_and_wait(query, timeout_sec=5.0)

        if handle:
          result = self.fetch(handle, rows=100)
          self.close(handle)

    return result
Esempio n. 50
0
def create_database(request):

  if request.method == "POST":
    data = request.POST.copy()
    data.setdefault("use_default_location", False)
    form = CreateDatabaseForm(data)

    if form.is_valid():
      proposed_query = django_mako.render_to_string("create_database_statement.mako", {
        'database': form.cleaned_data,
      })
      # Mako outputs bytestring in utf8
      proposed_query = proposed_query.decode('utf-8')
      query = hql_query(proposed_query)
      return execute_directly(request, query, on_success_url=reverse('metastore:databases'))
  else:
    form = CreateDatabaseForm()

  return render("create_database.mako", request, {
    'database_form': form,
  })
Esempio n. 51
0
File: dbms.py Progetto: vincexia/hue
  def get_histogram(self, database, table, column, nested=None):
    """
    Returns the results of an Impala SELECT histogram() FROM query for a given column or nested type.

    Assumes that the column/nested type is scalar.
    """
    results = []

    hql = self.get_histogram_query(database, table, column, nested)
    query = hql_query(hql)
    handle = self.execute_and_wait(query, timeout_sec=5.0)

    if handle:
      result = self.fetch(handle)
      try:
        histogram = list(result.rows())[0][0]  # actual histogram results is in first-and-only result row
        unique_values = set(histogram.split(', '))
        results = list(unique_values)
      except IndexError, e:
        LOG.warn('Failed to get histogram results, result set has unexpected format: %s' % smart_str(e))
      finally:
Esempio n. 52
0
  def alter_table(self, database, table_name, new_table_name=None, comment=None, tblproperties=None):
    hql = 'ALTER TABLE `%s`.`%s`' % (database, table_name)

    if new_table_name:
      table_name = new_table_name
      hql += ' RENAME TO `%s`' % table_name
    elif comment:
      hql += " SET TBLPROPERTIES ('comment' = '%s')" % comment
    elif tblproperties:
      hql += " SET TBLPROPERTIES (%s)" % ' ,'.join("'%s' = '%s'" % (k, v) for k, v in tblproperties.items())

    timeout = SERVER_CONN_TIMEOUT.get()
    query = hql_query(hql)
    handle = self.execute_and_wait(query, timeout_sec=timeout)

    if handle:
      self.close(handle)
    else:
      msg = _("Failed to execute alter table statement: %s") % hql
      raise QueryServerException(msg)

    return self.client.get_table(database, table_name)
Esempio n. 53
0
def load_after_create(request, database):
  """
  Automatically load data into a newly created table.

  We get here from the create's on_success_url, and expect to find
  ``table`` and ``path`` from the parameters.
  """
  tablename = request.REQUEST.get('table')
  path = request.REQUEST.get('path')

  if not tablename or not path:
    msg = _('Internal error: Missing needed parameter to load data into table.')
    LOG.error(msg)
    raise PopupException(msg)

  LOG.debug("Auto loading data from %s into table %s" % (path, tablename))
  hql = "LOAD DATA INPATH '%s' INTO TABLE `%s.%s`" % (path, database, tablename)
  query = hql_query(hql)
  app_name = get_app_name(request)
  on_success_url = reverse('metastore:describe_table', kwargs={'database': database, 'table': tablename})

  return execute_directly(request, query, on_success_url=on_success_url)
Esempio n. 54
0
def drop_partition(request, database, table):
    db = dbms.get(request.user)

    if request.method == 'POST':
        partition_specs = request.POST.getlist('partition_selection')
        partition_specs = [spec for spec in partition_specs]
        try:
            design = SavedQuery.create_empty(app_name='beeswax',
                                             owner=request.user,
                                             data=hql_query('').dumps())
            query_history = db.drop_partitions(database, table,
                                               partition_specs, design)
            url = reverse('beeswax:watch_query_history', kwargs={'query_history_id': query_history.id}) + '?on_success_url=' + \
                  reverse('metastore:describe_partitions', kwargs={'database': database, 'table': table})
            return redirect(url)
        except Exception, ex:
            error_message, log = dbms.expand_exception(ex, db)
            error = _("Failed to remove %(partition)s.  Error: %(error)s") % {
                'partition': '\n'.join(partition_specs),
                'error': error_message
            }
            raise PopupException(error, title=_("Hive Error"), detail=log)
Esempio n. 55
0
  def get_table_columns_stats(self, database, table, column):
    if self.server_name == 'impala':
      hql = 'SHOW COLUMN STATS `%(database)s`.`%(table)s`' % {'database': database, 'table': table}
    else:
      hql = 'DESCRIBE FORMATTED `%(database)s`.`%(table)s` `%(column)s`' % {'database': database, 'table': table, 'column': column}

    query = hql_query(hql)
    handle = self.execute_and_wait(query, timeout_sec=5.0)

    if handle:
      result = self.fetch(handle, rows=100)
      self.close(handle)
      data = list(result.rows())

      if self.server_name == 'impala':
        data = [col for col in data if col[0] == column][0]
        return [
            {'col_name': data[0]},
            {'data_type': data[1]},
            {'distinct_count': data[2]},
            {'num_nulls': data[3]},
            {'max_col_len': data[4]},
            {'avg_col_len': data[5]},
        ]
      else:
        return [
            {'col_name': data[2][0]},
            {'data_type': data[2][1]},
            {'min': data[2][2]},
            {'max': data[2][3]},
            {'num_nulls': data[2][4]},
            {'distinct_count': data[2][5]},
            {'avg_col_len': data[2][6]},
            {'max_col_len': data[2][7]},
            {'num_trues': data[2][8]},
            {'num_falses': data[2][9]}
        ]
    else:
      return []
Esempio n. 56
0
def test_split_statements():
    assert_equal([''], hql_query(";;;").statements)
    assert_equal(["select * where id == '10'"],
                 hql_query("select * where id == '10'").statements)
    assert_equal(["select * where id == '10'"],
                 hql_query("select * where id == '10';").statements)
    assert_equal(
        ['select', "select * where id == '10;' limit 100"],
        hql_query("select; select * where id == '10;' limit 100;").statements)
    assert_equal(
        ['select', "select * where id == \"10;\" limit 100"],
        hql_query(
            "select; select * where id == \"10;\" limit 100;").statements)
    assert_equal(
        ['select', "select * where id == '\"10;\"\"\"' limit 100"],
        hql_query("select; select * where id == '\"10;\"\"\"' limit 100;"
                  ).statements)
Esempio n. 57
0
  def load(self, django_user):
    """
    Load data into table. Raise InstallException on failure.
    """
    LOAD_HQL = \
      """
      LOAD DATA local INPATH
      '%(filename)s' OVERWRITE INTO TABLE %(tablename)s
      """

    LOG.info('Loading data into table "%s"' % (self.name,))
    hql = LOAD_HQL % dict(tablename=self.name, filename=self._contents_file)
    query = hql_query(hql)
    try:
      results = dbms.get(django_user).execute_and_wait(query)
      if not results:
        msg = _('Error loading table %(table)s: Operation timeout.') % {'table': self.name}
        LOG.error(msg)
        raise InstallException(msg)
    except BeeswaxException, ex:
      msg = _('Error loading table %(table)s: %(error)s') % {'table': self.name, 'error': ex}
      LOG.error(msg)
      raise InstallException(msg)
Esempio n. 58
0
def drop_database(request):
    db = dbms.get(request.user)

    if request.method == 'POST':
        databases = request.POST.getlist('database_selection')

        try:
            # Can't be simpler without an important refactoring
            design = SavedQuery.create_empty(app_name='beeswax',
                                             owner=request.user,
                                             data=hql_query('').dumps())
            query_history = db.drop_databases(databases, design)
            url = reverse('beeswax:watch_query', args=[
                query_history.id
            ]) + '?on_success_url=' + reverse('metastore:databases')
            return redirect(url)
        except Exception, ex:
            error_message, log = dbms.expand_exception(ex, db)
            error = _("Failed to remove %(databases)s.  Error: %(error)s") % {
                'databases': ','.join(databases),
                'error': error_message
            }
            raise PopupException(error, title=_("Hive Error"), detail=log)
Esempio n. 59
0
    def get_tables(self, database='default', table_names='*'):
        identifier = self.to_matching_wildcard(table_names)
        identifier = "'%s'" % identifier if identifier != '*' else ''  # Filter not supported in SparkSql

        hql = "SHOW TABLES IN `%s` %s" % (
            database, identifier
        )  # self.client.get_tables(database, table_names) is too slow
        query = hql_query(hql)
        timeout = SERVER_CONN_TIMEOUT.get()

        handle = self.execute_and_wait(query, timeout_sec=timeout)

        if handle:
            result = self.fetch(handle, rows=5000)
            self.close(handle)
            tables = [
                table[0] for table in result.rows()
            ]  # We only keep the first column as the name, SparkSql returns multiple columns
            if len(tables) <= APPLY_NATURAL_SORT_MAX.get():
                tables = apply_natural_sort(tables)
            return tables
        else:
            return []
Esempio n. 60
0
File: dbms.py Progetto: rootAir/hue
    def drop_partitions(self,
                        db_name,
                        table_name,
                        partition_specs,
                        design=None,
                        generate_ddl_only=False):
        hql = []

        for partition_spec in partition_specs:
            hql.append(
                "ALTER TABLE `%s`.`%s` DROP IF EXISTS PARTITION (%s) PURGE" %
                (db_name, table_name, partition_spec))

        hql = ';'.join(hql)
        query = hql_query(hql, db_name)

        if generate_ddl_only:
            return hql
        else:
            design.data = query.dumps()
            design.save()

            return self.execute_query(query, design)