Esempio n. 1
0
File: api.py Progetto: cloudera/hue
def analyze_table(request, database, table, columns=None):
  app_name = get_app_name(request)
  cluster = json.loads(request.POST.get('cluster', '{}'))

  query_server = get_query_server_config(app_name, cluster=cluster)
  db = dbms.get(request.user, query_server)

  table_obj = db.get_table(database, table)
  if table_obj.is_impala_only and app_name != 'impala':
    query_server = get_query_server_config('impala')
    db = dbms.get(request.user, query_server)

  response = {'status': -1, 'message': '', 'redirect': ''}

  if request.method == "POST":
    if columns is None:
      query_history = db.analyze_table(database, table)
    else:
      query_history = db.analyze_table_columns(database, table)

    response['watch_url'] = reverse('beeswax:api_watch_query_refresh_json', kwargs={'id': query_history.id})
    response['status'] = 0
  else:
    response['message'] = _('A POST request is required.')

  return JsonResponse(response)
Esempio n. 2
0
def get_session(request, session_id=None):
  app_name = get_app_name(request)
  query_server = get_query_server_config(app_name)

  response = {'status': -1, 'message': ''}

  if session_id:
    session = Session.objects.get(id=session_id, owner=request.user, application=query_server['server_name'])
  else:  # get the latest session for given user and server type
    session = Session.objects.get_session(request.user, query_server['server_name'])

  if session is not None:
    properties = json.loads(session.properties)
    # Redact passwords
    for key, value in properties.items():
      if 'password' in key.lower():
        properties[key] = '*' * len(value)

    response['status'] = 0
    response['session'] = {'id': session.id, 'application': session.application, 'status': session.status_code}
    response['properties'] = properties
  else:
    response['message'] = _('Could not find session or no open sessions found.')

  return JsonResponse(response)
Esempio n. 3
0
def database_initialize(request):
    """ Install the tables for this application """

    # Connexion to the db
    query_server = get_query_server_config(name='impala')
    db = dbms.get(request.user, query_server=query_server)
  
    # The sql queries
    sql = "DROP TABLE IF EXISTS map_sample_id; CREATE TABLE map_sample_id (internal_sample_id STRING, customer_sample_id STRING, date_creation TIMESTAMP, date_modification TIMESTAMP);  DROP TABLE IF EXISTS sample_files; CREATE TABLE sample_files (id STRING, internal_sample_id STRING, file_path STRING, file_type STRING, date_creation TIMESTAMP, date_modification TIMESTAMP);"

    # The clinical db
    sql += "DROP TABLE IF EXISTS clinical_sample; CREATE TABLE clinical_sample (sample_id STRING, patient_id STRING, date_of_collection STRING, original_sample_id STRING, status STRING, sample_type STRING, biological_contamination STRING, storage_condition STRING, biobank_id STRING, pn_id STRING);"

    #DROP TABLE IF EXISTS variants; CREATE TABLE variants (id STRING, alternate_bases STRING, calls STRING, names STRING, info STRING, reference_bases STRING, quality DOUBLE, created TIMESTAMP, elem_start BIGINT, elem_end BIGINT, variantset_id STRING); DROP TABLE IF EXISTS variantsets;
    #CREATE TABLE variantsets (id STRING, dataset_id STRING, metadata STRING, reference_bounds STRING);
    #DROP TABLE IF EXISTS datasets; CREATE TABLE datasets (id STRING, is_public BOOLEAN, name STRING);'''
  
    # Executing the different queries
    tmp = sql.split(";")
    for hql in tmp:
        hql = hql.strip()
        if hql:
            query = hql_query(hql)
            handle = db.execute_and_wait(query, timeout_sec=5.0)
     
    return render('database.initialize.mako', request, locals())
Esempio n. 4
0
def configuration(request):
  app_name = get_app_name(request)
  query_server = get_query_server_config(app_name)
  config_values = dbms.get(request.user, query_server).get_default_configuration(
                      bool(request.REQUEST.get("include_hadoop", False)))

  return render("configuration.mako", request, {'config_values': config_values})
Esempio n. 5
0
def autocomplete(request, database=None, table=None, column=None, nested=None):
  app_name = get_app_name(request)
  query_server = get_query_server_config(app_name)
  do_as = request.user
  if (request.user.is_superuser or request.user.has_hue_permission(action="impersonate", app="security")) and 'doas' in request.GET:
    do_as = User.objects.get(username=request.GET.get('doas'))
  db = dbms.get(do_as, query_server)
  response = {}

  try:
    if database is None:
      response['databases'] = db.get_databases()
    elif table is None:
      response['tables'] = db.get_tables(database=database)
    elif column is None:
      t = db.get_table(database, table)
      response['hdfs_link'] = t.hdfs_link
      response['columns'] = [column.name for column in t.cols]
      response['extended_columns'] = massage_columns_for_json(t.cols)
    else:
      col = db.get_column(database, table, column)
      if col:
        parse_tree = parser.parse_column(col.name, col.type, col.comment)
        if nested:
          parse_tree = _extract_nested_type(parse_tree, nested)
        response = parse_tree
      else:
        raise Exception('Could not find column `%s`.`%s`.`%s`' % (database, table, column))
  except (QueryServerTimeoutException, TTransportException), e:
    response['code'] = 503
    response['error'] = e.message
Esempio n. 6
0
def patient_search(request):
    """ Search the data related to a given patient id or dossier no """
    result = {'status': -1,'data': {}}

    if request.method != 'POST' or not request.POST or not request.POST['sample_id']:
        result['status'] = 0
        return HttpResponse(json.dumps(result), mimetype="application/json")
    
    
    hql = "SELECT patient.patient_id, patient.dossier_no, patient.patient_family_name, patient.patient_first_name, ethnicity.ethnicity FROM patient JOIN ethnicity ON patient.ethnic_id = ethnicity.ethnic_id WHERE "
    
    if "patient_id" in request.POST:
        patient_id = str(request.POST['patient_id'])
        hql += "patient.patient_id = '"+patient_id+"';"
    elif "dossier_no" in request.POST:
        dossier_no = str(request.POST['dossier_no'])
        hql += "patient.dossier_no = '"+dossier_no+"';"

    # Database connexion
    query_server = get_query_server_config(name='impala')
    db = dbms.get(request.user, query_server=query_server)
    customer_sample_id = str(request.user.id)+"_"+sample_id

    # Selecting the files related to the sample id
    
    query = hql_query(hql)
    handle = db.execute_and_wait(query, timeout_sec=5.0)
    if handle:
        data = db.fetch(handle, rows=100)
        result['status'] = 1
        result['data'] = list(data.rows())
        db.close(handle)

    # Returning the data
    return HttpResponse(json.dumps(result), mimetype="application/json")
Esempio n. 7
0
File: api.py Progetto: ronglei/hue
def explain_query(request):
    response = {"status": -1, "message": ""}

    if request.method != "POST":
        response["message"] = _("A POST request is required.")

    app_name = get_app_name(request)
    query_type = beeswax_models.SavedQuery.TYPES_MAPPING[app_name]

    try:
        form = get_query_form(request)

        if form.is_valid():
            query = SQLdesign(form, query_type=query_type)
            query_server = dbms.get_query_server_config(app_name)
            db = dbms.get(request.user, query_server)

            try:
                db.use(form.cleaned_data["database"])
                datatable = db.explain(query)
                results = db.client.create_result(datatable)

                response["status"] = 0
                response["results"] = results_to_dict(results)
            except Exception, e:
                response["status"] = -1
                response["message"] = str(e)

        else:
Esempio n. 8
0
def _guess_range(user, dashboard, field):
    
    hql = "SELECT MIN(%(field)s), MAX(%(field)s) FROM %(database)s.%(table)s" % {
      'field': field['name'],
      'database': dashboard['properties'][0]['database'],
      'table': dashboard['properties'][0]['table']      
    }
    
    query_server = get_query_server_config(name='impala')
    db = dbms.get(user, query_server=query_server)
    
    query = hql_query(hql)
    handle = db.execute_and_wait(query, timeout_sec=35.0)    

    data = db.fetch(handle, rows=1)
    stats_min, stats_max = list(data.rows())[0]
    db.close(handle)
    
    _min, _m = _round_number_range(stats_min)
    _m, _max = _round_number_range(stats_max)    

    properties = {
      'min': stats_min,
      'max': stats_max,
      'start': _min,
      'end': _max,
      #'gap': gap,
      'canRange': True,
      'isDate': False,
    }
    
    return properties
Esempio n. 9
0
File: conf.py Progetto: lorelib/hue
def config_validator(user):
  # dbms is dependent on beeswax.conf (this file)
  # import in method to avoid circular dependency
  from beeswax.design import hql_query
  from beeswax.server import dbms
  from beeswax.server.dbms import get_query_server_config

  res = []
  try:
    try:
      if not 'test' in sys.argv: # Avoid tests hanging
        query_server = get_query_server_config(name='impala')
        server = dbms.get(user, query_server)
        query = hql_query("SELECT 'Hello World!';")
        handle = server.execute_and_wait(query, timeout_sec=10.0)

        if handle:
          server.fetch(handle, rows=100)
          server.close(handle)
    except StructuredThriftTransportException, ex:
      if 'TSocket read 0 bytes' in str(ex):  # this message appears when authentication fails
        msg = "Failed to authenticate to Impalad, check authentication configurations."
        LOG.exception(msg)
        res.append((NICE_NAME, _(msg)))
      else:
        raise ex
  except Exception, ex:
    msg = "No available Impalad to send queries to."
    LOG.exception(msg)
    res.append((NICE_NAME, _(msg)))
Esempio n. 10
0
def sample_search(request):
    """ Search the data related to a given sample id """

    result = {'status': -1,'data': {}}

    if request.method != 'POST' or not request.POST or not request.POST['sample_id']:
        result['status'] = 0
        return HttpResponse(json.dumps(result), mimetype="application/json")

    sample_id = str(request.POST['sample_id'])

    # Database connexion
    query_server = get_query_server_config(name='impala')
    db = dbms.get(request.user, query_server=query_server)
    customer_sample_id = str(request.user.id)+"_"+sample_id

    # Selecting the files related to the sample id
    hql = "SELECT sample_files.id, sample_files.file_path FROM sample_files JOIN map_sample_id ON sample_files.internal_sample_id = map_sample_id.internal_sample_id WHERE map_sample_id.customer_sample_id = '"+customer_sample_id+"';"
    query = hql_query(hql)
    handle = db.execute_and_wait(query, timeout_sec=5.0)
    if handle:
        data = db.fetch(handle, rows=100)
        result['status'] = 1
        result['data'] = list(data.rows())
        db.close(handle)

    # Returning the data
    return HttpResponse(json.dumps(result), mimetype="application/json")
Esempio n. 11
0
def init_example(request):
    """ Allow to make some test for the developpers, to see if the insertion and the querying of data is correct """

    result = {'status': -1,'data': {}}

    query_server = get_query_server_config(name='impala')
    db = dbms.get(request.user, query_server=query_server)
  
    # Deleting the db
    hql = "DROP TABLE IF EXISTS val_test_2;"
    query = hql_query(hql)
    handle = db.execute_and_wait(query, timeout_sec=5.0)
  
    # Creating the db
    hql = "CREATE TABLE val_test_2 (id int, token string);"
    query = hql_query(hql)
    handle = db.execute_and_wait(query, timeout_sec=5.0)
  
    # Adding some data
    hql = " INSERT OVERWRITE val_test_2 values (1, 'a'), (2, 'b'), (-1,'xyzzy');"
    # hql = "INSERT INTO TABLE testset_bis VALUES (2, 25.0)"
    query = hql_query(hql)
    handle = db.execute_and_wait(query, timeout_sec=5.0)
  
    # querying the data
    hql = "SELECT * FROM val_test_2"
    query = hql_query(hql)
    handle = db.execute_and_wait(query, timeout_sec=5.0)
    if handle:
        data = db.fetch(handle, rows=100)
        result['data'] = list(data.rows())
        db.close(handle)
 
    return render('database.initialize.mako', request, locals())
Esempio n. 12
0
  def teardown_class(cls):
    if is_live_cluster():
      # Delete test DB and tables
      query_server = get_query_server_config()
      client = make_logged_in_client()
      user = User.objects.get(username='******')

      db = dbms.get(user, query_server)

      # Kill Spark context if running
      if is_hive_on_spark() and cluster.is_yarn():
        # TODO: We should clean up the running Hive on Spark job here
        pass

      for db_name in [cls.db_name, '%s_other' % cls.db_name]:
        databases = db.get_databases()

        if db_name in databases:
          tables = db.get_tables(database=db_name)
          for table in tables:
            make_query(client, 'DROP TABLE IF EXISTS `%(db)s`.`%(table)s`' % {'db': db_name, 'table': table}, wait=True)
          make_query(client, 'DROP VIEW IF EXISTS `%(db)s`.`myview`' % {'db': db_name}, wait=True)
          make_query(client, 'DROP DATABASE IF EXISTS %(db)s' % {'db': db_name}, wait=True)

          # Check the cleanup
          databases = db.get_databases()
          assert_false(db_name in databases)

      global _INITIALIZED
      _INITIALIZED = False
Esempio n. 13
0
def get_sample_data(request, database, table, column=None):
  app_name = get_app_name(request)
  query_server = get_query_server_config(app_name)
  db = dbms.get(request.user, query_server)

  response = _get_sample_data(db, database, table, column)
  return JsonResponse(response)
Esempio n. 14
0
def execute_query(request, design_id=None):
  """
  View function for executing an arbitrary query.
  It understands the optional GET/POST params:

    on_success_url
      If given, it will be displayed when the query is successfully finished.
      Otherwise, it will display the view query results page by default.
  """
  authorized_get_design(request, design_id)

  error_message = None
  form = QueryForm()
  action = request.path
  log = None
  app_name = get_app_name(request)
  query_type = SavedQuery.TYPES_MAPPING[app_name]
  design = safe_get_design(request, query_type, design_id)
  on_success_url = request.REQUEST.get('on_success_url')
  databases = []
  query_server = get_query_server_config(app_name)
  db = dbms.get(request.user, query_server)

  try:
    databases = get_db_choices(request)
  except Exception, ex:
    error_message, log = expand_exception(ex, db)
Esempio n. 15
0
def database_initialize(request):
    """ Install the tables for this application """

    # The variant tables (impala and hbase)
    database_create_variants(request, temporary=False)

    # Connexion to the db
    query_server = get_query_server_config(name='impala')
    db = dbms.get(request.user, query_server=query_server)
  
    # The sql queries
    sql = "DROP TABLE IF EXISTS map_sample_id; CREATE TABLE map_sample_id (internal_sample_id STRING, customer_sample_id STRING, date_creation TIMESTAMP, date_modification TIMESTAMP);  DROP TABLE IF EXISTS sample_files; CREATE TABLE sample_files (id STRING, internal_sample_id STRING, file_path STRING, file_type STRING, date_creation TIMESTAMP, date_modification TIMESTAMP) row format delimited fields terminated by ',' stored as textfile;"

    # The clinical db
    sql += "DROP TABLE IF EXISTS clinical_sample; CREATE TABLE clinical_sample (sample_id STRING, patient_id STRING, date_of_collection STRING, original_sample_id STRING, status STRING, sample_type STRING, biological_contamination STRING, storage_condition STRING, biobank_id STRING, pn_id STRING) row format delimited fields terminated by ',' stored as textfile;"

    # Executing the different queries
    tmp = sql.split(";")
    for hql in tmp:
        hql = hql.strip()
        if hql:
            query = hql_query(hql)
            handle = db.execute_and_wait(query, timeout_sec=5.0)
     
    return render('database.initialize.mako', request, locals())
Esempio n. 16
0
  def create_session(self, lang='hive', properties=None):
    application = 'beeswax' if lang == 'hive' else lang

    session = Session.objects.get_session(self.user, application=application)

    if session is None:
      session = dbms.get(self.user, query_server=get_query_server_config(name=lang)).open_session(self.user)

    response = {
      'type': lang,
      'id': session.id
    }

    if not properties:

      config = None
      if USE_DEFAULT_CONFIGURATION.get():
        config = DefaultConfiguration.objects.get_configuration_for_user(app=lang, user=self.user)

      if config is not None:
        properties = config.properties_list
      else:
        properties = self.get_properties(lang)

    response['properties'] = properties

    if lang == 'impala':
      impala_settings = session.get_formatted_properties()
      http_addr = next((setting['value'] for setting in impala_settings if setting['key'].lower() == 'http_addr'), None)
      response['http_addr'] = http_addr

    return response
Esempio n. 17
0
def get_shared_beeswax_server(db_name='default'):
  global _SHARED_HIVE_SERVER
  global _SHARED_HIVE_SERVER_CLOSER
  if _SHARED_HIVE_SERVER is None:

    cluster = pseudo_hdfs4.shared_cluster()

    if is_live_cluster():
      def s():
        pass
    else:
      s = _start_mini_hs2(cluster)

    start = time.time()
    started = False
    sleep = 1

    make_logged_in_client()
    user = User.objects.get(username='******')
    query_server = get_query_server_config()
    db = dbms.get(user, query_server)

    while not started and time.time() - start <= 30:
      try:
        db.open_session(user)
        started = True
        break
      except Exception, e:
        LOG.info('HiveServer2 server could not be found after: %s' % e)
        time.sleep(sleep)

    if not started:
      raise Exception("Server took too long to come up.")

    _SHARED_HIVE_SERVER, _SHARED_HIVE_SERVER_CLOSER = cluster, s
Esempio n. 18
0
    def put(self, request, original_variant, pk):
        # Allow to modify a variant in HBase/Impala

        f = open('/tmp/superhello.txt','w')
        f.write(json.dumps(request.data))
        f.close()

        # We convert the original and modified data to flatjson
        fc = formatConverters(input_file='stuff.json',output_file='stuff.json')
        original_flatjson = fc.convertVariantJsonToFlatJson(json_data=original_variant)
        modified_flatjson = fc.convertVariantJsonToFlatJson(json_data=request.data)

        # We convert the data to hbase, and we modify directly some fields (note: the keys are almost the same for hbase and impala)
        hbase_data = fc.convertVariantFlatJsonToHbase(original_data=original_flatjson,modified_data=modified_flatjson)


        # Impala - We make the query
        query_server = get_query_server_config(name='impala')
        db = dbms.get(request.user, query_server=query_server)
        query = hql_query("INSERT INTO variant("+",".join(query_data)+")")
        handle = db.execute_and_wait(query, timeout_sec=5.0)
        if handle:
            db.close(handle)
        else:
            raise Exception("Impossible to create the variant...")

        # HBase - We add the data in that table too
        hbaseApi = HbaseApi(user=request.user)
        currentCluster = hbaseApi.getClusters().pop()
        rowkey = pk
        hbaseApi.putRow(cluster=currentCluster['name'], tableName='variants', row=rowkey, data=hbase_data)
Esempio n. 19
0
def get_query_form(request):
  try:
    # Get database choices
    query_server = dbms.get_query_server_config(get_app_name(request))
    db = dbms.get(request.user, query_server)
    databases = [(database, database) for database in db.get_databases()]
  except Exception, e:
    raise PopupException(_('Unable to access databases, Query Server or Metastore may be down.'), detail=e)
Esempio n. 20
0
File: views.py Progetto: jriv96/hue
def _run_parameterized_query(request, design_id, explain):
    """
  Given a design and arguments to parameterize that design, runs the query.
  - explain is a boolean to determine whether to run as an explain or as an
  execute.

  This is an extra "step" in the flow from execute_query.
  """
    design = authorized_get_design(request, design_id, must_exist=True)

    # Reconstitute the form
    design_obj = beeswax.design.HQLdesign.loads(design.data)
    query_form = QueryForm()
    params = design_obj.get_query_dict()
    params.update(request.POST)

    databases = _get_db_choices(request)
    query_form.bind(params)
    query_form.query.fields["database"].choices = databases  # Could not do it in the form

    if not query_form.is_valid():
        raise PopupException(_("Query form is invalid: %s") % query_form.errors)

    query_str = query_form.query.cleaned_data["query"]
    app_name = get_app_name(request)
    query_server = get_query_server_config(app_name)
    query_type = SavedQuery.TYPES_MAPPING[app_name]

    parameterization_form_cls = make_parameterization_form(query_str)
    if not parameterization_form_cls:
        raise PopupException(_("Query is not parameterizable."))

    parameterization_form = parameterization_form_cls(request.REQUEST, prefix="parameterization")

    if parameterization_form.is_valid():
        real_query = substitute_variables(query_str, parameterization_form.cleaned_data)
        query = HQLdesign(query_form, query_type=query_type)
        query._data_dict["query"]["query"] = real_query
        try:
            if explain:
                return explain_directly(request, query, design, query_server)
            else:
                return execute_directly(request, query, query_server, design)
        except Exception, ex:
            db = dbms.get(request.user, query_server)
            error_message, log = expand_exception(ex, db)
            return render(
                "execute.mako",
                request,
                {
                    "action": reverse(get_app_name(request) + ":execute_query"),
                    "design": design,
                    "error_message": error_message,
                    "form": query_form,
                    "log": log,
                    "autocomplete_base_url": reverse(get_app_name(request) + ":autocomplete", kwargs={}),
                },
            )
Esempio n. 21
0
    def _get_db(self, snippet):
        if snippet["type"] == "hive":
            name = "beeswax"
        elif snippet["type"] == "impala":
            name = "impala"
        else:
            name = "spark-sql"

        return dbms.get(self.user, query_server=get_query_server_config(name=name))
Esempio n. 22
0
  def set_execution_engine(cls):
    query_server = get_query_server_config()

    if query_server['server_name'] == 'beeswax' and is_hive_on_spark():
      user = User.objects.get(username='******')
      db = dbms.get(user, query_server)

      LOG.info("Setting Hive execution engine to Spark")
      db.execute_statement('SET hive.execution.engine=spark')
Esempio n. 23
0
    def create_session(self, lang="hive", properties=None):
        application = "beeswax" if lang == "hive" else lang

        session = Session.objects.get_session(self.user, application=application)

        if session is None:
            session = dbms.get(self.user, query_server=get_query_server_config(name=lang)).open_session(self.user)

        return {"type": lang, "id": session.id, "properties": session.get_formatted_properties()}
Esempio n. 24
0
File: api.py Progetto: cloudera/hue
def get_sample_data(request, database, table, column=None):
  app_name = get_app_name(request)
  cluster = json.loads(request.POST.get('cluster', '{}'))

  query_server = get_query_server_config(app_name, cluster=cluster)
  db = dbms.get(request.user, query_server)

  response = _get_sample_data(db, database, table, column, cluster=cluster)
  return JsonResponse(response)
Esempio n. 25
0
def configuration(request):
  app_name = get_app_name(request)
  query_server = get_query_server_config(app_name)
  config_values = dbms.get(request.user, query_server).get_default_configuration(
                      bool(request.REQUEST.get("include_hadoop", False)))
  for value in config_values:
    if 'password' in value.key.lower():
      value.value = "*" * 10
  return render("configuration.mako", request, {'config_values': config_values})
Esempio n. 26
0
  def _get_db(self, snippet):
    if snippet['type'] == 'hive':
      name = 'beeswax'
    elif snippet['type'] == 'impala':
      name = 'impala'
    else:
      name = 'spark-sql'

    return dbms.get(self.user, query_server=get_query_server_config(name=name))
Esempio n. 27
0
def autocomplete(request, database=None, table=None, column=None, nested=None):
  app_name = get_app_name(request)
  query_server = get_query_server_config(app_name)
  do_as = request.user
  if (request.user.is_superuser or request.user.has_hue_permission(action="impersonate", app="security")) and 'doas' in request.GET:
    do_as = User.objects.get(username=request.GET.get('doas'))
  db = dbms.get(do_as, query_server)

  response = _autocomplete(db, database, table, column, nested)
  return JsonResponse(response)
Esempio n. 28
0
def get(user, query_server=None):
  # Avoid circular dependency
  from ImpalaService import ImpalaHiveServer2Service, ImpalaService

  if query_server is None:
    query_server = get_query_server_config(name='impala')

  if query_server['server_interface'] == HIVE_SERVER2:
    return ImpalaServerClient(ImpalaHiveServer2Service, query_server, user)
  else:
    return ImpalaServerClient(ImpalaService, query_server, user)
Esempio n. 29
0
def execute(request, design_id=None):
  response = {'status': -1, 'message': ''}

  if request.method != 'POST':
    response['message'] = _('A POST request is required.')

  app_name = get_app_name(request)
  query_server = get_query_server_config(app_name)
  query_type = beeswax.models.SavedQuery.TYPES_MAPPING[app_name]
  design = safe_get_design(request, query_type, design_id)

  try:
    query_form = get_query_form(request)

    if query_form.is_valid():
      query_str = query_form.query.cleaned_data["query"]
      explain = request.GET.get('explain', 'false').lower() == 'true'
      design = save_design(request, query_form, query_type, design, False)

      if query_form.query.cleaned_data['is_parameterized']:
        # Parameterized query
        parameterization_form_cls = make_parameterization_form(query_str)
        if parameterization_form_cls:
          parameterization_form = parameterization_form_cls(request.REQUEST, prefix="parameterization")

          if parameterization_form.is_valid():
            parameters = parameterization_form.cleaned_data
            real_query = substitute_variables(query_str, parameters)
            query = HQLdesign(query_form, query_type=query_type)
            query._data_dict['query']['query'] = real_query

            try:
              if explain:
                return explain_directly(request, query, design, query_server)
              else:
                return execute_directly(request, query, design, query_server, parameters=parameters)

            except Exception, ex:
              db = dbms.get(request.user, query_server)
              error_message, log = expand_exception(ex, db)
              response['message'] = error_message
              return JsonResponse(response)
          else:
            response['errors'] = parameterization_form.errors
            return JsonResponse(response)

      # Non-parameterized query
      query = HQLdesign(query_form, query_type=query_type)
      if request.GET.get('explain', 'false').lower() == 'true':
        return explain_directly(request, query, design, query_server)
      else:
        return execute_directly(request, query, design, query_server)
    else:
Esempio n. 30
0
File: api.py Progetto: rosomaxa/hue
def get_session(request):
  app_name = get_app_name(request)
  query_server = get_query_server_config(app_name)

  session = Session.objects.get_session(request.user, query_server['server_name'])

  if session:
    properties = json.loads(session.properties)
  else:
    properties = {}

  return JsonResponse({'properties': properties})
Esempio n. 31
0
def get_session(request):
    app_name = get_app_name(request)
    query_server = get_query_server_config(app_name)

    session = Session.objects.get_session(request.user,
                                          query_server['server_name'])

    if session:
        properties = json.loads(session.properties)
    else:
        properties = {}

    return JsonResponse({'properties': properties})
Esempio n. 32
0
  def create_session(self, lang='hive', properties=None):
    application = 'beeswax' if lang == 'hive' else lang

    session = Session.objects.get_session(self.user, application=application)

    if session is None:
      session = dbms.get(self.user, query_server=get_query_server_config(name=lang)).open_session(self.user)

    return {
        'type': lang,
        'id': session.id,
        'properties': session.get_formatted_properties()
    }
Esempio n. 33
0
def get_top_terms(request, database, table, column, prefix=None):
  app_name = get_app_name(request)
  query_server = get_query_server_config(app_name)
  db = dbms.get(request.user, query_server)

  response = {'status': -1, 'message': '', 'redirect': ''}

  terms = db.get_top_terms(database, table, column, prefix=prefix, limit=int(request.GET.get('limit', 30)))

  response['terms'] = terms
  response['status'] = 0

  return JsonResponse(response)
Esempio n. 34
0
def get_settings(request):
    query_server = dbms.get_query_server_config(get_app_name(request))
    db = dbms.get(request.user, query_server)
    response = {'status': -1}

    settings = db.get_configuration()
    if settings:
        response['status'] = 0
        response['settings'] = settings
    else:
        response['message'] = _('Failed to get settings.')

    return JsonResponse(response)
Esempio n. 35
0
def _get_db(user, source_type=None):
    if source_type is None:
        cluster_config = get_cluster_config(user)
        if FORCE_HS2_METADATA.get() and cluster_config['app_config'].get(
                'editor') and 'hive' in cluster_config['app_config'].get(
                    'editor')['interpreter_names']:
            source_type = 'hive'
        else:
            source_type = cluster_config['default_sql_interpreter']

    query_server = get_query_server_config(
        name=source_type if source_type != 'hive' else 'beeswax')
    return dbms.get(user, query_server)
Esempio n. 36
0
    def get_query_server_config(self):
        from beeswax.server.dbms import get_query_server_config

        query_server = get_query_server_config(
            QueryHistory.get_type_name(self.query_type))
        query_server.update({
            'server_name': self.server_name,
            #         'server_host': self.server_host, # Always use the live server configuration as the session is currently tied to the connection
            #         'server_port': int(self.server_port),
            'server_type': self.server_type,
        })

        return query_server
Esempio n. 37
0
    def get_query_server_config(self):
        from beeswax.server.dbms import get_query_server_config

        query_server = get_query_server_config(
            QueryHistory.get_type_name(self.query_type))
        query_server.update({
            'server_name': self.server_name,
            'server_host': self.server_host,
            'server_port': self.server_port,
            'server_type': self.server_type,
        })

        return query_server
Esempio n. 38
0
def describe_table(request, database, table):
    app_name = get_app_name(request)
    query_server = get_query_server_config(app_name)
    db = dbms.get(request.user, query_server)

    try:
        table = db.get_table(database, table)
    except Exception, e:
        LOG.exception("Describe table error")
        if hasattr(e, 'message') and e.message:
            raise PopupException(_("Hive Error"), detail=e.message)
        else:
            raise PopupException(_("Hive Error"), detail=e)
Esempio n. 39
0
  def __init__(self, data_dict, app_name):
    self.name = data_dict['table_name']
    self.filename = data_dict['data_file']
    self.hql = data_dict['create_hql']
    self.query_server = get_query_server_config(app_name)

    # Sanity check
    self._data_dir = beeswax.conf.LOCAL_EXAMPLES_DATA_DIR.get()
    self._contents_file = os.path.join(self._data_dir, self.filename)
    if not os.path.isfile(self._contents_file):
      msg = _('Cannot find table data in "%(file)s".') % {'file': self._contents_file}
      LOG.error(msg)
      raise ValueError(msg)
Esempio n. 40
0
def get_indexes(request, database, table):
  query_server = dbms.get_query_server_config(get_app_name(request))
  db = dbms.get(request.user, query_server)
  response = {'status': -1}

  indexes = db.get_indexes(database, table)
  if indexes:
    response['status'] = 0
    response['headers'] = indexes.cols()
    response['rows'] = escape_rows(indexes.rows(), nulls_only=True)
  else:
    response['message'] = _('Failed to get indexes.')

  return JsonResponse(response)
Esempio n. 41
0
def get_query_form(request):
  # Get database choices
  query_server = dbms.get_query_server_config(get_app_name(request))
  db = dbms.get(request.user, query_server)
  databases = [(database, database) for database in db.get_databases()]

  if not databases:
    raise RuntimeError(_("No databases are available. Permissions could be missing."))

  query_form = QueryForm()
  query_form.bind(request.POST)
  query_form.query.fields['database'].choices = databases # Could not do it in the form

  return query_form
Esempio n. 42
0
    def test_get_llap(self):

        with patch('beeswax.conf.LLAP_SERVER_HOST.get') as LLAP_SERVER_HOST:
            with patch(
                    'beeswax.conf.LLAP_SERVER_PORT.get') as LLAP_SERVER_PORT:
                LLAP_SERVER_HOST.return_value = 'hive-llap.gethue.com'
                LLAP_SERVER_PORT.return_value = 10002

                query_server = get_query_server_config(name='llap')

                assert_equal(query_server['server_name'], 'beeswax')
                assert_equal(query_server['server_host'],
                             'hive-llap.gethue.com')
                assert_equal(query_server['server_port'], 10002)
Esempio n. 43
0
File: api.py Progetto: sanmahaj/hue
def get_sample_data(request, database, table):
    query_server = dbms.get_query_server_config(get_app_name(request))
    db = dbms.get(request.user, query_server)
    response = {'status': -1}

    table_obj = db.get_table(database, table)
    sample_data = db.get_sample(database, table_obj)
    if sample_data:
        response['status'] = 0
        response['headers'] = sample_data.cols()
        response['rows'] = escape_rows(sample_data.rows(), nulls_only=True)
    else:
        response['message'] = _('Failed to get sample data.')

    return JsonResponse(response)
Esempio n. 44
0
def get_functions(request):
  query_server = dbms.get_query_server_config(get_app_name(request))
  db = dbms.get(request.user, query_server)
  response = {'status': -1}

  prefix = request.GET.get('prefix', None)
  functions = db.get_functions(prefix)
  if functions:
    response['status'] = 0
    rows = escape_rows(functions.rows(), nulls_only=True)
    response['functions'] = [row[0] for row in rows]
  else:
    response['message'] = _('Failed to get functions.')

  return JsonResponse(response)
Esempio n. 45
0
def get_query_form(request):
  try:
    try:
      # Get database choices
      query_server = dbms.get_query_server_config(get_app_name(request))
      db = dbms.get(request.user, query_server)
      databases = [(database, database) for database in db.get_databases()]
    except StructuredThriftTransportException, e:
      # If Thrift exception was due to failed authentication, raise corresponding message
      if 'TSocket read 0 bytes' in str(e) or 'Error validating the login' in str(e):
        raise PopupException(_('Failed to authenticate to query server, check authentication configurations.'), detail=e)
      else:
        raise e
  except Exception, e:
    raise PopupException(_('Unable to access databases, Query Server or Metastore may be down.'), detail=e)
Esempio n. 46
0
def analyze_table(request, database, table, columns=None):
  app_name = get_app_name(request)
  query_server = get_query_server_config(app_name)
  db = dbms.get(request.user, query_server)

  table_obj = db.get_table(database, table)
  if table_obj.is_impala_only and app_name != 'impala':
    query_server = get_query_server_config('impala')
    db = dbms.get(request.user, query_server)

  response = {'status': -1, 'message': '', 'redirect': ''}

  if request.method == "POST":
    if columns is None:
      query_history = db.analyze_table(database, table)
    else:
      query_history = db.analyze_table_columns(database, table)

    response['watch_url'] = reverse('beeswax:api_watch_query_refresh_json', kwargs={'id': query_history.id})
    response['status'] = 0
  else:
    response['message'] = _('A POST request is required.')

  return JsonResponse(response)
Esempio n. 47
0
def get_indexes(request, database, table):
    query_server = dbms.get_query_server_config(get_app_name(request))
    db = dbms.get(request.user, query_server)
    response = {'status': -1, 'error_message': ''}

    indexes = db.get_indexes(database, table)
    if indexes:
        response['status'] = 0
        response['headers'] = indexes.cols()
        response['rows'] = escape_rows(indexes.rows(), nulls_only=True)
    else:
        response['error_message'] = _(
            'Index data took too long to be generated')

    return JsonResponse(response)
Esempio n. 48
0
def get_shared_beeswax_server(db_name='default'):
    global _SHARED_HIVE_SERVER
    global _SHARED_HIVE_SERVER_CLOSER

    with _SHARED_HIVE_SERVER_LOCK:
        if _SHARED_HIVE_SERVER is None:
            cluster = pseudo_hdfs4.shared_cluster()

            if is_live_cluster():

                def s():
                    pass
            else:
                s = _start_mini_hs2(cluster)

            start = time.time()
            started = False
            sleep = 1

            make_logged_in_client()
            user = User.objects.get(username='******')
            query_server = get_query_server_config()
            db = dbms.get(user, query_server)

            while not started and time.time() - start <= 60:
                try:
                    db.open_session(user)
                except StructuredThriftTransportException as e:
                    LOG.exception('Failed to open Hive Server session')

                    # Don't loop if we had an authentication error.
                    if 'Bad status: 3' in e.message:
                        raise
                except Exception as e:
                    LOG.exception('Failed to open Hive Server session')
                else:
                    started = True
                    break

                time.sleep(sleep)
                sleep *= 2

            if not started:
                raise Exception("Server took too long to come up.")

            _SHARED_HIVE_SERVER, _SHARED_HIVE_SERVER_CLOSER = cluster, s

        return _SHARED_HIVE_SERVER, _SHARED_HIVE_SERVER_CLOSER
Esempio n. 49
0
    def create_session(self, lang='hive', properties=None):
        application = 'beeswax' if lang == 'hive' or lang == 'llap' else lang

        if has_session_pool():
            session = Session.objects.get_tez_session(
                self.user, application, MAX_NUMBER_OF_SESSIONS.get())
        elif not has_multiple_sessions():
            session = Session.objects.get_session(self.user,
                                                  application=application)
        else:
            session = None

        reuse_session = session is not None
        if not reuse_session:
            db = dbms.get(self.user,
                          query_server=get_query_server_config(
                              name=lang, connector=self.interpreter))
            session = db.open_session(self.user)

        response = {'type': lang, 'id': session.id}

        if not properties:
            config = None
            if USE_DEFAULT_CONFIGURATION.get():
                config = DefaultConfiguration.objects.get_configuration_for_user(
                    app=lang, user=self.user)

            if config is not None:
                properties = config.properties_list
            else:
                properties = self.get_properties(lang)

        response['properties'] = properties
        response['configuration'] = json.loads(session.properties)
        response['reuse_session'] = reuse_session
        response['session_id'] = ''

        try:
            decoded_guid = session.get_handle().sessionId.guid
            response['session_id'] = unpack_guid(decoded_guid)
        except Exception as e:
            LOG.warn('Failed to decode session handle: %s' % e)

        if lang == 'impala' and session:
            http_addr = _get_impala_server_url(session)
            response['http_addr'] = http_addr

        return response
Esempio n. 50
0
def refresh_tables(request):
    app_name = get_app_name(request)
    query_server = get_query_server_config(app_name)
    db = dbms.get(request.user, query_server=query_server)

    response = {'status': 0, 'message': ''}

    if request.method == "POST":
        try:
            database = json.loads(request.POST['database'])
            added = json.loads(request.POST['added'])
            removed = json.loads(request.POST['removed'])

            db.invalidate_tables(database, added + removed)
        except Exception, e:
            response['message'] = str(e)
Esempio n. 51
0
def configuration(request):
  app_name = get_app_name(request)
  query_server = get_query_server_config(app_name)

  session = Session.objects.get_session(request.user, query_server['server_name'])

  if session:
    properties = json.loads(session.properties)
    # Redact passwords
    for key, value in properties.items():
      if 'password' in key.lower():
        properties[key] = '*' * len(value)
  else:
    properties = {}

  return render("configuration.mako", request, {'configuration': properties})
Esempio n. 52
0
    def _get_db(self, snippet, is_async=False, interpreter=None):
        if not is_async and snippet['type'] == 'hive':
            name = 'beeswax'
        elif snippet['type'] == 'hive':
            name = 'hive'
        elif snippet['type'] == 'llap':
            name = 'llap'
        elif snippet['type'] == 'impala':
            name = 'impala'
        else:
            name = 'sparksql'

        return dbms.get(self.user,
                        query_server=get_query_server_config(
                            name=name, connector=interpreter)
                        )  # Note: name is not used if interpreter is present
Esempio n. 53
0
def config_validator(user):
    # dbms is dependent on beeswax.conf (this file)
    # import in method to avoid circular dependency
    from beeswax.server import dbms
    from beeswax.server.dbms import get_query_server_config

    res = []
    try:
        if not 'test' in sys.argv:  # Avoid tests hanging
            query_server = get_query_server_config(name='impala')
            server = dbms.get(user, query_server)
            server.get_databases()
    except:
        res.append((NICE_NAME, _("No available Impalad to send queries to.")))

    return res
Esempio n. 54
0
def get_sample_data(request, database, table):
    query_server = dbms.get_query_server_config(get_app_name(request))
    db = dbms.get(request.user, query_server)
    response = {'status': -1, 'error_message': ''}

    table_obj = db.get_table(database, table)
    sample_data = db.get_sample(database, table_obj)
    if sample_data:
        response['status'] = 0
        response['headers'] = sample_data.cols()
        response['rows'] = escape_rows(sample_data.rows(), nulls_only=True)
    else:
        response['error_message'] = _(
            'Sample data took too long to be generated')

    return JsonResponse(response)
Esempio n. 55
0
def _run_parameterized_query(request, design_id, explain):
  """
  Given a design and arguments to parameterize that design, runs the query.
  - explain is a boolean to determine whether to run as an explain or as an
  execute.

  This is an extra "step" in the flow from execute_query.
  """
  design = authorized_get_design(request, design_id, must_exist=True)

  # Reconstitute the form
  design_obj = beeswax.design.HQLdesign.loads(design.data)
  query_form = QueryForm()
  params = design_obj.get_query_dict()
  params.update(request.POST)
  query_form.bind(params)
  assert query_form.is_valid()

  query_str = query_form.query.cleaned_data["query"]
  query_server = get_query_server_config(get_app_name(request))

  parameterization_form_cls = make_parameterization_form(query_str)
  if not parameterization_form_cls:
    raise PopupException(_("Query is not parameterizable."))

  parameterization_form = parameterization_form_cls(request.REQUEST, prefix="parameterization")

  if parameterization_form.is_valid():
    real_query = substitute_variables(query_str, parameterization_form.cleaned_data)
    query = HQLdesign(query_form)
    query._data_dict['query']['query'] = real_query
    try:
      if explain:
        return explain_directly(request, query, design, query_server)
      else:
        return execute_directly(request, query, query_server, design)
    except Exception, ex:
      db = dbms.get(request.user, query_server)
      error_message, log = expand_exception(ex, db)
      return render('execute.mako', request, {
        'action': reverse(get_app_name(request) + ':execute_query'),
        'design': design,
        'error_message': error_message,
        'form': query_form,
        'log': log,
        'autocomplete_base_url': reverse(get_app_name(request) + ':autocomplete', kwargs={}),
      })
Esempio n. 56
0
def autocomplete(request, database=None, table=None):
  app_name = get_app_name(request)
  query_server = get_query_server_config(app_name)
  db = dbms.get(request.user, query_server)
  response = {}

  try:
    if database is None:
      response['databases'] = db.get_databases()
    elif table is None:
      response['tables'] = db.get_tables(database=database)
    else:
      t = db.get_table(database, table)
      response['columns'] = [column.name for column in t.cols]
  except Exception, e:
    LOG.warn('Autocomplete data fetching error %s.%s: %s' % (database, table, e))
    response['error'] = e.message
Esempio n. 57
0
def get_table_stats(request, database, table, column=None):
  app_name = get_app_name(request)
  query_server = get_query_server_config(app_name)
  db = dbms.get(request.user, query_server)

  response = {'status': -1, 'message': '', 'redirect': ''}

  if column is not None:
    stats = db.get_table_columns_stats(database, table, column)
  else:
    table = db.get_table(database, table)
    stats = table.stats

  response['stats'] = stats
  response['status'] = 0

  return JsonResponse(response)
Esempio n. 58
0
    def close_session(self, session):
        app_name = session.get('type')
        session_id = session.get('id')
        source_method = session.get("sourceMethod")

        if not session_id:
            session = Session.objects.get_session(self.user,
                                                  application=app_name)
            decoded_guid = session.get_handle().sessionId.guid
            session_decoded_id = unpack_guid(decoded_guid)
            if source_method == "dt_logout":
                LOG.debug(
                    "Closing Impala session id %s on logout for user %s" %
                    (session_decoded_id, self.user.username))

        query_server = get_query_server_config(name=app_name)

        response = {'status': -1, 'message': ''}
        session_record = None

        try:
            filters = {
                'id': session_id,
                'application': query_server['server_name']
            }
            if not is_admin(self.user):
                filters['owner'] = self.user
            session_record = Session.objects.get(**filters)
        except Session.DoesNotExist:
            response['message'] = _(
                'Session does not exist or you do not have permissions to close the session.'
            )

        if session_record:
            session_record = dbms.get(
                self.user, query_server).close_session(session_record)
            response['status'] = 0
            response['message'] = _('Session successfully closed.')
            response['session'] = {
                'id': session_id,
                'application': session_record.application,
                'status': session_record.status_code
            }

        return response
Esempio n. 59
0
def autocomplete(request, database=None, table=None):
  app_name = get_app_name(request)
  query_server = get_query_server_config(app_name)
  db = dbms.get(request.user, query_server)
  response = {}

  try:
    if database is None:
      response['databases'] = db.get_databases()
    elif table is None:
      response['tables'] = db.get_tables(database=database)
    else:
      t = db.get_table(database, table)
      response['columns'] = [column.name for column in t.cols]
      response['extended_columns'] = massage_columns_for_json(t.cols)
  except TTransportException, tx:
    response['code'] = 503
    response['error'] = tx.message
Esempio n. 60
0
  def _get_db(self, snippet, is_async=False, interpreter=None):
    if interpreter and interpreter.get('dialect'):
      dialect = interpreter['dialect']
    else:
      dialect = snippet['type']  # Backward compatibility without connectors

    if not is_async and dialect == 'hive':
      name = 'beeswax'
    elif dialect == 'hive':
      name = 'hive'
    elif dialect == 'llap':
      name = 'llap'
    elif dialect == 'impala':
      name = 'impala'
    else:
      name = 'sparksql'

    # Note: name is not used if interpreter is present
    return dbms.get(self.user, query_server=get_query_server_config(name=name, connector=interpreter))