Example #1
0
def execute_query(request, design_id=None):
  """
  View function for executing an arbitrary query.
  It understands the optional GET/POST params:

    on_success_url
      If given, it will be displayed when the query is successfully finished.
      Otherwise, it will display the view query results page by default.
  """
  authorized_get_design(request, design_id)

  error_message = None
  form = QueryForm()
  action = request.path
  log = None
  design = safe_get_design(request, models.SavedQuery.HQL, design_id)
  on_success_url = request.REQUEST.get('on_success_url')

  if request.method == 'POST':
    form.bind(request.POST)

    to_explain = request.POST.has_key('button-explain')
    to_submit = request.POST.has_key('button-submit')

    # Always validate the saveform, which will tell us whether it needs explicit saving
    if form.is_valid():
      to_save = form.saveform.cleaned_data['save']
      to_saveas = form.saveform.cleaned_data['saveas']

      if to_saveas and not design.is_auto:
        # Save As only affects a previously saved query
        design = design.clone()

      if to_submit or to_save or to_saveas or to_explain:
        explicit_save = to_save or to_saveas
        design = save_design(request, form, models.SavedQuery.HQL, design, explicit_save)
        action = urlresolvers.reverse(execute_query, kwargs=dict(design_id=design.id))

      if to_explain or to_submit:
        query_str = form.query.cleaned_data["query"]
        query_server = get_query_server(form.query_servers.cleaned_data["server"])

        # (Optional) Parameterization.
        parameterization = get_parameterization(request, query_str, form, design, to_explain)
        if parameterization:
          return parameterization

        try:
          query = HQLdesign(form)
          if to_explain:
            return explain_directly(request, query, design, query_server)
          else:
            notify = form.query.cleaned_data.get('email_notify', False)
            return execute_directly(request, query, query_server, design, on_success_url=on_success_url, notify=notify)
        except BeeswaxException, ex:
          print ex.errorCode
          print ex.SQLState
          db = dbms.get(request.user, query_server)
          error_message, log = expand_exception(ex, db)
Example #2
0
def configuration(request):
  if request.method == 'POST':
    server_form = QueryServerForm(request.POST)
    if server_form.is_valid():
      query_server = get_query_server(server_form.cleaned_data["server"])
      config_values = dbms.get(request.user, query_server).get_default_configuration(
                          bool(request.REQUEST.get("include_hadoop", False)))
  else:
    server_form = QueryServerForm()
    config_values = {}

  return render("configuration.mako", request, {'config_values': config_values,
                                                'server_form': server_form})
Example #3
0
def _run_parameterized_query(request, design_id, explain):
  """
  Given a design and arguments to parameterize that design, runs the query.
  - explain is a boolean to determine whether to run as an explain or as an
  execute.

  This is an extra "step" in the flow from execute_query.
  """
  design = authorized_get_design(request, design_id, must_exist=True)

  # Reconstitute the form
  design_obj = beeswax.design.HQLdesign.loads(design.data)
  query_form = QueryForm()
  params = design_obj.get_query_dict()
  params.update(request.POST)
  query_form.bind(params)
  assert query_form.is_valid()

  query_str = query_form.query.cleaned_data["query"]
  query_server = get_query_server(query_form.query_servers.cleaned_data["server"])

  parameterization_form_cls = make_parameterization_form(query_str)
  if not parameterization_form_cls:
    raise PopupException(_("Query is not parameterizable."))

  parameterization_form = parameterization_form_cls(request.REQUEST, prefix="parameterization")

  if parameterization_form.is_valid():
    real_query = substitute_variables(query_str, parameterization_form.cleaned_data)
    query = HQLdesign(query_form)
    query._data_dict['query']['query'] = real_query
    try:
      if explain:
        return explain_directly(request, query, design, query_server)
      else:
        return execute_directly(request, query, query_server, design)
    except BeeswaxException, ex:
      db = dbms.get(request.user, query_server)
      error_message, log = expand_exception(ex, db)
      return render('execute.mako', request, {
        'action': urlresolvers.reverse(execute_query),
        'design': design,
        'error_message': error_message,
        'form': query_form,
        'log': log,
      })
Example #4
0
def get_shared_beeswax_server():
    # Make it happens only once
    global _SHARED_BEESWAX_SERVER
    global _SHARED_BEESWAX_SERVER_CLOSER
    if _SHARED_BEESWAX_SERVER is None:
        # Copy hive-default.xml.template from BEESWAX_HIVE_CONF_DIR before it is set to
        # /my/bogus/path
        default_xml = file(beeswax.conf.BEESWAX_HIVE_CONF_DIR.get() +
                           "/hive-default.xml.template").read()

        finish = (
            beeswax.conf.QUERY_SERVERS['default'].SERVER_HOST.set_for_testing(
                "localhost"),
            beeswax.conf.QUERY_SERVERS['default'].SERVER_PORT.set_for_testing(
                BEESWAXD_TEST_PORT),
            beeswax.conf.QUERY_SERVERS['default'].SUPPORT_DDL.set_for_testing(
                True),
            beeswax.conf.BEESWAX_META_SERVER_HOST.set_for_testing("localhost"),
            beeswax.conf.BEESWAX_META_SERVER_PORT.set_for_testing(
                BEESWAXD_TEST_PORT + 1),
            # Use a bogus path to avoid loading the normal hive-site.xml
            beeswax.conf.BEESWAX_HIVE_CONF_DIR.set_for_testing('/my/bogus/path'
                                                               ))

        cluster = pseudo_hdfs4.shared_cluster()

        # Copy hive-default.xml into the mini_cluster's conf dir, which happens to be
        # in the cluster's tmpdir. This tmpdir is determined during the mini_cluster
        # startup, during which BEESWAX_HIVE_CONF_DIR needs to be set to
        # /my/bogus/path. Hence the step of writing to memory.
        # hive-default.xml will get picked up by the beeswax_server during startup
        file(cluster._tmpdir + "/conf/hive-default.xml",
             'w').write(default_xml)

        global _SHARED_BEESWAX_SERVER_PROCESS

        if SERVER_INTERFACE.get() == HIVE_SERVER2:
            _SHARED_BEESWAX_SERVER_PROCESS = 1

        if _SHARED_BEESWAX_SERVER_PROCESS is None:
            p = _start_server(cluster)
            _SHARED_BEESWAX_SERVER_PROCESS = p

            def kill():
                LOG.info("Killing beeswax server (pid %d)." % p.pid)
                os.kill(p.pid, 9)
                p.wait()

            atexit.register(kill)

            # Wait for server to come up, by repeatedly trying.
            start = time.time()
            started = False
            sleep = 0.001
            make_logged_in_client()
            user = User.objects.get(username='******')
            query_server = get_query_server(support_ddl=True)
            db = dbms.get(user, query_server)

            while not started and time.time() - start < 20.0:
                try:
                    db.echo("echo")
                    if db.getStatus() == fb303.ttypes.fb_status.ALIVE:
                        started = True
                        break
                    time.sleep(sleep)
                    sleep *= 2
                except:
                    time.sleep(sleep)
                    sleep *= 2
                    pass
            if not started:
                raise Exception("Beeswax server took too long to come up.")

            # Make sure /tmp is 0777
            cluster.fs.setuser(cluster.superuser)
            if not cluster.fs.isdir('/tmp'):
                cluster.fs.mkdir('/tmp', 0777)
            else:
                cluster.fs.chmod('/tmp', 0777)

            cluster.fs.chmod(cluster._tmpdir, 0777)
            cluster.fs.chmod(cluster._tmpdir + '/hadoop_tmp_dir/mapred', 0777)

        def s():
            for f in finish:
                f()
            cluster.stop()

        _SHARED_BEESWAX_SERVER, _SHARED_BEESWAX_SERVER_CLOSER = cluster, s

    return _SHARED_BEESWAX_SERVER, _SHARED_BEESWAX_SERVER_CLOSER
Example #5
0
def get_shared_beeswax_server():
  # Make it happens only once
  global _SHARED_BEESWAX_SERVER
  global _SHARED_BEESWAX_SERVER_CLOSER
  if _SHARED_BEESWAX_SERVER is None:
    # Copy hive-default.xml.template from BEESWAX_HIVE_CONF_DIR before it is set to
    # /my/bogus/path
    default_xml = file(beeswax.conf.BEESWAX_HIVE_CONF_DIR.get()+"/hive-default.xml.template").read()

    finish = (
      beeswax.conf.QUERY_SERVERS['default'].SERVER_HOST.set_for_testing("localhost"),
      beeswax.conf.QUERY_SERVERS['default'].SERVER_PORT.set_for_testing(BEESWAXD_TEST_PORT),
      beeswax.conf.QUERY_SERVERS['default'].SUPPORT_DDL.set_for_testing(True),
      beeswax.conf.BEESWAX_META_SERVER_HOST.set_for_testing("localhost"),
      beeswax.conf.BEESWAX_META_SERVER_PORT.set_for_testing(BEESWAXD_TEST_PORT + 1),
      # Use a bogus path to avoid loading the normal hive-site.xml
      beeswax.conf.BEESWAX_HIVE_CONF_DIR.set_for_testing('/my/bogus/path')
    )

    cluster = pseudo_hdfs4.shared_cluster()

    # Copy hive-default.xml into the mini_cluster's conf dir, which happens to be
    # in the cluster's tmpdir. This tmpdir is determined during the mini_cluster
    # startup, during which BEESWAX_HIVE_CONF_DIR needs to be set to
    # /my/bogus/path. Hence the step of writing to memory.
    # hive-default.xml will get picked up by the beeswax_server during startup
    file(cluster._tmpdir + "/conf/hive-default.xml", 'w').write(default_xml)

    global _SHARED_BEESWAX_SERVER_PROCESS

    if SERVER_INTERFACE.get() == HIVE_SERVER2:
      _SHARED_BEESWAX_SERVER_PROCESS = 1

    if _SHARED_BEESWAX_SERVER_PROCESS is None:
      p = _start_server(cluster)
      _SHARED_BEESWAX_SERVER_PROCESS = p
      def kill():
        LOG.info("Killing beeswax server (pid %d)." % p.pid)
        os.kill(p.pid, 9)
        p.wait()
      atexit.register(kill)

      # Wait for server to come up, by repeatedly trying.
      start = time.time()
      started = False
      sleep = 0.001
      make_logged_in_client()
      user = User.objects.get(username='******')
      query_server = get_query_server(support_ddl=True)
      db = dbms.get(user, query_server)

      while not started and time.time() - start < 20.0:
        try:
          db.echo("echo")
          if db.getStatus() == fb303.ttypes.fb_status.ALIVE:
            started = True
            break
          time.sleep(sleep)
          sleep *= 2
        except:
          time.sleep(sleep)
          sleep *= 2
          pass
      if not started:
        raise Exception("Beeswax server took too long to come up.")

      # Make sure /tmp is 0777
      cluster.fs.setuser(cluster.superuser)
      if not cluster.fs.isdir('/tmp'):
        cluster.fs.mkdir('/tmp', 0777)
      else:
        cluster.fs.chmod('/tmp', 0777)

      cluster.fs.chmod(cluster._tmpdir, 0777)
      cluster.fs.chmod(cluster._tmpdir + '/hadoop_tmp_dir/mapred', 0777)

    def s():
      for f in finish:
        f()
      cluster.stop()

    _SHARED_BEESWAX_SERVER, _SHARED_BEESWAX_SERVER_CLOSER = cluster, s

  return _SHARED_BEESWAX_SERVER, _SHARED_BEESWAX_SERVER_CLOSER