Example #1
0
def action_parameters(request):
  response = {'status': -1}
  parameters = set()

  try:
    node_data = json.loads(request.POST.get('node', '{}'))

    parameters = parameters.union(set(Node(node_data).find_parameters()))

    script_path = node_data.get('properties', {}).get('script_path', {})
    if script_path:
      script_path = script_path.replace('hdfs://', '')

      if request.fs.do_as_user(request.user, request.fs.exists, script_path):
        data = request.fs.do_as_user(request.user, request.fs.read, script_path, 0, 16 * 1024 ** 2)

        if node_data['type'] in ('hive', 'hive2'):
          parameters = parameters.union(set(find_dollar_braced_variables(data)))
        elif node_data['type'] == 'pig':
          parameters = parameters.union(set(find_dollar_variables(data)))
    elif node_data['type'] == 'hive-document':
      notebook = Notebook(document=Document2.objects.get_by_uuid(user=request.user, uuid=node_data['properties']['uuid']))
      parameters = parameters.union(set(find_dollar_braced_variables(notebook.get_str())))

    response['status'] = 0
    response['parameters'] = list(parameters)
  except Exception, e:
    response['message'] = str(e)
Example #2
0
  def execute_and_wait(self, query_doc, snippet_idx=0, timeout=30.0, wait=1.0):
      notebook = Notebook(document=query_doc)
      snippet = self.get_snippet(notebook, snippet_idx=snippet_idx)

      curr = time.time()
      end = curr + timeout
      status = 'ready'

      response = self.client.post(reverse('notebook:execute'),
                                  {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)})
      notebook = Notebook(document=query_doc)
      snippet = self.get_snippet(notebook, snippet_idx=snippet_idx)
      data = json.loads(response.content)
      snippet['result']['handle'] = data['handle']

      while status != 'available' and curr <= end:
        response = self.client.post(reverse('notebook:check_status'),
                                    {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)})
        data = json.loads(response.content)
        status = data['query_status']['status']
        snippet['status'] = status
        time.sleep(wait)
        curr = time.time()

      if status != 'available':
        raise Exception('Query failed to complete or return results.')

      return snippet
Example #3
0
  def test_fetch_result_abbreviated(self):
    if not is_live_cluster():
      raise SkipTest

    # Create session so that session object is saved to DB for server URL lookup
    session = self.api.create_session(lang='impala')

    try:

      # Assert that abbreviated rows returned (e.g. - 1.00K) still returns actual rows
      statement = "SELECT * FROM web_logs;"
      doc = self.create_query_document(owner=self.user, query_type='impala', statement=statement)
      notebook = Notebook(document=doc)
      snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0)

      self.client.post(reverse('notebook:fetch_result_data'),
                       {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet), 'rows': 100, 'startOver': 'false'})

      response = self.client.post(reverse('notebook:fetch_result_size'),
                                  {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)})

      data = json.loads(response.content)
      assert_equal(0, data['status'], data)
      assert_true('result' in data)
      assert_true('rows' in data['result'])
      assert_equal(1000, data['result']['rows'])
    finally:
      self.api.close_session(session)
Example #4
0
  def test_get_current_statement(self):
    multi_statement = "SELECT description, salary FROM sample_07 LIMIT 20;\r\nSELECT AVG(salary) FROM sample_07;"

    doc = self.create_query_document(owner=self.user, statement=multi_statement)
    notebook = Notebook(document=doc)
    snippet = self.get_snippet(notebook, snippet_idx=0)

    response = self.client.post(reverse('notebook:execute'),
                                {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)})
    data = json.loads(response.content)

    assert_equal(0, data['status'], data)
    assert_equal(0, data['handle']['statement_id'], data)
    assert_equal(2, data['handle']['statements_count'], data)
    assert_equal(True, data['handle']['has_more_statements'], data)
    assert_equal({'row': 0, 'column': 0}, data['handle']['start'], data)
    assert_equal({'row': 0, 'column': 51}, data['handle']['end'], data)

    snippet['result']['handle'] = data['handle']

    response = self.client.post(reverse('notebook:execute'),
                                {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)})
    data = json.loads(response.content)

    assert_equal(0, data['status'], data)
    assert_equal(1, data['handle']['statement_id'], data)
    assert_equal(2, data['handle']['statements_count'], data)
    assert_equal(False, data['handle']['has_more_statements'], data)
    assert_equal({'row': 1, 'column': 0}, data['handle']['start'], data)
    assert_equal({'row': 1, 'column': 33}, data['handle']['end'], data)
Example #5
0
  def test_get_sample(self):
    doc = self.create_query_document(owner=self.user, statement=self.statement)
    notebook = Notebook(document=doc)
    snippet = self.get_snippet(notebook, snippet_idx=0)

    response = self.client.post(reverse('notebook:api_sample_data',
      kwargs={'database': 'default', 'table': 'sample_07'}),
      {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)})
    data = json.loads(response.content)

    assert_equal(0, data['status'], data)
    assert_true('headers' in data)
    assert_true('rows' in data)
    assert_true(len(data['rows']) > 0)

    response = self.client.post(reverse('notebook:api_sample_data_column',
      kwargs={'database': 'default', 'table': 'sample_07', 'column': 'code'}),
      {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)})
    data = json.loads(response.content)

    assert_equal(0, data['status'], data)
    assert_true('headers' in data)
    assert_equal(['code'], data['headers'])
    assert_true('rows' in data)
    assert_true(len(data['rows']) > 0)
Example #6
0
    def test_get_current_statement(self):
        multi_statement = "SELECT description, salary FROM sample_07 LIMIT 20;\r\nSELECT AVG(salary) FROM sample_07;"

        doc = self.create_query_document(owner=self.user,
                                         statement=multi_statement)
        notebook = Notebook(document=doc)
        snippet = self.get_snippet(notebook, snippet_idx=0)

        response = self.client.post(reverse('notebook:execute'), {
            'notebook': notebook.get_json(),
            'snippet': json.dumps(snippet)
        })
        data = json.loads(response.content)

        assert_equal(0, data['status'], data)
        assert_equal(0, data['handle']['statement_id'], data)
        assert_equal(2, data['handle']['statements_count'], data)
        assert_equal(True, data['handle']['has_more_statements'], data)
        assert_equal({'row': 0, 'column': 0}, data['handle']['start'], data)
        assert_equal({'row': 0, 'column': 51}, data['handle']['end'], data)

        snippet['result']['handle'] = data['handle']

        response = self.client.post(reverse('notebook:execute'), {
            'notebook': notebook.get_json(),
            'snippet': json.dumps(snippet)
        })
        data = json.loads(response.content)

        assert_equal(0, data['status'], data)
        assert_equal(1, data['handle']['statement_id'], data)
        assert_equal(2, data['handle']['statements_count'], data)
        assert_equal(False, data['handle']['has_more_statements'], data)
        assert_equal({'row': 1, 'column': 0}, data['handle']['start'], data)
        assert_equal({'row': 1, 'column': 33}, data['handle']['end'], data)
Example #7
0
def schedule_document(request):
  if request.method != 'POST':
    raise PopupException(_('A POST request is required.'))

  uuid = request.POST.get('uuid')

  document = Document2.objects.get_by_uuid(user=request.user, uuid=uuid)
  notebook = Notebook(document=document)
  parameters = find_dollar_braced_variables(notebook.get_str())

  name = _('Schedule of ') + document.name

  params = [{u'value': u'%s=${%s}' % (p, p)} for p in parameters]

  data = json.dumps({
      'workflow': {u'name': name, u'versions': [u'uri:oozie:workflow:0.4', u'uri:oozie:workflow:0.4.5', u'uri:oozie:workflow:0.5'], u'isDirty': False, u'movedNode': None, u'linkMapping': {u'33430f0f-ebfa-c3ec-f237-3e77efa03d0a': [], u'3f107997-04cc-8733-60a9-a4bb62cebffc': [u'0aec471d-2b7c-d93d-b22c-2110fd17ea2c'], u'0aec471d-2b7c-d93d-b22c-2110fd17ea2c': [u'33430f0f-ebfa-c3ec-f237-3e77efa03d0a'], u'17c9c895-5a16-7443-bb81-f34b30b21548': []}, u'nodeIds': [u'3f107997-04cc-8733-60a9-a4bb62cebffc', u'33430f0f-ebfa-c3ec-f237-3e77efa03d0a', u'17c9c895-5a16-7443-bb81-f34b30b21548', u'0aec471d-2b7c-d93d-b22c-2110fd17ea2c'], u'id': 47, u'nodes': [{u'name': u'Start', u'properties': {}, u'actionParametersFetched': False, u'id': u'3f107997-04cc-8733-60a9-a4bb62cebffc', u'type': u'start-widget', u'children': [{u'to': u'0aec471d-2b7c-d93d-b22c-2110fd17ea2c'}], u'actionParameters': []}, {u'name': u'End', u'properties': {}, u'actionParametersFetched': False, u'id': u'33430f0f-ebfa-c3ec-f237-3e77efa03d0a', u'type': u'end-widget', u'children': [], u'actionParameters': []}, {u'name': u'Kill', u'properties': {u'body': u'', u'cc': u'', u'to': u'', u'enableMail': False, u'message': u'Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]', u'subject': u''}, u'actionParametersFetched': False, u'id': u'17c9c895-5a16-7443-bb81-f34b30b21548', u'type': u'kill-widget', u'children': [], u'actionParameters': []}, {u'name': u'hive-0aec', u'actionParametersUI': [], u'properties': {u'files': [], u'job_xml': u'', u'uuid': uuid, u'parameters': params, u'retry_interval': [], u'retry_max': [], u'job_properties': [], u'sla': [{u'key': u'enabled', u'value': False}, {u'key': u'nominal-time', u'value': u'${nominal_time}'}, {u'key': u'should-start', u'value': u''}, {u'key': u'should-end', u'value': u'${30 * MINUTES}'}, {u'key': u'max-duration', u'value': u''}, {u'key': u'alert-events', u'value': u''}, {u'key': u'alert-contact', u'value': u''}, {u'key': u'notification-msg', u'value': u''}, {u'key': u'upstream-apps', u'value': u''}], u'archives': [], u'prepares': [], u'credentials': [], u'password': u'', u'jdbc_url': u''}, u'actionParametersFetched': False, u'id': u'0aec471d-2b7c-d93d-b22c-2110fd17ea2c', u'type': u'hive-document-widget', u'children': [{u'to': u'33430f0f-ebfa-c3ec-f237-3e77efa03d0a'}, {u'error': u'17c9c895-5a16-7443-bb81-f34b30b21548'}], u'actionParameters': []}], u'properties': {u'job_xml': u'', u'description': u'', u'wf1_id': None, u'sla_enabled': False, u'deployment_dir': u'/user/hue/oozie/workspaces/hue-oozie-1459474214.27', u'schema_version': u'uri:oozie:workflow:0.5', u'sla': [{u'key': u'enabled', u'value': False}, {u'key': u'nominal-time', u'value': u'${nominal_time}'}, {u'key': u'should-start', u'value': u''}, {u'key': u'should-end', u'value': u'${30 * MINUTES}'}, {u'key': u'max-duration', u'value': u''}, {u'key': u'alert-events', u'value': u''}, {u'key': u'alert-contact', u'value': u''}, {u'key': u'notification-msg', u'value': u''}, {u'key': u'upstream-apps', u'value': u''}], u'show_arrows': True, u'parameters': [{u'name': u'oozie.use.system.libpath', u'value': True}], u'properties': []}, u'nodeNamesMapping': {u'33430f0f-ebfa-c3ec-f237-3e77efa03d0a': u'End', u'3f107997-04cc-8733-60a9-a4bb62cebffc': u'Start', u'0aec471d-2b7c-d93d-b22c-2110fd17ea2c': u'hive-0aec', u'17c9c895-5a16-7443-bb81-f34b30b21548': u'Kill'}, u'uuid': u'433922e5-e616-dfe0-1cba-7fe744c9305c'},
      'layout': [{u'oozieRows': [{u'enableOozieDropOnBefore': True, u'enableOozieDropOnSide': True, u'enableOozieDrop': False, u'widgets': [{u'status': u'', u'logsURL': u'', u'name': u'Hive', u'widgetType': u'hive-document-widget', u'oozieMovable': True, u'ooziePropertiesExpanded': False, u'externalIdUrl': u'', u'properties': {}, u'isLoading': True, u'offset': 0, u'actionURL': u'', u'progress': 0, u'klass': u'card card-widget span12', u'oozieExpanded': False, u'id': u'0aec471d-2b7c-d93d-b22c-2110fd17ea2c', u'size': 12}], u'id': u'32e1ea1a-812b-6878-9719-ff7b8407bf46', u'columns': []}], u'rows': [{u'enableOozieDropOnBefore': True, u'enableOozieDropOnSide': True, u'enableOozieDrop': False, u'widgets': [{u'status': u'', u'logsURL': u'', u'name': u'Start', u'widgetType': u'start-widget', u'oozieMovable': False, u'ooziePropertiesExpanded': False, u'externalIdUrl': u'', u'properties': {}, u'isLoading': True, u'offset': 0, u'actionURL': u'', u'progress': 0, u'klass': u'card card-widget span12', u'oozieExpanded': False, u'id': u'3f107997-04cc-8733-60a9-a4bb62cebffc', u'size': 12}], u'id': u'798dc16a-d366-6305-d2b3-2d5a6f6c4f4b', u'columns': []}, {u'enableOozieDropOnBefore': True, u'enableOozieDropOnSide': True, u'enableOozieDrop': False, u'widgets': [{u'status': u'', u'logsURL': u'', u'name': u'Hive', u'widgetType': u'hive-document-widget', u'oozieMovable': True, u'ooziePropertiesExpanded': False, u'externalIdUrl': u'', u'properties': {}, u'isLoading': True, u'offset': 0, u'actionURL': u'', u'progress': 0, u'klass': u'card card-widget span12', u'oozieExpanded': False, u'id': u'0aec471d-2b7c-d93d-b22c-2110fd17ea2c', u'size': 12}], u'id': u'32e1ea1a-812b-6878-9719-ff7b8407bf46', u'columns': []}, {u'enableOozieDropOnBefore': True, u'enableOozieDropOnSide': True, u'enableOozieDrop': False, u'widgets': [{u'status': u'', u'logsURL': u'', u'name': u'End', u'widgetType': u'end-widget', u'oozieMovable': False, u'ooziePropertiesExpanded': False, u'externalIdUrl': u'', u'properties': {}, u'isLoading': True, u'offset': 0, u'actionURL': u'', u'progress': 0, u'klass': u'card card-widget span12', u'oozieExpanded': False, u'id': u'33430f0f-ebfa-c3ec-f237-3e77efa03d0a', u'size': 12}], u'id': u'f2cf152d-8c82-2f4f-5d67-2e18c99e59c4', u'columns': []}, {u'enableOozieDropOnBefore': True, u'enableOozieDropOnSide': True, u'enableOozieDrop': False, u'widgets': [{u'status': u'', u'logsURL': u'', u'name': u'Kill', u'widgetType': u'kill-widget', u'oozieMovable': True, u'ooziePropertiesExpanded': False, u'externalIdUrl': u'', u'properties': {}, u'isLoading': True, u'offset': 0, u'actionURL': u'', u'progress': 0, u'klass': u'card card-widget span12', u'oozieExpanded': False, u'id': u'17c9c895-5a16-7443-bb81-f34b30b21548', u'size': 12}], u'id': u'01afcf1b-fa7a-e093-b613-ce52c5531a04', u'columns': []}], u'oozieEndRow': {u'enableOozieDropOnBefore': True, u'enableOozieDropOnSide': True, u'enableOozieDrop': False, u'widgets': [{u'status': u'', u'logsURL': u'', u'name': u'End', u'widgetType': u'end-widget', u'oozieMovable': False, u'ooziePropertiesExpanded': False, u'externalIdUrl': u'', u'properties': {}, u'isLoading': True, u'offset': 0, u'actionURL': u'', u'progress': 0, u'klass': u'card card-widget span12', u'oozieExpanded': False, u'id': u'33430f0f-ebfa-c3ec-f237-3e77efa03d0a', u'size': 12}], u'id': u'f2cf152d-8c82-2f4f-5d67-2e18c99e59c4', u'columns': []}, u'oozieKillRow': {u'enableOozieDropOnBefore': True, u'enableOozieDropOnSide': True, u'enableOozieDrop': False, u'widgets': [{u'status': u'', u'logsURL': u'', u'name': u'Kill', u'widgetType': u'kill-widget', u'oozieMovable': True, u'ooziePropertiesExpanded': False, u'externalIdUrl': u'', u'properties': {}, u'isLoading': True, u'offset': 0, u'actionURL': u'', u'progress': 0, u'klass': u'card card-widget span12', u'oozieExpanded': False, u'id': u'17c9c895-5a16-7443-bb81-f34b30b21548', u'size': 12}], u'id': u'01afcf1b-fa7a-e093-b613-ce52c5531a04', u'columns': []}, u'enableOozieDropOnAfter': True, u'oozieStartRow': {u'enableOozieDropOnBefore': True, u'enableOozieDropOnSide': True, u'enableOozieDrop': False, u'widgets': [{u'status': u'', u'logsURL': u'', u'name': u'Start', u'widgetType': u'start-widget', u'oozieMovable': False, u'ooziePropertiesExpanded': False, u'externalIdUrl': u'', u'properties': {}, u'isLoading': True, u'offset': 0, u'actionURL': u'', u'progress': 0, u'klass': u'card card-widget span12', u'oozieExpanded': False, u'id': u'3f107997-04cc-8733-60a9-a4bb62cebffc', u'size': 12}], u'id': u'798dc16a-d366-6305-d2b3-2d5a6f6c4f4b', u'columns': []}, u'klass': u'card card-home card-column span12', u'enableOozieDropOnBefore': True, u'drops': [u'temp'], u'id': u'672ff75a-d841-72c3-c616-c9d45ec97649', u'size': 12}]
  })

  workflow_doc = Document2.objects.create(name=name, type='oozie-workflow2', owner=request.user, data=data)
  Document.objects.link(workflow_doc, owner=workflow_doc.owner, name=workflow_doc.name, description=workflow_doc.description, extra='workflow2')

  workflow_doc.dependencies.add(document)

  response = {
    'status': 0,
    'url': reverse('oozie:new_coordinator') + '?workflow=' + workflow_doc.uuid
  }

  return JsonResponse(response)
Example #8
0
    def run_morphline(self,
                      request,
                      collection_name,
                      morphline,
                      input_path,
                      query=None):
        workspace_path = self._upload_workspace(morphline)

        notebook = Notebook(name='Indexer job for %s' % collection_name,
                            isManaged=True)

        if query:
            q = Notebook(document=Document2.objects.get_by_uuid(user=self.user,
                                                                uuid=query))
            notebook_data = q.get_data()
            snippet = notebook_data['snippets'][0]

            api = get_api(request, snippet)

            destination = '__hue_%s' % notebook_data['uuid'][:4]
            location = '/user/%s/__hue-%s' % (request.user,
                                              notebook_data['uuid'][:4])
            sql, success_url = api.export_data_as_table(notebook_data,
                                                        snippet,
                                                        destination,
                                                        is_temporary=True,
                                                        location=location)
            input_path = '${nameNode}%s' % location

            notebook.add_hive_snippet(snippet['database'], sql)

        notebook.add_java_snippet(
            clazz='org.apache.solr.hadoop.MapReduceIndexerTool',
            app_jar=CONFIG_INDEXER_LIBS_PATH.get(),
            arguments=[
                u'--morphline-file',
                u'morphline.conf',
                u'--output-dir',
                u'${nameNode}/user/%s/indexer' % self.username,
                u'--log4j',
                u'log4j.properties',
                u'--go-live',
                u'--zk-host',
                zkensemble(),
                u'--collection',
                collection_name,
                input_path,
            ],
            files=[{
                u'path': u'%s/log4j.properties' % workspace_path,
                u'type': u'file'
            }, {
                u'path': u'%s/morphline.conf' % workspace_path,
                u'type': u'file'
            }])

        return notebook.execute(request, batch=True)
Example #9
0
  def run_morphline(self, request, collection_name, morphline, input_path, query=None, start_time=None, lib_path=None):
    workspace_path = self._upload_workspace(morphline)

    task = make_notebook(
      name=_('Indexing into %s') % collection_name,
      editor_type='notebook',
      on_success_url=reverse('search:browse', kwargs={'name': collection_name}),
      pub_sub_url='assist.collections.refresh',
      is_task=True,
      is_notebook=True,
      last_executed=start_time
    )

    if query:
      q = Notebook(document=Document2.objects.get_by_uuid(user=self.user, uuid=query))
      notebook_data = q.get_data()
      snippet = notebook_data['snippets'][0]

      api = get_api(request, snippet)

      destination = '__hue_%s' % notebook_data['uuid'][:4]
      location = '/user/%s/__hue-%s' % (request.user,  notebook_data['uuid'][:4])
      sql, _success_url = api.export_data_as_table(notebook_data, snippet, destination, is_temporary=True, location=location)
      input_path = '${nameNode}%s' % location

      task.add_hive_snippet(snippet['database'], sql)

    client = SolrClient(self.user)

    extra_args = ['-Dmapreduce.job.user.classpath.first=true'] if client.is_solr_six_or_more() else []

    task.add_java_snippet(
      clazz='org.apache.solr.hadoop.MapReduceIndexerTool',
      app_jar=lib_path if lib_path is not None else CONFIG_INDEXER_LIBS_PATH.get(),
      arguments=extra_args + [
          u'--morphline-file',
          u'morphline.conf',
          u'--output-dir',
          u'${nameNode}/user/%s/indexer' % self.username,
          u'--log4j',
          u'log4j.properties',
          u'--go-live',
          u'--zk-host',
          client.get_zookeeper_host(),
          u'--collection',
          collection_name,
          input_path,
      ],
      files=[
          {u'path': u'%s/log4j.properties' % workspace_path, u'type': u'file'},
          {u'path': u'%s/morphline.conf' % workspace_path, u'type': u'file'}
      ]
    )

    return task.execute(request, batch=True)
Example #10
0
  def test_download(self):
    statement = "SELECT 'hello world';"

    doc = self.create_query_document(owner=self.user, statement=statement)
    notebook = Notebook(document=doc)
    snippet = self.execute_and_wait(doc, snippet_idx=0)

    response = self.client.post(reverse('notebook:download'),
                                {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet), 'format': 'csv'})

    assert_equal(200, response.status_code)
    assert_equal(('Content-Disposition', 'attachment; filename=Test Query.csv'), response._headers['content-disposition'])
Example #11
0
    def apps(self, filters):
        tasks = Document2.objects.get_history(
            user=self.user).order_by('-last_modified')[:MAX_JOB_FETCH.get()]
        apps = []

        for app in tasks:
            # Copied, Document class should have a get_history method (via method or inheritance)
            notebook = Notebook(document=app).get_data()
            is_notification_manager = False  # Supposed SQL Editor query only right now
            if 'snippets' in notebook:
                statement = notebook[
                    'description'] if is_notification_manager else _get_statement(
                        notebook)
                history = {
                    'name': app.name,
                    'id': app.id,
                    'uuid': app.uuid,
                    'type': app.type,
                    'data': {
                        'statement':
                        statement[:1001] if statement else '',
                        'lastExecuted':
                        notebook['snippets'][0].get('lastExecuted', -1),
                        'status':
                        notebook['snippets'][0]['status'],
                        'parentSavedQueryUuid':
                        notebook.get('parentSavedQueryUuid', '')
                    } if notebook['snippets'] else {},
                    'absoluteUrl': app.get_absolute_url(),
                }
            api_status = self._api_status(history)

            if filters.get(
                    'states') and api_status.lower() not in filters['states']:
                continue

            apps.append({
                'id': 'history-%010d' % history['id'],
                'name': history['data']['statement'],
                'status': history['data']['status'],
                'apiStatus': api_status,
                'type': 'history-%s' % history['type'],
                'user': self.user.username,
                'progress': 50,
                'queue': '',
                'canWrite': True,
                'duration': 1,
                'submitted': history['data']['lastExecuted']
            })

        return {'apps': apps, 'total': len(tasks)}
Example #12
0
def upload_query(request):
    response = {'status': -1}

    source_platform = request.POST.get('sourcePlatform', 'default')

    if OPTIMIZER.AUTO_UPLOAD_QUERIES.get() and source_platform in ('hive',
                                                                   'impala'):
        query_id = request.POST.get('query_id')

        doc = Document2.objects.document(request.user, doc_id=query_id)

        query_data = Notebook(document=doc).get_data()
        queries = _convert_queries([query_data])
        source_platform = query_data['snippets'][0]['type']

        api = OptimizerApi(request.user)

        response['query_upload'] = api.upload(data=queries,
                                              data_type='queries',
                                              source_platform=source_platform)
    else:
        response['query_upload'] = _('Skipped')
    response['status'] = 0

    return JsonResponse(response)
Example #13
0
def upload_query(request):
    response = {'status': -1}

    interface = request.POST.get('interface', OPTIMIZER.INTERFACE.get())
    source_platform = request.POST.get('sourcePlatform', 'default')
    query_id = request.POST.get('query_id')

    if OPTIMIZER.AUTO_UPLOAD_QUERIES.get() and source_platform in (
            'hive', 'impala') and query_id:
        try:
            doc = Document2.objects.document(request.user, doc_id=query_id)

            query_data = Notebook(document=doc).get_data()
            queries = _convert_queries([query_data])
            source_platform = query_data['snippets'][0]['type']

            api = get_api(request, interface)

            response['query_upload'] = api.upload(
                data=queries,
                data_type='queries',
                source_platform=source_platform)
        except Document2.DoesNotExist:
            response['query_upload'] = _('Skipped as task query')
    else:
        response['query_upload'] = _('Skipped')
    response['status'] = 0

    return JsonResponse(response)
Example #14
0
  def test_explain(self):
    # Hive 2 with Tez set hive.explain.user to true by default, but this test is expecting output when this setting
    # is set to false.
    doc = self.create_query_document(owner=self.user, statement=self.statement)
    notebook = Notebook(document=doc)
    snippet = self.get_snippet(notebook, snippet_idx=0)
    snippet['properties']['settings'].append({"key": "hive.explain.user", "value": "false"})

    response = self.client.post(reverse('notebook:explain'),
                                {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)})

    data = json.loads(response.content)

    assert_equal(0, data['status'], data)
    assert_true('STAGE DEPENDENCIES' in data['explanation'], data)
    assert_equal(self.statement, data['statement'], data)
Example #15
0
def upload_history(request):
    response = {'status': -1}

    n = request.POST.get('n')
    source_platform = request.POST.get('sourcePlatform', 'hive')

    history = Document2.objects.get_history(doc_type='query-%s' %
                                            source_platform,
                                            user=request.user)
    if n:
        history = history[:n]

    queries = []
    for doc in history:
        query_data = Notebook(document=doc).get_data()

        try:
            original_query_id = '%s:%s' % struct.unpack(
                b"QQ",
                base64.decodestring(
                    query_data['snippets'][0]['result']['handle']['guid']))
            execution_time = query_data['snippets'][0]['result'][
                'executionTime'] * 100

            queries.append((original_query_id, execution_time,
                            query_data['snippets'][0]['statement']))
        except Exception, e:
            LOG.warning('Skipping upload of %s: %s' % (doc, e))
Example #16
0
def upload_history(request):
  response = {'status': -1}

  if request.user.is_superuser:
    api = OptimizerApi(request.user)
    histories = []
    upload_stats = {}

    if request.POST.get('sourcePlatform'):
      n = min(request.POST.get('n', OPTIMIZER.QUERY_HISTORY_UPLOAD_LIMIT.get()))
      source_platform = request.POST.get('sourcePlatform', 'hive')
      histories = [(source_platform, Document2.objects.get_history(doc_type='query-%s' % source_platform, user=request.user)[:n])]

    elif OPTIMIZER.QUERY_HISTORY_UPLOAD_LIMIT.get() > 0:
      histories = [
        (source_platform, Document2.objects.filter(type='query-%s' % source_platform, is_history=True, is_managed=False, is_trashed=False).order_by('-last_modified')[:OPTIMIZER.QUERY_HISTORY_UPLOAD_LIMIT.get()])
            for source_platform in ['hive', 'impala']
      ]

    for source_platform, history in histories:
      queries = _convert_queries([Notebook(document=doc).get_data() for doc in history])
      upload_stats[source_platform] = api.upload(data=queries, data_type='queries', source_platform=source_platform)

    response['upload_history'] = upload_stats
    response['status'] = 0
  else:
    response['message'] = _('Query history upload requires Admin privileges or feature is disabled.')

  return JsonResponse(response)
Example #17
0
  def test_fetch_result_size_spark(self):
    if not is_live_cluster() or not is_hive_on_spark():
      raise SkipTest

    # TODO: Add session cleanup here so we don't have orphan spark sessions

    # Assert that a query with no job will return no rows or size
    statement = "SELECT 'hello world';"

    settings = [
        {
            'key': 'hive.execution.engine',
            'value': 'spark'
        }
    ]
    doc = self.create_query_document(owner=self.user, statement=statement, settings=settings)
    notebook = Notebook(document=doc)
    snippet = self.execute_and_wait(doc, snippet_idx=0)

    response = self.client.post(reverse('notebook:fetch_result_size'),
                                {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)})

    data = json.loads(response.content)
    assert_equal(0, data['status'], data)
    assert_true('result' in data)
    assert_true('rows' in data['result'])
    assert_true('size' in data['result'])
    assert_equal(None, data['result']['rows'])
    assert_equal(None, data['result']['size'])

    # Assert that a query that runs a job will return rows and size
    statement = "SELECT app, COUNT(1) AS count FROM web_logs GROUP BY app ORDER BY count DESC;"
    doc = self.create_query_document(owner=self.user, statement=statement, settings=settings)
    notebook = Notebook(document=doc)
    snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0)

    response = self.client.post(reverse('notebook:fetch_result_size'),
                                {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)})

    data = json.loads(response.content)
    assert_equal(0, data['status'], data)
    assert_true('result' in data)
    assert_true('rows' in data['result'])
    assert_true('size' in data['result'])
    assert_equal(23, data['result']['rows'])
    assert_true(data['result']['size'] > 0)
Example #18
0
    def test_download(self):
        statement = "SELECT 'hello world';"

        doc = self.create_query_document(owner=self.user, statement=statement)
        notebook = Notebook(document=doc)
        snippet = self.execute_and_wait(doc, snippet_idx=0)

        response = self.client.post(
            reverse('notebook:download'), {
                'notebook': notebook.get_json(),
                'snippet': json.dumps(snippet),
                'format': 'csv'
            })

        assert_equal(200, response.status_code)
        assert_equal(
            ('Content-Disposition', 'attachment; filename=Test Query.csv'),
            response._headers['content-disposition'])
Example #19
0
  def test_query_with_unicode(self):
    statement = "SELECT * FROM sample_07 WHERE code='한';"

    doc = self.create_query_document(owner=self.user, statement=statement)
    notebook = Notebook(document=doc)
    snippet = self.get_snippet(notebook, snippet_idx=0)

    response = self.client.post(reverse('notebook:execute'),
                                {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)})
    data = json.loads(response.content)
    assert_equal(0, data['status'], data)

    snippet['result']['handle'] = data['handle']

    response = self.client.post(reverse('notebook:get_logs'),
                                  {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)})
    data = json.loads(response.content)
    assert_equal(0, data['status'], data)
    assert_true("SELECT * FROM sample_07 WHERE code='한'" in smart_str(data['logs']))
Example #20
0
    def test_fetch_result_size_impala(self):
        if not is_live_cluster():
            raise SkipTest

        # Create session so that session object is saved to DB for server URL lookup
        session = self.api.create_session(lang='impala')

        try:
            # Assert that a query that runs a job will return rows
            statement = "SELECT app, COUNT(1) AS count FROM web_logs GROUP BY app ORDER BY count DESC;"
            doc = self.create_query_document(owner=self.user,
                                             query_type='impala',
                                             statement=statement)
            notebook = Notebook(document=doc)
            snippet = self.execute_and_wait(doc,
                                            snippet_idx=0,
                                            timeout=60.0,
                                            wait=2.0)

            self.client.post(
                reverse('notebook:fetch_result_data'), {
                    'notebook': notebook.get_json(),
                    'snippet': json.dumps(snippet),
                    'rows': 100,
                    'startOver': 'false'
                })

            response = self.client.post(reverse('notebook:fetch_result_size'),
                                        {
                                            'notebook': notebook.get_json(),
                                            'snippet': json.dumps(snippet)
                                        })

            data = json.loads(response.content)
            assert_equal(0, data['status'], data)
            assert_true('result' in data)
            assert_true('rows' in data['result'])
            assert_true('size' in data['result'])
            assert_equal(23, data['result']['rows'])
            assert_equal(None, data['result']['size'])
        finally:
            self.api.close_session(session)
Example #21
0
    def test_fetch_result_abbreviated(self):
        if not is_live_cluster():
            raise SkipTest

        # Create session so that session object is saved to DB for server URL lookup
        session = self.api.create_session(lang='impala')

        try:

            # Assert that abbreviated rows returned (e.g. - 1.00K) still returns actual rows
            statement = "SELECT * FROM web_logs;"
            doc = self.create_query_document(owner=self.user,
                                             query_type='impala',
                                             statement=statement)
            notebook = Notebook(document=doc)
            snippet = self.execute_and_wait(doc,
                                            snippet_idx=0,
                                            timeout=60.0,
                                            wait=5.0)

            self.client.post(
                reverse('notebook:fetch_result_data'), {
                    'notebook': notebook.get_json(),
                    'snippet': json.dumps(snippet),
                    'rows': 100,
                    'startOver': 'false'
                })

            response = self.client.post(reverse('notebook:fetch_result_size'),
                                        {
                                            'notebook': notebook.get_json(),
                                            'snippet': json.dumps(snippet)
                                        })

            data = json.loads(response.content)
            assert_equal(0, data['status'], data)
            assert_true('result' in data)
            assert_true('rows' in data['result'])
            assert_equal(1000, data['result']['rows'])
        finally:
            self.api.close_session(session)
Example #22
0
  def test_fetch_result_size_mr(self):
    if not is_live_cluster():  # Mini-cluster does not have JHS
      raise SkipTest

    # Assert that a query with no job will return no rows or size
    statement = "SELECT 'hello world';"

    settings = [
        {
            'key': 'hive.execution.engine',
            'value': 'mr'
        }
    ]
    doc = self.create_query_document(owner=self.user, statement=statement, settings=settings)
    notebook = Notebook(document=doc)
    snippet = self.execute_and_wait(doc, snippet_idx=0)

    response = self.client.post(reverse('notebook:fetch_result_size'),
                                {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)})

    data = json.loads(response.content)
    assert_equal(0, data['status'], data)
    assert_true('result' in data)
    assert_true('rows' in data['result'])
    assert_true('size' in data['result'])
    assert_equal(None, data['result']['rows'])
    assert_equal(None, data['result']['size'])

    # Assert that a query with map & reduce task returns rows
    statement = "SELECT DISTINCT code FROM sample_07;"
    doc = self.create_query_document(owner=self.user, statement=statement, settings=settings)
    notebook = Notebook(document=doc)
    snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0)

    response = self.client.post(reverse('notebook:fetch_result_size'),
                                {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)})

    data = json.loads(response.content)
    assert_equal(0, data['status'], data)
    assert_true('result' in data)
    assert_true('rows' in data['result'])
    assert_true('size' in data['result'])
    assert_equal(823, data['result']['rows'])
    assert_true(data['result']['size'] > 0, data['result'])

    # Assert that a query with multiple jobs returns rows
    statement = "SELECT app, COUNT(1) AS count FROM web_logs GROUP BY app ORDER BY count DESC;"
    doc = self.create_query_document(owner=self.user, statement=statement, settings=settings)
    notebook = Notebook(document=doc)
    snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0)

    response = self.client.post(reverse('notebook:fetch_result_size'),
                                {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)})

    data = json.loads(response.content)
    assert_equal(0, data['status'], data)
    assert_true('result' in data)
    assert_true('rows' in data['result'])
    assert_equal(23, data['result']['rows'])
    assert_true(data['result']['size'] > 0, data['result'])
Example #23
0
    def test_get_sample(self):
        doc = self.create_query_document(owner=self.user,
                                         statement=self.statement)
        notebook = Notebook(document=doc)
        snippet = self.get_snippet(notebook, snippet_idx=0)

        response = self.client.post(
            reverse('notebook:api_sample_data',
                    kwargs={
                        'database': 'default',
                        'table': 'sample_07'
                    }), {
                        'notebook': notebook.get_json(),
                        'snippet': json.dumps(snippet)
                    })
        data = json.loads(response.content)

        assert_equal(0, data['status'], data)
        assert_true('headers' in data)
        assert_true('rows' in data)
        assert_true(len(data['rows']) > 0)

        response = self.client.post(
            reverse('notebook:api_sample_data_column',
                    kwargs={
                        'database': 'default',
                        'table': 'sample_07',
                        'column': 'code'
                    }), {
                        'notebook': notebook.get_json(),
                        'snippet': json.dumps(snippet)
                    })
        data = json.loads(response.content)

        assert_equal(0, data['status'], data)
        assert_true('headers' in data)
        assert_equal(['code'], data['headers'])
        assert_true('rows' in data)
        assert_true(len(data['rows']) > 0)
Example #24
0
    def test_explain(self):
        # Hive 2 with Tez set hive.explain.user to true by default, but this test is expecting output when this setting
        # is set to false.
        doc = self.create_query_document(owner=self.user,
                                         statement=self.statement)
        notebook = Notebook(document=doc)
        snippet = self.get_snippet(notebook, snippet_idx=0)
        snippet['properties']['settings'].append({
            "key": "hive.explain.user",
            "value": "false"
        })

        response = self.client.post(reverse('notebook:explain'), {
            'notebook': notebook.get_json(),
            'snippet': json.dumps(snippet)
        })

        data = json.loads(response.content)

        assert_equal(0, data['status'], data)
        assert_true('STAGE DEPENDENCIES' in data['explanation'], data)
        assert_equal(self.statement, data['statement'], data)
Example #25
0
def run_sync_query(doc_id, user):
    '''Independently run a query as a user.'''
    # Add INSERT INTO table if persist result
    # Add variable substitution
    # Send notifications: done/on failure
    if type(user) is str:
        lookup = {orm_user_lookup(): user}
        user = User.objects.get(**lookup)
        user = rewrite_user(user)

    query_document = Document2.objects.get_by_uuid(user=user, uuid=doc_id)
    notebook = Notebook(document=query_document).get_data()
    snippet = notebook['snippets'][0]

    editor_type = snippet['type']
    sql = _get_statement(notebook)
    request = MockedDjangoRequest(user=user)
    last_executed = time.mktime(datetime.datetime.now().timetuple()) * 1000

    notebook = make_notebook(name='Scheduled query %s at %s' %
                             (query_document.name, last_executed),
                             editor_type=editor_type,
                             statement=sql,
                             status='ready',
                             last_executed=last_executed,
                             is_task=True)

    task = notebook.execute(request, batch=True)

    task['uuid'] = task['history_uuid']
    status = check_status(task)

    while status['status'] in ('waiting', 'running'):
        status = check_status(task)
        time.sleep(3)

    return task
Example #26
0
  def run_morphline(self, request, collection_name, morphline, input_path, query=None):
    workspace_path = self._upload_workspace(morphline)

    notebook = Notebook(
        name='Indexer job for %s' % collection_name,
        isManaged=True
    )

    if query:
      q = Notebook(document=Document2.objects.get_by_uuid(user=self.user, uuid=query))
      notebook_data = q.get_data()
      snippet = notebook_data['snippets'][0]

      api = get_api(request, snippet)

      destination = '__hue_%s' % notebook_data['uuid'][:4]
      location = '/user/%s/__hue-%s' % (request.user,  notebook_data['uuid'][:4])
      sql, success_url = api.export_data_as_table(notebook_data, snippet, destination, is_temporary=True, location=location)
      input_path = '${nameNode}%s' % location

      notebook.add_hive_snippet(snippet['database'], sql)

    notebook.add_java_snippet(
      clazz='org.apache.solr.hadoop.MapReduceIndexerTool',
      app_jar=CONFIG_INDEXER_LIBS_PATH.get(),
      arguments=[
          u'--morphline-file',
          u'morphline.conf',
          u'--output-dir',
          u'${nameNode}/user/%s/indexer' % self.username,
          u'--log4j',
          u'log4j.properties',
          u'--go-live',
          u'--zk-host',
          zkensemble(),
          u'--collection',
          collection_name,
          input_path,
      ],
      files=[
          {u'path': u'%s/log4j.properties' % workspace_path, u'type': u'file'},
          {u'path': u'%s/morphline.conf' % workspace_path, u'type': u'file'}
      ]
    )

    return notebook.execute(request, batch=True)
Example #27
0
    def test_query_with_unicode(self):
        statement = "SELECT * FROM sample_07 WHERE code='한';"

        doc = self.create_query_document(owner=self.user, statement=statement)
        notebook = Notebook(document=doc)
        snippet = self.get_snippet(notebook, snippet_idx=0)

        response = self.client.post(reverse('notebook:execute'), {
            'notebook': notebook.get_json(),
            'snippet': json.dumps(snippet)
        })
        data = json.loads(response.content)
        assert_equal(0, data['status'], data)

        snippet['result']['handle'] = data['handle']

        response = self.client.post(reverse('notebook:get_logs'), {
            'notebook': notebook.get_json(),
            'snippet': json.dumps(snippet)
        })
        data = json.loads(response.content)
        assert_equal(0, data['status'], data)
        assert_true("SELECT * FROM sample_07 WHERE code='한'" in smart_str(
            data['logs']))
Example #28
0
def upload_history(request):
  response = {'status': -1}

  query_type = 'hive'

  queries = [
      (doc.uuid, 1000, Notebook(document=doc).get_data()['snippets'][0]['statement'])
      for doc in Document2.objects.get_history(doc_type='query-%s' % query_type, user=request.user)[:25]
  ]

  api = OptimizerApi()

  response['upload_history'] = api.upload(queries=queries, source_platform=query_type)
  response['status'] = 0

  return JsonResponse(response)
Example #29
0
    def test_fetch_result_size_spark(self):
        if not is_live_cluster() or not is_hive_on_spark():
            raise SkipTest

        # TODO: Add session cleanup here so we don't have orphan spark sessions

        # Assert that a query with no job will return no rows or size
        statement = "SELECT 'hello world';"

        settings = [{'key': 'hive.execution.engine', 'value': 'spark'}]
        doc = self.create_query_document(owner=self.user,
                                         statement=statement,
                                         settings=settings)
        notebook = Notebook(document=doc)
        snippet = self.execute_and_wait(doc, snippet_idx=0)

        response = self.client.post(reverse('notebook:fetch_result_size'), {
            'notebook': notebook.get_json(),
            'snippet': json.dumps(snippet)
        })

        data = json.loads(response.content)
        assert_equal(0, data['status'], data)
        assert_true('result' in data)
        assert_true('rows' in data['result'])
        assert_true('size' in data['result'])
        assert_equal(None, data['result']['rows'])
        assert_equal(None, data['result']['size'])

        # Assert that a query that runs a job will return rows and size
        statement = "SELECT app, COUNT(1) AS count FROM web_logs GROUP BY app ORDER BY count DESC;"
        doc = self.create_query_document(owner=self.user,
                                         statement=statement,
                                         settings=settings)
        notebook = Notebook(document=doc)
        snippet = self.execute_and_wait(doc,
                                        snippet_idx=0,
                                        timeout=60.0,
                                        wait=2.0)

        response = self.client.post(reverse('notebook:fetch_result_size'), {
            'notebook': notebook.get_json(),
            'snippet': json.dumps(snippet)
        })

        data = json.loads(response.content)
        assert_equal(0, data['status'], data)
        assert_true('result' in data)
        assert_true('rows' in data['result'])
        assert_true('size' in data['result'])
        assert_true(data['result']['rows'] > 0)
        assert_true(data['result']['size'] > 0)
Example #30
0
  def test_fetch_result_size_impala(self):
    if not is_live_cluster():
      raise SkipTest

    # Create session so that session object is saved to DB for server URL lookup
    session = self.api.create_session(lang='impala')

    try:
      # Assert that a query that runs a job will return rows
      statement = "SELECT app, COUNT(1) AS count FROM web_logs GROUP BY app ORDER BY count DESC;"
      doc = self.create_query_document(owner=self.user, query_type='impala', statement=statement)
      notebook = Notebook(document=doc)
      snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0)

      self.client.post(reverse('notebook:fetch_result_data'),
                       {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet), 'rows': 100, 'startOver': 'false'})

      response = self.client.post(reverse('notebook:fetch_result_size'),
                                  {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)})

      data = json.loads(response.content)
      assert_equal(0, data['status'], data)
      assert_true('result' in data)
      assert_true('rows' in data['result'])
      assert_true('size' in data['result'])
      assert_equal(23, data['result']['rows'])
      assert_equal(None, data['result']['size'])

      # Assert that selecting all from partitioned table works
      statement = "SELECT * FROM web_logs;"
      doc = self.create_query_document(owner=self.user, query_type='impala', statement=statement)
      notebook = Notebook(document=doc)
      snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0)

      self.client.post(reverse('notebook:fetch_result_data'),
                       {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet), 'rows': 100, 'startOver': 'false'})

      response = self.client.post(reverse('notebook:fetch_result_size'),
                                 {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)})

      data = json.loads(response.content)
      assert_equal(0, data['status'], data)
      assert_true('result' in data)
      assert_true('rows' in data['result'])
      assert_equal(1000, data['result']['rows'])
    finally:
      self.api.close_session(session)
Example #31
0
def upload_history(request):
    response = {'status': -1}

    n = request.POST.get('n')
    source_platform = request.POST.get('sourcePlatform', 'hive')

    history = Document2.objects.get_history(doc_type='query-%s' %
                                            source_platform,
                                            user=request.user)
    if n:
        history = history[:n]

    queries = _convert_queries(
        [Notebook(document=doc).get_data() for doc in history])

    api = OptimizerApi()

    response['upload_history'] = api.upload(data=queries,
                                            data_type='queries',
                                            source_platform=source_platform)
    response['status'] = 0

    return JsonResponse(response)
Example #32
0
    def execute_and_wait(self,
                         query_doc,
                         snippet_idx=0,
                         timeout=30.0,
                         wait=1.0):
        notebook = Notebook(document=query_doc)
        snippet = self.get_snippet(notebook, snippet_idx=snippet_idx)

        curr = time.time()
        end = curr + timeout
        status = 'ready'

        response = self.client.post(reverse('notebook:execute'), {
            'notebook': notebook.get_json(),
            'snippet': json.dumps(snippet)
        })
        notebook = Notebook(document=query_doc)
        snippet = self.get_snippet(notebook, snippet_idx=snippet_idx)
        data = json.loads(response.content)
        snippet['result']['handle'] = data['handle']

        while status != 'available' and curr <= end:
            response = self.client.post(reverse('notebook:check_status'), {
                'notebook': notebook.get_json(),
                'snippet': json.dumps(snippet)
            })
            data = json.loads(response.content)
            status = data['query_status']['status']
            snippet['status'] = status
            time.sleep(wait)
            curr = time.time()

        if status != 'available':
            raise Exception('Query failed to complete or return results.')

        return snippet
Example #33
0
class Submission(object):
    """
  Represents one unique Oozie submission.

  Actions are:
  - submit
  - rerun
  """
    def __init__(self,
                 user,
                 job=None,
                 fs=None,
                 jt=None,
                 properties=None,
                 oozie_id=None,
                 local_tz=None):
        self.job = job
        self.user = user
        self.fs = fs
        self.jt = jt  # Deprecated with YARN, we now use logical names only for RM
        self.oozie_id = oozie_id
        self.api = get_oozie(self.user)

        if properties is not None:
            self.properties = properties
        else:
            self.properties = {}

        if local_tz and isinstance(self.job.data, dict):
            local_tz = self.job.data.get('properties')['timezone']

        # Modify start_date & end_date only when it's a coordinator
        from oozie.models2 import Coordinator
        if type(self.job) is Coordinator:
            if 'start_date' in self.properties:
                properties['start_date'] = convert_to_server_timezone(
                    self.properties['start_date'], local_tz)
            if 'end_date' in self.properties:
                properties['end_date'] = convert_to_server_timezone(
                    self.properties['end_date'], local_tz)

        if 'nominal_time' in self.properties:
            properties['nominal_time'] = convert_to_server_timezone(
                self.properties['nominal_time'], local_tz)

        self.properties['security_enabled'] = self.api.security_enabled

    def __str__(self):
        if self.oozie_id:
            res = "Submission for job '%s'." % (self.oozie_id, )
        else:
            res = "Submission for job '%s' (id %s, owner %s)." % (
                self.job.name, self.job.id, self.user)
        if self.oozie_id:
            res += " -- " + self.oozie_id
        return res

    @submit_dryrun
    def run(self, deployment_dir=None):
        """
    Take care of all the actions of submitting a Oozie workflow.
    Returns the oozie job id if all goes well.
    """

        if self.properties and 'oozie.use.system.libpath' not in self.properties:
            self.properties['oozie.use.system.libpath'] = 'true'

        self.oozie_id = self.api.submit_job(self.properties)
        LOG.info("Submitted: %s" % (self, ))

        if self._is_workflow():
            self.api.job_control(self.oozie_id, 'start')
            LOG.info("Started: %s" % (self, ))

        return self.oozie_id

    def rerun(self, deployment_dir, fail_nodes=None, skip_nodes=None):
        jt_address = cluster.get_cluster_addr_for_job_submission()

        self._update_properties(jt_address, deployment_dir)
        self.properties.update({'oozie.wf.application.path': deployment_dir})

        if 'oozie.coord.application.path' in self.properties:
            self.properties.pop('oozie.coord.application.path')

        if 'oozie.bundle.application.path' in self.properties:
            self.properties.pop('oozie.bundle.application.path')

        if fail_nodes:
            self.properties.update({'oozie.wf.rerun.failnodes': fail_nodes})
        elif not skip_nodes:
            self.properties.update({'oozie.wf.rerun.failnodes':
                                    'false'})  # Case empty 'skip_nodes' list
        else:
            self.properties.update({'oozie.wf.rerun.skip.nodes': skip_nodes})

        self.api.rerun(self.oozie_id, properties=self.properties)

        LOG.info("Rerun: %s" % (self, ))

        return self.oozie_id

    def rerun_coord(self, deployment_dir, params):
        jt_address = cluster.get_cluster_addr_for_job_submission()

        self._update_properties(jt_address, deployment_dir)
        self.properties.update(
            {'oozie.coord.application.path': deployment_dir})

        self.api.job_control(self.oozie_id,
                             action='coord-rerun',
                             properties=self.properties,
                             parameters=params)
        LOG.info("Rerun: %s" % (self, ))

        return self.oozie_id

    def update_coord(self):
        self.api = get_oozie(self.user, api_version="v2")
        self.api.job_control(self.oozie_id,
                             action='update',
                             properties=self.properties,
                             parameters=None)
        LOG.info("Update: %s" % (self, ))

        return self.oozie_id

    def rerun_bundle(self, deployment_dir, params):
        jt_address = cluster.get_cluster_addr_for_job_submission()

        self._update_properties(jt_address, deployment_dir)
        self.properties.update(
            {'oozie.bundle.application.path': deployment_dir})
        self.api.job_control(self.oozie_id,
                             action='bundle-rerun',
                             properties=self.properties,
                             parameters=params)
        LOG.info("Rerun: %s" % (self, ))

        return self.oozie_id

    def deploy(self, deployment_dir=None):
        try:
            if not deployment_dir:
                deployment_dir = self._create_deployment_dir()
        except Exception, ex:
            msg = _("Failed to create deployment directory: %s" % ex)
            LOG.exception(msg)
            raise PopupException(message=msg, detail=str(ex))

        if self.api.security_enabled:
            jt_address = cluster.get_cluster_addr_for_job_submission()
            self._update_properties(
                jt_address
            )  # Needed for coordinator deploying workflows with credentials

        if hasattr(self.job, 'nodes'):
            for action in self.job.nodes:
                # Make sure XML is there
                # Don't support more than one level sub-workflow
                if action.data['type'] == 'subworkflow':
                    from oozie.models2 import Workflow
                    workflow = Workflow(document=Document2.objects.get_by_uuid(
                        user=self.user,
                        uuid=action.data['properties']['workflow']))
                    sub_deploy = Submission(self.user, workflow, self.fs,
                                            self.jt, self.properties)
                    workspace = sub_deploy.deploy()

                    self.job.override_subworkflow_id(
                        action,
                        workflow.id)  # For displaying the correct graph
                    self.properties[
                        'workspace_%s' % workflow.
                        uuid] = workspace  # For pointing to the correct workspace

                elif action.data['type'] == 'altus':
                    self._create_file(
                        deployment_dir, action.data['name'] + '.sh',
                        '''#!/usr/bin/env bash

export PYTHONPATH=`pwd`

echo 'Starting Altus command...'

python altus.py

          ''')

                    shell_script = self._generate_altus_action_script(
                        service=action.data['properties'].get('service'),
                        command=action.data['properties'].get('command'),
                        arguments=dict([
                            arg.split('=', 1)
                            for arg in action.data['properties'].get(
                                'arguments', [])
                        ]),
                        auth_key_id=ALTUS.AUTH_KEY_ID.get(),
                        auth_key_secret=ALTUS.AUTH_KEY_SECRET.get().replace(
                            '\\n', '\n'))
                    self._create_file(deployment_dir, 'altus.py', shell_script)

                    ext_py_lib_path = os.path.join(get_desktop_root(), 'core',
                                                   'ext-py')
                    lib_dir_path = os.path.join(self.job.deployment_dir, 'lib')
                    libs = [
                        (os.path.join(ext_py_lib_path,
                                      'navoptapi-0.1.0'), 'navoptapi'),
                        (os.path.join(ext_py_lib_path,
                                      'navoptapi-0.1.0'), 'altuscli'),
                        (os.path.join(ext_py_lib_path,
                                      'asn1crypto-0.24.0'), 'asn1crypto'),
                        (os.path.join(ext_py_lib_path, 'rsa-3.4.2'), 'rsa'),
                        (os.path.join(ext_py_lib_path,
                                      'pyasn1-0.1.8'), 'pyasn1'),
                    ]
                    for source_path, name in libs:
                        destination_path = os.path.join(lib_dir_path, name)
                        if not self.fs.do_as_user(self.user, self.fs.exists,
                                                  destination_path):
                            # Note: would be much faster to have only one zip archive
                            self.fs.do_as_user(self.user,
                                               self.fs.copyFromLocal,
                                               os.path.join(source_path, name),
                                               destination_path)

                elif action.data['type'] == 'impala' or action.data[
                        'type'] == 'impala-document':
                    from oozie.models2 import _get_impala_url
                    from impala.impala_flags import get_ssl_server_certificate

                    if action.data['type'] == 'impala-document':
                        from notebook.models import Notebook
                        if action.data['properties'].get('uuid'):
                            notebook = Notebook(
                                document=Document2.objects.get_by_uuid(
                                    user=self.user,
                                    uuid=action.data['properties']['uuid']))
                            statements = notebook.get_str()
                            statements = Template(statements).safe_substitute(
                                **self.properties)
                            script_name = action.data['name'] + '.sql'
                            self._create_file(deployment_dir, script_name,
                                              statements)
                    else:
                        script_name = os.path.basename(
                            action.data['properties'].get('script_path'))

                    if self.api.security_enabled:
                        kinit = 'kinit -k -t *.keytab %(user_principal)s' % {
                            'user_principal':
                            self.properties.get(
                                'user_principal', action.data['properties'].
                                get('user_principal'))
                        }
                    else:
                        kinit = ''

                    shell_script = """#!/bin/bash

# Needed to launch impala shell in oozie
export PYTHON_EGG_CACHE=./myeggs

%(kinit)s

impala-shell %(kerberos_option)s %(ssl_option)s -i %(impalad_host)s -f %(query_file)s""" % {
                        'impalad_host':
                        action.data['properties'].get('impalad_host')
                        or _get_impala_url(),
                        'kerberos_option':
                        '-k' if self.api.security_enabled else '',
                        'ssl_option':
                        '--ssl' if get_ssl_server_certificate() else '',
                        'query_file':
                        script_name,
                        'kinit':
                        kinit
                    }

                    self._create_file(deployment_dir,
                                      action.data['name'] + '.sh',
                                      shell_script)

                elif action.data['type'] == 'hive-document':
                    from notebook.models import Notebook
                    if action.data['properties'].get('uuid'):
                        notebook = Notebook(
                            document=Document2.objects.get_by_uuid(
                                user=self.user,
                                uuid=action.data['properties']['uuid']))
                        statements = notebook.get_str()
                    else:
                        statements = action.data['properties'].get(
                            'statements')

                    if self.properties.get('send_result_path'):
                        statements = """
INSERT OVERWRITE DIRECTORY '%s'
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES (
   "separatorChar" = "\t",
   "quoteChar"     = "'",
   "escapeChar"    = "\\"
)
STORED AS TEXTFILE %s""" % (self.properties.get('send_result_path'),
                            '\n\n\n'.join([
                                snippet['statement_raw']
                                for snippet in notebook.get_data()['snippets']
                            ]))

                    if statements is not None:
                        self._create_file(deployment_dir,
                                          action.data['name'] + '.sql',
                                          statements)

                elif action.data['type'] in ('java-document', 'java',
                                             'mapreduce-document'):
                    if action.data['type'] == 'java-document' or action.data[
                            'type'] == 'mapreduce-document':
                        from notebook.models import Notebook
                        notebook = Notebook(
                            document=Document2.objects.get_by_uuid(
                                user=self.user,
                                uuid=action.data['properties']['uuid']))
                        properties = notebook.get_data(
                        )['snippets'][0]['properties']
                    else:
                        properties = action.data['properties']

                    if properties.get('app_jar'):
                        LOG.debug("Adding to oozie.libpath %s" %
                                  properties['app_jar'])
                        paths = [properties['app_jar']]
                        if self.properties.get('oozie.libpath'):
                            paths.append(self.properties['oozie.libpath'])
                        self.properties['oozie.libpath'] = ','.join(paths)

                elif action.data['type'] == 'pig-document':
                    from notebook.models import Notebook
                    notebook = Notebook(document=Document2.objects.get_by_uuid(
                        user=self.user, uuid=action.data['properties']
                        ['uuid']))
                    statements = notebook.get_data(
                    )['snippets'][0]['statement_raw']

                    self._create_file(deployment_dir,
                                      action.data['name'] + '.pig', statements)
                elif action.data['type'] in ('spark', 'spark-document') or (
                        action.data['type'] in ('sqoop', 'sqoop-document')
                        and action.data['properties']['statement']
                        in '--hive-import'):
                    if not [
                            f for f in action.data.get('properties').get(
                                'files', [])
                            if f.get('value').endswith('hive-site.xml')
                    ]:
                        hive_site_lib = Hdfs.join(deployment_dir + '/lib/',
                                                  'hive-site.xml')
                        hive_site_content = get_hive_site_content()
                        if not self.fs.do_as_user(
                                self.user, self.fs.exists,
                                hive_site_lib) and hive_site_content:
                            self.fs.do_as_user(
                                self.user,
                                self.fs.create,
                                hive_site_lib,
                                overwrite=True,
                                permission=0700,
                                data=smart_str(hive_site_content))
                    if action.data['type'] in ('sqoop', 'sqoop-document'):
                        if CONFIG_JDBC_LIBS_PATH.get(
                        ) and CONFIG_JDBC_LIBS_PATH.get(
                        ) not in self.properties.get('oozie.libpath', ''):
                            LOG.debug("Adding to oozie.libpath %s" %
                                      CONFIG_JDBC_LIBS_PATH.get())
                            paths = [CONFIG_JDBC_LIBS_PATH.get()]
                            if self.properties.get('oozie.libpath'):
                                paths.append(self.properties['oozie.libpath'])
                            self.properties['oozie.libpath'] = ','.join(paths)

        oozie_xml = self.job.to_xml(self.properties)
        self._do_as(self.user.username, self._copy_files, deployment_dir,
                    oozie_xml, self.properties)

        return deployment_dir
Example #34
0
class Submission(object):
  """
  Represents one unique Oozie submission.

  Actions are:
  - submit
  - rerun
  """
  def __init__(self, user, job=None, fs=None, jt=None, properties=None, oozie_id=None, local_tz=None):
    self.job = job
    self.user = user
    self.fs = fs
    self.jt = jt # Deprecated with YARN, we now use logical names only for RM
    self.oozie_id = oozie_id
    self.api = get_oozie(self.user)

    if properties is not None:
      self.properties = properties
    else:
      self.properties = {}

    if local_tz and isinstance(self.job.data, dict):
      local_tz = self.job.data.get('properties')['timezone']

    # Modify start_date & end_date only when it's a coordinator
    from oozie.models2 import Coordinator
    if type(self.job) is Coordinator:
      if 'start_date' in self.properties:
        properties['start_date'] = convert_to_server_timezone(self.properties['start_date'], local_tz)
      if 'end_date' in self.properties:
        properties['end_date'] = convert_to_server_timezone(self.properties['end_date'], local_tz)

    if 'nominal_time' in self.properties:
      properties['nominal_time'] = convert_to_server_timezone(self.properties['nominal_time'], local_tz)

    self.properties['security_enabled'] = self.api.security_enabled

  def __str__(self):
    if self.oozie_id:
      res = "Submission for job '%s'." % (self.oozie_id,)
    else:
      res = "Submission for job '%s' (id %s, owner %s)." % (self.job.name, self.job.id, self.user)
    if self.oozie_id:
      res += " -- " + self.oozie_id
    return res

  @submit_dryrun
  def run(self, deployment_dir=None):
    """
    Take care of all the actions of submitting a Oozie workflow.
    Returns the oozie job id if all goes well.
    """

    if self.properties and 'oozie.use.system.libpath' not in self.properties:
      self.properties['oozie.use.system.libpath'] = 'true'

    self.oozie_id = self.api.submit_job(self.properties)
    LOG.info("Submitted: %s" % (self,))

    if self._is_workflow():
      self.api.job_control(self.oozie_id, 'start')
      LOG.info("Started: %s" % (self,))

    return self.oozie_id

  def rerun(self, deployment_dir, fail_nodes=None, skip_nodes=None):
    jt_address = cluster.get_cluster_addr_for_job_submission()

    self._update_properties(jt_address, deployment_dir)
    self.properties.update({'oozie.wf.application.path': deployment_dir})

    if 'oozie.coord.application.path' in self.properties:
      self.properties.pop('oozie.coord.application.path')

    if 'oozie.bundle.application.path' in self.properties:
      self.properties.pop('oozie.bundle.application.path')

    if fail_nodes:
      self.properties.update({'oozie.wf.rerun.failnodes': fail_nodes})
    elif not skip_nodes:
      self.properties.update({'oozie.wf.rerun.failnodes': 'false'}) # Case empty 'skip_nodes' list
    else:
      self.properties.update({'oozie.wf.rerun.skip.nodes': skip_nodes})

    self.api.rerun(self.oozie_id, properties=self.properties)

    LOG.info("Rerun: %s" % (self,))

    return self.oozie_id


  def rerun_coord(self, deployment_dir, params):
    jt_address = cluster.get_cluster_addr_for_job_submission()

    self._update_properties(jt_address, deployment_dir)
    self.properties.update({'oozie.coord.application.path': deployment_dir})

    self.api.job_control(self.oozie_id, action='coord-rerun', properties=self.properties, parameters=params)
    LOG.info("Rerun: %s" % (self,))

    return self.oozie_id

  def update_coord(self):
    self.api = get_oozie(self.user, api_version="v2")
    self.api.job_control(self.oozie_id, action='update', properties=self.properties, parameters=None)
    LOG.info("Update: %s" % (self,))

    return self.oozie_id

  def rerun_bundle(self, deployment_dir, params):
    jt_address = cluster.get_cluster_addr_for_job_submission()

    self._update_properties(jt_address, deployment_dir)
    self.properties.update({'oozie.bundle.application.path': deployment_dir})
    self.api.job_control(self.oozie_id, action='bundle-rerun', properties=self.properties, parameters=params)
    LOG.info("Rerun: %s" % (self,))

    return self.oozie_id


  def deploy(self, deployment_dir=None):
    try:
      if not deployment_dir:
        deployment_dir = self._create_deployment_dir()
    except Exception, ex:
      msg = _("Failed to create deployment directory: %s" % ex)
      LOG.exception(msg)
      raise PopupException(message=msg, detail=str(ex))

    if self.api.security_enabled:
      jt_address = cluster.get_cluster_addr_for_job_submission()
      self._update_properties(jt_address) # Needed for coordinator deploying workflows with credentials

    if hasattr(self.job, 'nodes'):
      for action in self.job.nodes:
        # Make sure XML is there
        # Don't support more than one level sub-workflow
        if action.data['type'] == 'subworkflow':
          from oozie.models2 import Workflow
          workflow = Workflow(document=Document2.objects.get_by_uuid(user=self.user, uuid=action.data['properties']['workflow']))
          sub_deploy = Submission(self.user, workflow, self.fs, self.jt, self.properties)
          workspace = sub_deploy.deploy()

          self.job.override_subworkflow_id(action, workflow.id) # For displaying the correct graph
          self.properties['workspace_%s' % workflow.uuid] = workspace # For pointing to the correct workspace

        elif action.data['type'] == 'impala' or action.data['type'] == 'impala-document':
          from oozie.models2 import _get_impala_url
          from impala.impala_flags import get_ssl_server_certificate

          if action.data['type'] == 'impala-document':
            from notebook.models import Notebook
            if action.data['properties'].get('uuid'):
              notebook = Notebook(document=Document2.objects.get_by_uuid(user=self.user, uuid=action.data['properties']['uuid']))
              statements = notebook.get_str()
              statements = Template(statements).safe_substitute(**self.properties)
              script_name = action.data['name'] + '.sql'
              self._create_file(deployment_dir, script_name, statements)
          else:
            script_name = os.path.basename(action.data['properties'].get('script_path'))

          if self.api.security_enabled:
            kinit = 'kinit -k -t *.keytab %(user_principal)s' % {
              'user_principal': self.properties.get('user_principal', action.data['properties'].get('user_principal'))
            }
          else:
            kinit = ''

          shell_script = """#!/bin/bash

# Needed to launch impala shell in oozie
export PYTHON_EGG_CACHE=./myeggs

%(kinit)s

impala-shell %(kerberos_option)s %(ssl_option)s -i %(impalad_host)s -f %(query_file)s""" % {
  'impalad_host': action.data['properties'].get('impalad_host') or _get_impala_url(),
  'kerberos_option': '-k' if self.api.security_enabled else '',
  'ssl_option': '--ssl' if get_ssl_server_certificate() else '',
  'query_file': script_name,
  'kinit': kinit
  }

          self._create_file(deployment_dir, action.data['name'] + '.sh', shell_script)

        elif action.data['type'] == 'hive-document':
          from notebook.models import Notebook
          if action.data['properties'].get('uuid'):
            notebook = Notebook(document=Document2.objects.get_by_uuid(user=self.user, uuid=action.data['properties']['uuid']))
            statements = notebook.get_str()
          else:
            statements = action.data['properties'].get('statements')

          if self.properties.get('send_result_path'):
            statements = """
INSERT OVERWRITE DIRECTORY '%s'
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES (
   "separatorChar" = "\t",
   "quoteChar"     = "'",
   "escapeChar"    = "\\"
)
STORED AS TEXTFILE %s""" % (self.properties.get('send_result_path'), '\n\n\n'.join([snippet['statement_raw'] for snippet in notebook.get_data()['snippets']]))

          if statements is not None:
            self._create_file(deployment_dir, action.data['name'] + '.sql', statements)

        elif action.data['type'] in ('java-document', 'java', 'mapreduce-document'):
          if action.data['type'] == 'java-document' or action.data['type'] == 'mapreduce-document':
            from notebook.models import Notebook
            notebook = Notebook(document=Document2.objects.get_by_uuid(user=self.user, uuid=action.data['properties']['uuid']))
            properties = notebook.get_data()['snippets'][0]['properties']
          else:
            properties = action.data['properties']

          if properties.get('app_jar'):
            LOG.debug("Adding to oozie.libpath %s" % properties['app_jar'])
            paths = [properties['app_jar']]
            if self.properties.get('oozie.libpath'):
              paths.append(self.properties['oozie.libpath'])
            self.properties['oozie.libpath'] = ','.join(paths)

        elif action.data['type'] == 'pig-document':
          from notebook.models import Notebook
          notebook = Notebook(document=Document2.objects.get_by_uuid(user=self.user, uuid=action.data['properties']['uuid']))
          statements = notebook.get_data()['snippets'][0]['statement_raw']

          self._create_file(deployment_dir, action.data['name'] + '.pig', statements)

    oozie_xml = self.job.to_xml(self.properties)
    self._do_as(self.user.username, self._copy_files, deployment_dir, oozie_xml, self.properties)

    return deployment_dir
Example #35
0
    def test_fetch_result_size_mr(self):
        if not is_live_cluster():  # Mini-cluster does not have JHS
            raise SkipTest

        # Assert that a query with no job will return no rows or size
        statement = "SELECT 'hello world';"

        settings = [{'key': 'hive.execution.engine', 'value': 'mr'}]
        doc = self.create_query_document(owner=self.user,
                                         statement=statement,
                                         settings=settings)
        notebook = Notebook(document=doc)
        snippet = self.execute_and_wait(doc, snippet_idx=0)

        response = self.client.post(reverse('notebook:fetch_result_size'), {
            'notebook': notebook.get_json(),
            'snippet': json.dumps(snippet)
        })

        data = json.loads(response.content)
        assert_equal(0, data['status'], data)
        assert_true('result' in data)
        assert_true('rows' in data['result'])
        assert_true('size' in data['result'])
        assert_equal(None, data['result']['rows'])
        assert_equal(None, data['result']['size'])

        # Assert that a query with map & reduce task returns rows
        statement = "SELECT DISTINCT code FROM sample_07;"
        doc = self.create_query_document(owner=self.user,
                                         statement=statement,
                                         settings=settings)
        notebook = Notebook(document=doc)
        snippet = self.execute_and_wait(doc,
                                        snippet_idx=0,
                                        timeout=60.0,
                                        wait=2.0)

        response = self.client.post(reverse('notebook:fetch_result_size'), {
            'notebook': notebook.get_json(),
            'snippet': json.dumps(snippet)
        })

        data = json.loads(response.content)
        assert_equal(0, data['status'], data)
        assert_true('result' in data)
        assert_true('rows' in data['result'])
        assert_true(data['result']['rows'] > 0)

        # Assert that a query with multiple jobs returns rows
        statement = "SELECT app, COUNT(1) AS count FROM web_logs GROUP BY app ORDER BY count DESC;"
        doc = self.create_query_document(owner=self.user,
                                         statement=statement,
                                         settings=settings)
        notebook = Notebook(document=doc)
        snippet = self.execute_and_wait(doc,
                                        snippet_idx=0,
                                        timeout=60.0,
                                        wait=2.0)

        response = self.client.post(reverse('notebook:fetch_result_size'), {
            'notebook': notebook.get_json(),
            'snippet': json.dumps(snippet)
        })

        data = json.loads(response.content)
        assert_equal(0, data['status'], data)
        assert_true('result' in data)
        assert_true('rows' in data['result'])
        assert_true(data['result']['rows'] > 0)
Example #36
0
class Submission(object):
    """
  Represents one unique Oozie submission.

  Actions are:
  - submit
  - rerun
  """
    def __init__(self,
                 user,
                 job=None,
                 fs=None,
                 jt=None,
                 properties=None,
                 oozie_id=None,
                 local_tz=None):
        self.job = job
        self.user = user
        self.fs = fs
        self.jt = jt  # Deprecated with YARN, we now use logical names only for RM
        self.oozie_id = oozie_id
        self.api = get_oozie(self.user)

        if properties is not None:
            self.properties = properties
        else:
            self.properties = {}

        if local_tz and isinstance(self.job.data, dict):
            local_tz = self.job.data.get('properties')['timezone']

        # Modify start_date & end_date only when it's a coordinator
        from oozie.models2 import Coordinator
        if type(self.job) is Coordinator:
            if 'start_date' in self.properties:
                properties['start_date'] = convert_to_server_timezone(
                    self.properties['start_date'], local_tz)
            if 'end_date' in self.properties:
                properties['end_date'] = convert_to_server_timezone(
                    self.properties['end_date'], local_tz)

        self.properties['security_enabled'] = self.api.security_enabled

    def __str__(self):
        if self.oozie_id:
            res = "Submission for job '%s'." % (self.oozie_id, )
        else:
            res = "Submission for job '%s' (id %s, owner %s)." % (
                self.job.name, self.job.id, self.user)
        if self.oozie_id:
            res += " -- " + self.oozie_id
        return res

    @submit_dryrun
    def run(self, deployment_dir=None):
        """
    Take care of all the actions of submitting a Oozie workflow.
    Returns the oozie job id if all goes well.
    """

        if self.properties and 'oozie.use.system.libpath' not in self.properties:
            self.properties['oozie.use.system.libpath'] = 'true'

        self.oozie_id = self.api.submit_job(self.properties)
        LOG.info("Submitted: %s" % (self, ))

        if self._is_workflow():
            self.api.job_control(self.oozie_id, 'start')
            LOG.info("Started: %s" % (self, ))

        return self.oozie_id

    def rerun(self, deployment_dir, fail_nodes=None, skip_nodes=None):
        jt_address = cluster.get_cluster_addr_for_job_submission()

        self._update_properties(jt_address, deployment_dir)
        self.properties.update({'oozie.wf.application.path': deployment_dir})

        if 'oozie.coord.application.path' in self.properties:
            self.properties.pop('oozie.coord.application.path')

        if fail_nodes:
            self.properties.update({'oozie.wf.rerun.failnodes': fail_nodes})
        elif not skip_nodes:
            self.properties.update({'oozie.wf.rerun.failnodes':
                                    'false'})  # Case empty 'skip_nodes' list
        else:
            self.properties.update({'oozie.wf.rerun.skip.nodes': skip_nodes})

        self.api.rerun(self.oozie_id, properties=self.properties)

        LOG.info("Rerun: %s" % (self, ))

        return self.oozie_id

    def rerun_coord(self, deployment_dir, params):
        jt_address = cluster.get_cluster_addr_for_job_submission()

        self._update_properties(jt_address, deployment_dir)
        self.properties.update(
            {'oozie.coord.application.path': deployment_dir})

        self.api.job_control(self.oozie_id,
                             action='coord-rerun',
                             properties=self.properties,
                             parameters=params)
        LOG.info("Rerun: %s" % (self, ))

        return self.oozie_id

    def update_coord(self):
        self.api = get_oozie(self.user, api_version="v2")
        self.api.job_control(self.oozie_id,
                             action='update',
                             properties=self.properties,
                             parameters=None)
        LOG.info("Update: %s" % (self, ))

        return self.oozie_id

    def rerun_bundle(self, deployment_dir, params):
        jt_address = cluster.get_cluster_addr_for_job_submission()

        self._update_properties(jt_address, deployment_dir)
        self.properties.update(
            {'oozie.bundle.application.path': deployment_dir})
        self.api.job_control(self.oozie_id,
                             action='bundle-rerun',
                             properties=self.properties,
                             parameters=params)
        LOG.info("Rerun: %s" % (self, ))

        return self.oozie_id

    def deploy(self):
        try:
            deployment_dir = self._create_deployment_dir()
        except Exception, ex:
            msg = _("Failed to create deployment directory: %s" % ex)
            LOG.exception(msg)
            raise PopupException(message=msg, detail=str(ex))

        if self.api.security_enabled:
            jt_address = cluster.get_cluster_addr_for_job_submission()
            self._update_properties(
                jt_address
            )  # Needed for coordinator deploying workflows with credentials

        if hasattr(self.job, 'nodes'):
            for action in self.job.nodes:
                # Make sure XML is there
                # Don't support more than one level sub-workflow
                if action.data['type'] == 'subworkflow':
                    from oozie.models2 import Workflow
                    workflow = Workflow(document=Document2.objects.get_by_uuid(
                        user=self.user,
                        uuid=action.data['properties']['workflow']))
                    sub_deploy = Submission(self.user, workflow, self.fs,
                                            self.jt, self.properties)
                    workspace = sub_deploy.deploy()

                    self.job.override_subworkflow_id(
                        action,
                        workflow.id)  # For displaying the correct graph
                    self.properties[
                        'workspace_%s' % workflow.
                        uuid] = workspace  # For pointing to the correct workspace
                elif action.data['type'] == 'hive-document':
                    from notebook.models import Notebook
                    notebook = Notebook(document=Document2.objects.get_by_uuid(
                        user=self.user, uuid=action.data['properties']
                        ['uuid']))

                    self._create_file(deployment_dir,
                                      action.data['name'] + '.sql',
                                      notebook.get_str())
                    #self.data['properties']['script_path'] = _generate_hive_script(self.data['uuid']) #'workspace_%s' % workflow.uui

        oozie_xml = self.job.to_xml(self.properties)
        self._do_as(self.user.username, self._copy_files, deployment_dir,
                    oozie_xml, self.properties)

        return deployment_dir