Ejemplo n.º 1
0
    def run_morphline(self,
                      request,
                      collection_name,
                      morphline,
                      input_path,
                      query=None):
        workspace_path = self._upload_workspace(morphline)

        notebook = Notebook(name='Indexer job for %s' % collection_name,
                            isManaged=True)

        if query:
            q = Notebook(document=Document2.objects.get_by_uuid(user=self.user,
                                                                uuid=query))
            notebook_data = q.get_data()
            snippet = notebook_data['snippets'][0]

            api = get_api(request, snippet)

            destination = '__hue_%s' % notebook_data['uuid'][:4]
            location = '/user/%s/__hue-%s' % (request.user,
                                              notebook_data['uuid'][:4])
            sql, success_url = api.export_data_as_table(notebook_data,
                                                        snippet,
                                                        destination,
                                                        is_temporary=True,
                                                        location=location)
            input_path = '${nameNode}%s' % location

            notebook.add_hive_snippet(snippet['database'], sql)

        notebook.add_java_snippet(
            clazz='org.apache.solr.hadoop.MapReduceIndexerTool',
            app_jar=CONFIG_INDEXER_LIBS_PATH.get(),
            arguments=[
                u'--morphline-file',
                u'morphline.conf',
                u'--output-dir',
                u'${nameNode}/user/%s/indexer' % self.username,
                u'--log4j',
                u'log4j.properties',
                u'--go-live',
                u'--zk-host',
                zkensemble(),
                u'--collection',
                collection_name,
                input_path,
            ],
            files=[{
                u'path': u'%s/log4j.properties' % workspace_path,
                u'type': u'file'
            }, {
                u'path': u'%s/morphline.conf' % workspace_path,
                u'type': u'file'
            }])

        return notebook.execute(request, batch=True)
Ejemplo n.º 2
0
  def run_morphline(self, request, collection_name, morphline, input_path, query=None):
    workspace_path = self._upload_workspace(morphline)

    notebook = Notebook(
        name='Indexer job for %s' % collection_name,
        isManaged=True
    )

    if query:
      q = Notebook(document=Document2.objects.get_by_uuid(user=self.user, uuid=query))
      notebook_data = q.get_data()
      snippet = notebook_data['snippets'][0]

      api = get_api(request, snippet)

      destination = '__hue_%s' % notebook_data['uuid'][:4]
      location = '/user/%s/__hue-%s' % (request.user,  notebook_data['uuid'][:4])
      sql, success_url = api.export_data_as_table(notebook_data, snippet, destination, is_temporary=True, location=location)
      input_path = '${nameNode}%s' % location

      notebook.add_hive_snippet(snippet['database'], sql)

    notebook.add_java_snippet(
      clazz='org.apache.solr.hadoop.MapReduceIndexerTool',
      app_jar=CONFIG_INDEXER_LIBS_PATH.get(),
      arguments=[
          u'--morphline-file',
          u'morphline.conf',
          u'--output-dir',
          u'${nameNode}/user/%s/indexer' % self.username,
          u'--log4j',
          u'log4j.properties',
          u'--go-live',
          u'--zk-host',
          zkensemble(),
          u'--collection',
          collection_name,
          input_path,
      ],
      files=[
          {u'path': u'%s/log4j.properties' % workspace_path, u'type': u'file'},
          {u'path': u'%s/morphline.conf' % workspace_path, u'type': u'file'}
      ]
    )

    return notebook.execute(request, batch=True)
Ejemplo n.º 3
0
def run_sync_query(doc_id, user):
    '''Independently run a query as a user.'''
    # Add INSERT INTO table if persist result
    # Add variable substitution
    # Send notifications: done/on failure
    if type(user) is str:
        lookup = {orm_user_lookup(): user}
        user = User.objects.get(**lookup)
        user = rewrite_user(user)

    query_document = Document2.objects.get_by_uuid(user=user, uuid=doc_id)
    notebook = Notebook(document=query_document).get_data()
    snippet = notebook['snippets'][0]

    editor_type = snippet['type']
    sql = _get_statement(notebook)
    request = MockedDjangoRequest(user=user)
    last_executed = time.mktime(datetime.datetime.now().timetuple()) * 1000

    notebook = make_notebook(name='Scheduled query %s at %s' %
                             (query_document.name, last_executed),
                             editor_type=editor_type,
                             statement=sql,
                             status='ready',
                             last_executed=last_executed,
                             is_task=True)

    task = notebook.execute(request, batch=True)

    task['uuid'] = task['history_uuid']
    status = check_status(task)

    while status['status'] in ('waiting', 'running'):
        status = check_status(task)
        time.sleep(3)

    return task