Beispiel #1
0
    def execute(self, notebook, snippet):
        # Get document from notebook
        if not notebook.get('uuid', ''):
            raise PopupException(
                _('Notebook is missing a uuid, please save the notebook before executing as a batch job.'
                  ))

        notebook_doc = Document2.objects.get_by_uuid(user=self.user,
                                                     uuid=notebook['uuid'],
                                                     perm_type='read')

        # Create a managed workflow from the notebook doc
        workflow_doc = WorkflowBuilder().create_workflow(
            document=notebook_doc,
            user=self.user,
            managed=True,
            name=_("Batch job for %s") % notebook_doc.name
            or notebook_doc.type)
        workflow = Workflow(document=workflow_doc, user=self.user)

        # Submit workflow
        job_id = _submit_workflow(user=self.user,
                                  fs=self.fs,
                                  jt=self.jt,
                                  workflow=workflow,
                                  mapping=None)

        return {
            'id': job_id,
            'has_result_set': True,
        }
Beispiel #2
0
    def run_morphline(self, collection_name, morphline, input_path):
        workspace_path = self._upload_workspace(morphline)

        snippet_properties = {
            u'files': [{
                u'path': u'%s/log4j.properties' % workspace_path,
                u'type': u'file'
            }, {
                u'path': u'%s/morphline.conf' % workspace_path,
                u'type': u'file'
            }],
            u'class':
            u'org.apache.solr.hadoop.MapReduceIndexerTool',
            u'app_jar':
            CONFIG_INDEXER_LIBS_PATH.get(),
            u'arguments': [
                u'--morphline-file',
                u'morphline.conf',
                u'--output-dir',
                u'${nameNode}/user/%s/indexer' % self.username,
                u'--log4j',
                u'log4j.properties',
                u'--go-live',
                u'--zk-host',
                zkensemble(),
                u'--collection',
                collection_name,
                u'${nameNode}%s' % input_path,
            ],
            u'archives': [],
        }

        notebook = make_notebook(
            name='Indexer',
            editor_type='java',
            snippet_properties=snippet_properties).get_data()
        notebook_doc, created = _save_notebook(notebook, self.user)

        workflow_doc = WorkflowBuilder().create_workflow(
            document=notebook_doc,
            user=self.user,
            managed=True,
            name=_("Batch job for %s") % notebook_doc.name)
        workflow = Workflow(document=workflow_doc, user=self.user)

        job_id = _submit_workflow(user=self.user,
                                  fs=self.fs,
                                  jt=self.jt,
                                  workflow=workflow,
                                  mapping=None)

        return job_id
Beispiel #3
0
  def execute(self, notebook, snippet):
    # Get document from notebook
    if not notebook.get('uuid', ''):
      raise PopupException(_('Notebook is missing a uuid, please save the notebook before executing as a batch job.'))

    notebook_doc = Document2.objects.get_by_uuid(user=self.user, uuid=notebook['uuid'], perm_type='read')

    # Create a managed workflow from the notebook doc
    workflow_doc = WorkflowBuilder().create_workflow(document=notebook_doc, user=self.user, managed=True, name=_("Batch job for %s") % notebook_doc.name or notebook_doc.type)
    workflow = Workflow(document=workflow_doc, user=self.user)

    # Submit workflow
    job_id = _submit_workflow(user=self.user, fs=self.fs, jt=self.jt, workflow=workflow, mapping=None)

    return {
      'id': job_id,
      'has_result_set': True,
    }
Beispiel #4
0
    def execute(self, notebook, snippet):
        # Get document from notebook
        if not notebook.get('uuid', ''):
            raise PopupException(
                _('Notebook is missing a uuid, please save the notebook before executing as a batch job.'
                  ))

        if notebook['type'] == 'notebook' or notebook['type'] == 'query-java':
            # Convert notebook to workflow
            workflow_doc = WorkflowBuilder().create_notebook_workflow(
                notebook=notebook,
                user=self.user,
                managed=True,
                name=_("%s for %s") % (OozieApi.BATCH_JOB_PREFIX,
                                       notebook['name'] or notebook['type']))
            workflow = Workflow(document=workflow_doc, user=self.user)
        else:
            notebook_doc = Document2.objects.get_by_uuid(user=self.user,
                                                         uuid=notebook['uuid'],
                                                         perm_type='read')
            # Create a managed workflow from the notebook doc
            workflow_doc = WorkflowBuilder().create_workflow(
                document=notebook_doc,
                user=self.user,
                managed=True,
                name=_("Batch job for %s") %
                (notebook_doc.name or notebook_doc.type))
            workflow = Workflow(document=workflow_doc, user=self.user)

        # Submit workflow
        job_id = _submit_workflow(user=self.user,
                                  fs=self.fs,
                                  jt=self.jt,
                                  workflow=workflow,
                                  mapping=None)

        return {
            'id': job_id,
            'has_result_set': True,
        }
Beispiel #5
0
  def run_morphline(self, collection_name, morphline, input_path):
    workspace_path = self._upload_workspace(morphline)

    snippet_properties =  {
      u'files': [
          {u'path': u'%s/log4j.properties' % workspace_path, u'type': u'file'},
          {u'path': u'%s/morphline.conf' % workspace_path, u'type': u'file'}
      ],
      u'class': u'org.apache.solr.hadoop.MapReduceIndexerTool',
      u'app_jar': CONFIG_INDEXER_LIBS_PATH.get(),
      u'arguments': [
          u'--morphline-file',
          u'morphline.conf',
          u'--output-dir',
          u'${nameNode}/user/%s/indexer' % self.username,
          u'--log4j',
          u'log4j.properties',
          u'--go-live',
          u'--zk-host',
          zkensemble(),
          u'--collection',
          collection_name,
          u'${nameNode}%s' % input_path,
      ],
      u'archives': [],
    }

    notebook = make_notebook(name='Indexer', editor_type='java', snippet_properties=snippet_properties).get_data()
    notebook_doc, created = _save_notebook(notebook, self.user)

    workflow_doc = WorkflowBuilder().create_workflow(document=notebook_doc, user=self.user, managed=True, name=_("Batch job for %s") % notebook_doc.name)
    workflow = Workflow(document=workflow_doc, user=self.user)

    job_id = _submit_workflow(user=self.user, fs=self.fs, jt=self.jt, workflow=workflow, mapping=None)

    return job_id