def run_morphline(self, request, collection_name, morphline, input_path, query=None): workspace_path = self._upload_workspace(morphline) notebook = Notebook(name='Indexer job for %s' % collection_name, isManaged=True) if query: q = Notebook(document=Document2.objects.get_by_uuid(user=self.user, uuid=query)) notebook_data = q.get_data() snippet = notebook_data['snippets'][0] api = get_api(request, snippet) destination = '__hue_%s' % notebook_data['uuid'][:4] location = '/user/%s/__hue-%s' % (request.user, notebook_data['uuid'][:4]) sql, success_url = api.export_data_as_table(notebook_data, snippet, destination, is_temporary=True, location=location) input_path = '${nameNode}%s' % location notebook.add_hive_snippet(snippet['database'], sql) notebook.add_java_snippet( clazz='org.apache.solr.hadoop.MapReduceIndexerTool', app_jar=CONFIG_INDEXER_LIBS_PATH.get(), arguments=[ u'--morphline-file', u'morphline.conf', u'--output-dir', u'${nameNode}/user/%s/indexer' % self.username, u'--log4j', u'log4j.properties', u'--go-live', u'--zk-host', zkensemble(), u'--collection', collection_name, input_path, ], files=[{ u'path': u'%s/log4j.properties' % workspace_path, u'type': u'file' }, { u'path': u'%s/morphline.conf' % workspace_path, u'type': u'file' }]) return notebook.execute(request, batch=True)
def run_morphline(self, request, collection_name, morphline, input_path, query=None, start_time=None, lib_path=None): workspace_path = self._upload_workspace(morphline) task = make_notebook( name=_('Indexing into %s') % collection_name, editor_type='notebook', on_success_url=reverse('search:browse', kwargs={'name': collection_name}), pub_sub_url='assist.collections.refresh', is_task=True, is_notebook=True, last_executed=start_time ) if query: q = Notebook(document=Document2.objects.get_by_uuid(user=self.user, uuid=query)) notebook_data = q.get_data() snippet = notebook_data['snippets'][0] api = get_api(request, snippet) destination = '__hue_%s' % notebook_data['uuid'][:4] location = '/user/%s/__hue-%s' % (request.user, notebook_data['uuid'][:4]) sql, _success_url = api.export_data_as_table(notebook_data, snippet, destination, is_temporary=True, location=location) input_path = '${nameNode}%s' % location task.add_hive_snippet(snippet['database'], sql) client = SolrClient(self.user) extra_args = ['-Dmapreduce.job.user.classpath.first=true'] if client.is_solr_six_or_more() else [] task.add_java_snippet( clazz='org.apache.solr.hadoop.MapReduceIndexerTool', app_jar=lib_path if lib_path is not None else CONFIG_INDEXER_LIBS_PATH.get(), arguments=extra_args + [ u'--morphline-file', u'morphline.conf', u'--output-dir', u'${nameNode}/user/%s/indexer' % self.username, u'--log4j', u'log4j.properties', u'--go-live', u'--zk-host', client.get_zookeeper_host(), u'--collection', collection_name, input_path, ], files=[ {u'path': u'%s/log4j.properties' % workspace_path, u'type': u'file'}, {u'path': u'%s/morphline.conf' % workspace_path, u'type': u'file'} ] ) return task.execute(request, batch=True)
def run_morphline(self, request, collection_name, morphline, input_path, query=None): workspace_path = self._upload_workspace(morphline) notebook = Notebook( name='Indexer job for %s' % collection_name, isManaged=True ) if query: q = Notebook(document=Document2.objects.get_by_uuid(user=self.user, uuid=query)) notebook_data = q.get_data() snippet = notebook_data['snippets'][0] api = get_api(request, snippet) destination = '__hue_%s' % notebook_data['uuid'][:4] location = '/user/%s/__hue-%s' % (request.user, notebook_data['uuid'][:4]) sql, success_url = api.export_data_as_table(notebook_data, snippet, destination, is_temporary=True, location=location) input_path = '${nameNode}%s' % location notebook.add_hive_snippet(snippet['database'], sql) notebook.add_java_snippet( clazz='org.apache.solr.hadoop.MapReduceIndexerTool', app_jar=CONFIG_INDEXER_LIBS_PATH.get(), arguments=[ u'--morphline-file', u'morphline.conf', u'--output-dir', u'${nameNode}/user/%s/indexer' % self.username, u'--log4j', u'log4j.properties', u'--go-live', u'--zk-host', zkensemble(), u'--collection', collection_name, input_path, ], files=[ {u'path': u'%s/log4j.properties' % workspace_path, u'type': u'file'}, {u'path': u'%s/morphline.conf' % workspace_path, u'type': u'file'} ] ) return notebook.execute(request, batch=True)