def run(self): # Since we only iterate over dates in the umbrella process, we need to # skip batch files that do not exist output_file = self.output().path if not os.path.exists(self.batch): common.shell_cmd('touch %s', output_file) return input_file = self.input()[1].path es = elasticsearch.Elasticsearch(self.es_host) index_util.start_index_transaction(es, 'druglabel', self.epoch) parallel.mapreduce( input_collection=parallel.Collection.from_sharded(input_file), mapper=index_util.LoadJSONMapper(self.es_host, 'druglabel', 'spl', self.epoch, docid_key='set_id', version_key='version'), reducer=parallel.NullReducer(), output_prefix='/tmp/loadjson.druglabel', num_shards=1, map_workers=1) index_util.commit_index_transaction(es, 'druglabel') common.shell_cmd('touch %s', output_file)
def run(self): es = elasticsearch.Elasticsearch(self.es_host) index_util.start_index_transaction(es, 'drugevent', self.epoch) parallel.mapreduce( parallel.Collection.from_sharded(self.input()[1].path), index_util.LoadJSONMapper(self.es_host, 'drugevent', 'safetyreport', self.epoch, docid_key='@case_number', version_key='@version'), parallel.NullReducer(), output_prefix='/tmp/loadjson.drugevent', num_shards=1, map_workers=1) index_util.commit_index_transaction(es, 'drugevent')
def run(self): es = elasticsearch.Elasticsearch(self.es_host) index_util.start_index_transaction(es, 'drugevent', self.epoch) parallel.mapreduce(parallel.Collection.from_sharded( self.input()[1].path), index_util.LoadJSONMapper(self.es_host, 'drugevent', 'safetyreport', self.epoch, docid_key='@case_number', version_key='@version'), parallel.NullReducer(), output_prefix='/tmp/loadjson.drugevent', num_shards=1, map_workers=1) index_util.commit_index_transaction(es, 'drugevent')
def run(self): output_file = self.output().path input_file = self.input()[1].path es = elasticsearch.Elasticsearch(self.es_host) index_util.start_index_transaction(es, 'recall', self.epoch) parallel.mapreduce( input_collection=parallel.Collection.from_sharded(input_file), mapper=index_util.LoadJSONMapper(self.es_host, 'recall', 'enforcementreport', self.epoch, docid_key='@id', version_key='@version'), reducer=parallel.NullReducer(), output_prefix='/tmp/loadjson.recall', num_shards=1, map_workers=1) index_util.commit_index_transaction(es, 'recall') common.shell_cmd('touch %s', output_file)