Example #1
0
  def run(self):
    # Since we only iterate over dates in the umbrella process, we need to
    # skip batch files that do not exist
    output_file = self.output().path
    if not os.path.exists(self.batch):
      common.shell_cmd('touch %s', output_file)
      return

    input_file = self.input()[1].path
    es = elasticsearch.Elasticsearch(self.es_host)
    index_util.start_index_transaction(es, 'druglabel', self.epoch)
    parallel.mapreduce(
      input_collection=parallel.Collection.from_sharded(input_file),
      mapper=index_util.LoadJSONMapper(self.es_host,
                                       'druglabel',
                                       'spl',
                                       self.epoch,
                                       docid_key='set_id',
                                       version_key='version'),
      reducer=parallel.NullReducer(),
      output_prefix='/tmp/loadjson.druglabel',
      num_shards=1,
      map_workers=1)
    index_util.commit_index_transaction(es, 'druglabel')
    common.shell_cmd('touch %s', output_file)
Example #2
0
  def run(self):
    es = elasticsearch.Elasticsearch(self.es_host)
    index_util.start_index_transaction(es, 'drugevent', self.epoch)

    parallel.mapreduce(
      parallel.Collection.from_sharded(self.input()[1].path),
      index_util.LoadJSONMapper(self.es_host,
                                'drugevent',
                                'safetyreport',
                                self.epoch,
                                docid_key='@case_number',
                                version_key='@version'),
      parallel.NullReducer(),
      output_prefix='/tmp/loadjson.drugevent',
      num_shards=1,
      map_workers=1)

    index_util.commit_index_transaction(es, 'drugevent')
Example #3
0
    def run(self):
        es = elasticsearch.Elasticsearch(self.es_host)
        index_util.start_index_transaction(es, 'drugevent', self.epoch)

        parallel.mapreduce(parallel.Collection.from_sharded(
            self.input()[1].path),
                           index_util.LoadJSONMapper(self.es_host,
                                                     'drugevent',
                                                     'safetyreport',
                                                     self.epoch,
                                                     docid_key='@case_number',
                                                     version_key='@version'),
                           parallel.NullReducer(),
                           output_prefix='/tmp/loadjson.drugevent',
                           num_shards=1,
                           map_workers=1)

        index_util.commit_index_transaction(es, 'drugevent')
Example #4
0
 def run(self):
     output_file = self.output().path
     input_file = self.input()[1].path
     es = elasticsearch.Elasticsearch(self.es_host)
     index_util.start_index_transaction(es, 'recall', self.epoch)
     parallel.mapreduce(
         input_collection=parallel.Collection.from_sharded(input_file),
         mapper=index_util.LoadJSONMapper(self.es_host,
                                          'recall',
                                          'enforcementreport',
                                          self.epoch,
                                          docid_key='@id',
                                          version_key='@version'),
         reducer=parallel.NullReducer(),
         output_prefix='/tmp/loadjson.recall',
         num_shards=1,
         map_workers=1)
     index_util.commit_index_transaction(es, 'recall')
     common.shell_cmd('touch %s', output_file)
Example #5
0
 def run(self):
   output_file = self.output().path
   input_file = self.input()[1].path
   es = elasticsearch.Elasticsearch(self.es_host)
   index_util.start_index_transaction(es, 'recall', self.epoch)
   parallel.mapreduce(
     input_collection=parallel.Collection.from_sharded(input_file),
     mapper=index_util.LoadJSONMapper(self.es_host,
                                      'recall',
                                      'enforcementreport',
                                      self.epoch,
                                      docid_key='@id',
                                      version_key='@version'),
     reducer=parallel.NullReducer(),
     output_prefix='/tmp/loadjson.recall',
     num_shards=1,
     map_workers=1)
   index_util.commit_index_transaction(es, 'recall')
   common.shell_cmd('touch %s', output_file)