def run(self): output_dir = self.output().path os.system('mkdir -p "%s"' % output_dir) harmonized_file = self.input()[0].path event_file = glob.glob(self.input()[1].path + '/*.json') output_file = self.output().path + '/annotated_res.json' recall_event = annotate.AnnotateMapper(harmonized_file) for file_name in event_file: recall_event(file_name, output_file)
def run(self): input_db = self.input()[0].path harmonized_file = self.input()[1].path parallel.mapreduce(parallel.Collection.from_sharded(input_db), mapper=annotate.AnnotateMapper(harmonized_file), reducer=parallel.IdentityReducer(), output_prefix=self.output().path, num_shards=1, map_workers=1)