import sys from config_handler import ConfigHandler if __name__ == '__main__': config_handler = ConfigHandler(sys.argv[1]) option_dict = config_handler.get_eval_option(sys.argv[2], sys.argv[3]) print(option_dict[sys.argv[4]])
(topic_list, host_conf['host'], host_conf['username'], host_conf['password'], topics)) print('%s topics created.' % len(topics_conf)) if __name__ == '__main__': if len(sys.argv) < 4: print('python extractor.py <config> <source_dir> <target_dir>\n') exit(1) source_dir = sys.argv[2] target_dir = sys.argv[3] config_handler = ConfigHandler(sys.argv[1]) prefix = config_handler.get_config_option('info', 'prefix') sources = config_handler.get_eval_option('extraction', 'sources') for source in sources: tsv_files = source['tsv_files'] for tsv_file in tsv_files: file_name = '%s/%s' % (source_dir, tsv_file['file_name']) lines = load_lines(file_name) ent_conf = tsv_file['entities'] for entity_name, entity_info in ent_conf.items(): file_name = '%s/%s.tsv' % (target_dir, entity_name) rows = create_rows(lines, entity_info) count = write_rows(rows, file_name, entity_info) print('%s [%s] entities extracted.' % (count, entity_name)) rel_conf = tsv_file['relations']
producer.flush() producer.flush() print('. %s' % count) if __name__ == "__main__": if len(sys.argv) < 3: print("Usage: python producer.py <config> <tsv_dir> <remote>") exit(1) handler = ConfigHandler(sys.argv[1]) tsv_dir = sys.argv[2] remote = len(sys.argv) == 4 config = handler.get_eval_option('yggdrasil', 'conf') prefix = handler.get_config_option('info', 'prefix') broker = config['broker'] schema_registry = config['schema_registry'] schema = handler.get_config_option('avro', 'schema') avro_schema = avro.loads(schema) avro_producer = AvroProducer( { 'bootstrap.servers': broker, 'schema.registry.url': schema_registry, }, default_value_schema=avro_schema) # if remote: