def before_all(context): config = ConfigHandler('app.ini') app_host = config.get_config_option('bottle', 'host') app_port = config.get_config_option('bottle', 'port') context.app_uri = 'http://%s:%s/serve' % (app_host, app_port) context.neo4j_conf = { 'server': config.get_config_option('neo4j', 'server'), 'user': config.get_config_option('neo4j', 'user'), 'password': config.get_config_option('neo4j', 'password'), } context.neo4j_adapter = Neo4jAdapter(context.neo4j_conf) nlp_host = config.get_config_option('nlp', 'host') nlp_port = config.get_config_option('nlp', 'port') context.nlp_uri = 'http://%s:%s/?properties={"annotators":"sentiment","outputFormat":"json"}' % ( nlp_host, nlp_port)
] + ['"%s.%s_constraints": "%s"' % (TOPIC_PREFIX, prefix, statement)]) text_file.write(SINK_CONFIG % (topic_list, host_conf['host'], host_conf['username'], host_conf['password'], topics)) print('%s topics created.' % len(topics_conf)) if __name__ == '__main__': if len(sys.argv) < 4: print('python extractor.py <config> <source_dir> <target_dir>\n') exit(1) source_dir = sys.argv[2] target_dir = sys.argv[3] config_handler = ConfigHandler(sys.argv[1]) prefix = config_handler.get_config_option('info', 'prefix') sources = config_handler.get_eval_option('extraction', 'sources') for source in sources: tsv_files = source['tsv_files'] for tsv_file in tsv_files: file_name = '%s/%s' % (source_dir, tsv_file['file_name']) lines = load_lines(file_name) ent_conf = tsv_file['entities'] for entity_name, entity_info in ent_conf.items(): file_name = '%s/%s.tsv' % (target_dir, entity_name) rows = create_rows(lines, entity_info) count = write_rows(rows, file_name, entity_info) print('%s [%s] entities extracted.' % (count, entity_name))
from queue import Queue import signal from threading import Thread, Event, get_ident from time import sleep from bottle import HTTPError, run, route, request, response from config_handler import ConfigHandler from neo4j_adapter import Neo4jAdapter fileConfig('logging.ini') logger = logging.getLogger('appLogger') config = ConfigHandler('app.ini') bottle_conf = { 'host': config.get_config_option('bottle', 'host'), 'port': int(config.get_config_option('bottle', 'port')), 'server': config.get_config_option('bottle', 'server'), 'threads': int(config.get_config_option('bottle', 'threads')), } neo4j_conf = { 'server': config.get_config_option('neo4j', 'server'), 'user': config.get_config_option('neo4j', 'user'), 'password': config.get_config_option('neo4j', 'password'), } in_queue = Queue() out_queue_dict = dict() stop_event = Event()
__email__ = "*****@*****.**" __status__ = "Development" from itertools import groupby from typing import List from fastapi import FastAPI from pydantic import BaseModel from requests import Session from config_handler import ConfigHandler #################### # Reading configuration from given file in relative path config = ConfigHandler('conf/extractor.ini') tika_url = config.get_config_option('tika', 'url') file_dir = config.get_config_option('tika', 'dir') #################### # Define the document model that the webapp receives from submission: # It is a json format: # [ # "u": the file name of the document, the webapp retains and returns it # ] class Item(BaseModel): u: str #################### # Create an instance of ASPI webapp provided by FastAPI
from post_nlp import PostProcessor #################### # Reading configuration from given file in relative path config = ConfigHandler('conf/nlp.ini') #################### # Create an instance of the post processor post_processor = PostProcessor(config) #################### # Start `stanza`: # - obtain the language string # - (optional) uncomment if the language's model was not pre-downloaded # - creates a `stanza` NLP processing pipeline, namely `nlp` language = config.get_config_option('stanza', 'language') # stanza.download(language) nlp = stanza.Pipeline(language) #################### # Define the document model that the webapp receives from submission: # It is a json format: # [ # "u": the uid of the document, the webapp retains and returns it # "c": the textual content of the document. # ] class Item(BaseModel): u: str c: str
producer.flush() print('. %s' % count) if __name__ == "__main__": if len(sys.argv) < 3: print("Usage: python producer.py <config> <tsv_dir> <remote>") exit(1) handler = ConfigHandler(sys.argv[1]) tsv_dir = sys.argv[2] remote = len(sys.argv) == 4 config = handler.get_eval_option('yggdrasil', 'conf') prefix = handler.get_config_option('info', 'prefix') broker = config['broker'] schema_registry = config['schema_registry'] schema = handler.get_config_option('avro', 'schema') avro_schema = avro.loads(schema) avro_producer = AvroProducer( { 'bootstrap.servers': broker, 'schema.registry.url': schema_registry, }, default_value_schema=avro_schema) # if remote: # schema_topic = '%s_constraints' % prefix