コード例 #1
0
ファイル: environment.py プロジェクト: rsaavy/CORTEX
def before_all(context):
    config = ConfigHandler('app.ini')

    app_host = config.get_config_option('bottle', 'host')
    app_port = config.get_config_option('bottle', 'port')
    context.app_uri = 'http://%s:%s/serve' % (app_host, app_port)

    context.neo4j_conf = {
        'server': config.get_config_option('neo4j', 'server'),
        'user': config.get_config_option('neo4j', 'user'),
        'password': config.get_config_option('neo4j', 'password'),
    }
    context.neo4j_adapter = Neo4jAdapter(context.neo4j_conf)

    nlp_host = config.get_config_option('nlp', 'host')
    nlp_port = config.get_config_option('nlp', 'port')
    context.nlp_uri = 'http://%s:%s/?properties={"annotators":"sentiment","outputFormat":"json"}' % (
        nlp_host, nlp_port)
コード例 #2
0
ファイル: extractor.py プロジェクト: rsaavy/CORTEX
        ] + ['"%s.%s_constraints": "%s"' % (TOPIC_PREFIX, prefix, statement)])
        text_file.write(SINK_CONFIG %
                        (topic_list, host_conf['host'], host_conf['username'],
                         host_conf['password'], topics))
        print('%s topics created.' % len(topics_conf))


if __name__ == '__main__':
    if len(sys.argv) < 4:
        print('python extractor.py <config> <source_dir> <target_dir>\n')
        exit(1)

    source_dir = sys.argv[2]
    target_dir = sys.argv[3]
    config_handler = ConfigHandler(sys.argv[1])
    prefix = config_handler.get_config_option('info', 'prefix')

    sources = config_handler.get_eval_option('extraction', 'sources')
    for source in sources:
        tsv_files = source['tsv_files']

        for tsv_file in tsv_files:
            file_name = '%s/%s' % (source_dir, tsv_file['file_name'])
            lines = load_lines(file_name)

            ent_conf = tsv_file['entities']
            for entity_name, entity_info in ent_conf.items():
                file_name = '%s/%s.tsv' % (target_dir, entity_name)
                rows = create_rows(lines, entity_info)
                count = write_rows(rows, file_name, entity_info)
                print('%s [%s] entities extracted.' % (count, entity_name))
コード例 #3
0
ファイル: app.py プロジェクト: rsaavy/CORTEX
from queue import Queue
import signal
from threading import Thread, Event, get_ident
from time import sleep

from bottle import HTTPError, run, route, request, response

from config_handler import ConfigHandler
from neo4j_adapter import Neo4jAdapter

fileConfig('logging.ini')
logger = logging.getLogger('appLogger')

config = ConfigHandler('app.ini')
bottle_conf = {
    'host': config.get_config_option('bottle', 'host'),
    'port': int(config.get_config_option('bottle', 'port')),
    'server': config.get_config_option('bottle', 'server'),
    'threads': int(config.get_config_option('bottle', 'threads')),
}

neo4j_conf = {
    'server': config.get_config_option('neo4j', 'server'),
    'user': config.get_config_option('neo4j', 'user'),
    'password': config.get_config_option('neo4j', 'password'),
}

in_queue = Queue()
out_queue_dict = dict()
stop_event = Event()
コード例 #4
0
__email__ = "*****@*****.**"
__status__ = "Development"

from itertools import groupby
from typing import List

from fastapi import FastAPI
from pydantic import BaseModel
from requests import Session

from config_handler import ConfigHandler

####################
# Reading configuration from given file in relative path
config = ConfigHandler('conf/extractor.ini')
tika_url = config.get_config_option('tika', 'url')
file_dir = config.get_config_option('tika', 'dir')


####################
# Define the document model that the webapp receives from submission:
# It is a json format:
# [
#   "u": the file name of the document, the webapp retains and returns it
# ]
class Item(BaseModel):
    u: str


####################
# Create an instance of ASPI webapp provided by FastAPI
コード例 #5
0
from post_nlp import PostProcessor

####################
# Reading configuration from given file in relative path
config = ConfigHandler('conf/nlp.ini')

####################
# Create an instance of the post processor
post_processor = PostProcessor(config)

####################
# Start `stanza`:
# - obtain the language string
# - (optional) uncomment if the language's model was not pre-downloaded
# - creates a `stanza` NLP processing pipeline, namely `nlp`
language = config.get_config_option('stanza', 'language')
# stanza.download(language)
nlp = stanza.Pipeline(language)


####################
# Define the document model that the webapp receives from submission:
# It is a json format:
# [
#   "u": the uid of the document, the webapp retains and returns it
#   "c": the textual content of the document.
# ]
class Item(BaseModel):
    u: str
    c: str
コード例 #6
0
    producer.flush()
    print('. %s' % count)


if __name__ == "__main__":

    if len(sys.argv) < 3:
        print("Usage: python producer.py <config> <tsv_dir> <remote>")
        exit(1)

    handler = ConfigHandler(sys.argv[1])
    tsv_dir = sys.argv[2]
    remote = len(sys.argv) == 4

    config = handler.get_eval_option('yggdrasil', 'conf')
    prefix = handler.get_config_option('info', 'prefix')

    broker = config['broker']
    schema_registry = config['schema_registry']

    schema = handler.get_config_option('avro', 'schema')
    avro_schema = avro.loads(schema)
    avro_producer = AvroProducer(
        {
            'bootstrap.servers': broker,
            'schema.registry.url': schema_registry,
        },
        default_value_schema=avro_schema)

    # if remote:
    #     schema_topic = '%s_constraints' % prefix