def main(argv):
    config = load_config(argv[1])
    ops = argv[2].split(",")
    model = None

    if ("train" in ops):
        for i in xrange(5):
            model = train_model(config, model_seq=i + 3)
    def __init__(self, config_path=DEFAULT_CONFIG_PATH):
        self.config = load_config(config_path)
        corenlp_config = self.config["data"]["stanford_corenlp"]
        self.tagger = CoreNLPPOSTagger(
            url="http://%s:%d" %
            (corenlp_config["host"], corenlp_config["port"]))

        self.pos_map = self.config["model"]["STANFORD_POS_MAP"]
Esempio n. 3
0
def main():
    """
    Setup consumer
    """
    config = loader.load_config()
    logging.info("Connecting to Kafka broker at %s:%s", config.kafka_host, config.kafka_port)
    if config.benchmark:
        create_sample_messages(config)
    start_consumer(config)
Esempio n. 4
0
def main():
    """
    Setup consumer
    """
    config = loader.load_config()
    logging.info("Connecting to Kafka broker at %s:%s", config.kafka_host,
                 config.kafka_port)
    if config.benchmark:
        create_sample_messages(config)
    start_consumer(config)
def tag_claims(doc, config_path, model=None, model_path=None, cache=True):
    global model_cache
    config = load_config(config_path)
    classes = sorted(config["model"]["CLASSES"])
    annotation_types = config["model"]["ANNOTATIONS"]

    if (model is None):
        if (not model_path):
            if (cache and model_cache is not None):
                model = model_cache
            else:
                model = DeepClaimTagger(config)
                model.load(config["data"]["model_path"] % (config["id"], 0))
                model_cache = model
        else:
            model = DeepClaimTagger(config)
            model.load(model_path)
            model_cache = model

    sent_encs = encode_document(doc, config)[0]
    input_morph = []
    input_attr = []
    for (enc_morph, enc_attrs) in sent_encs:
        input_morph.append(enc_morph)
        input_attr.append(enc_attrs)

    enc_sent_segments = model.predict([
        np.array(input_morph, dtype=np.uint8),
        np.array(input_attr, dtype=np.float32)
    ])

    for (sentence, enc_sent_seg) in izip(doc.sentences, enc_sent_segments):
        dec_segments = decode_sentence(enc_sent_seg, classes)
        token_confidence = confidence(enc_sent_seg)
        avg_confidence = [
            float(np.mean(token_conf)) for token_conf in token_confidence
        ]

        for i in xrange(len(sentence.tokens)):
            segcls = dec_segments[i]
            sentence.tokens[i].annotations["PATCLAIM_SEG"] = segcls

        sentence.annotations["PATCLAIM_SEG_INFO"] = {
            "token_confidence": token_confidence,
            "confidence": avg_confidence
        }

        logger.info("Segmentation: " + " ".join([
            token.surface + ":" + seg for (token, seg) in izip(
                sentence.tokens, dec_segments[0:len(sentence.tokens)])
        ]))

    return doc
Esempio n. 6
0
def pos_tag(doc, config_path, model_path=None, cache=True):
    global model_cache
    config = load_config(config_path)
    pos_classes = sorted(config["model"]["POS_CLASSES"])
    inv_pos_map = dict()

    for item in config["model"]["POS_MAP"].items():
        vals = item[1].split("|")

        for val in vals:
            inv_pos_map[val] = item[0]

    if (not model_path):
        if (cache and model_cache is not None):
            model = model_cache
        else:
            model = DeepTDVPOSTagger(config)
            model.load(config["data"]["model_path"] % (config["id"], 0))
            model_cache = model
    else:
        model = DeepTDVPOSTagger(config)
        model.load(model_path)
        model_cache = model

    sent_encs = encode_document(doc, config)[0]
    input_morph = []
    input_attr = []
    for (enc_morph, enc_attrs) in sent_encs:
        input_morph.append(enc_morph)
        input_attr.append(enc_attrs)

    for (sentence, enc_sent_pos) in izip(doc.sentences, model.predict([np.array(input_morph, dtype=np.uint8), np.array(input_attr, dtype=np.float16)])):
        dec_pos = decode_sentence(enc_sent_pos, pos_classes)
        token_confidence = confidence(enc_sent_pos)
        avg_confidence = float(np.mean(token_confidence))

        for i in xrange(len(sentence.tokens)):
            if (i < config["model"]["MAX_SENT_LEN"]):
                if (token_confidence[i] > config["hyperparam"]["confidence_thresh"]):
                    pos = dec_pos[i]
                else:
                    pos = "noun"
            else:
                pos = "x"

            sentence.tokens[i].annotations["UPOS"] = inv_pos_map[pos]
            sentence.tokens[i].annotations["POS"] = pos

        sentence.annotations["UPOS_TAGGER_INFO"] = {"token_confidence": token_confidence, "confidence": avg_confidence}

    return doc
def model_paths(config_paths=[]):
    model_num = 0
    last_config_path = ""
    paths = []
    for config_path in config_paths:
        if (config_path == last_config_path):
            model_num += 1
        else:
            model_num = 0

        config = load_config(config_path)
        paths.append(config["data"]["model_path"] % (config["id"], model_num))
        last_config_path = config_path

    return paths
Esempio n. 8
0
        config.influxdb_timeout, config.influxdb_use_udp,
        config.influxdb_retention_policy, config.influxdb_time_precision)


def show_version():
    """
    Output current version and exit
    """
    print("{} {}".format(__title__, __version__))
    sys.exit(0)


if __name__ == '__main__':
    FORMAT = '%(asctime)-15s %(name)s:%(levelname)s:%(message)s'
    logging.basicConfig(format=FORMAT)
    config = loader.load_config()
    if config.version:
        show_version()

    # Check for a pidfile to see if the program already runs
    try:
        pf = file(config.pidfile, 'r')
        pid = int(pf.read().strip())
        pf.close()
    except IOError:
        pid = None
    if pid:
        message = "pidfile %s already exist. Daemon already running?\n"
        sys.stderr.write(message % config.pidfile)
        sys.exit(1)
    pid = str(os.getpid())
Esempio n. 9
0
                        reuse=True)

    opt = tf.train.AdamOptimizer(lr_ph)
    update_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_op):
        train_op = opt.minimize(loss)
    saver = tf.train.Saver()

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    if args.pretrained:
        loc = os.path.join(args.model_dir, args.model_name,
                           args.dataset_name + '.ckpt')
        saver.restore(sess, loc)

    config = load_config(args.config)
    train_gen = EpisodeGenerator(args.dataset_dir, 'train', config)
    test_gen = EpisodeGenerator(args.dataset_dir, 'test', config)
    if args.train:
        max_iter = train_gen.dataset_size[args.dataset_name] * args.max_epoch \
                // (nway * qsize)
        show_step = args.show_epoch * max_iter // args.max_epoch
        save_step = args.save_epoch * max_iter // args.max_epoch
        avger = np.zeros([4])
        for i in range(1, max_iter + 1):
            stt = time.time()
            cur_epoch = i * (
                nway * qsize) // train_gen.dataset_size[args.dataset_name]
            lr = args.lr if i < 0.7 * max_iter else args.lr * .1
            sx, sy, qx, qy = train_gen.get_episode(nway, kshot, qsize)
            fd = {\
Esempio n. 10
0
 def __init__(self, config_path=DEFAULT_CONFIG_PATH):
     self.config = load_config(config_path)
     self.conn = MongoClient(self.config["data"]["semantic_db"]["host"],
                             self.config["data"]["semantic_db"]["port"])
     self.db = self.conn.semdb
     self.pos_map = self.config["model"]["POS_MAP"]
Esempio n. 11
0
from config.loader import load_config

SSL_VERIFY = True if load_config().get("mattermost").get(
    "ssl_verify") is "yes" else False
BOT_URL = load_config().get("mattermost").get("url")
BOT_LOGIN = load_config().get("mattermost").get("login")
BOT_PASSWORD = load_config().get("mattermost").get("password")
BOT_TEAM = load_config().get("mattermost").get("team")

IGNORE_NOTIFIES = ['@channel', '@all', '@here']
DEFAULT_REPLY = "I didn't understand your message"