Exemple #1
0
def main():
    try:
        consumer = KafkaConsumer(KAFKA_TOPIC,
                                 bootstrap_servers=KAFKA_BOOTSTRAP_SERVERS,
                                 auto_offset_reset='earliest',
                                 enable_auto_commit=False,
                                 group_id=KAFKA_CONSUMER_GROUP)
        session = tf.Session(graph=tf.Graph())
        with session.graph.as_default():
            keras.backend.set_session(session)

            check_model_files()
            model = load_model(MODEL_JSON_PATH, MODEL_WEIGHTS_PATH)

            logger.info("Start listing")

            for msg in consumer:
                logger.info(msg)
                id = msg.value.decode()
                logger.info("Start processing id : {}".format(id))
                process_msg(id, model)
                consumer.commit()
                logger.info("Finish processing id : {}".format(id))

    finally:
        logger.info("closing db")
        db_session.remove()
Exemple #2
0
def answer(dataset = []):
    model, (words, labels, training, output) = load_model()

    print("type (q) to stop!")
    while True:
        sentence = input("You: ")
        if sentence.lower() == "q":
            break

        answ = do_answer(sentence, dataset=dataset)
        print(answ)
Exemple #3
0
def run_window(dataset=[]):
    model, (words, labels, training, output) = load_model()

    main_window = tk.Tk(screenName='MainScreen')
    main_window.title('Chat with Bot')
    main_window.geometry("950x600+300+300")

    menu = tk.Menu(master=main_window)
    main_window.config(menu=menu, background='#75bdbd')

    file_menu = tk.Menu(master=menu)

    menu.add_cascade(label='Chatbot', menu=file_menu)
    file_menu.add_command(label='About')
    menu.add_separator()
    file_menu.add_command(label='Exit', command=main_window.quit)

    label_1 = tk.Label(master=main_window, text='Pesanmu : ')
    label_1.grid(row=0)

    entry_1_text = tk.StringVar()
    entry_1 = tk.Entry(master=main_window, width=80, textvariable=entry_1_text)
    entry_1.grid(row=0, column=1)

    scrollbar = tk.Scrollbar(master=main_window, orient='vertical')
    scrollbar.grid(row=3, column=2)

    list_box = tk.Listbox(master=main_window,
                          width=80,
                          yscrollcommand=scrollbar.set)
    list_box.grid(row=3, column=1)

    def btn_1_click():
        if entry_1_text.get() != '':
            sentence = entry_1_text.get()
            answ = do_answer(sentence, dataset=dataset)

            list_box.insert(tk.END, 'Aku: {}'.format(sentence))
            list_box.insert(tk.END, 'Bot: {}'.format(answ))
            entry_1_text.set('')

    btn_1 = tk.Button(master=main_window,
                      text='Send',
                      width=10,
                      command=btn_1_click)
    btn_1.config(background='#0fa4d6')
    btn_1.grid(row=0, column=3)

    main_window.mainloop()
Exemple #4
0
def predict_prob(model_file: click.Path,
                 test_data: click.Path,
                 k: int,
                 t: float,
                 batch_size: int,
                 stat: bool,
                 show_data: bool,
                 ):
    """predict most likely labels with probabilities"""
    # metadata
    metadata_file = f"{model_file}.meta.yml"
    metadata = Metadata.load(metadata_file)
    # load model
    model = load_model(model_file)
    # test data
    dataset_test = read(test_data)
    dataset_test = Dataset(
        metadata.task,
        metadata.labels,
        metadata.chars,
        dataset_test.samples,
    )

    if stat:
        cowsay.cow(f"Test data stat:\n {utils.stat(dataset_test)}")

    # prediction
    pred = model.predict_generator(
            BatchSequence(dataset_test, batch_size, metadata.params['maxlen']),
            workers=1,
            use_multiprocessing=True)
    indices = numpy.argsort(-pred)[:, :k]
    for i, ind in enumerate(indices):
        if metadata.task == Task.binary:
            pred_label = metadata.labels[0 if pred[i, 0] < 0.5 else 1]
            pred_prob = utils.float4(max(pred[i, 0], 1.0 - pred[i, 0]))
            pred_msg = f"{pred_label} {pred_prob}"
        else:
            pred_labels = [metadata.labels[j] for j in ind if pred[i, j] > t]
            pred_probs = [utils.float4(pred[i, j]) for j in ind if pred[i, j] > t]
            pred_msg = ' '.join(str(x) for pair in zip(pred_labels, pred_probs) for x in pair)
        if show_data:
            print(pred_msg, dataset_test.samples[i].data)
        else:
            print(pred_msg)
Exemple #5
0
def print_sentence_vectors(
        model_file: str,
        data_file: str,
        batch_size: int,
        ):
    """print sentence vectors given a trained model"""
    # metadata
    metadata_file = f"{model_file}.meta.yml"
    metadata = Metadata.load(metadata_file)
    # load model
    model = load_model(model_file)
    feature_model = Model(inputs=model.input,
                          outputs=model.get_layer('dense_1').output)
    # data
    dataset = read(data_file)
    pred = feature_model.predict_generator(
            BatchSequence(dataset, batch_size, metadata.params['maxlen']),
            workers=1,
            use_multiprocessing=True)

    for v in pred:
        print(' '.join(str(utils.float4(x)) for x in v))
Exemple #6
0
def test(
        model_file: str,
        test_data: str,
        k: int,
        t: float,
        batch_size: int,
        stat: bool,
        ):
    """evaluate a supervised classifier"""
    # metadata
    metadata_file = f"{model_file}.meta.yml"
    metadata = Metadata.load(metadata_file)
    # load model
    model = load_model(model_file)
    # test data
    dataset_test = read(test_data, remove_no_labels=True)
    dataset_test = Dataset(
        metadata.task,
        metadata.labels,
        metadata.chars,
        dataset_test.samples,
    )

    if stat:
        cowsay.cow(f"Test data stat:\n {utils.stat(dataset_test)}")

    # prediction
    pred = model.predict_generator(
            BatchSequence(dataset_test, batch_size, metadata.params['maxlen']),
            workers=1,
            use_multiprocessing=True)
    indices = numpy.argsort(-pred)[:, :k]

    n_labels = len(dataset_test.labels)
    confusion_matrix = [
        [0] * n_labels
        for _ in range(n_labels)
    ]

    if metadata.task == Task.binary:
        for i in range(len(pred)):
            y_pred = 1 if pred[i, 0] > 0.5 else 0
            label_true = dataset_test.samples[i].labels[0]
            y_true = metadata.labels.index(label_true)
            confusion_matrix[y_true][y_pred] += 1

        n_sum = sum(confusion_matrix[i][j]
                    for i in range(n_labels)
                    for j in range(n_labels))
        n_acc = sum(confusion_matrix[i][i] for i in range(n_labels))
        print(f"Acc: {n_acc / n_sum:.4f}")

        performance = utils.labels_performance(confusion_matrix)
        for i, perf in enumerate(performance):
            print(metadata.labels[i])
            print(f"Recall: {perf['recall']:.4f}")
            print(f"Prec: {perf['prec']:.4f}")
            print(f"F1: {perf['f1']:.4f}")

    else:
        n_sum = 0
        n_acc = 0
        for i, ind in enumerate(indices):
            preds = [j for j in ind if pred[i, j] > t]
            truth = [metadata.labels.index(label) for label in dataset_test.samples[i].labels]
            n_sum += 1
            if all(j in preds for j in truth):
                n_acc += 1
            for y_pred in preds:
                for y_true in truth:
                    confusion_matrix[y_true][y_pred] += 1

        print(f"Acc@{k}: {n_acc / n_sum:.4f}")

        performance = utils.labels_performance(confusion_matrix)
        for i, perf in enumerate(performance):
            print(metadata.labels[i])
            print(f"- Recall: {perf['recall']:.4f}")
            print(f"- Prec: {perf['prec']:.4f}")
            print(f"- F1: {perf['f1']:.4f}")