Exemple #1
0
def train(model_path, params):

    texts_train, labels_train = read_texts(train=True)
    texts_test, labels_test = read_texts(train=False)

    labels_set = list(set(labels_train + labels_test))
    labels_n = len(labels_set)

    x_train = embed(texts_train)
    x_test = embed(texts_test)

    y_train, labels_index = labels_to_indexes(labels_train, labels_set)
    y_test, _ = labels_to_indexes(labels_test, labels_set)
    y_train = to_categorical(y_train, num_classes=labels_n)
    y_test = to_categorical(y_test, num_classes=labels_n)

    model = Sequential()
    model.add(Dense(params["dense_dim"], activation="relu"))
    model.add(Dense(labels_n, activation="sigmoid"))
    model.compile(loss="binary_crossentropy", optimizer="sgd", metrics=["accuracy"])

    model.fit(
        x_train,
        y_train,
        validation_data=(x_test, y_test),
        epochs=params["epochs"],
        batch_size=params["batch_size"],
        verbose=2,
    )

    scores = model.evaluate(x_test, y_test, verbose=0)
    print("Accuracy: %.2f%%" % (scores[1] * 100))

    model.save(os.path.join(model_path, "model.h5"))
    with open(os.path.join(model_path, "params.json"), "w") as f:
        json.dump(params, f)
    with open(os.path.join(model_path, "labels_index.json"), "w") as f:
        json.dump(labels_index, f)

    return scores[1]
Exemple #2
0
def predict(texts, model_path=MODEL_PATH):

    logger.info("Loading model...")
    model, params, labels_index_inv = get_model(model_path)

    embeddings = embed(texts)
    scores = model.predict(embeddings)
    inds = [argmax(score) for score in scores]
    predictions = [labels_index_inv[ind] for ind in inds]

    logger.info("Prediction done!")

    return predictions
Exemple #3
0
    def predict(self, text_list, top_k=5):
        tic = time.time()

        logger.info(f"Predicting text_list=`{text_list}`")

        embeddings = embed(text_list)
        model_outputs = self.model.predict(embeddings)

        predictions = {
            i: OrderedDict([(self.labels_index_inv[j], float(model_output[j]))
                            for j in argsort(model_output)[::-1][:top_k]])
            for i, model_output in enumerate(model_outputs)
        }

        logger.info("Prediction done in {:2f}s".format(time.time() - tic))

        return predictions
 def test_embed(self):
     embeddings = embed(['hello world'])
     self.assertEqual(embeddings.shape, (1, 768))
def test_embed():

    embeddings = embed(['hello world'])
    assert embeddings.shape == (1, 768)