def train(model_path, params): texts_train, labels_train = read_texts(train=True) texts_test, labels_test = read_texts(train=False) labels_set = list(set(labels_train + labels_test)) labels_n = len(labels_set) x_train = embed(texts_train) x_test = embed(texts_test) y_train, labels_index = labels_to_indexes(labels_train, labels_set) y_test, _ = labels_to_indexes(labels_test, labels_set) y_train = to_categorical(y_train, num_classes=labels_n) y_test = to_categorical(y_test, num_classes=labels_n) model = Sequential() model.add(Dense(params["dense_dim"], activation="relu")) model.add(Dense(labels_n, activation="sigmoid")) model.compile(loss="binary_crossentropy", optimizer="sgd", metrics=["accuracy"]) model.fit( x_train, y_train, validation_data=(x_test, y_test), epochs=params["epochs"], batch_size=params["batch_size"], verbose=2, ) scores = model.evaluate(x_test, y_test, verbose=0) print("Accuracy: %.2f%%" % (scores[1] * 100)) model.save(os.path.join(model_path, "model.h5")) with open(os.path.join(model_path, "params.json"), "w") as f: json.dump(params, f) with open(os.path.join(model_path, "labels_index.json"), "w") as f: json.dump(labels_index, f) return scores[1]
def predict(texts, model_path=MODEL_PATH): logger.info("Loading model...") model, params, labels_index_inv = get_model(model_path) embeddings = embed(texts) scores = model.predict(embeddings) inds = [argmax(score) for score in scores] predictions = [labels_index_inv[ind] for ind in inds] logger.info("Prediction done!") return predictions
def predict(self, text_list, top_k=5): tic = time.time() logger.info(f"Predicting text_list=`{text_list}`") embeddings = embed(text_list) model_outputs = self.model.predict(embeddings) predictions = { i: OrderedDict([(self.labels_index_inv[j], float(model_output[j])) for j in argsort(model_output)[::-1][:top_k]]) for i, model_output in enumerate(model_outputs) } logger.info("Prediction done in {:2f}s".format(time.time() - tic)) return predictions
def test_embed(self): embeddings = embed(['hello world']) self.assertEqual(embeddings.shape, (1, 768))
def test_embed(): embeddings = embed(['hello world']) assert embeddings.shape == (1, 768)