def main(path): train, test, valid = read_data(path) data = ev_data(train["text"]) print(data) model = build_model(ev_data(valid["text"])).fit(data) evaluate(model, test.sample(frac=0.1), "test") evaluate(model, valid, "valid") evaluate(model, train, "train")
def main(path): train, test, valid = read_data(path) data = ev_data(train["text"]) print(data) model = build_model(ev_data(valid["text"])).fit(data) model[-1].set_params(batch_size=32) evaluate(model, test, "test") evaluate(model, valid, "valid") evaluate(model, train, "train")
def evaluate(model, dataset, title): data = ev_data(dataset["text"]) with timer("Predict"): predicted = model.predict(data) with timer("Calculate the vectorized measures"): data["recall"] = recall(data["gold"], predicted) data["rr"] = rr(data["gold"], predicted) print("Evaluating on", title) print("Recall", data["recall"].mean()) print("MRR", data["rr"].mean()) return data
def flat_oov(oov): return ev_data(oov["text"])
def flat_data(data): return ev_data(data["text"])
def recall_scoring(model, X, y): dataset = ev_data(X.sample(frac=0.01)["text"]) predicted = model.predict(dataset) return np.mean(recall(dataset["gold"], predicted))