def test_it(hyp): for_graph = [] str0 = '' print(hyp) str0 += hyp + '\n' print("Extraction") train_vecs = np.array( futils.open_json( "../dataset/my_datasets/{}_train_vecs.json".format(hyp))) train_labels = np.array( futils.open_json("../dataset/my_datasets/train_label.json")) test_vecs = np.array( futils.open_json( "../dataset/my_datasets/{}_dev_vecs.json".format(hyp))) test_labels = np.array( futils.open_json("../dataset/my_datasets/dev_label.json")) print("Training") model = nn.create_trained_nn(train_vecs, train_labels, 1) epochs = 1 loss, acc = model.evaluate(train_vecs, train_labels) while acc < 0.94 and epochs < 300: for_graph.append((epochs, loss, acc)) loss, acc = model.evaluate(train_vecs, train_labels) epochs += 1 print("Epochs: {}".format(epochs)) nn.retrain_model(model, train_vecs, train_labels, 1) pred_label = nn.predict_nn(model, test_vecs, test_labels) pred_label = [(vc.convert_map_to_label(p), vc.convert_map_to_label(l)) for p, l in pred_label] str0 += ms.all_mesure(pred_label) str0 += "Epochs: {}".format(epochs) futils.create_json("../tests/for_graph/{}.json".format(hyp), for_graph) with open("../tests/nn_{}.txt".format(hyp), 'w+') as stream: stream.write(str0)
def main(): print("Extraction") lTrain = futils.open_json("../dataset/my_datasets/train.json") lDev = futils.open_json("../dataset/my_datasets/dev.json") tweets_train = get_label_col(0, lTrain) tweets_dev = get_label_col(0, lDev) sources_train = get_label_col(1, lTrain) sources_dev = get_label_col(1, lDev) objType_train = create_objType(lTrain) objType_Dev = create_objType(lDev) print("Preprocessing") lTrain = hyp3_pp(lTrain) lDev = hyp3_pp(lDev) print("Vectorisation") lTrain, all_words = vutils.vectorise_train(lTrain) lDev = vutils.vectorise_test(lDev, all_words) lTrain = add_objType_to_vec(lTrain, objType_train) lDev = add_objType_to_vec(lDev, objType_Dev) lTrain = add_source_to_vecs(tweets_train, sources_train, lTrain) lDev = add_source_to_vecs(tweets_dev, sources_dev, lDev) print("Saving") futils.create_json("../dataset/my_datasets/hyp3_train_vecs.json", lTrain) futils.create_json("../dataset/my_datasets/hyp3_dev_vecs.json", lDev)
def direct_struc(): dic_output = {} structures = futils.open_json("../dataset/traindev/structures.json") for id_tweet in structures: dic_input = futils.open_json("{}{}".format( "../dataset/rumoureval-data/", structures[id_tweet])) dic_output = recurs_struc(dic_input, dic_output) return dic_output
def main(): lTrain = futils.open_json("../dataset/my_datasets/train.json") lDev = futils.open_json("../dataset/my_datasets/dev.json") futils.create_json("../dataset/my_datasets/train_label.json", get_label_col(2, lTrain)) futils.create_json("../dataset/my_datasets/dev_label.json", get_label_col(2, lDev))
def main(): print("Extraction") lTrain = futils.open_json("../dataset/my_datasets/train.json") lDev = futils.open_json("../dataset/my_datasets/dev.json") print("Preprocessing") lTrain = get_label_col(3, lTrain) lDdev = get_label_col(3, lDev) print("Vectorisation") lTrain, all_words = vutils.vectorise_train(lTrain) lDdev = vutils.vectorise_test(lDev, all_words) print("Saving") futils.create_json("../dataset/my_datasets/hyp4_train_vecs.json", lTrain) futils.create_json("../dataset/my_datasets/hyp4_dev_vecs.json", lDdev)
def main(): print("Extraction") lTrain = futils.open_json("../dataset/my_datasets/train.json") lDev = futils.open_json("../dataset/my_datasets/dev.json") objType_train = create_objType(lTrain) objType_Dev = create_objType(lDev) print("Preprocessing") lTrain = hyp2_pp(lTrain) lDev = hyp2_pp(lDev) print("Vectorisation") lTrain, all_words = vutils.vectorise_train(lTrain) lDev = vutils.vectorise_test(lDev, all_words) lTrain = add_objType_to_vec(lTrain, objType_train) lDev = add_objType_to_vec(lDev, objType_Dev) print("Saving") futils.create_json("../dataset/my_datasets/hyp2_train_vecs.json", lTrain) futils.create_json("../dataset/my_datasets/hyp2_dev_vecs.json", lDev)
def create_dataset(): """Create all objects for the datasets.""" dataset = dd(Data) ROOT = "../dataset/rumoureval-data/" paths = f.open_json("../dataset/traindev/source-reply.json") structure_paths = f.open_json("../dataset/traindev/structures.json") for data_path in paths: data = f.open_json("{}{}".format(ROOT, paths[data_path])) subject, source_tweet, type_rt, _ = paths[data_path].split("/") categorie = None if type_rt == "source-tweet": structure = f.open_json("{}{}".format( ROOT, structure_paths[data_path])) dataset[data_path] = Tweet(data, subject, categorie, structure) if type_rt == "replies": dataset[data_path] = Reply(data, subject, categorie, source_tweet) return dataset
def get_X_dataset(dataset, X): """Permit to get train or test datasets.""" categories = f.open_json(X) x_set = [] for key in categories: dataset[key].set_categorie(categories[key]) elt = dataset[key] x_set.append(elt) return x_set
def add_source_to_vecs(tweets, sources, vecs): del sources tv = {t: v for t, v in zip(tweets, vecs)} ts = futils.open_json("../dataset/traindev/direct_structures.json") return [v + tv[ts.get(t, t)] for t, v in zip(tweets, vecs)]