Ejemplo n.º 1
0
def test_it(hyp):
    for_graph = []
    str0 = ''
    print(hyp)
    str0 += hyp + '\n'
    print("Extraction")
    train_vecs = np.array(
        futils.open_json(
            "../dataset/my_datasets/{}_train_vecs.json".format(hyp)))
    train_labels = np.array(
        futils.open_json("../dataset/my_datasets/train_label.json"))
    test_vecs = np.array(
        futils.open_json(
            "../dataset/my_datasets/{}_dev_vecs.json".format(hyp)))
    test_labels = np.array(
        futils.open_json("../dataset/my_datasets/dev_label.json"))
    print("Training")
    model = nn.create_trained_nn(train_vecs, train_labels, 1)
    epochs = 1
    loss, acc = model.evaluate(train_vecs, train_labels)
    while acc < 0.94 and epochs < 300:
        for_graph.append((epochs, loss, acc))
        loss, acc = model.evaluate(train_vecs, train_labels)
        epochs += 1
        print("Epochs: {}".format(epochs))
        nn.retrain_model(model, train_vecs, train_labels, 1)
    pred_label = nn.predict_nn(model, test_vecs, test_labels)
    pred_label = [(vc.convert_map_to_label(p), vc.convert_map_to_label(l))
                  for p, l in pred_label]
    str0 += ms.all_mesure(pred_label)
    str0 += "Epochs: {}".format(epochs)
    futils.create_json("../tests/for_graph/{}.json".format(hyp), for_graph)
    with open("../tests/nn_{}.txt".format(hyp), 'w+') as stream:
        stream.write(str0)
Ejemplo n.º 2
0
def main():
    print("Extraction")
    lTrain = futils.open_json("../dataset/my_datasets/train.json")
    lDev = futils.open_json("../dataset/my_datasets/dev.json")
    tweets_train = get_label_col(0, lTrain)
    tweets_dev = get_label_col(0, lDev)
    sources_train = get_label_col(1, lTrain)
    sources_dev = get_label_col(1, lDev)
    objType_train = create_objType(lTrain)
    objType_Dev = create_objType(lDev)
    print("Preprocessing")
    lTrain = hyp3_pp(lTrain)
    lDev = hyp3_pp(lDev)
    print("Vectorisation")
    lTrain, all_words = vutils.vectorise_train(lTrain)
    lDev = vutils.vectorise_test(lDev, all_words)
    lTrain = add_objType_to_vec(lTrain, objType_train)
    lDev = add_objType_to_vec(lDev, objType_Dev)
    lTrain = add_source_to_vecs(tweets_train, sources_train, lTrain)
    lDev = add_source_to_vecs(tweets_dev, sources_dev, lDev)

    print("Saving")
    futils.create_json("../dataset/my_datasets/hyp3_train_vecs.json",
                       lTrain)
    futils.create_json("../dataset/my_datasets/hyp3_dev_vecs.json",
                       lDev)
def direct_struc():
    dic_output = {}
    structures = futils.open_json("../dataset/traindev/structures.json")
    for id_tweet in structures:
        dic_input = futils.open_json("{}{}".format(
            "../dataset/rumoureval-data/", structures[id_tweet]))
        dic_output = recurs_struc(dic_input, dic_output)
    return dic_output
Ejemplo n.º 4
0
def main():
    lTrain = futils.open_json("../dataset/my_datasets/train.json")
    lDev = futils.open_json("../dataset/my_datasets/dev.json")

    futils.create_json("../dataset/my_datasets/train_label.json",
                       get_label_col(2, lTrain))
    futils.create_json("../dataset/my_datasets/dev_label.json",
                       get_label_col(2, lDev))
Ejemplo n.º 5
0
def main():
    print("Extraction")
    lTrain = futils.open_json("../dataset/my_datasets/train.json")
    lDev = futils.open_json("../dataset/my_datasets/dev.json")
    print("Preprocessing")
    lTrain = get_label_col(3, lTrain)
    lDdev = get_label_col(3, lDev)
    print("Vectorisation")
    lTrain, all_words = vutils.vectorise_train(lTrain)
    lDdev = vutils.vectorise_test(lDev, all_words)
    print("Saving")
    futils.create_json("../dataset/my_datasets/hyp4_train_vecs.json", lTrain)
    futils.create_json("../dataset/my_datasets/hyp4_dev_vecs.json", lDdev)
Ejemplo n.º 6
0
def main():
    print("Extraction")
    lTrain = futils.open_json("../dataset/my_datasets/train.json")
    lDev = futils.open_json("../dataset/my_datasets/dev.json")
    objType_train = create_objType(lTrain)
    objType_Dev = create_objType(lDev)
    print("Preprocessing")
    lTrain = hyp2_pp(lTrain)
    lDev = hyp2_pp(lDev)
    print("Vectorisation")
    lTrain, all_words = vutils.vectorise_train(lTrain)
    lDev = vutils.vectorise_test(lDev, all_words)
    lTrain = add_objType_to_vec(lTrain, objType_train)
    lDev = add_objType_to_vec(lDev, objType_Dev)
    print("Saving")
    futils.create_json("../dataset/my_datasets/hyp2_train_vecs.json", lTrain)
    futils.create_json("../dataset/my_datasets/hyp2_dev_vecs.json", lDev)
Ejemplo n.º 7
0
def create_dataset():
    """Create all objects for the datasets."""
    dataset = dd(Data)
    ROOT = "../dataset/rumoureval-data/"
    paths = f.open_json("../dataset/traindev/source-reply.json")
    structure_paths = f.open_json("../dataset/traindev/structures.json")
    for data_path in paths:
        data = f.open_json("{}{}".format(ROOT, paths[data_path]))
        subject, source_tweet, type_rt, _ = paths[data_path].split("/")
        categorie = None
        if type_rt == "source-tweet":
            structure = f.open_json("{}{}".format(
                ROOT, structure_paths[data_path]))
            dataset[data_path] = Tweet(data, subject, categorie, structure)
        if type_rt == "replies":
            dataset[data_path] = Reply(data, subject, categorie, source_tweet)
    return dataset
Ejemplo n.º 8
0
def get_X_dataset(dataset, X):
    """Permit to get train or test datasets."""
    categories = f.open_json(X)
    x_set = []
    for key in categories:
        dataset[key].set_categorie(categories[key])
        elt = dataset[key]
        x_set.append(elt)
    return x_set
Ejemplo n.º 9
0
def add_source_to_vecs(tweets, sources, vecs):
    del sources
    tv = {t: v for t, v in zip(tweets, vecs)}
    ts = futils.open_json("../dataset/traindev/direct_structures.json")
    return [v + tv[ts.get(t, t)] for t, v in zip(tweets, vecs)]