Ejemplo n.º 1
0
def test_pool_edges_model():
    encoder = ConvWordsEncoder(*wordembeddings.shape)
    encoder.load_word_embeddings_from_numpy(wordembeddings)
    net = PooledEdgesModel(encoder)
    criterion = nn.MultiMarginLoss()

    container = fackel.TorchContainer(torch_model=net,
                                      batch_size=8,
                                      max_epochs=5,
                                      model_checkpoint=False,
                                      early_stopping=5,
                                      criterion=criterion)

    selected_questions = [
        s for s in training_dataset
        if any(scores[2] > 0.0 for g, scores in s.graphs)
    ]
    targets = np.zeros((len(selected_questions)), dtype=np.int32)
    for qi, q in enumerate(selected_questions):
        random.shuffle(q.graphs)
        targets[qi] = np.argsort([g.scores[2] for g in q.graphs])[::-1][0]

    train_questions = V.encode_batch_questions(selected_questions,
                                               word2idx)[..., 0, :]
    train_edges = V.encode_batch_graphs(selected_questions, word2idx)[...,
                                                                      0, :]

    container.train(train=(train_questions, train_edges),
                    train_targets=targets)
Ejemplo n.º 2
0
def test_variable_margin_loss():
    encoder = ConvWordsEncoder(*wordembeddings.shape)
    encoder.load_word_embeddings_from_numpy(wordembeddings)
    net = OneEdgeModel(encoder)
    criterion = losses.VariableMarginLoss()

    container = fackel.TorchContainer(torch_model=net,
                                      batch_size=8,
                                      max_epochs=5,
                                      model_checkpoint=False,
                                      early_stopping=5,
                                      criterion=criterion)

    training_dataset = [
        s for s in dataset if any(scores[2] > 0.0 for g, scores in s.graphs)
    ]
    train_questions = V.encode_batch_questions(training_dataset,
                                               word2idx)[..., 0, :]
    train_edges = V.encode_batch_graphs(training_dataset, word2idx)[..., 0,
                                                                    0, :]
    targets = np.zeros((len(training_dataset), 100))
    for qi, q in enumerate(training_dataset):
        random.shuffle(q.graphs)
        for gi, g in enumerate(q.graphs[:100]):
            targets[qi, gi] = g.scores[2]

    container.train(train=(train_questions, train_edges),
                    train_targets=targets)
Ejemplo n.º 3
0
def test_metrics():
    encoder = ConvWordsEncoder(*wordembeddings.shape)
    encoder.load_word_embeddings_from_numpy(wordembeddings)
    net = PooledEdgesModel(encoder)
    criterion = nn.MultiMarginLoss()

    def metrics(targets, predictions, validation=False):
        _, predicted_targets = torch.topk(predictions, 1, dim=-1)
        # _, targets = torch.topk(targets, 1, dim=-1)
        predicted_targets = predicted_targets.squeeze(1)
        cur_acc = torch.sum(predicted_targets == targets).float()
        cur_acc /= predicted_targets.size(0)
        cur_f1 = 0.0

        if validation:
            for i, q in enumerate(training_dataset):
                if i < predicted_targets.size(0):
                    idx = predicted_targets.data[i]
                    if idx < len(q.graphs):
                        cur_f1 += q.graphs[idx].scores[2]
            cur_f1 /= targets.size(0)
        return {'acc': cur_acc.data[0], 'f1': cur_f1}

    container = fackel.TorchContainer(torch_model=net,
                                      batch_size=8,
                                      max_epochs=5,
                                      model_checkpoint=False,
                                      early_stopping=5,
                                      criterion=criterion,
                                      metrics=metrics)

    selected_questions = [
        s for s in training_dataset
        if any(scores[2] > 0.0 for g, scores in s.graphs)
    ]
    targets = np.zeros((len(selected_questions)), dtype=np.int32)
    for qi, q in enumerate(selected_questions):
        random.shuffle(q.graphs)
        targets[qi] = np.argsort([g.scores[2] for g in q.graphs])[::-1][0]

    train_questions = V.encode_batch_questions(selected_questions,
                                               word2idx)[..., 0, :]
    train_edges = V.encode_batch_graphs(selected_questions, word2idx)[...,
                                                                      0, :]

    container.train(train=(train_questions, train_edges),
                    train_targets=targets,
                    dev=(train_questions, train_edges),
                    dev_targets=targets)
Ejemplo n.º 4
0
def test_stagg_model():
    encoder = ConvWordsEncoder(*wordembeddings.shape)
    encoder.load_word_embeddings_from_numpy(wordembeddings)
    net = STAGGModel(encoder)
    criterion = nn.CrossEntropyLoss()

    container = fackel.TorchContainer(torch_model=net,
                                      batch_size=8,
                                      max_epochs=5,
                                      model_checkpoint=False,
                                      early_stopping=5,
                                      criterion=criterion)

    train_questions = V.encode_batch_questions(training_dataset, word2idx)
    train_edges = V.encode_batch_graphs(training_dataset, word2idx)[...,
                                                                    0, :, :]
    train_features = V.encode_structural_features(training_dataset)

    container.train(train=(train_questions, train_edges, train_features),
                    train_targets=np.zeros(len(training_dataset),
                                           dtype=np.int32))
Ejemplo n.º 5
0
def test_gnn():
    encoder = ConvWordsEncoder(*wordembeddings.shape)
    encoder.load_word_embeddings_from_numpy(wordembeddings)
    net = GNNModel(encoder, hp_gated=False)
    criterion = nn.MultiMarginLoss(margin=0.5)

    container = fackel.TorchContainer(torch_model=net,
                                      batch_size=8,
                                      max_epochs=5,
                                      model_checkpoint=False,
                                      early_stopping=5,
                                      criterion=criterion,
                                      init_model_weights=True,
                                      lr_decay=2)

    train_questions = V.encode_batch_questions(training_dataset,
                                               word2idx)[..., 0, :]
    train_graphs = V.encode_batch_graph_structure(training_dataset, word2idx)
    targets = np.zeros(len(training_dataset), dtype=np.int32)

    container.train(train=(train_questions, *train_graphs),
                    train_targets=targets)