def main():  # Add relevant parameters
    train_data, valid_data = fetch_data(
    )  # X_data is a list of pairs (document, y); y in {0,1,2,3,4}

    # Think about the type of function that an RNN describes. To apply it, you will need to convert the text data into vector representations.
    # Further, think about where the vectors will come from. There are 3 reasonable choices:
    # 1) Randomly assign the input to vectors and learn better embeddings during training; see the PyTorch documentation for guidance
    # 2) Assign the input to vectors using pretrained word embeddings. We recommend any of {Word2Vec, GloVe, FastText}. Then, you do not train/update these embeddings.
    # 3) You do the same as 2) but you train (this is called fine-tuning) the pretrained embeddings further.
    # Option 3 will be the most time consuming, so we do not recommend starting with this

    model = RNN()  # Fill in parameters
    optimizer = optim.SGD(model.parameters())

    while not stopping_condition:  # How will you decide to stop training and why
        optimizer.zero_grad()
        # You will need further code to operationalize training, ffnn.py may be helpful

        predicted_vector = model(input_vector)
        predicted_label = torch.argmax(predicted_vector)
Ejemplo n.º 2
0
def getMinTrainingAndValData(percent):
    train_data, valid_data = fetch_data()
    train_data = train_data[0:len(train_data) // percent]
    train_data = preprocessData(train_data)
    valid_data = preprocessData(valid_data)
    return train_data, valid_data
Ejemplo n.º 3
0
def getTrainingAndValData():
    train_data, valid_data = fetch_data()
    train_data = preprocessData(train_data)
    valid_data = preprocessData(valid_data)
    return train_data, valid_data
Ejemplo n.º 4
0
def main(name, hidden_dim, number_of_epochs, n_layers):
    print("Fetching data")
    train_data, valid_data = fetch_data(
    )  # X_data is a list of pairs (document, y); y in {0,1,2,3,4}
    vocab = make_vocab(train_data)
    vocab, word2index, index2word = make_indices(vocab)
    print("Fetched and indexed data")
    train_data = convert_to_vector_representation(train_data, word2index)
    valid_data = convert_to_vector_representation(valid_data, word2index)
    print("Vectorized data")
    model = FFNN(len(vocab), hidden_dim, n_layers)
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    print("Training for {} epochs".format(number_of_epochs))
    for epoch in range(number_of_epochs):
        model.train()
        optimizer.zero_grad()
        loss = None
        correct = 0
        total = 0
        start_time = time.time()
        print("Training started for epoch {}".format(epoch + 1))
        random.shuffle(
            train_data)  # Good practice to shuffle order of training data
        minibatch_size = 16
        N = len(train_data)
        for minibatch_index in tqdm(range(N // minibatch_size)):
            optimizer.zero_grad()
            loss = None
            for example_index in range(minibatch_size):
                input_vector, gold_label = train_data[minibatch_index *
                                                      minibatch_size +
                                                      example_index]
                predicted_vector = model(input_vector)
                predicted_label = torch.argmax(predicted_vector)
                correct += int(predicted_label == gold_label)
                total += 1
                example_loss = model.compute_Loss(predicted_vector.view(1, -1),
                                                  torch.tensor([gold_label]))
                if loss is None:
                    loss = example_loss
                else:
                    loss += example_loss
            loss = loss / minibatch_size
            loss.backward()
            optimizer.step()
        print("Training completed for epoch {}".format(epoch + 1))
        print("Training accuracy for epoch {}: {}".format(
            epoch + 1, correct / total))
        print("Training time for this epoch: {}".format(time.time() -
                                                        start_time))
        # loss = None
        correct = 0
        total = 0
        start_time = time.time()
        print("Validation started for epoch {}".format(epoch + 1))
        random.shuffle(
            valid_data)  # Good practice to shuffle order of training data
        # minibatch_size = 16
        # for minibatch_index in tqdm(range(N // minibatch_size)):
        # 	optimizer.zero_grad()
        # 	# loss = None
        # 	for example_index in range(minibatch_size):
        N = len(valid_data)
        optimizer.zero_grad()
        for index in tqdm(range(N)):
            input_vector, gold_label = valid_data[index]
            predicted_vector = model(input_vector)
            predicted_label = torch.argmax(predicted_vector)
            correct += int(predicted_label == gold_label)
            total += 1
            # example_loss = model.compute_Loss(predicted_vector.view(1,-1), torch.tensor([gold_label]))
            # if loss is None:
            # loss = example_loss
            # else:
            # loss += example_loss
            # loss = loss / minibatch_size
            # loss.backward()
            # optimizer.step()
        print("Validation completed for epoch {}".format(epoch + 1))
        print("Validation accuracy for epoch {}: {}".format(
            epoch + 1, correct / total))
        print("Validation time for this epoch: {}".format(time.time() -
                                                          start_time))

    current = os.curdir
    models = os.path.join(current, 'models')
    PATH = os.path.join(models, name + '.pt')
    torch.save(model.state_dict(), PATH)
Ejemplo n.º 5
0
def preproccesor1(document):
    return " ".join(document)

def vectorize(data):
    vecs = []
    labels = []
    for i, (doc, y) in enumerate(data):
    	vecs.append(doc)
    	labels.append(y)
    return vecs, labels

nlp = spacy.load("en_core_web_sm")

print("Fetching data...")
train_data, valid_data, test_data = fetch_data() # X_data is a list of pairs (document, y); y in {0,1,2,3,4}

Tx , Ty = vectorize(train_data)
Vx , Vy = vectorize(valid_data)
Tsx, Id = vectorize(test_data)

text_clf = Pipeline([
    ('features', FeatureUnion([
        ('text', Pipeline([
            ('vect', CountVectorizer(ngram_range=(1, 5), lowercase=True,
                                     binary=False,  preprocessor=preproccesor)),
        ])),
        ('char', Pipeline([
            ('vect', CountVectorizer(analyzer='char',ngram_range=(4, 4), lowercase=False,
                                     binary=False, preprocessor=preproccesor1)),
        ])),
Ejemplo n.º 6
0
def main(hidden_dim, number_of_epochs):
    print("Fetching data")
    train_data, valid_data = fetch_data(
    )  # X_data is a list of pairs (document, y); y in {0,1,2,3,4}
    vocab = make_vocab(train_data)
    vocab, word2index, index2word = make_indices(vocab)
    print("Fetched and indexed data")
    train_data = convert_to_vector_representation(train_data, word2index)
    valid_data = convert_to_vector_representation(valid_data, word2index)
    print("Vectorized data")

    model = FFNN(input_dim=len(vocab), h=hidden_dim)
    optimizer = optim.Adagrad(model.parameters(), lr=0.01)
    print("Training for {} epochs".format(number_of_epochs))

    train_accuracy_history = []
    val_accuracy_history = []
    train_loss_history = []
    val_loss_history = []

    for epoch in range(number_of_epochs):
        if os.path.exists("model.pth"):
            state_dict = torch.load("model.pth")['state_dict']
            model.load_state_dict(state_dict)
            print("Successful")

        if len(train_loss_history) > 1 and (
                train_loss_history[-1] < val_loss_history[-1]) and (
                    train_loss_history[-1] < train_loss_history[-2]) and (
                        val_loss_history[-1] > val_loss_history[-2]):
            break

        model.train()
        optimizer.zero_grad()
        loss = None
        totalloss = 0
        correct = 0
        total = 0
        start_time = time.time()
        print("Training started for epoch {}".format(epoch + 1))
        random.shuffle(
            train_data)  # Good practice to shuffle order of training data
        minibatch_size = 16
        N = len(train_data)
        for minibatch_index in tqdm(range(N // minibatch_size)):
            optimizer.zero_grad()
            loss = None
            for example_index in range(minibatch_size):
                input_vector, gold_label = train_data[minibatch_index *
                                                      minibatch_size +
                                                      example_index]
                predicted_vector = model(input_vector)
                predicted_label = torch.argmax(predicted_vector)
                correct += int(predicted_label == gold_label)
                total += 1
                example_loss = model.compute_Loss(predicted_vector.view(1, -1),
                                                  torch.tensor([gold_label]))
                if loss is None:
                    loss = example_loss
                else:
                    loss += example_loss
            loss = loss / minibatch_size  # BUGG - not averaging loss
            totalloss += loss
            loss.backward(
            )  # BUGGGG - loss and optimzer updated once per epoch
            optimizer.step()
        train_loss_history.append(totalloss / (N // minibatch_size))
        train_accuracy_history.append(correct / total)

        print("Training completed for epoch {}".format(epoch + 1))
        print("Training accuracy for epoch {}: {}".format(
            epoch + 1, correct / total))
        print("Training time for this epoch: {}".format(time.time() -
                                                        start_time))
        loss = None
        correct = 0
        total = 0
        start_time = time.time()
        print("Validation started for epoch {}".format(epoch + 1))
        random.shuffle(
            valid_data)  # Good practice to shuffle order of validation data
        minibatch_size = 16
        N = len(valid_data)
        #for minibatch_index in tqdm(range(N // minibatch_size)):
        # optimizer.zero_grad()
        # loss = None
        # BUGGGG - Shouldnt train on validation set
        # for example_index in range(minibatch_size):
        # 	input_vector, gold_label = valid_data[minibatch_index * minibatch_size + example_index]
        # 	predicted_vector = model(input_vector)
        # 	predicted_label = torch.argmax(predicted_vector)
        # 	correct += int(predicted_label == gold_label)
        # 	total += 1
        # 	# example_loss = model.compute_Loss(predicted_vector.view(1,-1), torch.tensor([gold_label]))
        # if loss is None:
        # 	loss = example_loss
        # else:
        # 	loss += example_loss
        # loss = loss / minibatch_size
        # loss.backward()
        # optimizer.step()
        val_tot_loss = None
        for i in range(N):
            input_vector, gold_label = valid_data[i]
            predicted_vector = model(input_vector)
            predicted_label = torch.argmax(predicted_vector)
            correct += int(predicted_label == gold_label)
            total += 1
            example_loss = model.compute_Loss(predicted_vector.view(1, -1),
                                              torch.tensor([gold_label]))
            if val_tot_loss is None:
                val_tot_loss = example_loss / N
            else:
                val_tot_loss += example_loss / N
        val_accuracy_history.append(correct / total)
        val_loss_history.append(val_tot_loss)
        print("Validation completed for epoch {}".format(epoch + 1))
        print("Validation accuracy for epoch {}: {}".format(
            epoch + 1, correct / total))
        print("Validation time for this epoch: {}".format(time.time() -
                                                          start_time))

        #saving model aftr every epoch
        path = "model.pth"
        torch.save({'state_dict': model.state_dict()}, path)

    # number of parameters
    print("Number of parameters")
    pytorch_total_params = sum(p.numel() for p in model.parameters()
                               if p.requires_grad)
    for p in model.parameters():
        if p.requires_grad:
            print(p.numel())
    print(pytorch_total_params)

    print(train_loss_history)
    print(train_accuracy_history)
    print(val_accuracy_history)
    print(val_loss_history)
    # training loss
    iteration_list = [i + 1 for i in range(number_of_epochs)]
    plt.plot(iteration_list, train_loss_history)
    plt.xlabel("Number of Epochs")
    plt.ylabel("Training Loss")
    plt.title("FFNN: Loss vs Number of Epochs")
    #plt.show()
    plt.savefig('FFFN_train_loss_history.png')
    plt.clf()

    # training accuracy
    iteration_list = [i + 1 for i in range(number_of_epochs)]
    plt.plot(iteration_list, train_accuracy_history)
    plt.xlabel("Number of Epochs")
    plt.ylabel("Training Accuracy")
    plt.title("FFNN: Accuracy vs Number of Epochs")
    #plt.show()
    plt.savefig('FFNN_train_accuracy_history.png')
    plt.clf()

    # val accuracy
    iteration_list = [i + 1 for i in range(number_of_epochs)]
    plt.plot(iteration_list, val_accuracy_history, color="red")
    plt.xlabel("Number of Epochs")
    plt.ylabel("Validation Accuracy")
    plt.title("FFNN: Accuracy vs Number of Epochs")
    #plt.show()
    plt.savefig('FFNN_val_accuracy_history.png')
    plt.clf()

    # val accuracy
    iteration_list = [i + 1 for i in range(number_of_epochs)]
    plt.plot(iteration_list, val_loss_history, color="red")
    plt.xlabel("Number of Epochs")
    plt.ylabel("Validation Loss")
    plt.title("FFNN: Loss vs Number of Epochs")
    #plt.show()
    plt.savefig('FFNN_val_loss_history.png')
    plt.clf()
Ejemplo n.º 7
0
def main(embedding_dim, hidden_dim, number_of_epochs,
         num_layers):  # Add relevant parameters
    print("Fetching data")
    train_data, valid_data = fetch_data(
    )  # X_data is a list of pairs (document, y); y in {0,1,2,3,4}

    temp_list = []
    for data in train_data:
        temp_list.append(data[0])
    for data in valid_data:
        temp_list.append(data[0])
    word2vec_model = Word2Vec(temp_list,
                              size=embedding_dim,
                              window=5,
                              min_count=1)
    vectorized_train = convert_to_vector_representation(
        train_data, word2vec_model)
    vectorized_valid = convert_to_vector_representation(
        valid_data, word2vec_model)

    print("Fetched and Vectorized data")

    # Think about the type of function that an RNN describes. To apply it, you will need to convert the text data into vector representations.
    # Further, think about where the vectors will come from. There are 3 reasonable choices:
    # 1) Randomly assign the input to vectors and learn better embeddings during training; see the PyTorch documentation for guidance
    # 2) Assign the input to vectors using pretrained word embeddings. We recommend any of {Word2Vec, GloVe, FastText}. Then, you do not train/update these embeddings.
    # 3) You do the same as 2) but you train (this is called fine-tuning) the pretrained embeddings further.
    # Option 3 will be the most time consuming, so we do not recommend starting with this

    # similar to ffnn1fix.py, make some changes in validation part, also check the early stopping condition
    model = RNN(embedding_dim, hidden_dim, num_layers)  # Fill in parameters
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    #optimizer = optim.Adam(model.parameters(), lr=0.001)

    # early stopping condition
    min_valid_loss = 1e10  # keep track of the minimum validation loss
    number_to_stop = 5  # when reach this number, do the early stopping
    counter = 0  # keep track of the number of epoches that do not decrease from minimum loss
    stop_flag = False  # early stopping flag

    print("Training for {} epochs".format(number_of_epochs))
    for epoch in range(number_of_epochs):
        model.train()
        optimizer.zero_grad()
        loss = None
        correct = 0
        total = 0
        start_time = time.time()
        print("Training started for epoch {}".format(epoch + 1))
        random.shuffle(vectorized_train
                       )  # Good practice to shuffle order of training data
        minibatch_size = 16
        N = len(vectorized_train)
        for minibatch_index in tqdm(range(N // minibatch_size)):
            optimizer.zero_grad()
            loss = None
            for example_index in range(minibatch_size):
                input_vector, gold_label = vectorized_train[minibatch_index *
                                                            minibatch_size +
                                                            example_index]
                predicted_vector = model(input_vector.unsqueeze(0))
                predicted_label = torch.argmax(predicted_vector)
                correct += int(predicted_label == gold_label)
                total += 1
                example_loss = model.compute_Loss(predicted_vector.view(1, -1),
                                                  torch.tensor([gold_label]))
                if loss is None:
                    loss = example_loss
                else:
                    loss += example_loss
            loss = loss / minibatch_size
            loss.backward()
            optimizer.step()
        print("Training completed for epoch {}".format(epoch + 1))
        print("Training accuracy for epoch {}: {}".format(
            epoch + 1, correct / total))
        print("Training time for this epoch: {}".format(time.time() -
                                                        start_time))
        loss = None
        correct = 0
        total = 0
        start_time = time.time()
        print("Validation started for epoch {}".format(epoch + 1))
        random.shuffle(vectorized_valid
                       )  # Good practice to shuffle order of validation data
        minibatch_size = len(vectorized_valid)
        N = len(vectorized_valid)
        for minibatch_index in tqdm(range(N // minibatch_size)):
            loss = None
            for example_index in range(minibatch_size):
                input_vector, gold_label = vectorized_valid[minibatch_index *
                                                            minibatch_size +
                                                            example_index]
                predicted_vector = model(input_vector.unsqueeze(0))
                predicted_label = torch.argmax(predicted_vector)
                correct += int(predicted_label == gold_label)
                total += 1
                example_loss = model.compute_Loss(predicted_vector.view(1, -1),
                                                  torch.tensor([gold_label]))
                if loss is None:
                    loss = example_loss
                else:
                    loss += example_loss
            loss = loss / minibatch_size

            # check for early stopping condition
            if loss < min_valid_loss:
                min_valid_loss = loss
                counter = 0
            else:
                counter += 1
            #print("Counter: {}".format(counter))
            if counter == number_to_stop:
                stop_flag = True
                break

        print("Validation completed for epoch {}".format(epoch + 1))
        print("Validation accuracy for epoch {}: {}".format(
            epoch + 1, correct / total))
        print("Validation time for this epoch: {}".format(time.time() -
                                                          start_time))

        if stop_flag:
            print("Early stopping, with minimum validation loss {}".format(
                min_valid_loss))
            break
Ejemplo n.º 8
0
    'lstm_rmsprop_base.pt'
]
model1 = RNN(32, 1, 64, True)
model1.load_state_dict(torch.load(os.path.join(directory, model_paths[0])))
model2 = RNN(32, 1, 64, True)
model2.load_state_dict(torch.load(os.path.join(directory, model_paths[1])))
model3 = RNN(32, 1, 64, False)
model3.load_state_dict(torch.load(os.path.join(directory, model_paths[2])))
model4 = RNN(32, 1, 64, False)
model4.load_state_dict(torch.load(os.path.join(directory, model_paths[3])))
models = [model1, model2, model3, model4]

print('models succesfuly loaded')

# Load trained word embeddings
train_data, valid_data = fetch_data()
wv_model = Word2Vec.load("word2vec.model")

validation_samples = []
for v in valid_data:
    embedding_list = [wv_model.wv[word] for word in v[0]]
    stacked_embedding = np.stack(embedding_list, axis=0)
    expanded_embedding = np.expand_dims(stacked_embedding, axis=0)
    embedding_tensor = torch.from_numpy(expanded_embedding)
    valid_sample = (embedding_tensor, v[1])
    validation_samples.append(valid_sample)

N = len(validation_samples)

print('starting validation counts')
Ejemplo n.º 9
0
def main(hidden_dim, batch_size):
    global device
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    if not os.path.exists('glove.6B.50d.w2v.txt'):
        print("w2v file not found, generating...")
        glove2word2vec(glove_input_file='glove.6B.50d.txt',
                       word2vec_output_file='glove.6B.50d.w2v.txt')
    global w2v
    w2v = KeyedVectors.load_word2vec_format('glove.6B.50d.w2v.txt',
                                            binary=False)

    print("Fetching data...")
    train_data, valid_data = fetch_data(
    )  # X_data is a list of pairs (document, y); y in {0,1,2,3,4}

    model = RNN(50, hidden_dim, 5, batch_size)
    model.double()
    model.cuda()

    print("Vectorizing data...")
    train_vecs, train_labs = vectorize_data(train_data)
    valid_vecs, valid_labs = vectorize_data(valid_data)
    print("Finished vectorizing data")

    optimizer = optim.SGD(model.parameters(),
                          lr=0.01,
                          momentum=0.9,
                          nesterov=False)
    #scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
    iters = 10
    while iters > 0:  # How will you decide to stop training and why
        model.train()
        optimizer.zero_grad()
        minibatch_size = 16
        N = len(train_data)
        perm = np.random.permutation(N)
        train_vecs = [train_vecs[i] for i in perm]
        train_labs = train_labs[perm]
        total = 0
        correct = 0
        epoch = 10 - iters
        for minibatch_index in tqdm(range(N // minibatch_size)):
            optimizer.zero_grad()
            loss = None
            for example_index in range(minibatch_size):
                gold_label = train_labs[minibatch_index * minibatch_size +
                                        example_index].long()
                predicted_vector = model(
                    train_vecs[minibatch_index * minibatch_size +
                               example_index].to(device))
                predicted_label = torch.argmax(predicted_vector)
                correct += int(predicted_label == gold_label)
                total += 1
                example_loss = model.compute_Loss(
                    predicted_vector.view(1, -1),
                    torch.tensor([gold_label]).to(device))
                if loss is None:
                    loss = example_loss
                else:
                    loss += example_loss
            loss = loss / minibatch_size
            loss.backward()
            optimizer.step()

        optimizer.zero_grad()
        N = len(valid_data)
        total = 0
        correct = 0
        for minibatch_index in tqdm(range(N // minibatch_size)):
            optimizer.zero_grad()
            loss = None
            for example_index in range(minibatch_size):
                gold_label = valid_labs[minibatch_index * minibatch_size +
                                        example_index].long()
                predicted_vector = model(
                    valid_vecs[minibatch_index * minibatch_size +
                               example_index].to(device))
                predicted_label = torch.argmax(predicted_vector)
                correct += int(predicted_label == gold_label)
                total += 1
        print("Validation completed for epoch {}".format(epoch + 1))
        print("Validation accuracy for epoch {}: {}".format(
            epoch + 1, correct / total))
        #scheduler.step()
        iters -= 1
def main(hidden_dim, n_layers, number_of_epochs):  # Add relevant parameters
    print("Fetching data")
    train_data, valid_data = fetch_data(
    )  # X_data is a list of pairs (document, y); y in {0,1,2,3,4}
    vocab = make_vocab(train_data)
    vocab, word2index, index2word = make_indices(vocab)
    print("Fetched and indexed data")
    train_data = convert_to_vector_representation(train_data)
    valid_data = convert_to_vector_representation(valid_data)
    print("Vectorized data")
    # Think about the type of function that an RNN describes. To apply it, you will need to convert the text data into vector representations.
    # Further, think about where the vectors will come from. There are 3 reasonable choices:
    # 1) Randomly assign the input to vectors and learn better embeddings during training; see the PyTorch documentation for guidance
    # 2) Assign the input to vectors using pretrained word embeddings. We recommend any of {Word2Vec, GloVe, FastText}. Then, you do not train/update these embeddings.
    # 3) You do the same as 2) but you train (this is called fine-tuning) the pretrained embeddings further.
    # Option 3 will be the most time consuming, so we do not recommend starting with this

    model = RNN(len(vocab), hidden_dim, n_layers)  # Fill in parameters
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.8)
    print("Training for {} epochs".format(number_of_epochs))
    for epoch in range(
            number_of_epochs):  # How will you decide to stop training and why
        # You will need further code to operationalize training, ffnn.py may be helpful
        model.train()
        optimizer.zero_grad()
        loss = None
        correct = 0
        total = 0
        start_time = time.time()
        print("Training started for epoch {}".format(epoch + 1))
        random.shuffle(
            train_data)  # Good practice to shuffle order of training data
        minibatch_size = 16
        N = len(train_data)
        for minibatch_index in tqdm(range(N // minibatch_size)):
            optimizer.zero_grad()
            loss = None
            for example_index in range(minibatch_size):
                input_vector, gold_label = train_data[minibatch_index *
                                                      minibatch_size +
                                                      example_index]
                predicted_vector = model.forward(input_vector)
                predicted_label = torch.argmax(predicted_vector)
                correct += int(predicted_label == gold_label)
                total += 1
                example_loss = model.compute_Loss(predicted_vector.view(1, -1),
                                                  torch.tensor([gold_label]))
                if loss is None:
                    loss = example_loss
                else:
                    loss += example_loss
            loss = loss / minibatch_size
            loss.backward()
            optimizer.step()
        print("Training completed for epoch {}".format(epoch + 1))
        print("Training accuracy for epoch {}: {}".format(
            epoch + 1, correct / total))
        print("Training time for this epoch: {}".format(time.time() -
                                                        start_time))
        loss = None
        correct = 0
        total = 0
        start_time = time.time()
        print("Validation started for epoch {}".format(epoch + 1))
        random.shuffle(
            valid_data)  # Good practice to shuffle order of validation data
        minibatch_size = 16
        N = len(valid_data)
        for minibatch_index in tqdm(range(N // minibatch_size)):
            #optimizer.zero_grad()
            loss = None
            for example_index in range(minibatch_size):
                input_vector, gold_label = valid_data[minibatch_index *
                                                      minibatch_size +
                                                      example_index]
                predicted_vector = model.forward(input_vector)
                predicted_label = torch.argmax(predicted_vector)
                correct += int(predicted_label == gold_label)
                total += 1
                example_loss = model.compute_Loss(predicted_vector.view(1, -1),
                                                  torch.tensor([gold_label]))
                if loss is None:
                    loss = example_loss
                else:
                    loss += example_loss
            loss = loss / minibatch_size
            #loss.backward()
            #optimizer.step()
        print("Validation completed for epoch {}".format(epoch + 1))
        print("Validation accuracy for epoch {}: {}".format(
            epoch + 1, correct / total))
        print("Validation time for this epoch: {}".format(time.time() -
                                                          start_time))
Ejemplo n.º 11
0
def main(epochs, speed_up=1):
    # X_data is a list of pairs (document, y); y in {0,1,2,3,4}
    minibatch_size = 16
    train_data, valid_data = fetch_data()
    # convert to one hot encoding
    vocab = make_vocab(train_data)
    vocab, word2index, index2word = make_indices(vocab)
    print("Fetched and indexed data")
    print("Vectorized data")
    model = RNN(input_dim=len(vocab), hidden_dim=32)
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        loss = 0.00
        correct = 0
        total = 0
        start_time = time.time()
        print("Training started for epoch {}".format(epoch + 1))
        random.shuffle(train_data)
        minibatch_size = 100
        N = len(train_data)
        for minibatch_index in tqdm(range(N // minibatch_size)):
            optimizer.zero_grad()
            loss_g = 0.00
            for example_index in range(minibatch_size):
                total += 1
                input_vector = train_data[minibatch_index * minibatch_size +
                                          example_index]

                gold_label = input_vector[1]
                input_vector = convert_to_vector_representation_rnn(
                    input_vector, word2index)
                # force teacher learning w/ 50% success
                if random.random() < 0.5:
                    force = True
                else:
                    force = False
                predicted_vector, hidden = model(input_vector, None)
                predicted_probabilities = torch.exp(predicted_vector)
                predicted_label = torch.argmax(predicted_probabilities)
                if predicted_label == gold_label:
                    correct += 1
                updated_loss = model.compute_Loss(predicted_vector.view(1, -1),
                                                  torch.tensor([gold_label]))
                loss_g += updated_loss
            loss = loss_g / minibatch_size
            loss.backward(retain_graph=True)
            optimizer.step()
            print("Training completed for epoch {}".format(epoch + 1))
            print("Training accuracy for epoch {}: {}".format(
                epoch + 1, correct / total))
            print("Training time for this epoch: {}".format(time.time() -
                                                            start_time))
            print("Loss for this epoch: {}".format(loss))

            # Validation loss/acc
            loss = 0.00
            correct = 0
            total = 0
            start_time = time.time()
            print("Validation started for epoch {}".format(epoch + 1))
            random.shuffle(valid_data)
            N = int(len(valid_data) / speed_up)
            print(N)
            for i, document in enumerate(range(N)):
                total += 1
                input_vector = valid_data[i]
                gold_label = input_vector[1]
                input_vector = convert_to_vector_representation_rnn(
                    input_vector, word2index)
                predicted_vector, hidden = model(input_vector, None)
                predicted_label = torch.argmax(predicted_vector)
                if predicted_label == gold_label:
                    correct += 1
                updated_loss = model.compute_Loss(predicted_vector.view(1, -1),
                                                  torch.tensor([gold_label]))
                loss += updated_loss
                loss.backward(retain_graph=True)
                optimizer.step()
            print("Validation completed for epoch {}".format(epoch + 1))
            print("Validation accuracy for epoch {}: {}".format(
                epoch + 1, correct / total))
            print("Validation time for this epoch: {}".format(time.time() -
                                                              start_time))

    return (model)
def main(hidden_dim, number_of_epochs):
    print("Fetching data")
    train_data, valid_data = fetch_data(
    )  # X_data is a list of pairs (document, y); y in {0,1,2,3,4}
    vocab = make_vocab(train_data)
    vocab, word2index, index2word = make_indices(vocab)
    print("Fetched and indexed data")
    train_data = convert_to_vector_representation(train_data, word2index)
    valid_data = convert_to_vector_representation(valid_data, word2index)
    print("Vectorized data")

    model = FFNN(input_dim=len(vocab), h=hidden_dim)
    optimizer = optim.SGD(
        model.parameters(), lr=0.01, momentum=0.9
    )  # This network is trained by traditional (batch) gradient descent; ignore that this says 'SGD'
    print("Training for {} epochs".format(number_of_epochs))
    for epoch in range(number_of_epochs):
        model.train()
        optimizer.zero_grad()
        loss = 0
        correct = 0
        total = 0
        start_time = time.time()
        print("Training started for epoch {}".format(epoch + 1))
        random.shuffle(
            train_data)  # Good practice to shuffle order of training data
        for input_vector, gold_label in tqdm(train_data):
            predicted_vector = model(input_vector)
            predicted_label = torch.argmax(predicted_vector)
            correct += int(predicted_label == gold_label)
            total += 1
            loss = model.compute_Loss(predicted_vector.view(1, -1),
                                      torch.tensor([gold_label]))
        loss.backward()
        optimizer.step()
        print("Training completed for epoch {}".format(epoch + 1))
        print("Training accuracy for epoch {}: {}".format(
            epoch + 1, correct / total))
        print("Training time for this epoch: {}".format(time.time() -
                                                        start_time))
        loss = 0
        correct = 0
        total = 0
        start_time = time.time()
        print("Validation started for epoch {}".format(epoch + 1))
        random.shuffle(
            valid_data)  # Good practice to shuffle order of valid data
        for input_vector, gold_label in valid_data:
            predicted_vector = model(input_vector)
            predicted_label = torch.argmax(predicted_vector)
            correct += int(predicted_label == gold_label)
            total += 1
            loss = model.compute_Loss(predicted_vector.view(1, -1),
                                      torch.tensor([gold_label]))
        loss.backward()
        optimizer.step()
        print("Validation completed for epoch {}".format(epoch + 1))
        print("Validation accuracy for epoch {}: {}".format(
            epoch + 1, correct / total))
        print("Validation time for this epoch: {}".format(time.time() -
                                                          start_time))
Ejemplo n.º 13
0
def main(hidden_dim, number_of_epochs):  # Add relevant parameters
    index_global = 0
    is_cuda = torch.cuda.is_available()
    if is_cuda:
        device = torch.device("cuda")
        print("GPU is available")
    else:
        device = torch.device("cpu")
        print("GPU not available, CPU used")

    print("Fetching data")
    # X_data is a list of pairs (document, y); y in {0,1,2,3,4}
    train_data, valid_data = fetch_data()
    print("Data fetched")
    #train_data = convert_to_vector_representation(train_data)
    #valid_data = convert_to_vector_representation(valid_data)
    if CACHED:
        train_data = pickle.load(open("train_data.pkl", "rb"))
        valid_data = pickle.load(open("valid_data.pkl", "rb"))
    else:
        train_data = convert_to_vector_representation(train_data)
        valid_data = convert_to_vector_representation(valid_data)
        pickle.dump(train_data, open("train_data.pkl", "wb"))
        pickle.dump(valid_data, open("valid_data.pkl", "wb"))

    print("Vectorized data")

    # Create RNN
    model = RNN(input_dim=300,
                h=hidden_dim,
                output_size=5,
                n_layers=1,
                device=device)
    model.to(device)
    optimizer = optim.SGD(model.parameters(), lr=0.005, momentum=0.9)
    for epoch in range(number_of_epochs):
        model.train()
        optimizer.zero_grad()
        loss = None
        correct = 0
        total = 0
        start_time = time.time()
        print("Training started for epoch {}".format(epoch + 1))
        random.shuffle(train_data)
        minibatch_size = 16
        N = len(train_data)
        for minibatch_index in tqdm(range(N // minibatch_size)):
            optimizer.zero_grad()
            loss = None
            for example_index in range(minibatch_size):
                input_vector, gold_label = train_data[minibatch_index *
                                                      minibatch_size +
                                                      example_index]
                input_vector = input_vector.to(device)
                predicted_vector, hidden = model(input_vector.unsqueeze(1))
                predicted_label = torch.argmax(predicted_vector)
                correct += int(predicted_label == gold_label)
                total += 1
                example_loss = model.compute_Loss(predicted_vector.view(1, -1),
                                                  torch.tensor([gold_label]))
                if loss is None:
                    loss = example_loss
                else:
                    loss += example_loss
            loss = loss / minibatch_size
            if (loss.item() > 100):
                print("Stopping: Model might be Overfitting")
                os._exit(1)
            loss.backward()
            #nn.utils.clip_grad_norm_(model.parameters(),0.5)
            optimizer.step()
        print("Training completed for epoch {}".format(epoch + 1))
        print("Training accuracy for epoch {}: {}".format(
            epoch + 1, correct / total))
        print("Training time for this epoch: {}".format(time.time() -
                                                        start_time))
        model.eval()
        loss = None
        correct = 0
        total = 0
        start_time = time.time()
        print("Validation started for epoch {}".format(epoch + 1))
        #random.shuffle(valid_data)
        minibatch_size = 16
        N = len(valid_data)
        with open('rnn.csv', 'a') as csvFile:
            row = ["index", "epoch", "gold_label", "predicted_label"]
            writer = csv.writer(csvFile)
            writer.writerow(row)
            for minibatch_index in tqdm(range(N // minibatch_size)):
                optimizer.zero_grad()
                loss = None
                for example_index in range(minibatch_size):
                    input_vector, gold_label = valid_data[minibatch_index *
                                                          minibatch_size +
                                                          example_index]
                    input_vector = input_vector.to(device)
                    predicted_vector, hidden = model(input_vector.unsqueeze(1))
                    predicted_label = torch.argmax(predicted_vector)

                    row = [
                        index_global, epoch + 1, gold_label, predicted_label
                    ]
                    writer = csv.writer(csvFile)
                    writer.writerow(row)

                    index_global = index_global + 1

                    correct += int(predicted_label == gold_label)
                    total += 1
                    example_loss = model.compute_Loss(
                        predicted_vector.view(1, -1),
                        torch.tensor([gold_label]))
                    if loss is None:
                        loss = example_loss
                    else:
                        loss += example_loss
                loss = loss / minibatch_size
        csvFile.close()
        print("Validation completed for epoch {}".format(epoch + 1))
        print("Validation accuracy for epoch {}: {}".format(
            epoch + 1, correct / total))
        print("Validation time for this epoch: {}".format(time.time() -
                                                          start_time))
Ejemplo n.º 14
0
def main(hidden_dim, number_of_epochs):
	index_global = 0
	print("Fetching data")
	train_data, valid_data = fetch_data() # X_data is a list of pairs (document, y); y in {0,1,2,3,4}
	vocab = make_vocab(train_data)
	vocab, word2index, index2word = make_indices(vocab)
	print("Fetched and indexed data")
	train_data = convert_to_vector_representation(train_data, word2index)
	valid_data = convert_to_vector_representation(valid_data, word2index)
	print("Vectorized data")

	model = FFNN(input_dim = len(vocab), h = hidden_dim)
	optimizer = optim.SGD(model.parameters(),lr=0.01, momentum=0.9)
	print("Training for {} epochs".format(number_of_epochs))
	for epoch in range(number_of_epochs):
		model.train()
		optimizer.zero_grad()
		loss = None
		correct = 0
		total = 0
		start_time = time.time()
		print("Training started for epoch {}".format(epoch + 1))
		random.shuffle(train_data) # Good practice to shuffle order of training data
		minibatch_size = 16
		N = len(train_data)
		for minibatch_index in tqdm(range(N // minibatch_size)):
			optimizer.zero_grad() #they added this: fourth error
			loss = None
			for example_index in range(minibatch_size):
				input_vector, gold_label = train_data[minibatch_index * minibatch_size + example_index]
				predicted_vector = model(input_vector)
				predicted_label = torch.argmax(predicted_vector)
				correct += int(predicted_label == gold_label)
				total += 1
				example_loss = model.compute_Loss(predicted_vector.view(1,-1), torch.tensor([gold_label]))
				if loss is None:
					loss = example_loss
				else:
					loss += example_loss
			loss = loss / minibatch_size
			loss.backward()
			optimizer.step()
		print("Training completed for epoch {}".format(epoch + 1))
		print("Training accuracy for epoch {}: {}".format(epoch + 1, correct / total))
		print("Training time for this epoch: {}".format(time.time() - start_time))
		model.eval() #ERROR 2 ADDED: if not, entire model would continuously be training and not testing validation set
		loss = None
		correct = 0
		total = 0
		start_time = time.time()
		print("Validation started for epoch {}".format(epoch + 1))
		#random.shuffle(valid_data) # Good practice to shuffle order of training data #ERROR: RANDOM SHUFFLE VALID_DATA!!!!!
		minibatch_size = 16
		N = len(valid_data) ##ERROR: VALID DATA NOT TRAINDATA
		with open('ffnn.csv', 'a') as csvFile:
			row = ["index", "epoch", "gold_label", "predicted_label"]
			writer = csv.writer(csvFile)
			writer.writerow(row)
			for minibatch_index in tqdm(range(N // minibatch_size)):
				optimizer.zero_grad() #they added this
				loss = None
				for example_index in range(minibatch_size):
					input_vector, gold_label = valid_data[minibatch_index * minibatch_size + example_index] ##ERROR: VALID_DATA
					predicted_vector = model(input_vector)
					predicted_label = torch.argmax(predicted_vector)

					row = [index_global, epoch + 1, gold_label, predicted_label]
					writer = csv.writer(csvFile)
					writer.writerow(row)


					index_global = index_global + 1


					correct += int(predicted_label == gold_label)
					total += 1
					example_loss = model.compute_Loss(predicted_vector.view(1,-1), torch.tensor([gold_label]))
					if loss is None:
						loss = example_loss
					else:
						loss += example_loss
				loss = loss / minibatch_size
				loss.backward()
				optimizer.step()
		csvFile.close()
		print("Validation completed for epoch {}".format(epoch + 1))
		print("Validation accuracy for epoch {}: {}".format(epoch + 1, correct / total))
		print("Validation time for this epoch: {}".format(time.time() - start_time))
Ejemplo n.º 15
0
def main(h1, h2, h3, h4, number_of_epochs): # Add relevant parameters
	train_data, valid_data = fetch_data() # X_data is a list of pairs (document, y); y in {0,1,2,3,4}	
	train_data = preprocessData(train_data)
	valid_data = preprocessData(valid_data)

	# Think about the type of function that an RNN describes. To apply it, you will need to convert the text data into vector representations.
	# Further, think about where the vectors will come from. There are 3 reasonable choices:
	# 1) Randomly assign the input to vectors and learn better embeddings during training; see the PyTorch documentation for guidance
	# 2) Assign the input to vectors using pretrained word embeddings. We recommend any of {Word2Vec, GloVe, FastText}. Then, you do not train/update these embeddings.
	# 3) You do the same as 2) but you train (this is called fine-tuning) the pretrained embeddings further. 
	# Option 3 will be the most time consuming, so we do not recommend starting with this

	model = RNN(h1, h2, h3, h4, 5, train_data[0][0].size()[2], 1) # Fill in parameters
	optimizer = optim.SGD(model.parameters(), lr = 0.1, momentum = 0.9) 
	minibatch_size = 16 

	for epoch in range(number_of_epochs): # How will you decide to stop training and why
		model.train()
		# You will need further code to operationalize training, ffnn.py may be helpful
		correct = 0
		total = 0
		start_time = time.time()
		print("Training started for epoch {}".format(epoch + 1))

		random.shuffle(train_data) # Good practice to shuffle order of training data
		N = len(train_data) 
		for minibatch_index in tqdm(range(N // minibatch_size)):
			optimizer.zero_grad()
			loss = None
			for example_index in range(minibatch_size):
				input_vector, gold_label = train_data[minibatch_index * minibatch_size + example_index]
				# print(input_vector.shape)
				predicted_vector = model(input_vector.float())
				predicted_label = torch.argmax(predicted_vector)
				correct += int(predicted_label == gold_label)
				total += 1
				example_loss = model.compute_Loss(predicted_vector.view(1,-1), torch.tensor([gold_label]))
				if loss is None:
					loss = example_loss
				else:
					loss += example_loss
			loss = loss / minibatch_size	
			loss.backward()
			optimizer.step()
		print(loss)
		print("Training completed for epoch {}".format(epoch + 1))
		print("Training accuracy for epoch {}: {}".format(epoch + 1, correct / total))
		print("Training time for this epoch: {}".format(time.time() - start_time))

		start_time = time.time()
		correct = 0 
		total = 0
		# You will need to validate your model. All results for Part 3 should be reported on the validation set. 
		for i in tqdm(range(len(valid_data))):
			input_vector, gold_label = valid_data[i]
			predicted_vector = model(input_vector.float())
			predicted_label = torch.argmax(predicted_vector)
			correct += int(predicted_label == gold_label)
			total += 1
			example_loss = model.compute_Loss(predicted_vector.view(1,-1), torch.tensor([gold_label]))
			if loss is None:
				loss = example_loss
			else:
				loss += example_loss
		loss = loss / len(valid_data)
		print("Validation avg loss {}".format(loss))
		print("Validation completed for epoch {}".format(epoch + 1))
		print("Validation accuracy for epoch {}: {}".format(epoch + 1, correct / total))
		print("Validation time for this epoch: {}".format(time.time() - start_time))

	pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
	for p in model.parameters():
		if p.requires_grad:
			print(p.numel())
	print(pytorch_total_params)