예제 #1
0
def train(args, labeled, resume_from, ckpt_file):
    print("========== In the train step ==========")
    batch_size = args["batch_size"]
    lr = args["learning_rate"]
    momentum = args["momentum"]
    epochs = args["train_epochs"]
    train_split = args["split_train"]

    loader = processData(args, stageFor="train", indices=labeled)

    net = NeuralNet()
    net = net.to(device=device)

    criterion = torch.nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=float(lr), momentum=momentum)

    if resume_from is not None:
        ckpt = torch.load(os.path.join(args["EXPT_DIR"], resume_from + ".pth"))
        net.load_state_dict(ckpt["model"])
        optimizer.load_state_dict(ckpt["optimizer"])
    else:
        getdatasetstate(args)

    net.train()

    for epoch in tqdm(range(args["train_epochs"]), desc="Training"):

        running_loss = 0

        for i, batch in enumerate(loader, start=0):
            data, labels = batch

            data = data.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            output = net(data)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            if i % 1000:
                print(
                    "epoch: {} batch: {} running-loss: {}".format(
                        epoch + 1, i + 1, running_loss / 1000
                    ),
                    end="\r",
                )
                running_loss = 0

    print("Finished Training. Saving the model as {}".format(ckpt_file))

    ckpt = {"model": net.state_dict(), "optimizer": optimizer.state_dict()}
    torch.save(ckpt, os.path.join(args["EXPT_DIR"], ckpt_file + ".pth"))

    return
def train():
    for i, (train_idx, valid_idx) in enumerate(splits):    
        # split data in train / validation according to the KFold indeces
        # also, convert them to a torch tensor and store them on the GPU (done with .cuda())
        x_train = np.array(x_train)
        y_train = np.array(y_train)
        features = np.array(features)

        x_train_fold = torch.tensor(x_train[train_idx.astype(int)], dtype=torch.long).cuda()
        y_train_fold = torch.tensor(y_train[train_idx.astype(int), np.newaxis], dtype=torch.float32).cuda()

        kfold_X_features = features[train_idx.astype(int)]
        kfold_X_valid_features = features[valid_idx.astype(int)]
        x_val_fold = torch.tensor(x_train[valid_idx.astype(int)], dtype=torch.long).cuda()
        y_val_fold = torch.tensor(y_train[valid_idx.astype(int), np.newaxis], dtype=torch.float32).cuda()

    #     model = BiLSTM(lstm_layer=2,hidden_dim=40,dropout=DROPOUT).cuda()
        model = NeuralNet()

        # make sure everything in the model is running on the GPU
        model.cuda()

        # define binary cross entropy loss
        # note that the model returns logit to take advantage of the log-sum-exp trick 
        # for numerical stability in the loss
        loss_fn = torch.nn.BCEWithLogitsLoss(reduction='sum')

        step_size = 300
        base_lr, max_lr = 0.001, 0.003   
        optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), 
                                 lr=max_lr)

        ################################################################################################
        scheduler = CyclicLR(optimizer, base_lr=base_lr, max_lr=max_lr,
                   step_size=step_size, mode='exp_range',
                   gamma=0.99994)
        ###############################################################################################

        train = torch.utils.data.TensorDataset(x_train_fold, y_train_fold)
        valid = torch.utils.data.TensorDataset(x_val_fold, y_val_fold)

        train = MyDataset(train)
        valid = MyDataset(valid)

        ##No need to shuffle the data again here. Shuffling happens when splitting for kfolds.
        train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)

        valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size, shuffle=False)

        print(f'Fold {i + 1}')
        for epoch in range(n_epochs):
            # set train mode of the model. This enables operations which are only applied during training like dropout
            start_time = time.time()
            model.train()

            avg_loss = 0.  
            for i, (x_batch, y_batch, index) in enumerate(train_loader):
                # Forward pass: compute predicted y by passing x to the model.
                ################################################################################################            
                f = kfold_X_features[index]
                y_pred = model([x_batch,f])
                ################################################################################################

                ################################################################################################

                if scheduler:
                    scheduler.batch_step()
                ################################################################################################


                # Compute and print loss.
                loss = loss_fn(y_pred, y_batch)

                # Before the backward pass, use the optimizer object to zero all of the
                # gradients for the Tensors it will update (which are the learnable weights
                # of the model)
                optimizer.zero_grad()

                # Backward pass: compute gradient of the loss with respect to model parameters
                loss.backward()

                # Calling the step function on an Optimizer makes an update to its parameters
                optimizer.step()
                avg_loss += loss.item() / len(train_loader)

            # set evaluation mode of the model. This disabled operations which are only applied during training like dropout
            model.eval()

            # predict all the samples in y_val_fold batch per batch
            valid_preds_fold = np.zeros((x_val_fold.size(0)))
            test_preds_fold = np.zeros((len(df_test)))

            avg_val_loss = 0.
            for i, (x_batch, y_batch, index) in enumerate(valid_loader):
                f = kfold_X_valid_features[index]
                y_pred = model([x_batch,f]).detach()

                avg_val_loss += loss_fn(y_pred, y_batch).item() / len(valid_loader)
                valid_preds_fold[i * batch_size:(i+1) * batch_size] = sigmoid(y_pred.cpu().numpy())[:, 0]

            elapsed_time = time.time() - start_time 
            print('Epoch {}/{} \t loss={:.4f} \t val_loss={:.4f} \t time={:.2f}s'.format(
                epoch + 1, n_epochs, avg_loss, avg_val_loss, elapsed_time))
        avg_losses_f.append(avg_loss)
        avg_val_losses_f.append(avg_val_loss) 
        # predict all samples in the test set batch per batch
        for i, (x_batch,) in enumerate(test_loader):
            f = test_features[i * batch_size:(i+1) * batch_size]
            y_pred = model([x_batch,f]).detach()

            test_preds_fold[i * batch_size:(i+1) * batch_size] = sigmoid(y_pred.cpu().numpy())[:, 0]

        train_preds[valid_idx] = valid_preds_fold
        test_preds += test_preds_fold / len(splits)

    print('All \t loss={:.4f} \t val_loss={:.4f} \t '.format(np.average(avg_losses_f),np.average(avg_val_losses_f)))
예제 #3
0

def ShowResult(net, dataReader, title):
    # draw train data
    X, Y = dataReader.XTrain, dataReader.YTrain
    plt.plot(X[:, 0], Y[:, 0], '.', c='b')
    # create and draw visualized validation data
    TX = np.linspace(0, 1, 100).reshape(100, 1)
    TY = net.inference(TX)
    plt.plot(TX, TY, 'x', c='r')
    plt.title(title)
    plt.show()


if __name__ == '__main__':
    dataReader = DataReader(train_data_name, test_data_name)
    dataReader.ReadData()
    dataReader.GenerateValidationSet()

    n_input, n_hidden, n_output = 1, 3, 1
    eta, batch_size, max_epoch = 0.5, 10, 10000
    eps = 0.001

    hp = HyperParameters(n_input, n_hidden, n_output, eta, max_epoch,
                         batch_size, eps, NetType.Fitting,
                         InitialMethod.Xavier)
    net = NeuralNet(hp, "save")

    net.train(dataReader, 50, True)
    net.ShowTrainingHistory()
    ShowResult(net, dataReader, hp.toString())
예제 #4
0
                 entailment, device).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                    model.parameters()),
                             lr=learning_rate,
                             betas=(0.9, 0.999),
                             weight_decay=weight_decay)

c_train = math.ceil(len(token_data["train"]["Stance"]) / train_batch)
# c_dev = math.ceil(len(token_data["dev"]["Stance"])/dev_batch)
c_test = math.ceil(len(token_data["test"]["Stance"]) / test_batch)
# train
print("Start Train")
for epoch in range(num_epoch):
    model.train()
    loss_sum = 0
    accuracy_sum = 0
    for premise, hypothesis, label, sim, entail in batcher(
            token_data["train"], tfidf_cossim["train"],
            entailment_rep["train"], train_batch):
        x_p = torch.tensor(premise, dtype=torch.long, device=device)
        x_h = torch.tensor(hypothesis, dtype=torch.long, device=device)
        y = torch.tensor(label, dtype=torch.long, device=device)
        x_similarity = torch.tensor(sim, dtype=torch.float, device=device)
        x_entailment = torch.stack(entail, dim=0).to(device)
        outputs = model(x_p, x_h, x_similarity, x_entailment)
        optimizer.zero_grad()
        loss = criterion(outputs, y)
        loss.backward()
        optimizer.step()
예제 #5
0
Here we are predicting output for fizzbuzz game
with our neural network
"""
import numpy as np

from model import NeuralNet
from generate_data import inputs, labels

# Min is 1 and max is 1024
first = 101
last = 1024
X = inputs(first, last)
y = labels(first, last)
model = NeuralNet(input_shape=(10, ))
model.compile(lr=0.001)
model.train(X, y, batch_size=32, epochs=1000)

first_test = 1
last_test = 100
X_test = inputs(first_test, last_test)
y_pred = model.predict(X_test)
iter = range(first_test, last_test + 1)
for i in range(last_test - first_test + 1):
    if y_pred[i] == 0:
        pred = i + 1
    elif y_pred[i] == 1:
        pred = "fizz"
    elif y_pred[i] == 2:
        pred = "buzz"
    else:
        pred = "fizzbuzz"
예제 #6
0
def train(args, labeled, resume_from, ckpt_file):
    print("========== In the train step ==========")
    batch_size = args["batch_size"]
    lr = args["learning_rate"]
    momentum = args["momentum"]
    epochs = args["train_epochs"]

    train_split = args["split_train"]

    CSV_FILE = "./data/mushrooms.csv"
    dataset = MushroomDataset(CSV_FILE)

    train_dataset = torch.utils.data.Subset(
        dataset, list(range(int(train_split * len(dataset))))
    )

    train_subset = Subset(train_dataset, labeled)

    train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True)

    net = NeuralNet()
    net = net.to(device=device)

    criterion = torch.nn.BCELoss()
    optimizer = optim.SGD(net.parameters(), lr=float(lr), momentum=momentum)

    if resume_from is not None:
        ckpt = torch.load(os.path.join(args["EXPT_DIR"], resume_from + ".pth"))
        net.load_state_dict(ckpt["model"])
        optimizer.load_state_dict(ckpt["optimizer"])
    else:
        getdatasetstate(args)

    net.train()

    for epoch in tqdm(range(args["train_epochs"]), desc="Training"):

        running_loss = 0

        for i, batch in enumerate(train_loader, start=0):
            data, labels = batch

            data = data.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            output = net(data)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            if i % 1000:
                print(
                    "epoch: {} batch: {} running-loss: {}".format(
                        epoch + 1, i + 1, running_loss / 1000
                    ),
                    end="\r",
                )
                running_loss = 0

    print("Finished Training. Saving the model as {}".format(ckpt_file))

    ckpt = {"model": net.state_dict(), "optimizer": optimizer.state_dict()}
    torch.save(ckpt, os.path.join(args["EXPT_DIR"], ckpt_file + ".pth"))

    return
예제 #7
0
# Each image is a 28x28 matrix of floats.
# Hence the first layer will have 28 * 28 = 784 neurons.
# The output layer will be a vector of 10 values (after a logistic sigmoid function)
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

model = NeuralNet(28*28, 10)
model.add_layer(300, relu)
model.add_layer(200, relu)
model.add_layer(10, logistic_sigmoid)

for img, label in training_data:
    input_data = img.numpy().flatten()
    result = model.process(input_data)

    result[label] -= 1
    error = result
    model.train(error)