def train(args, labeled, resume_from, ckpt_file): print("========== In the train step ==========") batch_size = args["batch_size"] lr = args["learning_rate"] momentum = args["momentum"] epochs = args["train_epochs"] train_split = args["split_train"] loader = processData(args, stageFor="train", indices=labeled) net = NeuralNet() net = net.to(device=device) criterion = torch.nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=float(lr), momentum=momentum) if resume_from is not None: ckpt = torch.load(os.path.join(args["EXPT_DIR"], resume_from + ".pth")) net.load_state_dict(ckpt["model"]) optimizer.load_state_dict(ckpt["optimizer"]) else: getdatasetstate(args) net.train() for epoch in tqdm(range(args["train_epochs"]), desc="Training"): running_loss = 0 for i, batch in enumerate(loader, start=0): data, labels = batch data = data.to(device) labels = labels.to(device) optimizer.zero_grad() output = net(data) loss = criterion(output, labels) loss.backward() optimizer.step() running_loss += loss.item() if i % 1000: print( "epoch: {} batch: {} running-loss: {}".format( epoch + 1, i + 1, running_loss / 1000 ), end="\r", ) running_loss = 0 print("Finished Training. Saving the model as {}".format(ckpt_file)) ckpt = {"model": net.state_dict(), "optimizer": optimizer.state_dict()} torch.save(ckpt, os.path.join(args["EXPT_DIR"], ckpt_file + ".pth")) return
def train(): for i, (train_idx, valid_idx) in enumerate(splits): # split data in train / validation according to the KFold indeces # also, convert them to a torch tensor and store them on the GPU (done with .cuda()) x_train = np.array(x_train) y_train = np.array(y_train) features = np.array(features) x_train_fold = torch.tensor(x_train[train_idx.astype(int)], dtype=torch.long).cuda() y_train_fold = torch.tensor(y_train[train_idx.astype(int), np.newaxis], dtype=torch.float32).cuda() kfold_X_features = features[train_idx.astype(int)] kfold_X_valid_features = features[valid_idx.astype(int)] x_val_fold = torch.tensor(x_train[valid_idx.astype(int)], dtype=torch.long).cuda() y_val_fold = torch.tensor(y_train[valid_idx.astype(int), np.newaxis], dtype=torch.float32).cuda() # model = BiLSTM(lstm_layer=2,hidden_dim=40,dropout=DROPOUT).cuda() model = NeuralNet() # make sure everything in the model is running on the GPU model.cuda() # define binary cross entropy loss # note that the model returns logit to take advantage of the log-sum-exp trick # for numerical stability in the loss loss_fn = torch.nn.BCEWithLogitsLoss(reduction='sum') step_size = 300 base_lr, max_lr = 0.001, 0.003 optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=max_lr) ################################################################################################ scheduler = CyclicLR(optimizer, base_lr=base_lr, max_lr=max_lr, step_size=step_size, mode='exp_range', gamma=0.99994) ############################################################################################### train = torch.utils.data.TensorDataset(x_train_fold, y_train_fold) valid = torch.utils.data.TensorDataset(x_val_fold, y_val_fold) train = MyDataset(train) valid = MyDataset(valid) ##No need to shuffle the data again here. Shuffling happens when splitting for kfolds. train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True) valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size, shuffle=False) print(f'Fold {i + 1}') for epoch in range(n_epochs): # set train mode of the model. This enables operations which are only applied during training like dropout start_time = time.time() model.train() avg_loss = 0. for i, (x_batch, y_batch, index) in enumerate(train_loader): # Forward pass: compute predicted y by passing x to the model. ################################################################################################ f = kfold_X_features[index] y_pred = model([x_batch,f]) ################################################################################################ ################################################################################################ if scheduler: scheduler.batch_step() ################################################################################################ # Compute and print loss. loss = loss_fn(y_pred, y_batch) # Before the backward pass, use the optimizer object to zero all of the # gradients for the Tensors it will update (which are the learnable weights # of the model) optimizer.zero_grad() # Backward pass: compute gradient of the loss with respect to model parameters loss.backward() # Calling the step function on an Optimizer makes an update to its parameters optimizer.step() avg_loss += loss.item() / len(train_loader) # set evaluation mode of the model. This disabled operations which are only applied during training like dropout model.eval() # predict all the samples in y_val_fold batch per batch valid_preds_fold = np.zeros((x_val_fold.size(0))) test_preds_fold = np.zeros((len(df_test))) avg_val_loss = 0. for i, (x_batch, y_batch, index) in enumerate(valid_loader): f = kfold_X_valid_features[index] y_pred = model([x_batch,f]).detach() avg_val_loss += loss_fn(y_pred, y_batch).item() / len(valid_loader) valid_preds_fold[i * batch_size:(i+1) * batch_size] = sigmoid(y_pred.cpu().numpy())[:, 0] elapsed_time = time.time() - start_time print('Epoch {}/{} \t loss={:.4f} \t val_loss={:.4f} \t time={:.2f}s'.format( epoch + 1, n_epochs, avg_loss, avg_val_loss, elapsed_time)) avg_losses_f.append(avg_loss) avg_val_losses_f.append(avg_val_loss) # predict all samples in the test set batch per batch for i, (x_batch,) in enumerate(test_loader): f = test_features[i * batch_size:(i+1) * batch_size] y_pred = model([x_batch,f]).detach() test_preds_fold[i * batch_size:(i+1) * batch_size] = sigmoid(y_pred.cpu().numpy())[:, 0] train_preds[valid_idx] = valid_preds_fold test_preds += test_preds_fold / len(splits) print('All \t loss={:.4f} \t val_loss={:.4f} \t '.format(np.average(avg_losses_f),np.average(avg_val_losses_f)))
def ShowResult(net, dataReader, title): # draw train data X, Y = dataReader.XTrain, dataReader.YTrain plt.plot(X[:, 0], Y[:, 0], '.', c='b') # create and draw visualized validation data TX = np.linspace(0, 1, 100).reshape(100, 1) TY = net.inference(TX) plt.plot(TX, TY, 'x', c='r') plt.title(title) plt.show() if __name__ == '__main__': dataReader = DataReader(train_data_name, test_data_name) dataReader.ReadData() dataReader.GenerateValidationSet() n_input, n_hidden, n_output = 1, 3, 1 eta, batch_size, max_epoch = 0.5, 10, 10000 eps = 0.001 hp = HyperParameters(n_input, n_hidden, n_output, eta, max_epoch, batch_size, eps, NetType.Fitting, InitialMethod.Xavier) net = NeuralNet(hp, "save") net.train(dataReader, 50, True) net.ShowTrainingHistory() ShowResult(net, dataReader, hp.toString())
entailment, device).to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate, betas=(0.9, 0.999), weight_decay=weight_decay) c_train = math.ceil(len(token_data["train"]["Stance"]) / train_batch) # c_dev = math.ceil(len(token_data["dev"]["Stance"])/dev_batch) c_test = math.ceil(len(token_data["test"]["Stance"]) / test_batch) # train print("Start Train") for epoch in range(num_epoch): model.train() loss_sum = 0 accuracy_sum = 0 for premise, hypothesis, label, sim, entail in batcher( token_data["train"], tfidf_cossim["train"], entailment_rep["train"], train_batch): x_p = torch.tensor(premise, dtype=torch.long, device=device) x_h = torch.tensor(hypothesis, dtype=torch.long, device=device) y = torch.tensor(label, dtype=torch.long, device=device) x_similarity = torch.tensor(sim, dtype=torch.float, device=device) x_entailment = torch.stack(entail, dim=0).to(device) outputs = model(x_p, x_h, x_similarity, x_entailment) optimizer.zero_grad() loss = criterion(outputs, y) loss.backward() optimizer.step()
Here we are predicting output for fizzbuzz game with our neural network """ import numpy as np from model import NeuralNet from generate_data import inputs, labels # Min is 1 and max is 1024 first = 101 last = 1024 X = inputs(first, last) y = labels(first, last) model = NeuralNet(input_shape=(10, )) model.compile(lr=0.001) model.train(X, y, batch_size=32, epochs=1000) first_test = 1 last_test = 100 X_test = inputs(first_test, last_test) y_pred = model.predict(X_test) iter = range(first_test, last_test + 1) for i in range(last_test - first_test + 1): if y_pred[i] == 0: pred = i + 1 elif y_pred[i] == 1: pred = "fizz" elif y_pred[i] == 2: pred = "buzz" else: pred = "fizzbuzz"
def train(args, labeled, resume_from, ckpt_file): print("========== In the train step ==========") batch_size = args["batch_size"] lr = args["learning_rate"] momentum = args["momentum"] epochs = args["train_epochs"] train_split = args["split_train"] CSV_FILE = "./data/mushrooms.csv" dataset = MushroomDataset(CSV_FILE) train_dataset = torch.utils.data.Subset( dataset, list(range(int(train_split * len(dataset)))) ) train_subset = Subset(train_dataset, labeled) train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True) net = NeuralNet() net = net.to(device=device) criterion = torch.nn.BCELoss() optimizer = optim.SGD(net.parameters(), lr=float(lr), momentum=momentum) if resume_from is not None: ckpt = torch.load(os.path.join(args["EXPT_DIR"], resume_from + ".pth")) net.load_state_dict(ckpt["model"]) optimizer.load_state_dict(ckpt["optimizer"]) else: getdatasetstate(args) net.train() for epoch in tqdm(range(args["train_epochs"]), desc="Training"): running_loss = 0 for i, batch in enumerate(train_loader, start=0): data, labels = batch data = data.to(device) labels = labels.to(device) optimizer.zero_grad() output = net(data) loss = criterion(output, labels) loss.backward() optimizer.step() running_loss += loss.item() if i % 1000: print( "epoch: {} batch: {} running-loss: {}".format( epoch + 1, i + 1, running_loss / 1000 ), end="\r", ) running_loss = 0 print("Finished Training. Saving the model as {}".format(ckpt_file)) ckpt = {"model": net.state_dict(), "optimizer": optimizer.state_dict()} torch.save(ckpt, os.path.join(args["EXPT_DIR"], ckpt_file + ".pth")) return
# Each image is a 28x28 matrix of floats. # Hence the first layer will have 28 * 28 = 784 neurons. # The output layer will be a vector of 10 values (after a logistic sigmoid function) training_data = datasets.FashionMNIST( root="data", train=True, download=True, transform=ToTensor() ) test_data = datasets.FashionMNIST( root="data", train=False, download=True, transform=ToTensor() ) model = NeuralNet(28*28, 10) model.add_layer(300, relu) model.add_layer(200, relu) model.add_layer(10, logistic_sigmoid) for img, label in training_data: input_data = img.numpy().flatten() result = model.process(input_data) result[label] -= 1 error = result model.train(error)