def __init__(self, generator=MLP(), discriminator=MLP()): super().__init__() if generator != None and discriminator != None: self.generator = generator self.discriminator = discriminator self.layers = self.generator.layers + self.discriminator.layers self.generator.loss = CrossEntropy() self.discriminator.loss = CrossEntropy()
def train_variational_autoencoder( learning_rate: float, epochs: int, batch_size: int, latent_variables: int = 10, print_every: int = 50, ) -> None: print( f"Training a variational autoencoder for {epochs} epochs with batch size {batch_size}" ) data_loader = DataLoader(batch_size) image_loss = CrossEntropy() divergence_loss = KLDivergenceStandardNormal() encoder_mean = Model([Linear(784, 50), ReLU(), Linear(50, latent_variables)]) encoder_variance = Model( [Linear(784, 50), ReLU(), Linear(50, latent_variables), Exponential()] ) reparameterization = Reparameterization() decoder = Model([Linear(latent_variables, 50), ReLU(), Linear(50, 784)]) for i in range(epochs): # One training loop training_data = data_loader.get_training_data() for j, batch in enumerate(training_data): input, target = batch # Forward pass mean = encoder_mean(input) variance = encoder_variance(input) z = reparameterization(mean=mean, variance=variance) generated_samples = decoder(z) # Loss calculation divergence_loss_value = divergence_loss(mean, variance) generation_loss = image_loss(generated_samples, input) if j % print_every == 0: print( f"Epoch {i+1}/{epochs}, " f"training iteration {j+1}/{len(training_data)}" ) print( f"KL loss {np.round(divergence_loss_value, 2)}\t" f"Generation loss {np.round(generation_loss, 2)}" ) # Backward pass decoder_gradient = image_loss.gradient() decoder_gradient = decoder.backward(decoder_gradient) decoder_mean_gradient, decoder_variance_gradient = reparameterization.backward( decoder_gradient ) encoder_mean_gradient, encoder_variance_gradient = ( divergence_loss.gradient() ) encoder_mean.backward(decoder_mean_gradient + encoder_mean_gradient) encoder_variance.backward( decoder_variance_gradient + encoder_variance_gradient )
def train(net: NeuralNetwork, inputs: Tensor, targets: Tensor, num_epochs: int = 5000, iterator: DataIterator = BatchIterator(), loss: Loss = CrossEntropy(), optimizer: Optimizer = MBGD(), showGraph: bool = False) -> None: losses = [] for epoch in range(num_epochs): epoch_loss = 0.0 for batch in iterator(inputs, targets): for X, Y in zip(batch.inputs, batch.targets): predicted = net.forward(X) epoch_loss += loss.loss(predicted, Y) grad = loss.grad(predicted, Y) net.backwards(grad) optimizer.step(net) print(epoch, epoch_loss) losses.append(epoch_loss) if epoch_loss < 300: pass if showGraph: plt.plot(losses) plt.show()
def train(epoch, model, optim, trainloader): losses = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() model.train() end = time.time() cross_entropy = CrossEntropy(num_classes=num_classes) triplet_loss_fn = TripletLoss(margin=margin) model.fc0.train(True) model.fc1.train(False) output_fc = "fc0" model.base.train(True) for batch, (imgs, pids, _) in enumerate(trainloader): imgs, pids = imgs.cuda(), pids.cuda() data_time.update(time.time() - end) clf_outputs, features = model(imgs) if isinstance(clf_outputs[output_fc], tuple): cross_entropy_loss = DeepSuperVision(cross_entropy, clf_outputs[output_fc], pids) else: cross_entropy_loss = cross_entropy(clf_outputs[output_fc], pids) if isinstance(features, tuple): triplet_loss = DeepSuperVision(triplet_loss_fn, features, pids) else: triplet_loss = triplet_loss_fn(clf_outputs[output_fc], pids) loss = cross_entropy_loss + triplet_loss optim.zero_grad() loss.backward() optim.step() batch_time.update(time.time() - end) end = time.time() losses.update(loss.item(), pids.size(0)) if (batch + 1) % print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format( epoch + 1, batch + 1, len(trainloader), batch_time=batch_time, data_time=data_time, loss=losses))
def train_classifier(learning_rate: float, epochs: int, batch_size: int, print_every: int = 50) -> None: data_loader = DataLoader(batch_size) loss = CrossEntropy() model = Model([Linear(784, 50), ReLU(), Linear(50, 10)]) for i in range(epochs): # One training loop training_data = data_loader.get_training_data() validation_data = data_loader.get_validation_data() for j, batch in enumerate(training_data): input, target = batch y = model(input) loss(y, target) gradient = loss.gradient() model.backward(gradient) model.update(learning_rate) if j % print_every == 0: print( f"Epoch {i+1}/{epochs}, training iteration {j+1}/{len(training_data)}" ) accuracy_values = [] loss_values = [] # One validation loop for j, batch in enumerate(validation_data): input, target = batch y = model(input) loss_value = loss(y, target) accuracy = calculate_accuracy(y, target) accuracy_values.append(accuracy) loss_values.append(loss_value) print( f"Epoch {i+1}: loss {np.round(np.average(loss_values), 2)}, accuracy {np.round(np.average(accuracy_values), 2)}" )
def train(epoch, model, optim, trainloader): losses = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() model.train() end = time.time() cross_entropy = CrossEntropy(num_classes=num_classes) triplet_loss_fn = TripletLoss(margin=margin) model.fc0.train(False) model.fc1.train(True) output_fc = "fc1" model.base.train(True) ################################################3 person_per_batch = 8 imgs_per_person = 4 bmask = [] l_all_pos = [] l_all_neg = [] pos_targets = torch.Tensor() neg_targets = torch.Tensor() C_pos = torch.zeros([train_batch, 256, 2, 4], device=device) C_neg = torch.zeros([train_batch, 256, 2, 4], device=device) ################################### for batch, (imgs, pids, camids) in enumerate(trainloader): #imgs,pids = imgs.cuda(), pids.cuda() pids = torch.Tensor.numpy(pids) camids = torch.Tensor.numpy(camids) uid = list(set(pids)) mask = np.zeros( [2 * person_per_batch, person_per_batch * imgs_per_person]) for i in range(len(uid)): sel = uid[i] # print(sel) pos = -1 neg = -1 k = -1 for j in range(len(pids)): if (pids[j] == sel): k = j break for j in range(len(pids)): if (pids[k] == pids[j] and camids[k] != camids[j]): # Same IDs and diff cam IDs pos = j #Postive break for j in range(len(pids)): if (pids[k] != pids[j]): #Negative # Diff Cam IDs neg = j break mask[2 * i][k] = 1 mask[2 * i][pos] = 1 mask[2 * i + 1][k] = 1 mask[2 * i + 1][neg] = 1 bmask.append(mask) l_batch_pos = [] l_batch_neg = [] kl = mask #bmask[batch] for i in range(len(kl)): l5 = [] for j in range(len(kl[i])): if (kl[i][j] == 1): l5.append(j) if i % 2 < 1: l_batch_pos.append(l5) else: l_batch_neg.append(l5) l_all_pos.append(l_batch_pos) l_all_neg.append(l_batch_neg) data_time.update(time.time() - end) clf_outputs = model(imgs.cuda()) f = activation['fc1.conv2'] f = f.permute(0, 3, 1, 2) m = nn.AdaptiveAvgPool2d((256, 2)) f = m(f) f = f.permute(0, 2, 3, 1) fc1 = clf_outputs[output_fc] for i in range(len(l_batch_pos)): pos_idx0 = l_batch_pos[i][0] pos_idx1 = l_batch_pos[i][1] #print(f[pos_idx0].shape) pos_targets = torch.sub(f[pos_idx1], f[pos_idx0]) C_pos += pos_targets #print(pos_targets.shape) #pos_targets = torch.Tensor(pos_targets) for i in range(len(l_batch_neg)): neg_idx0 = l_batch_neg[i][0] neg_idx1 = l_batch_neg[i][1] neg_targets = torch.sub(f[neg_idx1], f[neg_idx0]) C_neg += neg_targets g = Flatten(C_pos) y = Flatten(C_neg) u = g - y # (bs,2048) v = torch.unsqueeze(u, 2) # (64,2048,1) w = v.permute(0, 2, 1) # (64,1,2048) x_net = torch.matmul(v, w) # (64,2048,2048) y = torch.sum(x_net) y = F.relu(y) alpha = 1e-9 beta = 0 covariance_loss = 1 * (alpha * y - beta) pids = torch.from_numpy(pids) pids = pids.cuda() if isinstance(fc1, tuple): cross_entropy_loss = DeepSuperVision(cross_entropy, fc1, pids) else: cross_entropy_loss = cross_entropy(fc1, pids) """ if isinstance(f,tuple): triplet = DeepSuperVision(triplet_loss_fn,f,pids) else: triplet = triplet_loss_fn(f,pids) """ #print("xent", cross_entropy_loss) #print("covariance", covariance_loss) loss = cross_entropy_loss + covariance_loss #print("xent", cross_entropy_loss) #print("covariance_loss", covariance_loss) optim.zero_grad() loss.backward() optim.step() batch_time.update(time.time() - end) end = time.time() losses.update(loss.item(), pids.size(0)) if (batch + 1) % print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format( epoch + 1, batch + 1, len(trainloader), batch_time=batch_time, data_time=data_time, loss=losses))
def loss_fn(num_classes, logits, labels): return CrossEntropy(num_classes=num_classes)(logits, labels)
def train(epoch, model, optim, trainloader): losses = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() model.train() end = time.time() cross_entropy = CrossEntropy(num_classes=num_classes) triplet_loss_fn = TripletLoss(margin=margin) model.fc0.train(False) model.fc1.train(True) output_fc = "fc1" model.base.train(True) ################################################3 person_per_batch = 8 imgs_per_person = 4 bmask = [] l_all_pos = [] l_all_neg = [] pos_targets = torch.Tensor() neg_targets = torch.Tensor() C_pos0 = torch.zeros([train_batch, 256, 2, 4], device=device) C_pos1 = torch.zeros([train_batch, 256, 2, 4], device=device) C_neg0 = torch.zeros([train_batch, 256, 2, 4], device=device) C_neg1 = torch.zeros([train_batch, 256, 2, 4], device=device) ################################### for batch, (imgs, pids, camids) in enumerate(trainloader): #imgs,pids = imgs.cuda(), pids.cuda() pids = torch.Tensor.numpy(pids) camids = torch.Tensor.numpy(camids) uid = list(set(pids)) mask = np.zeros( [2 * person_per_batch, person_per_batch * imgs_per_person]) for i in range(len(uid)): sel = uid[i] # print(sel) pos = -1 neg = -1 k = -1 for j in range(len(pids)): if (pids[j] == sel): k = j break for j in range(len(pids)): if (pids[k] == pids[j] and camids[k] != camids[j]): # Same IDs and diff cam IDs pos = j #Postive break for j in range(len(pids)): if (pids[k] != pids[j]): #Negative # Diff Cam IDs neg = j break mask[2 * i][k] = 1 mask[2 * i][pos] = 1 mask[2 * i + 1][k] = 1 mask[2 * i + 1][neg] = 1 bmask.append(mask) l_batch_pos = [] l_batch_neg = [] kl = mask #bmask[batch] for i in range(len(kl)): l5 = [] for j in range(len(kl[i])): if (kl[i][j] == 1): l5.append(j) if i % 2 < 1: l_batch_pos.append(l5) else: l_batch_neg.append(l5) l_all_pos.append(l_batch_pos) l_all_neg.append(l_batch_neg) data_time.update(time.time() - end) clf_outputs = model(imgs.cuda()) f0 = activation['fc0.conv2'] #bs,2048,8,4 f1 = activation['fc1.conv2'] f0 = f0.permute(0, 3, 1, 2) f1 = f1.permute(0, 3, 1, 2) m = nn.AdaptiveAvgPool2d((256, 2)) f0 = m(f0) f1 = m(f1) f0 = f0.permute(0, 2, 3, 1) f1 = f1.permute(0, 2, 3, 1) fc1 = clf_outputs[output_fc] # Computing postive samples for i in range(len(l_batch_pos)): pos_idx0 = l_batch_pos[i][0] pos_idx1 = l_batch_pos[i][1] pos_targets0 = torch.sub(f0[pos_idx1], f0[pos_idx0]) pos_targets1 = torch.sub(f1[pos_idx1], f1[pos_idx0]) C_pos0 += pos_targets0 C_pos1 += pos_targets1 # Computing negative samples for i in range(len(l_batch_neg)): neg_idx0 = l_batch_neg[i][0] neg_idx1 = l_batch_neg[i][1] neg_targets0 = torch.sub(f0[neg_idx1], f0[neg_idx0]) neg_targets1 = torch.sub(f1[neg_idx1], f1[neg_idx0]) C_neg0 += neg_targets0 C_neg1 += neg_targets1 g0 = Flatten(C_pos0) g1 = Flatten(C_pos1) y0 = Flatten(C_neg0) y1 = Flatten(C_neg1) u0 = g0 - y0 # (bs,2048) u1 = g1 - y1 v0 = torch.unsqueeze(u0, 2) # (64,2048,1) v1 = torch.unsqueeze(u1, 2) w0 = v0.permute(0, 2, 1) # (64,1,2048) w1 = v1.permute(0, 2, 1) x_net0 = torch.matmul(v0, w0) # (64,2048,2048) x_net1 = torch.matmul(v1, w1) r0 = torch.sum(x_net0) r1 = torch.sum(x_net1) r0_hinge = F.relu(r0) r1_hinge = F.relu(r1) alpha = 1e-9 beta = 0 covariance_loss = 1 * (alpha * r0_hinge - beta) domain_g = 1 * (alpha * (r1_hinge - r0_hinge) - beta) pids = torch.from_numpy(pids) pids = pids.cuda() if isinstance(fc1, tuple): cross_entropy_loss = DeepSuperVision(cross_entropy, fc1, pids) else: cross_entropy_loss = cross_entropy(fc1, pids) loss = cross_entropy_loss + covariance_loss + domain_g optim.zero_grad() loss.backward() optim.step() batch_time.update(time.time() - end) end = time.time() losses.update(loss.item(), pids.size(0)) if (batch + 1) % print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format( epoch + 1, batch + 1, len(trainloader), batch_time=batch_time, data_time=data_time, loss=losses))
def main(args): total_size = 2000 train_size = 1000 test_size = 1000 data, target = generate_disc_set(total_size, random_state=1) train_data, train_target = data[:train_size], target[:train_size] test_data, test_target = data[test_size:], target[test_size:] colours = ['blue', 'green', 'red'] def colour_labels(labels): return list(map(lambda x: colours[x], labels)) plt.figure(figsize=(12, 5)) plt.subplot(1, 2, 1) plt.scatter(train_data[:, 0], train_data[:, 1], c=colour_labels(train_target.argmax(1)), edgecolors='none') plt.title('Train Data') plt.xlabel(r'$x_{1}$') plt.ylabel(r'$x_{2}$') plt.subplot(1, 2, 2) plt.scatter(test_data[:, 0], test_data[:, 1], c=colour_labels(test_target.argmax(1)), edgecolors='none') plt.title('Test Data') plt.xlabel(r'$x_{1}$') plt.ylabel(r'$x_{2}$') plt.pause(1) plt.show(block=False) if args.loss == 'mse': net_loss = MSE() net = Sequential(DenseLayer(2, 25), ReLU(), DenseLayer(25, 25), ReLU(), DenseLayer(25, 25), ReLU(), DenseLayer(25, 2)) elif args.loss == 'softmax_loss': net_loss = CrossEntropy() net = Sequential(DenseLayer(2, 25), ReLU(), DenseLayer(25, 25), ReLU(), DenseLayer(25, 25), ReLU(), DenseLayer(25, 2), SoftMax()) else: raise ValueError( args.loss + ' is invalid loss. Please use either \'mse\' or \'softmax_loss\'.') def sgd(x, dx, config): for cur_layer_x, cur_layer_dx in zip(x, dx): for cur_x, cur_dx in zip(cur_layer_x, cur_layer_dx): cur_old_grad = config['learning_rate'] * cur_dx if cur_old_grad.shape[0] == 1: cur_x = cur_x.reshape(cur_old_grad.shape) cur_x.add_(-cur_old_grad) def train_model(model, model_loss, train_data, train_target, lr=0.005, batch_size=1, n_epoch=50): optimizer_config = {'learning_rate': lr} train_loss_history = [] test_loss_history = [] for i in range(n_epoch): loss = 0 k = 0 for x_batch, y_batch in get_batches(train_data, train_target, batch_size): model.zero_grad_params() # Forward pred = model.forward(x_batch) loss += model_loss.forward(pred, y_batch) # Backward lg = model_loss.backward(pred, y_batch) model.backward(lg) # Update weights sgd(net.get_params(), net.get_grad_params(), optimizer_config) k += 1 train_loss_history.append(loss / k) test_pred = model.forward(test_data) test_loss = model_loss.forward(test_pred, test_target) test_loss_history.append(test_loss) print('#Epoch {}: current train loss = {:.4f}'.format( i + 1, loss.item() / k)) return train_loss_history, test_loss_history print('Training started...') train_loss_history, test_loss_history = train_model(net, net_loss, train_data, train_target, n_epoch=50) print('Final train loss: {:.4f}'.format(train_loss_history[-1])) print('Final test loss: {:.4f}'.format(test_loss_history[-1])) plt.figure(2, figsize=(8, 6)) plt.title("Train and Test Loss") plt.xlabel("#Epochs") plt.ylabel("loss") plt.plot(train_loss_history, 'b') plt.plot(test_loss_history, 'r') plt.legend(['train loss', 'test loss']) plt.pause(1) plt.show(block=False) train_res = net.forward(train_data) errors_train = compute_nb_errors(train_res, train_target) print("Number of errors on the train set: " + str(errors_train)) train_res = train_res.argmax(1) train_res[train_res != train_target.argmax(1)] = 2 test_res = net.forward(test_data) errors_test = compute_nb_errors(test_res, test_target) print("Number of errors on the test set: " + str(errors_test)) test_res = test_res.argmax(1) test_res[test_res != test_target.argmax(1)] = 2 plt.figure(figsize=(12, 5)) plt.subplot(1, 2, 1) plt.scatter(train_data[:, 0], train_data[:, 1], c=colour_labels(train_res), edgecolors='none') plt.xlabel(r'$x_{1}$') plt.ylabel(r'$x_{2}$') plt.title(f'Train Data, {errors_train} errors') plt.subplot(1, 2, 2) plt.scatter(test_data[:, 0], test_data[:, 1], c=colour_labels(test_res), edgecolors='none') plt.xlabel(r'$x_{1}$') plt.ylabel(r'$x_{2}$') plt.title(f'Test Data, {errors_test} errors') plt.show()
def main(): # generate data and translate labels train_features, train_targets = generate_all_datapoints_and_labels() test_features, test_targets = generate_all_datapoints_and_labels() train_labels, test_labels = convert_labels(train_targets), convert_labels(test_targets) print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('Model: Linear + ReLU + Linear +ReLU + Linear + ReLU + Linear + Tanh') print('Loss: MSE') print('Optimizer: SGD') print('*************************************************************************') print('Training') print('*************************************************************************') # build network, loss and optimizer for Model 1 my_model_design_1=[Linear(2,25), ReLU(), Linear(25,25), Dropout(p=0.5), ReLU(), Linear(25,25), ReLU(),Linear(25,2),Tanh()] my_model_1=Sequential(my_model_design_1) optimizer_1=SGD(my_model_1,lr=1e-3) criterion_1=LossMSE() # train Model 1 batch_size=1 for epoch in range(50): temp_train_loss_sum=0. temp_test_loss_sum=0. num_train_correct=0 num_test_correct=0 # trained in batch-fashion: here batch size = 1 for temp_batch in range(0,len(train_features), batch_size): temp_train_features=train_features.narrow(0, temp_batch, batch_size) temp_train_labels=train_labels.narrow(0, temp_batch, batch_size) for i in range(batch_size): # clean parameter gradient before each batch optimizer_1.zero_grad() temp_train_feature=temp_train_features[i] temp_train_label=temp_train_labels[i] # forward pass to compute loss temp_train_pred=my_model_1.forward(temp_train_feature) temp_train_loss=criterion_1.forward(temp_train_pred,temp_train_label) temp_train_loss_sum+=temp_train_loss _, temp_train_pred_cat=torch.max(temp_train_pred,0) _, temp_train_label_cat=torch.max(temp_train_label,0) if temp_train_pred_cat==temp_train_label_cat: num_train_correct+=1 # calculate gradient according to loss gradient temp_train_loss_grad=criterion_1.backward(temp_train_pred,temp_train_label) # accumulate parameter gradient in each batch my_model_1.backward(temp_train_loss_grad) # update parameters by optimizer optimizer_1.step() # evaluate the current model on testing set # only forward pass is implemented for i_test in range(len(test_features)): temp_test_feature=test_features[i_test] temp_test_label=test_labels[i_test] temp_test_pred=my_model_1.forward(temp_test_feature) temp_test_loss=criterion_1.forward(temp_test_pred,temp_test_label) temp_test_loss_sum+=temp_test_loss _, temp_test_pred_cat=torch.max(temp_test_pred,0) _, temp_test_label_cat=torch.max(temp_test_label,0) if temp_test_pred_cat==temp_test_label_cat: num_test_correct+=1 temp_train_loss_mean=temp_train_loss_sum/len(train_features) temp_test_loss_mean=temp_test_loss_sum/len(test_features) temp_train_accuracy=num_train_correct/len(train_features) temp_test_accuracy=num_test_correct/len(test_features) print("Epoch: {}/{}..".format(epoch+1, 50), "Training Loss: {:.4f}..".format(temp_train_loss_mean), "Training Accuracy: {:.4f}..".format(temp_train_accuracy), "Validation/Test Loss: {:.4f}..".format(temp_test_loss_mean), "Validation/Test Accuracy: {:.4f}..".format(temp_test_accuracy), ) # # visualize the classification performance of Model 1 on testing set test_pred_labels_1=[] for i in range(1000): temp_test_feature=test_features[i] temp_test_label=test_labels[i] temp_test_pred=my_model_1.forward(temp_test_feature) _, temp_train_pred_cat=torch.max(temp_test_pred,0) if test_targets[i].int() == temp_train_pred_cat.int(): test_pred_labels_1.append(int(test_targets[i])) else: test_pred_labels_1.append(2) fig,axes = plt.subplots(1,1,figsize=(6,6)) axes.scatter(test_features[:,0], test_features[:,1], c=test_pred_labels_1) axes.set_title('Classification Performance of Model 1') plt.show() print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('*************************************************************************') print('Model: Linear + ReLU + Linear + Dropout+ SeLU + Linear + Dropout + ReLU + Linear + Sigmoid') print('Loss: Cross Entropy') print('Optimizer: Adam') print('*************************************************************************') print('Training') print('*************************************************************************') # build network, loss function and optimizer for Model 2 my_model_design_2=[Linear(2,25), ReLU(), Linear(25,25), Dropout(p=0.5), SeLU(), Linear(25,25),Dropout(p=0.5), ReLU(),Linear(25,2), Sigmoid()] my_model_2=Sequential(my_model_design_2) optimizer_2=Adam(my_model_2,lr=1e-3) criterion_2=CrossEntropy() # train Model 2 batch_size=1 epoch=0 while(epoch<25): temp_train_loss_sum=0. temp_test_loss_sum=0. num_train_correct=0 num_test_correct=0 # trained in batch-fashion: here batch size = 1 for temp_batch in range(0,len(train_features), batch_size): temp_train_features=train_features.narrow(0, temp_batch, batch_size) temp_train_labels=train_labels.narrow(0, temp_batch, batch_size) for i in range(batch_size): # clean parameter gradient before each batch optimizer_2.zero_grad() temp_train_feature=temp_train_features[i] temp_train_label=temp_train_labels[i] # forward pass to compute loss temp_train_pred=my_model_2.forward(temp_train_feature) temp_train_loss=criterion_2.forward(temp_train_pred,temp_train_label) temp_train_loss_sum+=temp_train_loss _, temp_train_pred_cat=torch.max(temp_train_pred,0) _, temp_train_label_cat=torch.max(temp_train_label,0) if temp_train_pred_cat==temp_train_label_cat: num_train_correct+=1 # calculate gradient according to loss gradient temp_train_loss_grad=criterion_2.backward(temp_train_pred,temp_train_label) ''' if (not temp_train_loss_grad[0]>=0) and (not temp_train_loss_grad[0]<0): continue ''' # accumulate parameter gradient in each batch my_model_2.backward(temp_train_loss_grad) # update parameters by optimizer optimizer_2.step() # evaluate the current model on testing set # only forward pass is implemented for i_test in range(len(test_features)): temp_test_feature=test_features[i_test] temp_test_label=test_labels[i_test] temp_test_pred=my_model_2.forward(temp_test_feature) temp_test_loss=criterion_2.forward(temp_test_pred,temp_test_label) temp_test_loss_sum+=temp_test_loss _, temp_test_pred_cat=torch.max(temp_test_pred,0) _, temp_test_label_cat=torch.max(temp_test_label,0) if temp_test_pred_cat==temp_test_label_cat: num_test_correct+=1 temp_train_loss_mean=temp_train_loss_sum/len(train_features) temp_test_loss_mean=temp_test_loss_sum/len(test_features) temp_train_accuracy=num_train_correct/len(train_features) temp_test_accuracy=num_test_correct/len(test_features) # in case there is gradient explosion problem, initiliza model again and restart training # but the situation seldom happens if (not temp_train_loss_grad[0]>=0) and (not temp_train_loss_grad[0]<0): epoch=0 my_model_design_2=[Linear(2,25), ReLU(), Linear(25,25), Dropout(p=0.5), ReLU(), Linear(25,25),Dropout(p=0.5), ReLU(),Linear(25,2),Sigmoid()] my_model_2=Sequential(my_model_design_2) optimizer_2=Adam(my_model_2,lr=1e-3) criterion_2=CrossEntropy() print('--------------------------------------------------------------------------------') print('--------------------------------------------------------------------------------') print('--------------------------------------------------------------------------------') print('--------------------------------------------------------------------------------') print('--------------------------------------------------------------------------------') print('Restart training because of gradient explosion') continue print("Epoch: {}/{}..".format(epoch+1, 25), "Training Loss: {:.4f}..".format(temp_train_loss_mean), "Training Accuracy: {:.4f}..".format(temp_train_accuracy), "Validation/Test Loss: {:.4f}..".format(temp_test_loss_mean), "Validation/Test Accuracy: {:.4f}..".format(temp_test_accuracy), ) epoch+=1 # visualize the classification performance of Model 2 on testing set test_pred_labels_2=[] for i in range(1000): temp_test_feature=test_features[i] temp_test_label=test_labels[i] temp_test_pred=my_model_2.forward(temp_test_feature) _, temp_train_pred_cat=torch.max(temp_test_pred,0) if test_targets[i].int() == temp_train_pred_cat.int(): test_pred_labels_2.append(int(test_targets[i])) else: test_pred_labels_2.append(2) fig,axes = plt.subplots(1,1,figsize=(6,6)) axes.scatter(test_features[:,0], test_features[:,1], c=test_pred_labels_2) axes.set_title('Classification Performance of Model 2') plt.show()
targets, test_size=0.2) np.seterr(all='raise') net = NeuralNetwork([ LinearLayer(inputSize=64, outputSize=16), LeakyRelu(), LinearLayer(inputSize=16, outputSize=10), LeakyRelu(), Softmax() ]) train(net, inputs, targets, loss=CrossEntropy(), num_epochs=600, optimizer=MBGD(learningRate=0.0001), showGraph=True) net.serialize("serializedMNIST.json") # net.loadParamsFromFile("/home/ayush/scratch/Net/aknet/serializedMNIST.json") total = len(xtest) correct = 0 for x, y in zip(xtest, ytest): predicted = net.forward(x) if np.argmax(predicted) == np.argmax(y): correct += 1 # plt.imshow(x.reshape((28,28))) # plt.show() print(np.argmax(predicted), np.argmax(y))
return dz def compute_acc(X_test, Y_test, net): '''Not one-hot encoded format''' acc = 0.0 for i in range(X_test.shape[0]): y_h = net.forward(X_test[i]) y = np.argmax(y_h) if (y == Y_test[i]): acc += 1.0 return acc / Y_test.shape[0] if __name__ == 'main': loss = CrossEntropy() net = MnistNetMiniBatch() learning_rate = 0.001 L_train = [] L_test = [] Acc_train = [] Acc_test = [] len_mini_batch = 10 for it in range(100): L_acc = 0. sh = list(range(train_x.shape[0])) np.random.shuffle(sh) for i in range(train_x.shape[0]): x = train_x[sh[i]] y = train_y_oh[sh[i]] y_h = net.forward(x)
def main(cfg, gpus): # Network Buildersn net_encoder = ModelBuilder.build_encoder( arch=cfg.MODEL.arch_encoder.lower(), fc_dim=cfg.MODEL.fc_dim, weights=cfg.MODEL.weights_encoder) net_decoder = ModelBuilder.build_decoder( arch=cfg.MODEL.arch_decoder.lower(), fc_dim=cfg.MODEL.fc_dim, num_class=cfg.DATASET.num_class, weights=cfg.MODEL.weights_decoder) crit = CrossEntropy() if cfg.MODEL.arch_decoder.endswith('deepsup'): segmentation_module = SegmentationModule(net_encoder, net_decoder, crit, cfg.TRAIN.deep_sup_scale) else: segmentation_module = SegmentationModule(net_encoder, net_decoder, crit) # Dataset and Loader dataset_train = TrainDataset(cfg.DATASET.root_dataset, cfg.DATASET.list_train, cfg.DATASET, batch_per_gpu=cfg.TRAIN.batch_size_per_gpu) loader_train = torch.utils.data.DataLoader( dataset_train, batch_size=len(gpus), # we have modified data_parallel shuffle=False, # we do not use this param collate_fn=user_scattered_collate, num_workers=cfg.TRAIN.workers, drop_last=True, pin_memory=True) print('1 Epoch = {} iters'.format(cfg.TRAIN.epoch_iters)) # create loader iterator iterator_train = iter(loader_train) # load nets into gpu if len(gpus) > 1: segmentation_module = UserScatteredDataParallel(segmentation_module, device_ids=gpus) # For sync bn patch_replication_callback(segmentation_module) segmentation_module.cuda() # Set up optimizers nets = (net_encoder, net_decoder, crit) optimizers = create_optimizers(nets, cfg) # Main loop history = {'train': {'epoch': [], 'loss': [], 'acc': []}} for epoch in range(cfg.TRAIN.start_epoch, cfg.TRAIN.num_epoch): train(segmentation_module, iterator_train, optimizers, history, epoch + 1, cfg) # checkpointing checkpoint(nets, history, cfg, epoch + 1) print('Training Done!')
def train(net: NeuralNet, inputs, targets, num_epochs=100, batch_size=5, loss=CrossEntropy(), optimizer=Optimizer(), regularizer=False, validation=True, verbose=False): """ inputs.shape = [sample_size,n_samples] targets.shape = [target_shape,n_samples] """ reg_cost = lambda x: 0 if regularizer: reg_cost = regularizer.reg if validation: validator = Validation(inputs, targets, validation_fraction=0.2) inputs = inputs[..., 0:validator.train_size] targets = targets[..., 0:validator.train_size] epoch_losses = [] for epoch in range(num_epochs): epoch_loss = 0.0 starts = np.arange(0, len(inputs[-1]), batch_size) np.random.shuffle(starts) for start in starts: end = start + batch_size num_batches = len(starts) predicted = net.forward(inputs[..., start:end], verbose=verbose) epoch_loss += loss.loss( predicted, targets[..., start:end]) / num_batches #+ reg_cost(net) grad = loss.grad(predicted, targets[..., start:end]) net.backward(grad, verbose=verbose) if not isinstance(regularizer, bool): net.backward_regularizer(regularizer.grad_func) optimizer.step(net) if validation: validator.validate(net, loss) print(epoch, epoch_loss) epoch_losses.append(epoch_loss) if validation: plt.plot(np.linspace(0, num_epochs, num_epochs // validator.validation_freq), validator.v_errors, label="Validation") plt.plot(np.linspace(0, num_epochs, num_epochs), epoch_losses, label="Training") plt.legend()
def train_GRAM(seqFile='seqFile.txt', labelFile='labelFile.txt', treeFile='tree.txt', embFile='embFile.txt', outFile='out.txt', inputDimSize=100, numAncestors=100, embDimSize=100, hiddenDimSize=200, attentionDimSize=200, max_epochs=100, L2=0., numClass=26679, batchSize=100, dropoutRate=0.5, logEps=1e-8, verbose=True, ignore_level=0): options = locals().copy() # 这里的leavesList, ancestorsList蕴含着每一个疾病的类别信息 leavesList = [] ancestorsList = [] for i in range(5, 0, -1): leaves, ancestors = build_tree(treeFile + '.level' + str(i) + '.pk') leavesList.append(leaves) ancestorsList.append(ancestors) print('Building the model ... ') gram = GRAM(inputDimSize, numAncestors, embDimSize, hiddenDimSize, attentionDimSize, numClass, dropoutRate, embFile) # if torch.cuda.device_count() > 1: # print("Let's use", torch.cuda.device_count(), "GPUs!") # # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs # gram = nn.DataParallel(gram) gram.to(device) # gram.train() print(list(gram.state_dict())) loss_fn = CrossEntropy() loss_fn.to(device) print('Constructing the optimizer ... ') optimizer = torch.optim.Adadelta(gram.parameters(), lr=1, weight_decay=L2) print('Loading data ... ') trainSet, validSet, testSet = load_data(seqFile, labelFile, test_ratio=0.15, valid_ratio=0.1) print('Data length:', len(trainSet[0])) n_batches = int(np.ceil(float(len(trainSet[0])) / float(batchSize))) val_batches = int(np.ceil(float(len(validSet[0])) / float(batchSize))) test_batches = int(np.ceil(float(len(testSet[0])) / float(batchSize))) print('Optimization start !!') # setting the tensorboard loss_writer = SummaryWriter('{}/{}'.format(outFile + 'TbLog', 'Loss')) acc_writer = SummaryWriter('{}/{}'.format(outFile + 'TbLog', 'Acc')) # test_writer = SummaryWriter('{}/{}'.format(outFile+'TbLog', 'Test')) logFile = outFile + '.log' bestTrainCost = 0.0 bestValidCost = 100000.0 bestTestCost = 0.0 bestTrainAcc = 0.0 bestValidAcc = 0.0 bestTestAcc = 0.0 epochDuration = 0.0 bestEpoch = 0 # set the random seed for test random.seed(seed) # with torchsnooper.snoop(): for epoch in range(max_epochs): iteration = 0 cost_vec = [] acc_vec = [] startTime = time.time() gram.train() for index in random.sample(range(n_batches), n_batches): optimizer.zero_grad() batchX = trainSet[0][index * batchSize:(index + 1) * batchSize] batchY = trainSet[1][index * batchSize:(index + 1) * batchSize] x, y, mask, lengths = padMatrix(batchX, batchY, options) x = torch.from_numpy(x).to(device).float() mask = torch.from_numpy(mask).to(device).float() # print('x,', x.size()) y_hat = gram(x, mask, leavesList, ancestorsList) # print('y_hat', y_hat.size()) y = torch.from_numpy(y).float().to(device) # print('y', y.size()) lengths = torch.from_numpy(lengths).float().to(device) # print(y.size(), y_hat.size()) loss, acc = loss_fn(y_hat, y, lengths) loss.backward() optimizer.step() if iteration % 100 == 0 and verbose: buf = 'Epoch:%d, Iteration:%d/%d, Train_Cost:%f, Train_Acc:%f' % ( epoch, iteration, n_batches, loss, acc) print(buf) cost_vec.append(loss.item()) acc_vec.append(acc) iteration += 1 duration_optimize = time.time() - startTime gram.eval() cost = np.mean(cost_vec) acc = np.mean(acc_vec) startTime = time.time() with torch.no_grad(): # calculate the loss and acc of valid dataset cost_vec = [] acc_vec = [] for index in range(val_batches): validX = validSet[0][index * batchSize:(index + 1) * batchSize] validY = validSet[1][index * batchSize:(index + 1) * batchSize] val_x, val_y, mask, lengths = padMatrix( validX, validY, options) val_x = torch.from_numpy(val_x).float().to(device) mask = torch.from_numpy(mask).float().to(device) val_y_hat = gram(val_x, mask, leavesList, ancestorsList) val_y = torch.from_numpy(val_y).float().to(device) lengths = torch.from_numpy(lengths).float().to(device) valid_cost, valid_acc = loss_fn(val_y_hat, val_y, lengths) cost_vec.append(valid_cost.item()) acc_vec.append(valid_acc) valid_cost = np.mean(cost_vec) valid_acc = np.mean(acc_vec) # calculate the loss and acc of test dataset cost_vec = [] acc_vec = [] for index in range(test_batches): testX = testSet[0][index * batchSize:(index + 1) * batchSize] testY = testSet[1][index * batchSize:(index + 1) * batchSize] test_x, test_y, mask, lengths = padMatrix( testX, testY, options) test_x = torch.from_numpy(test_x).float().to(device) mask = torch.from_numpy(mask).float().to(device) test_y_hat = gram(test_x, mask, leavesList, ancestorsList) test_y = torch.from_numpy(test_y).float().to(device) lengths = torch.from_numpy(lengths).float().to(device) test_cost, test_acc = loss_fn(test_y_hat, test_y, lengths) cost_vec.append(test_cost.item()) acc_vec.append(test_acc) test_cost = np.mean(cost_vec) test_acc = np.mean(acc_vec) # record the loss and acc loss_writer.add_scalar('Train Loss', cost, epoch) loss_writer.add_scalar('Test Loss', test_cost, epoch) loss_writer.add_scalar('Valid Loss', valid_cost, epoch) acc_writer.add_scalar('Train Acc', acc, epoch) acc_writer.add_scalar('Test Acc', test_acc, epoch) acc_writer.add_scalar('Valid Acc', valid_acc, epoch) # print the loss duration_metric = time.time() - startTime buf = 'Epoch:%d, Train_Cost:%f, Valid_Cost:%f, Test_Cost:%f' % ( epoch, cost, valid_cost, test_cost) print(buf) print2file(buf, logFile) buf = 'Train_Acc:%f, Valid_Acc:%f, Test_Acc:%f' % (acc, valid_acc, test_acc) print(buf) print2file(buf, logFile) buf = 'Optimize_Duration:%f, Metric_Duration:%f' % (duration_optimize, duration_metric) print(buf) print2file(buf, logFile) # save the best model if valid_cost < bestValidCost: bestValidCost = valid_cost bestTestCost = test_cost bestTrainCost = cost bestEpoch = epoch bestTrainAcc = acc bestValidAcc = valid_acc bestTestAcc = test_acc torch.save(gram.state_dict(), outFile + f'.{epoch}') buf = 'Best Epoch:%d, Avg_Duration:%f, Train_Cost:%f, Valid_Cost:%f, Test_Cost:%f' % ( bestEpoch, epochDuration / max_epochs, bestTrainCost, bestValidCost, bestTestCost) print(buf) print2file(buf, logFile) buf = 'Train_Acc:%f, Valid_Acc:%f, Test_Acc:%f' % ( bestTrainAcc, bestValidAcc, bestTestAcc) print(buf) print2file(buf, logFile)
def test_whole_data(seqFile='seqFile.txt', labelFile='labelFile.txt', treeFile='tree.txt', embFile='embFile.txt', outFile='out.txt', inputDimSize=100, numAncestors=100, embDimSize=100, hiddenDimSize=200, attentionDimSize=200, max_epochs=100, L2=0., numClass=26679, batchSize=100, dropoutRate=0.5, logEps=1e-8, verbose=True, ignore_level=0): options = locals().copy() # get the best model through log # with open(outFile+'.log') as f: # line = f.readlines()[-2] # best_epoch = line.split(',')[0].split(':')[1] # print('Best parameters occur epoch:', best_epoch) leavesList = [] ancestorsList = [] for i in range(5, 0, -1): leaves, ancestors = build_tree(treeFile + '.level' + str(i) + '.pk') leavesList.append(leaves) ancestorsList.append(ancestors) print('Loading the model ... ') # create the model gram = GRAM(inputDimSize, numAncestors, embDimSize, hiddenDimSize, attentionDimSize, numClass, dropoutRate, '').to(device) # read the best parameters # gram.load_state_dict(torch.load(outFile + '.' + best_epoch)) gram.load_state_dict(torch.load(embFile)) loss_fn = CrossEntropy() loss_fn.to(device) print('Loading the data ... ') dataset, _, _ = load_data(seqFile, labelFile, test_ratio=0, valid_ratio=0) typeFile = labelFile.split('.seqs')[0] + '.types' types = pickle.load(open(typeFile, 'rb')) rTypes = dict([(v, u) for u, v in types.items()]) print('Data length:', len(dataset[0])) n_batches = int(np.ceil(float(len(dataset[0])) / float(batchSize))) print('Calculating the result ...') cost_vec = [] acc_vec = [] num_for_each_disease = defaultdict(float) TP_for_each_disease = defaultdict(float) rank_for_each_disease = defaultdict(float) for index in range(n_batches): batchX = dataset[0][index * batchSize:(index + 1) * batchSize] batchY = dataset[1][index * batchSize:(index + 1) * batchSize] x, y, mask, lengths = padMatrix(batchX, batchY, options) x = torch.from_numpy(x).to(device).float() mask = torch.from_numpy(mask).to(device).float() y_hat = gram(x, mask, leavesList, ancestorsList) y = torch.from_numpy(y).float().to(device) lengths = torch.from_numpy(lengths).float().to(device) loss, acc = loss_fn(y_hat, y, lengths) cost_vec.append(loss.item()) acc_vec.append(acc) # Calculating the accuracy for each disease y_sorted, indices = torch.sort(y_hat, dim=2, descending=True) # indices = indices[:, :, :20] for i, j, k in torch.nonzero(y, as_tuple=False): k = k.item() num_for_each_disease[k] += 1 # search the rank for k m = torch.nonzero(indices[i][j] == k, as_tuple=False).view(-1).item() # calculate the top20 accuracy if m < 20: TP_for_each_disease[k] += 1 rank_for_each_disease[k] += (m + 1) cost = np.mean(cost_vec) acc = np.mean(acc_vec) print('Whole data average loss:%f, average accuracy@20:%f,' % (cost, acc)) print('Recording the accuracy for each disease ...') acc_out_file = outFile + '_all_acc.txt' # sort the disease by num num_for_each_disease = OrderedDict( sorted(num_for_each_disease.items(), key=lambda item: item[1], reverse=True)) for disease in num_for_each_disease.keys(): d_acc = TP_for_each_disease[disease] / num_for_each_disease[disease] avg_rank = rank_for_each_disease[disease] / num_for_each_disease[ disease] buf = 'TypeNum:%d, icd_code:%s, Count:%d, avg_rank:%f, Accuracy:%f' % \ (disease, rTypes[disease], num_for_each_disease[disease], avg_rank, d_acc) print2file(buf, acc_out_file) print('Done!')