def compute_cross_entropy(self): Y = tf.reshape(self._Y, [-1]) target_one_hots = tf.one_hot(Y, self.vocab_size) preds = self.p_y_i * target_one_hots preds = tf.reduce_max(preds, reduction_indices=1) preds = tf.reshape(preds, [-1, 1]) ce = util.cross_entropy_loss(Y, preds, self.vocab_size) mask = tf.sign(tf.to_float(Y)) #puts zero wherever Y is zero and 1 otherwise self.ce = tf.reduce_sum(ce * mask)
def test_cross_entropy_loss_continuous(self): cross_entropy_loss = util.cross_entropy_loss( self.continuous_predictions, self.vectors, self.all_labels) self.assertAlmostEqual(cross_entropy_loss, 336.4232634905174)
def test_cross_entropy_loss_binary(self): cross_entropy_loss = util.cross_entropy_loss(self.binary_predictions, self.vectors, self.all_labels) self.assertAlmostEqual(cross_entropy_loss, 4000 / 3)
def train(self, X, Y, step_size=10e-5, epochs=10000, validation_frac=0.1): """ :param X: data, numpy 2D array :param Y: labels, numpy 1D array of target labels, only two possible classes. :param step_size: size of the step for gradient descent :param epochs: number of max iterations :param validation_frac: Fraction of data to use for validation, default: 10% :return: best validation error This method fits a model with a linear decision boundary to classify the data samples X by learning parameters using Gradient Descent optimizer. The training data X is further split into training and validation data. The parameters corresponding to lowest error on validation data over the epochs, are stored as trained parameters. """ # Validation data set extracted from the training data Xvalid, Yvalid, X, Y = split_data(X, Y, validation_frac) N, D = X.shape print("Training data size: ({}, {})".format(N, D)) # Make sure Y and Yvalid are column vectors Y.shape = [Y.size, 1] Yvalid.shape = [Yvalid.size, 1] # Initialize the weights W and the bias b to zero W = np.zeros(shape=(D, 1), dtype=np.float32) bias = np.zeros(shape=1, dtype=np.float32) # Perform Gradient Descent over defined epochs to learn weights costs = [] errors = [] best_validation_error = 1 for i in range(epochs): if i % 100 == 0: print(i) # Do forward propagation to calculate P(Y|X) pY = sigmoid(X.dot(W) + bias) # Perform gradient descent W -= step_size * (X.transpose().dot(pY - Y) / N).reshape(D, 1) bias -= step_size * np.mean(pY - Y) # Compute the sigmoid costs and append to array costs # Check to set best_validation_error cost = cross_entropy_loss(Y, pY) costs.append(cost) # Using the validation data, compute P(Y|X_valid) pYvalid = sigmoid(Xvalid.dot(W) + bias) error = error_rate(Yvalid, np.round(pYvalid)) errors.append(error) if error < best_validation_error: best_validation_error = error self.W = np.copy(W) self.bias = bias print("\n") return costs, errors
def train(args, io): data_dir = os.path.join(BASE_DIR, '..', 'part_seg', 'hdf5_data') #data_dir = '/home/tianxu/Desktop/pair-group/Thesis-project/dgcnn/dgcnn/tensorflow/part_seg/hdf5_data' data, label = load_h5_data_their_data(data_dir, 5, args.num_points) dataset = TensorDataset(data, label) train_loader, test_loader = get_data_loaders(dataset, args.batch_size) '''train_loader = DataLoader(data, num_workers=8, batch_size=args.batch_size, shuffle=True, drop_last=True) test_loader = DataLoader(, num_workers=8, batch_size=args.test_batch_size, shuffle=True, drop_last=False)''' device = torch.device("cuda" if args.use_cuda else "cpu") #Try to load models if args.model == 'pointnet': model = PointNet(args).to(device) elif args.model == 'dgcnn': model = DGCNN(args).to(device) else: raise Exception("Not implemented") print(str(model)) model = nn.DataParallel(model) if os.path.exists(args.model_path): io.cprint("Loading existing model...") try: model.load_state_dict(torch.load(args.model_path, map_location=device)) io.cprint("Existing model loaded") except: io.cprint("Can't load existing model, start from new model...") model.float() print("Let's use", torch.cuda.device_count(), "GPUs!") if args.use_sgd: print("Use SGD") opt = optim.SGD(model.parameters(), lr=args.lr*100, momentum=args.momentum, weight_decay=1e-4) else: print("Use Adam") opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4) scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr) criterion = cal_min_pairwise_seg_loss # cross_entropy_loss train_loss_list = [] train_acc_list = [] train_balanced_acc_list = [] test_loss_list = [] test_acc_list = [] test_balanced_acc_list = [] max_test_acc = 0 max_acc_epoch = 0 min_test_loss = math.inf min_loss_epoch = 0 starting_epoch = 0 training_backup_filepath = F'checkpoints_perm_loss_their_data/{args.exp_name}/models/training_backup.txt' if os.path.exists(training_backup_filepath): try: with open(training_backup_filepath, 'r') as f: starting_epoch = int(f.readline()) + 1 if starting_epoch >= args.epochs - 1: starting_epoch = 0 else: max_test_acc = float(f.readline()) min_test_loss = float(f.readline()) except: io.cprint("Error when reading epoch record file") io.cprint(F"Starting from epoch {starting_epoch}") for epoch in range(starting_epoch, args.epochs): #################### # Train #################### train_loss = 0.0 count = 0.0 model.train() train_pred = [] train_true = [] start_time = time.time() for data, label in train_loader: data, label = data.to(device), label.to(device) # data: batch_size x point_num x 3 # label: batch_size x point_num batch_size = data.shape[0] opt.zero_grad() logits = model(data.permute(0, 2, 1)) # TODO: update for cross entropy loss, permuted_labels = criterion(logits, label) min_loss = cross_entropy_loss(logits, permuted_labels) min_loss.backward() opt.step() preds = logits.max(dim=2)[1] count += batch_size train_loss += loss.item() * batch_size train_true.append(permuted_labels.cpu().view(-1).numpy()) train_pred.append(preds.detach().view(-1).cpu().numpy()) train_true = np.concatenate(train_true) train_pred = np.concatenate(train_pred) train_loss = train_loss*1.0/count train_loss_list.append(train_loss) train_acc = metrics.accuracy_score(train_true, train_pred) train_acc_list.append(train_acc) outstr = 'Train %d, loss: %.6f, train acc: %.6f' % (epoch, train_loss, train_acc) io.cprint(outstr) scheduler.step() #################### # Test #################### test_loss = 0.0 count = 0.0 model.eval() test_pred = [] test_true = [] for data, label in test_loader: data, label = data.to(device), label.to(device) batch_size = data.shape[0] logits = model(data.permute(0, 2, 1)) loss, permuted_labels = criterion(logits, label) preds = logits.max(dim=2)[1] count += batch_size test_loss += loss.item() * batch_size test_true.append(permuted_labels.cpu().view(-1).numpy()) test_pred.append(preds.detach().cpu().view(-1).numpy()) test_true = np.concatenate(test_true) test_pred = np.concatenate(test_pred) test_acc = metrics.accuracy_score(test_true, test_pred) test_loss = test_loss*1.0/count test_loss_list.append(test_loss) test_acc_list.append(test_acc) if test_acc > max_test_acc: max_test_acc = test_acc max_acc_epoch = epoch torch.save(model.state_dict(), 'checkpoints_perm_loss_their_data/%s/models/model.h5' % args.exp_name) if test_loss < min_test_loss: min_test_loss = test_loss min_loss_epoch = epoch end_time = time.time() time_per_epoch = end_time - start_time outstr = 'Test %d, loss: %.6f, test acc: %.6f, total time: %.6f s\n' % (epoch, test_loss, test_acc, time_per_epoch) io.cprint(outstr) with open(training_backup_filepath, 'w') as f: f.write(str(epoch) + '\n') f.write(str(max_test_acc) + '\n') f.write(str(min_test_loss)) fig = plt.figure(figsize=(17, 10)) loss_ax = fig.add_subplot(1, 2, 1) acc_ax = fig.add_subplot(1, 2, 2) loss_ax.plot(train_loss_list) loss_ax.plot(test_loss_list) loss_ax.set_title(F'Cross-entropy loss: \nMinimum test loss: {min_test_loss:.5f}(Epoch: {min_loss_epoch})') loss_ax.set_ylabel('loss') loss_ax.set_xlabel('epoch') loss_ax.legend([F'train', \ F'test'], loc='upper right') acc_ax.plot(train_acc_list) acc_ax.plot(test_acc_list) acc_ax.set_title(F'Accuracy: \nMaximum test accuracy: {max_test_acc:.5f}(Epoch: {max_acc_epoch})') acc_ax.set_ylabel('acc') acc_ax.set_xlabel('epoch') acc_ax.legend([F'train', \ F'test'], loc='upper right') #plt.show() fig.savefig('./log_perm_loss_their_data/model_loss_acc.png')
def ProtoLoss(prototypes, x_latent, q_latent, labels_onehot, num_classes, num_unlabeled, num_support, num_queries): """ calculates the prototype network loss using the latent representation of x and the latent representation of the query set Args: desired_latent: num_classes unlabeled examples to refine x_latent: latent representation of supports with shape [N*S, D], where D is the latent dimension q_latent: latent representation of queries with shape [N*Q, D], where D is the latent dimension labels_onehot: one-hot encodings of the labels of the queries with shape [N, Q, N] num_classes: number of classes (N) for classification num_support: number of examples (S) in the support set num_queries: number of examples (Q) in the query set Returns: ce_loss: the cross entropy loss between the predicted labels and true labels acc: the accuracy of classification on the queries """ latent_dim = x_latent.shape[-1] # prototypes are just centroids of each class's examples in latent space #x_class_split = tf.reshape(x_latent, (num_classes, num_support, latent_dim)) #prototypes = tf.reduce_mean(x_class_split, axis=1) # (num_classes, latent_dim) #closest_centroids = [] #for u in range(num_unlabeled): # dists = [] # for p in range(num_classes): # dists.append(tf.norm(desired_latent[u] - prototypes[p])) # closest_centroids.append(tf.argmax(dists)) #for u in range(num_unlabeled): #new_class = x_class_split[closest_centroids[u],:,:] #unlabeled_sample = tf.expand_dims(desired_latent[u], axis=0) #new_class = tf.concat([new_class, unlabeled_sample], axis = 1) #prototypes = prototypes[closest_centroids[u]].assign(tf.reduce_mean(new_class, axis = 1)) # need to repeat prototypes for easy distance calculation query_split = tf.reshape(q_latent, (num_classes, num_queries, 1, latent_dim)) expanded = tf.expand_dims(prototypes, axis=0) # (1, num_classes, latent_dim) expanded = tf.expand_dims(expanded, axis=0) # (1, 1, num_classes, latent_dim) expanded = tf.repeat(expanded, repeats=(num_classes), axis=0) # (num_classes, 1, num_classes, latent_dim) expanded = tf.repeat( expanded, repeats=(num_queries), axis=1) # (num_classes, num_queries, num_classes, latent_dim) # calculate distances (L2 norm), add small value for degenerate case dists = tf.norm(query_split - expanded, axis=3) + np.random.normal( 1e-5, scale=1e-6) # use negative distance as logits for CE loss ce_loss = cross_entropy_loss(-1 * dists, labels_onehot) # predictions use argmin if distance, argmax if logits/normalized distribution preds = tf.argmin(dists, axis=2) gt = tf.argmax(labels_onehot, axis=2) acc = accuracy(gt, preds) return ce_loss, acc