コード例 #1
0
  def compute_cross_entropy(self):
    Y = tf.reshape(self._Y, [-1])
    target_one_hots = tf.one_hot(Y, self.vocab_size)

    preds = self.p_y_i * target_one_hots
    preds = tf.reduce_max(preds, reduction_indices=1)
    preds = tf.reshape(preds, [-1, 1])
    
    ce = util.cross_entropy_loss(Y, preds, self.vocab_size)
    mask = tf.sign(tf.to_float(Y)) #puts zero wherever Y is zero and 1 otherwise
    self.ce = tf.reduce_sum(ce * mask)
コード例 #2
0
 def test_cross_entropy_loss_continuous(self):
     cross_entropy_loss = util.cross_entropy_loss(
         self.continuous_predictions, self.vectors, self.all_labels)
     self.assertAlmostEqual(cross_entropy_loss, 336.4232634905174)
コード例 #3
0
 def test_cross_entropy_loss_binary(self):
     cross_entropy_loss = util.cross_entropy_loss(self.binary_predictions,
                                                  self.vectors,
                                                  self.all_labels)
     self.assertAlmostEqual(cross_entropy_loss, 4000 / 3)
コード例 #4
0
    def train(self, X, Y, step_size=10e-5, epochs=10000, validation_frac=0.1):
        """
        :param X: data, numpy 2D array
        :param Y: labels, numpy 1D array of target labels, only two possible classes.
        :param step_size: size of the step for gradient descent
        :param epochs: number of max iterations
        :param validation_frac: Fraction of data to use for validation,
                default: 10%
        :return: best validation error

        This method fits a model with a linear decision boundary to classify the data
        samples X by learning parameters using Gradient Descent optimizer. The training data X
        is further split into training and validation data. The parameters corresponding to
        lowest error on validation data over the epochs, are stored as trained parameters.
        """
        # Validation data set extracted from the training data
        Xvalid, Yvalid, X, Y = split_data(X, Y, validation_frac)
        N, D = X.shape

        print("Training data size: ({}, {})".format(N, D))

        # Make sure Y and Yvalid are column vectors
        Y.shape = [Y.size, 1]
        Yvalid.shape = [Yvalid.size, 1]

        # Initialize the weights W and the bias b to zero
        W = np.zeros(shape=(D, 1), dtype=np.float32)
        bias = np.zeros(shape=1, dtype=np.float32)

        # Perform Gradient Descent over defined epochs to learn weights
        costs = []
        errors = []
        best_validation_error = 1

        for i in range(epochs):

            if i % 100 == 0:
                print(i)

            # Do forward propagation to calculate P(Y|X)
            pY = sigmoid(X.dot(W) + bias)

            # Perform gradient descent
            W -= step_size * (X.transpose().dot(pY - Y) / N).reshape(D, 1)
            bias -= step_size * np.mean(pY - Y)

            # Compute the sigmoid costs and append to array costs
            # Check to set best_validation_error
            cost = cross_entropy_loss(Y, pY)
            costs.append(cost)

            # Using the validation data, compute P(Y|X_valid)
            pYvalid = sigmoid(Xvalid.dot(W) + bias)
            error = error_rate(Yvalid, np.round(pYvalid))
            errors.append(error)

            if error < best_validation_error:
                best_validation_error = error
                self.W = np.copy(W)
                self.bias = bias

        print("\n")

        return costs, errors
コード例 #5
0
def train(args, io):
    data_dir = os.path.join(BASE_DIR, '..', 'part_seg', 'hdf5_data')
    #data_dir = '/home/tianxu/Desktop/pair-group/Thesis-project/dgcnn/dgcnn/tensorflow/part_seg/hdf5_data'

    data, label = load_h5_data_their_data(data_dir, 5, args.num_points)
    dataset = TensorDataset(data, label)

    train_loader, test_loader = get_data_loaders(dataset, args.batch_size)

    '''train_loader = DataLoader(data, num_workers=8,
                              batch_size=args.batch_size, shuffle=True, drop_last=True)
    test_loader = DataLoader(, num_workers=8,
                             batch_size=args.test_batch_size, shuffle=True, drop_last=False)'''

    device = torch.device("cuda" if args.use_cuda else "cpu")

    #Try to load models
    if args.model == 'pointnet':
        model = PointNet(args).to(device)
    elif args.model == 'dgcnn':
        model = DGCNN(args).to(device)
    else:
        raise Exception("Not implemented")
    print(str(model))

    model = nn.DataParallel(model)

    if os.path.exists(args.model_path):
        io.cprint("Loading existing model...")
        try:
            model.load_state_dict(torch.load(args.model_path, map_location=device))
            io.cprint("Existing model loaded")
        except:
            io.cprint("Can't load existing model, start from new model...")

    model.float()
    print("Let's use", torch.cuda.device_count(), "GPUs!")

    if args.use_sgd:
        print("Use SGD")
        opt = optim.SGD(model.parameters(), lr=args.lr*100, momentum=args.momentum, weight_decay=1e-4)
    else:
        print("Use Adam")
        opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4)

    scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr)
    
    criterion = cal_min_pairwise_seg_loss # cross_entropy_loss

    train_loss_list = []
    train_acc_list = []
    train_balanced_acc_list = []
    test_loss_list = []
    test_acc_list = []
    test_balanced_acc_list = []
    max_test_acc = 0
    max_acc_epoch = 0
    min_test_loss = math.inf
    min_loss_epoch = 0

    starting_epoch = 0
    training_backup_filepath = F'checkpoints_perm_loss_their_data/{args.exp_name}/models/training_backup.txt'
    if os.path.exists(training_backup_filepath):
        try:
            with open(training_backup_filepath, 'r') as f:
                starting_epoch = int(f.readline()) + 1
                if starting_epoch >= args.epochs - 1:
                    starting_epoch = 0
                else:
                    max_test_acc = float(f.readline())
                    min_test_loss = float(f.readline())
        except:
            io.cprint("Error when reading epoch record file")

    io.cprint(F"Starting from epoch {starting_epoch}")
    for epoch in range(starting_epoch, args.epochs):
        ####################
        # Train
        ####################
        train_loss = 0.0
        count = 0.0
        model.train()
        train_pred = []
        train_true = []
        start_time = time.time()
        for data, label in train_loader:
            data, label = data.to(device), label.to(device)
            # data: batch_size x point_num x 3
            # label: batch_size x point_num

            batch_size = data.shape[0]
            opt.zero_grad()
            logits = model(data.permute(0, 2, 1))

            # TODO: update for cross entropy
            loss, permuted_labels = criterion(logits, label)
            min_loss = cross_entropy_loss(logits, permuted_labels)
            min_loss.backward()

            opt.step()
            preds = logits.max(dim=2)[1]
            count += batch_size
            train_loss += loss.item() * batch_size
            train_true.append(permuted_labels.cpu().view(-1).numpy())
            train_pred.append(preds.detach().view(-1).cpu().numpy())
        train_true = np.concatenate(train_true)
        train_pred = np.concatenate(train_pred)

        train_loss = train_loss*1.0/count
        train_loss_list.append(train_loss)

        train_acc = metrics.accuracy_score(train_true, train_pred)
        train_acc_list.append(train_acc)

        outstr = 'Train %d, loss: %.6f, train acc: %.6f' % (epoch,
                                                            train_loss,
                                                            train_acc)
        io.cprint(outstr)

        scheduler.step()

        ####################
        # Test
        ####################
        test_loss = 0.0
        count = 0.0
        model.eval()
        test_pred = []
        test_true = []
        for data, label in test_loader:
            data, label = data.to(device), label.to(device)

            batch_size = data.shape[0]
            logits = model(data.permute(0, 2, 1))
            
            loss, permuted_labels = criterion(logits, label)
            preds = logits.max(dim=2)[1]
            count += batch_size
            test_loss += loss.item() * batch_size
            test_true.append(permuted_labels.cpu().view(-1).numpy())
            test_pred.append(preds.detach().cpu().view(-1).numpy())
        test_true = np.concatenate(test_true)
        test_pred = np.concatenate(test_pred)
        test_acc = metrics.accuracy_score(test_true, test_pred)

        test_loss = test_loss*1.0/count
        test_loss_list.append(test_loss)
        test_acc_list.append(test_acc)

        if test_acc > max_test_acc:
            max_test_acc = test_acc
            max_acc_epoch = epoch
            torch.save(model.state_dict(), 'checkpoints_perm_loss_their_data/%s/models/model.h5' % args.exp_name)
        if test_loss < min_test_loss:
            min_test_loss = test_loss
            min_loss_epoch = epoch
        
        end_time = time.time()
        time_per_epoch = end_time - start_time
        outstr = 'Test %d, loss: %.6f, test acc: %.6f, total time: %.6f s\n' % (epoch,
                                                                                test_loss,
                                                                                test_acc,
                                                                                time_per_epoch)
        io.cprint(outstr)

        with open(training_backup_filepath, 'w') as f:
            f.write(str(epoch) + '\n')
            f.write(str(max_test_acc) + '\n')
            f.write(str(min_test_loss))       

    fig = plt.figure(figsize=(17, 10))

    loss_ax = fig.add_subplot(1, 2, 1)
    acc_ax = fig.add_subplot(1, 2, 2)

    loss_ax.plot(train_loss_list)
    loss_ax.plot(test_loss_list)
    loss_ax.set_title(F'Cross-entropy loss: \nMinimum test loss: {min_test_loss:.5f}(Epoch: {min_loss_epoch})')
    loss_ax.set_ylabel('loss')
    loss_ax.set_xlabel('epoch')
    loss_ax.legend([F'train', \
                F'test'], loc='upper right')

    acc_ax.plot(train_acc_list)
    acc_ax.plot(test_acc_list)
    acc_ax.set_title(F'Accuracy: \nMaximum test accuracy: {max_test_acc:.5f}(Epoch: {max_acc_epoch})')
    acc_ax.set_ylabel('acc')
    acc_ax.set_xlabel('epoch')
    acc_ax.legend([F'train', \
                F'test'], loc='upper right')
    #plt.show()
    fig.savefig('./log_perm_loss_their_data/model_loss_acc.png')
コード例 #6
0
def ProtoLoss(prototypes, x_latent, q_latent, labels_onehot, num_classes,
              num_unlabeled, num_support, num_queries):
    """
        calculates the prototype network loss using the latent representation of x
        and the latent representation of the query set
        Args:
        desired_latent: num_classes unlabeled examples to refine 
        x_latent: latent representation of supports with shape [N*S, D], where D is the latent dimension
        q_latent: latent representation of queries with shape [N*Q, D], where D is the latent dimension
        labels_onehot: one-hot encodings of the labels of the queries with shape [N, Q, N]
        num_classes: number of classes (N) for classification
        num_support: number of examples (S) in the support set
        num_queries: number of examples (Q) in the query set
        Returns:
        ce_loss: the cross entropy loss between the predicted labels and true labels
        acc: the accuracy of classification on the queries
    """

    latent_dim = x_latent.shape[-1]

    # prototypes are just centroids of each class's examples in latent space
    #x_class_split = tf.reshape(x_latent, (num_classes, num_support, latent_dim))
    #prototypes = tf.reduce_mean(x_class_split, axis=1) # (num_classes, latent_dim)

    #closest_centroids = []
    #for u in range(num_unlabeled):
    #  dists = []
    #  for p in range(num_classes):
    #      dists.append(tf.norm(desired_latent[u] - prototypes[p]))
    #   closest_centroids.append(tf.argmax(dists))
    #for u in range(num_unlabeled):
    #new_class = x_class_split[closest_centroids[u],:,:]
    #unlabeled_sample = tf.expand_dims(desired_latent[u], axis=0)
    #new_class = tf.concat([new_class, unlabeled_sample], axis = 1)
    #prototypes = prototypes[closest_centroids[u]].assign(tf.reduce_mean(new_class, axis = 1))

    # need to repeat prototypes for easy distance calculation
    query_split = tf.reshape(q_latent,
                             (num_classes, num_queries, 1, latent_dim))
    expanded = tf.expand_dims(prototypes,
                              axis=0)  # (1, num_classes, latent_dim)
    expanded = tf.expand_dims(expanded,
                              axis=0)  # (1, 1, num_classes, latent_dim)
    expanded = tf.repeat(expanded, repeats=(num_classes),
                         axis=0)  # (num_classes, 1, num_classes, latent_dim)
    expanded = tf.repeat(
        expanded, repeats=(num_queries),
        axis=1)  # (num_classes, num_queries, num_classes, latent_dim)

    # calculate distances (L2 norm), add small value for degenerate case
    dists = tf.norm(query_split - expanded, axis=3) + np.random.normal(
        1e-5, scale=1e-6)

    # use negative distance as logits for CE loss
    ce_loss = cross_entropy_loss(-1 * dists, labels_onehot)

    # predictions use argmin if distance, argmax if logits/normalized distribution
    preds = tf.argmin(dists, axis=2)
    gt = tf.argmax(labels_onehot, axis=2)
    acc = accuracy(gt, preds)

    return ce_loss, acc