Exemple #1
0
 def mini_batch(self, batch_size, x_set, y_set, eta=3):
     train_len = len(x_set)
     if len(y_set) != train_len:
         raise ValueError('lengths of x, y are not same')
     if batch_size >= train_len:
         raise ValueError('batch size is greater than training set')
     common.shuffle_in_unison(x_set, y_set)
     nabla_b = [np.zeros(b.shape) for b in self.biases]
     nabla_w = [np.zeros(w.shape) for w in self.weights]
     # for i in range(len(nabla_w)):
     #     # print(nabla_w[i].shape)
     for i in range(0, train_len, batch_size):
         x = x_set[i:i+batch_size]
         y = y_set[i:i+batch_size]
         for m in range(batch_size):
             delta_nabla_w, delta_nabla_b = self.back_prop1(x[m], y[m])
             # print('nabla_w:' + str(nabla_w[0].shape))
             # print('nabla_b:' + str(nabla_b[0].shape))
             # print([b.shape for b in delta_nabla_b])
             nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
             nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
         self.weights = [w-(eta/batch_size)*nw
                         for w, nw in zip(self.weights, nabla_w)]
         self.biases = [b-(eta/batch_size)*nb
                        for b, nb in zip(self.biases, nabla_b)]
Exemple #2
0
 def mini_batch(self, batch_size, x_set, y_set, eta=3):
     train_len = len(x_set)
     if len(y_set) != train_len:
         raise ValueError('lengths of x, y are not same')
     if batch_size >= train_len:
         raise ValueError('batch size is greater than training set')
     common.shuffle_in_unison(x_set, y_set)
     nabla_b = [np.zeros(b.shape) for b in self.biases]
     nabla_w = [np.zeros(w.shape) for w in self.weights]
     # for i in range(len(nabla_w)):
     #     # print(nabla_w[i].shape)
     for i in range(0, train_len, batch_size):
         x = x_set[i:i + batch_size]
         y = y_set[i:i + batch_size]
         for m in range(batch_size):
             delta_nabla_w, delta_nabla_b = self.back_prop1(x[m], y[m])
             # print('nabla_w:' + str(nabla_w[0].shape))
             # print('nabla_b:' + str(nabla_b[0].shape))
             # print([b.shape for b in delta_nabla_b])
             nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
             nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]
         self.weights = [
             w - (eta / batch_size) * nw
             for w, nw in zip(self.weights, nabla_w)
         ]
         self.biases = [
             b - (eta / batch_size) * nb
             for b, nb in zip(self.biases, nabla_b)
         ]
Exemple #3
0
def main(args):
    # print args recap
    print(args, end="\n\n")

    # do not remove this line
    start = time.time()

    # Create the dataset object for example with the "ni, multi-task-nc, or nic tracks"
    # and assuming the core50 location in ./core50/data/
    # ???review CORE50 to see if there is a way to shorten dataset and runtime for testing
    # Original call to dataset. Takes a long time. 
    # dataset = CORE50(root='core50/data/', scenario=args.scenario,
    #                  preload=args.preload_data)
    # 
    # custom call to create CORE50 custom object
    # using train=True uses training set and allows more control over batches and other stuff.
    dataset = CORE50(root='core50/data/', scenario=args.scenario,
                     preload=args.preload_data)

    # Get the validation set
    print("Recovering validation set...")
    # default full validation set 
    # full_valdidset = dataset.get_full_valid_set()
    # reduced validation set 
    full_valdidset = dataset.get_full_valid_set(reduced=True)

    # model
    if args.classifier == 'ResNet18':
        classifier = models.resnet18(pretrained=True)  # classifier is a pretrained model 
        classifier.fc = torch.nn.Linear(512,
                                        args.n_classes)  # in features: 512 # out features: set below -> args.n_classes = 50  #  Applies a linear transformation to the incoming data

    opt = torch.optim.SGD(classifier.parameters(), lr=args.lr)  # Implements stochastic gradient descent
    criterion = torch.nn.CrossEntropyLoss()  # This criterion combines nn.LogSoftmax() and nn.NLLLoss() in one single class.

    # vars to update over time
    valid_acc = []
    ext_mem_sz = []
    ram_usage = []
    heads = []

    # enumerate(dataset) provides itirator over all training sets and test sets
    # loop over the training incremental batches (x, y, t)
    for i, train_batch in enumerate(dataset):
        train_x, train_y, t = train_batch

        # run batch 0 and 1. Then break.
        # if i == 2: break

        # shuffle new data
        train_x, train_y = shuffle_in_unison((train_x, train_y), seed=0)

        if i == 0:
            # first round
            # store data for later 
            all_x = train_x[0:train_x.shape[0] // 2]
            all_y = train_y[0:train_y.shape[0] // 2]
        else:
            # create hybrid training set old and new data
            # shuffle old data
            all_x, all_y = shuffle_in_unison((all_x, all_y), seed=0)

            # create temp holder
            temp_x = train_x
            temp_y = train_y

            # set current variables to be used for training
            train_x = np.append(all_x, train_x, axis=0)
            train_y = np.append(all_y, train_y)
            train_x, train_y = shuffle_in_unison((train_x, train_y), seed=0)

            # append half of old and all of new data 
            temp_x, temp_y = shuffle_in_unison((temp_x, temp_y), seed=0)
            keep_old = (all_x.shape[0] // (i + 1)) * i
            keep_new = temp_x.shape[0] // (i + 1)
            all_x = np.append(all_x[0:keep_old], temp_x[0:keep_new], axis=0)
            all_y = np.append(all_y[0:keep_old], temp_y[0:keep_new])
            del temp_x
            del temp_y


        # Print current batch number 
        print("----------- batch {0} -------------".format(i))
        # Print current batch shape
        print("x shape: {0}, y shape: {1}"
              .format(train_x.shape, train_y.shape))
        # print task label type
        print("Task Label: ", t)

        # train_net: a custom function to train neural network. returns stats.
        _, _, stats = train_net(
            opt, classifier, criterion, args.batch_size, train_x, train_y, t,
            args.epochs, preproc=preprocess_imgs
        )

        # if multi-task-nc: make deep copy in list heads (aka nn brains)
        if args.scenario == "multi-task-nc":
            heads.append(copy.deepcopy(classifier.fc))
        ext_mem_sz += stats['disk']
        ram_usage += stats['ram']

        # test all nn models in list heads for performance. return stats for each.
        stats, _ = test_multitask(
            classifier, full_valdidset, args.batch_size, preproc=preprocess_imgs, multi_heads=heads
        )

        # print new stats on performance 
        valid_acc += stats['acc']
        print("------------------------------------------")
        print("Avg. acc: {}".format(stats['acc']))
        print("------------------------------------------")

    # Generate submission.zip
    # directory with the code snapshot to generate the results
    sub_dir = 'submissions/' + args.sub_dir
    if not os.path.exists(sub_dir):
        os.makedirs(sub_dir)

    # copy code
    # custom function in utils folder to deal with possible file path issues 
    create_code_snapshot(".", sub_dir + "/code_snapshot")

    # generating metadata.txt: with all the data used for the CLScore
    elapsed = (time.time() - start) / 60
    print("Training Time: {}m".format(elapsed))
    with open(sub_dir + "/metadata.txt", "w") as wf:
        for obj in [
            np.average(valid_acc), elapsed, np.average(ram_usage),
            np.max(ram_usage), np.average(ext_mem_sz), np.max(ext_mem_sz)
        ]:
            wf.write(str(obj) + "\n")

    # run final full test 
    # test_preds.txt: with a list of labels separated by "\n"
    print("Final inference on test set...")
    full_testset = dataset.get_full_test_set()
    stats, preds = test_multitask(
        classifier, full_testset, args.batch_size, preproc=preprocess_imgs
    )

    with open(sub_dir + "/test_preds.txt", "w") as wf:
        for pred in preds:
            wf.write(str(pred) + "\n")

    print("Experiment completed.")
Exemple #4
0
def train_net(optimizer,
              model,
              criterion,
              mb_size,
              x,
              y,
              t,
              train_ep,
              preproc=None,
              use_cuda=True,
              mask=None):
    """
    Train a Pytorch model from pre-loaded tensors.

        Args:
            optimizer (object): the pytorch optimizer.
            model (object): the pytorch model to train.
            criterion (func): loss function.
            mb_size (int): mini-batch size.
            x (tensor): train data.
            y (tensor): train labels.
            t (int): task label.
            train_ep (int): number of training epochs.
            preproc (func): test iterations.
            use_cuda (bool): if we want to use gpu or cpu.
            mask (bool): if we want to maks out some classes from the results.
        Returns:
            ave_loss (float): average loss across the train set.
            acc (float): average accuracy over training.
            stats (dict): dictionary of several stats collected.
    """

    cur_ep = 0
    cur_train_t = t
    stats = {"ram": [], "disk": []}

    if preproc:
        x = preproc(x)

    (train_x, train_y), it_x_ep = pad_data([x, y], mb_size)

    shuffle_in_unison([train_x, train_y], 0, in_place=True)

    model = maybe_cuda(model, use_cuda=use_cuda)
    acc = None
    ave_loss = 0

    train_x = torch.from_numpy(train_x).type(torch.FloatTensor)
    train_y = torch.from_numpy(train_y).type(torch.LongTensor)

    for ep in range(train_ep):

        stats['disk'].append(check_ext_mem("cl_ext_mem"))
        stats['ram'].append(check_ram_usage())

        model.active_perc_list = []
        model.train()

        print("training ep: ", ep)
        correct_cnt, ave_loss = 0, 0
        for it in range(it_x_ep):

            start = it * mb_size
            end = (it + 1) * mb_size

            optimizer.zero_grad()

            x_mb = maybe_cuda(train_x[start:end], use_cuda=use_cuda)
            y_mb = maybe_cuda(train_y[start:end], use_cuda=use_cuda)
            logits = model(x_mb)

            _, pred_label = torch.max(logits, 1)
            correct_cnt += (pred_label == y_mb).sum()

            loss = criterion(logits, y_mb)
            ave_loss += loss.item()

            loss.backward()
            optimizer.step()

            acc = correct_cnt.item() / \
                  ((it + 1) * y_mb.size(0))
            ave_loss /= ((it + 1) * y_mb.size(0))

            if it % 100 == 0:
                print('==>>> it: {}, avg. loss: {:.6f}, '
                      'running train acc: {:.3f}'.format(it, ave_loss, acc))

        cur_ep += 1

    return ave_loss, acc, stats