def mini_batch(self, batch_size, x_set, y_set, eta=3): train_len = len(x_set) if len(y_set) != train_len: raise ValueError('lengths of x, y are not same') if batch_size >= train_len: raise ValueError('batch size is greater than training set') common.shuffle_in_unison(x_set, y_set) nabla_b = [np.zeros(b.shape) for b in self.biases] nabla_w = [np.zeros(w.shape) for w in self.weights] # for i in range(len(nabla_w)): # # print(nabla_w[i].shape) for i in range(0, train_len, batch_size): x = x_set[i:i+batch_size] y = y_set[i:i+batch_size] for m in range(batch_size): delta_nabla_w, delta_nabla_b = self.back_prop1(x[m], y[m]) # print('nabla_w:' + str(nabla_w[0].shape)) # print('nabla_b:' + str(nabla_b[0].shape)) # print([b.shape for b in delta_nabla_b]) nabla_b = [nb+dnb for nb, dnb in zip(nabla_b, delta_nabla_b)] nabla_w = [nw+dnw for nw, dnw in zip(nabla_w, delta_nabla_w)] self.weights = [w-(eta/batch_size)*nw for w, nw in zip(self.weights, nabla_w)] self.biases = [b-(eta/batch_size)*nb for b, nb in zip(self.biases, nabla_b)]
def mini_batch(self, batch_size, x_set, y_set, eta=3): train_len = len(x_set) if len(y_set) != train_len: raise ValueError('lengths of x, y are not same') if batch_size >= train_len: raise ValueError('batch size is greater than training set') common.shuffle_in_unison(x_set, y_set) nabla_b = [np.zeros(b.shape) for b in self.biases] nabla_w = [np.zeros(w.shape) for w in self.weights] # for i in range(len(nabla_w)): # # print(nabla_w[i].shape) for i in range(0, train_len, batch_size): x = x_set[i:i + batch_size] y = y_set[i:i + batch_size] for m in range(batch_size): delta_nabla_w, delta_nabla_b = self.back_prop1(x[m], y[m]) # print('nabla_w:' + str(nabla_w[0].shape)) # print('nabla_b:' + str(nabla_b[0].shape)) # print([b.shape for b in delta_nabla_b]) nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)] nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)] self.weights = [ w - (eta / batch_size) * nw for w, nw in zip(self.weights, nabla_w) ] self.biases = [ b - (eta / batch_size) * nb for b, nb in zip(self.biases, nabla_b) ]
def main(args): # print args recap print(args, end="\n\n") # do not remove this line start = time.time() # Create the dataset object for example with the "ni, multi-task-nc, or nic tracks" # and assuming the core50 location in ./core50/data/ # ???review CORE50 to see if there is a way to shorten dataset and runtime for testing # Original call to dataset. Takes a long time. # dataset = CORE50(root='core50/data/', scenario=args.scenario, # preload=args.preload_data) # # custom call to create CORE50 custom object # using train=True uses training set and allows more control over batches and other stuff. dataset = CORE50(root='core50/data/', scenario=args.scenario, preload=args.preload_data) # Get the validation set print("Recovering validation set...") # default full validation set # full_valdidset = dataset.get_full_valid_set() # reduced validation set full_valdidset = dataset.get_full_valid_set(reduced=True) # model if args.classifier == 'ResNet18': classifier = models.resnet18(pretrained=True) # classifier is a pretrained model classifier.fc = torch.nn.Linear(512, args.n_classes) # in features: 512 # out features: set below -> args.n_classes = 50 # Applies a linear transformation to the incoming data opt = torch.optim.SGD(classifier.parameters(), lr=args.lr) # Implements stochastic gradient descent criterion = torch.nn.CrossEntropyLoss() # This criterion combines nn.LogSoftmax() and nn.NLLLoss() in one single class. # vars to update over time valid_acc = [] ext_mem_sz = [] ram_usage = [] heads = [] # enumerate(dataset) provides itirator over all training sets and test sets # loop over the training incremental batches (x, y, t) for i, train_batch in enumerate(dataset): train_x, train_y, t = train_batch # run batch 0 and 1. Then break. # if i == 2: break # shuffle new data train_x, train_y = shuffle_in_unison((train_x, train_y), seed=0) if i == 0: # first round # store data for later all_x = train_x[0:train_x.shape[0] // 2] all_y = train_y[0:train_y.shape[0] // 2] else: # create hybrid training set old and new data # shuffle old data all_x, all_y = shuffle_in_unison((all_x, all_y), seed=0) # create temp holder temp_x = train_x temp_y = train_y # set current variables to be used for training train_x = np.append(all_x, train_x, axis=0) train_y = np.append(all_y, train_y) train_x, train_y = shuffle_in_unison((train_x, train_y), seed=0) # append half of old and all of new data temp_x, temp_y = shuffle_in_unison((temp_x, temp_y), seed=0) keep_old = (all_x.shape[0] // (i + 1)) * i keep_new = temp_x.shape[0] // (i + 1) all_x = np.append(all_x[0:keep_old], temp_x[0:keep_new], axis=0) all_y = np.append(all_y[0:keep_old], temp_y[0:keep_new]) del temp_x del temp_y # Print current batch number print("----------- batch {0} -------------".format(i)) # Print current batch shape print("x shape: {0}, y shape: {1}" .format(train_x.shape, train_y.shape)) # print task label type print("Task Label: ", t) # train_net: a custom function to train neural network. returns stats. _, _, stats = train_net( opt, classifier, criterion, args.batch_size, train_x, train_y, t, args.epochs, preproc=preprocess_imgs ) # if multi-task-nc: make deep copy in list heads (aka nn brains) if args.scenario == "multi-task-nc": heads.append(copy.deepcopy(classifier.fc)) ext_mem_sz += stats['disk'] ram_usage += stats['ram'] # test all nn models in list heads for performance. return stats for each. stats, _ = test_multitask( classifier, full_valdidset, args.batch_size, preproc=preprocess_imgs, multi_heads=heads ) # print new stats on performance valid_acc += stats['acc'] print("------------------------------------------") print("Avg. acc: {}".format(stats['acc'])) print("------------------------------------------") # Generate submission.zip # directory with the code snapshot to generate the results sub_dir = 'submissions/' + args.sub_dir if not os.path.exists(sub_dir): os.makedirs(sub_dir) # copy code # custom function in utils folder to deal with possible file path issues create_code_snapshot(".", sub_dir + "/code_snapshot") # generating metadata.txt: with all the data used for the CLScore elapsed = (time.time() - start) / 60 print("Training Time: {}m".format(elapsed)) with open(sub_dir + "/metadata.txt", "w") as wf: for obj in [ np.average(valid_acc), elapsed, np.average(ram_usage), np.max(ram_usage), np.average(ext_mem_sz), np.max(ext_mem_sz) ]: wf.write(str(obj) + "\n") # run final full test # test_preds.txt: with a list of labels separated by "\n" print("Final inference on test set...") full_testset = dataset.get_full_test_set() stats, preds = test_multitask( classifier, full_testset, args.batch_size, preproc=preprocess_imgs ) with open(sub_dir + "/test_preds.txt", "w") as wf: for pred in preds: wf.write(str(pred) + "\n") print("Experiment completed.")
def train_net(optimizer, model, criterion, mb_size, x, y, t, train_ep, preproc=None, use_cuda=True, mask=None): """ Train a Pytorch model from pre-loaded tensors. Args: optimizer (object): the pytorch optimizer. model (object): the pytorch model to train. criterion (func): loss function. mb_size (int): mini-batch size. x (tensor): train data. y (tensor): train labels. t (int): task label. train_ep (int): number of training epochs. preproc (func): test iterations. use_cuda (bool): if we want to use gpu or cpu. mask (bool): if we want to maks out some classes from the results. Returns: ave_loss (float): average loss across the train set. acc (float): average accuracy over training. stats (dict): dictionary of several stats collected. """ cur_ep = 0 cur_train_t = t stats = {"ram": [], "disk": []} if preproc: x = preproc(x) (train_x, train_y), it_x_ep = pad_data([x, y], mb_size) shuffle_in_unison([train_x, train_y], 0, in_place=True) model = maybe_cuda(model, use_cuda=use_cuda) acc = None ave_loss = 0 train_x = torch.from_numpy(train_x).type(torch.FloatTensor) train_y = torch.from_numpy(train_y).type(torch.LongTensor) for ep in range(train_ep): stats['disk'].append(check_ext_mem("cl_ext_mem")) stats['ram'].append(check_ram_usage()) model.active_perc_list = [] model.train() print("training ep: ", ep) correct_cnt, ave_loss = 0, 0 for it in range(it_x_ep): start = it * mb_size end = (it + 1) * mb_size optimizer.zero_grad() x_mb = maybe_cuda(train_x[start:end], use_cuda=use_cuda) y_mb = maybe_cuda(train_y[start:end], use_cuda=use_cuda) logits = model(x_mb) _, pred_label = torch.max(logits, 1) correct_cnt += (pred_label == y_mb).sum() loss = criterion(logits, y_mb) ave_loss += loss.item() loss.backward() optimizer.step() acc = correct_cnt.item() / \ ((it + 1) * y_mb.size(0)) ave_loss /= ((it + 1) * y_mb.size(0)) if it % 100 == 0: print('==>>> it: {}, avg. loss: {:.6f}, ' 'running train acc: {:.3f}'.format(it, ave_loss, acc)) cur_ep += 1 return ave_loss, acc, stats