Example #1
0
def generate_original_preds(train = True): 
    """
    Generate the predictions of the original model on training
    and validation datasets. 

    The original model is also trained if train = True. 

    """
    x_train, y_train, x_val, y_val, id_to_word = load_data() 
    model = create_original_model()

    if train:
        filepath="models/original.hdf5"
        checkpoint = ModelCheckpoint(filepath, monitor='val_acc', 
            verbose=1, save_best_only=True, mode='max')
        callbacks_list = [checkpoint]
        model.fit(x_train, y_train, validation_data=(x_val, y_val),callbacks = callbacks_list, epochs=epochs, batch_size=batch_size)

    model.load_weights('./models/original.hdf5', 
        by_name=True) 

    pred_train = model.predict(x_train,verbose = 1, batch_size = 1000)
    pred_val = model.predict(x_val,verbose = 1, batch_size = 1000)
    if not train:
        print('The val accuracy is {}'.format(calculate_acc(pred_val,y_val)))
        print('The train accuracy is {}'.format(calculate_acc(pred_train,y_train)))


    np.save('data/pred_train.npy', pred_train)
    np.save('data/pred_val.npy', pred_val) 
Example #2
0
 def simple_pred(params):
     all_train_labels = load_data(
         os.path.join(params.save_data, "all_train_labels"))
     all_train_labels = all_train_labels.reshape((-1))
     for ratio in params.ratios:
         time_start = time.time()
         subset_label = all_train_labels[:int(all_train_labels.shape[0] *
                                              ratio)]
         for layer in range(0, 2):  ## ATTENTION!!!
             concate = []
             for j in range(0, 50000, 5000):
                 a_batch = load_data(
                     os.path.join(
                         params.save_data,
                         "K_transform_batch_layer{}_batch{}".format(
                             layer, j)))
                 concate.append(a_batch)
             concate = np.concatenate(concate, axis=0)
             concate = concate[:int(all_train_labels.shape[0] * ratio)]
             # num_PCA_kernels = [80, 125]
             mo3 = LAFR()
             mo3.fit(concate, subset_label)
             prediction = mo3.predict(concate)
             save_data(
                 prediction,
                 os.path.join(
                     params.save_data,
                     "sinple_predict_ratio{}_layer{}".format(ratio, layer)))
             calculate_acc(prediction, subset_label)
             save_data(
                 mo3,
                 os.path.join(
                     params.save_data,
                     'simple_pred_layer{}_ratio{}'.format(layer, ratio)))
         print("Time cost - simple_pred:", time.time() - time_start)
Example #3
0
def generate_post_preds(train = True): 
    """
    Generate the predictions of the original model on training
    and validation datasets. 

    The original model is also trained if train = True. 

    """
    x_train, y_train, x_val, y_val = np.load('data/x_train_new_2_loss.npy'),np.load('data/y_train.npy'),np.load('data/x_val_new_2_loss.npy'),np.load('data/y_val.npy')
    with open('data/id_to_word.pkl','rb') as f:
        id_to_word = pickle.load(f) 
    model = create_original_model()

    if train:
        filepath="./models_new/post_2_loss.hdf5"
        checkpoint = ModelCheckpoint(filepath, monitor='val_acc', 
            verbose=1, save_best_only=True, mode='max')
        callbacks_list = [checkpoint]
        model.fit(x_train, y_train, validation_data=(x_val, y_val),callbacks = callbacks_list, epochs=epochs, batch_size=batch_size)

    model.load_weights('./models_new/post_2_loss.hdf5', 
        by_name=True) 

    pred_train = model.predict(x_train,verbose = 1, batch_size = 1000)
    pred_val = model.predict(x_val,verbose = 1, batch_size = 1000)
    if not train:
        print('The val accuracy is {}'.format(calculate_acc(pred_val,y_val)))
        print('The train accuracy is {}'.format(calculate_acc(pred_train,y_train)))
def train(net, train_loader, optimizer, loss_func):
    ''' Performs one training epoch of LSTM.

    Arguments:
        net (nn.Module): RNN (currently LSTM)
        train_loader (DataLoader): load object for train data
        optimizer: optimizer object for net parameters
        loss_func: criterion function used for backprop
    Returns:
        epoch_loss (torch.float): mean loss value for all
                                batches
        epoch_acc (torch.float): mean acc value for all batches
    '''
    net.train()

    epoch_loss = 0
    epoch_acc = 0

    for input, labels in train_loader:
        input, labels = input.to(device), labels.to(device)

        optimizer.zero_grad()
        output = net(input).squeeze(1)
        loss = loss_func(output, labels)

        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        epoch_acc += calculate_acc(output, labels)

    epoch_loss /= len(train_loader)
    epoch_acc /= len(train_loader)

    return epoch_loss, epoch_acc
Example #5
0
    def step(self, batch):
        self.model.train()
        self.optim.zero_grad()
        img, target = batch
        img, target = img.cuda(), target.cuda()
        #
        # Target inputs
        try:
            inputs_exemplar= next(self.exemplar_iter)
        except:
            self.exemplar_iter = iter(self.exemplar_dl)
            inputs_exemplar = next(self.exemplar_iter)

        img_exemplar, target_exemplar = inputs_exemplar
        img_exemplar, target_exemplar = img_exemplar.cuda(), target_exemplar.cuda()
        # source
        img = torch.cat([img,img_exemplar],dim=0)
        outputs = self.model(img)
        loss = self.loss_func(outputs, target)
        #

        if self.mix_precision:
            with amp.scale_loss(loss, self.optim) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()
        self.optim.step()

        # acc = (score.max(1)[1] == target).float().mean()
        acc = calculate_acc(self.cfg, outputs, target)

        self.loss_avg.update(loss.cpu().item())
        self.acc_avg.update(acc.cpu().item())

        return self.loss_avg.avg, self.acc_avg.avg
Example #6
0
    def step(self, batch):
        self.model.train()
        self.optim.zero_grad()
        img, target, histlabels = batch
        img, target, histlabels = img.cuda(), target.cuda(), histlabels.cuda()
        outputs = self.model(img)

        loss, tpl, ce, hlce = self.loss_func(outputs,
                                             target,
                                             histlabels,
                                             in_detail=True)

        if self.current_iteration % self.cfg.SOLVER.TENSORBOARD.LOG_PERIOD == 0:
            if self.summary_writer:
                self.summary_writer.add_scalar('Train/tpl', tpl,
                                               self.current_iteration)
                self.summary_writer.add_scalar('Train/ce', ce,
                                               self.current_iteration)
                self.summary_writer.add_scalar('Train/hlce', hlce,
                                               self.current_iteration)

        if self.mix_precision:
            with amp.scale_loss(loss, self.optim) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()
        self.optim.step()

        # acc = (score.max(1)[1] == target).float().mean()
        acc = calculate_acc(self.cfg, outputs, target)

        self.loss_avg.update(loss.cpu().item())
        self.acc_avg.update(acc.cpu().item())

        return self.loss_avg.avg, self.acc_avg.avg
Example #7
0
def train_net(model, loss, config, inputs, labels, batch_size, disp_freq):

    iter_counter = 0
    loss_list = []
    acc_list = []

    for input, label in data_iterator(inputs, labels, batch_size):
        target = onehot_encoding(label, 10)
        iter_counter += 1

        # forward net
        output = model.forward(input)
        # calculate loss
        loss_value = loss.forward(output, target)
        # generate gradient w.r.t loss
        grad = loss.backward(output, target)
        # backward gradient

        model.backward(grad)
        # update layers' weights
        model.update(config)

        acc_value = calculate_acc(output, label)
        loss_list.append(loss_value)
        acc_list.append(acc_value)

        if iter_counter % disp_freq == 0:
            msg = '  Training iter %d, batch loss %.4f, batch acc %.4f' % (iter_counter, np.mean(loss_list), np.mean(acc_list))
            loss_list = []
            acc_list = []
            LOG_INFO(msg)
Example #8
0
def test_net(model, loss, inputs, labels, batch_size, epoch, layer_name):
    loss_list = []
    acc_list = []

    for input, label in data_iterator(inputs,
                                      labels,
                                      batch_size,
                                      shuffle=False):
        target = onehot_encoding(label, 10)
        output, output_visualize = model.forward(input,
                                                 visualize=True,
                                                 layer_name=layer_name)
        # collapse output_visualize into 1 channel
        output_visualize = np.sum(output_visualize, axis=(1))

        loss_value = loss.forward(output, target)
        acc_value = calculate_acc(output, label)
        loss_list.append(loss_value)
        acc_list.append(acc_value)

    msg = '    Testing, total mean loss %.5f, total acc %.5f' % (
        np.mean(loss_list), np.mean(acc_list))
    LOG_INFO(msg)

    # save weights and biases
    model.save_weights(loss.name, epoch)

    return np.mean(loss_list), np.mean(
        acc_list
    ), output_visualize  # output_visualize: batch_size x height x width
def train_net(model, loss, config, inputs, labels, batch_size, disp_freq):

    iter_counter = 0
    loss_list = []
    acc_list = []

    for input, label in data_iterator(inputs, labels, batch_size):
        target = onehot_encoding(label, 10)
        iter_counter += 1

        # forward net
        output = model.forward(input)
        # calculate loss
        loss_value = loss.forward(output, target)
        # generate gradient w.r.t loss
        grad = loss.backward(output, target)
        # backward gradient
        model.backward(grad)
        # update layers' weights
        model.update(config)

        acc_value = calculate_acc(output, label)
        loss_list.append(loss_value)
        acc_list.append(acc_value)

        if iter_counter % disp_freq == 0:
            msg = '  Training iter %d, batch loss %.4f, batch acc %.4f' % (
                iter_counter, np.mean(loss_list), np.mean(acc_list))
            loss_list = []
            acc_list = []
            LOG_INFO(msg)
def evaluate(net, set_loader, loss_func):
    ''' Evaluates the performance of the RNN
        on the given set.

    Arguments:
        net (nn.Module): RNN (currently LSTM)
        set_loader (DataLoader): load object for val/test data
        loss_func: criterion function used for backprop
    Returns:
        eval_loss (torch.float): mean loss value for all
                                batches
        eval_acc (torch.float): mean acc value for all batches
    '''
    net.eval()
    eval_loss = 0
    eval_acc = 0

    for input, labels in set_loader:
        input, labels = input.to(device), labels.to(device)

        output = net(input).squeeze(1)

        loss = loss_func(output, labels)

        eval_loss += loss.item()
        eval_acc += calculate_acc(output, labels)

    eval_loss /= len(set_loader)
    eval_acc /= len(set_loader)

    return eval_loss, eval_acc
    def step(self, batch):
        self.model.train()
        self.optim.zero_grad()
        #
        img, target = batch
        img, target = img.cuda(), target.cuda()
        outputs = self.model(img)
        #
        if self.cfg.SOLVER.MIXUP.USE:

            mx_img, mx_target1, mx_target2, lamb = self.posneg_mixup(
                img, target, self.cfg.DATALOADER.NUM_INSTANCE,
                self.cfg.SOLVER.MIXUP.NEG_INSTANCE,
                self.cfg.SOLVER.MIXUP.ALPHA)
            mx_outputs = self.model(mx_img)
            loss = self.loss_func(outputs, target, mx_outputs, mx_target1,
                                  mx_target2, lamb)

        else:
            loss = self.loss_func(outputs, target)

        if self.mix_precision:
            with amp.scale_loss(loss, self.optim) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()
        self.optim.step()

        # acc = (score.max(1)[1] == target).float().mean()
        acc = calculate_acc(self.cfg, outputs, target)

        self.loss_avg.update(loss.cpu().item())
        self.acc_avg.update(acc.cpu().item())

        return self.loss_avg.avg, self.acc_avg.avg
Example #12
0
def test_net(model, loss, inputs, labels, batch_size):

    acc_value = 0.0
    count = 0
    for input, label in data_iterator(inputs, labels, batch_size, shuffle=False):
        output = model.forward(input)
        acc_value += calculate_acc(output, label)
        count += 1
    return acc_value / count
Example #13
0
def test_net(model, loss, input_feats, labels, test_mask, label_kind):
    target = onehot_encoding(labels, label_kind)
    output = model.forward(input_feats)

    # set mask
    output[~test_mask] = target[~test_mask]
    loss_value = loss.forward(output, target)

    acc_value = calculate_acc(output, labels, np.sum(test_mask))

    msg = '    Testing, total mean loss %.5f, total acc %.5f' % (loss_value,
                                                                 acc_value)
    LOG_INFO(msg)
def train_net(model, loss, config, inputs, labels, batch_size, disp_freq, Loss,
              Acur):

    iter_counter = 0
    loss_list = []
    acc_list = []
    ll = []
    ac = []

    for input, label in data_iterator(inputs, labels, batch_size):
        target = onehot_encoding(label, 10)
        iter_counter += 1

        # forward net
        output = model.forward(input)
        # calculate loss
        loss_value = loss.forward(output, target)
        # generate gradient w.r.t loss
        grad = loss.backward(output, target)
        # backward gradient
        model.backward(grad)

        if loss_value > 1:
            config['learning_rate'] = 0.2
        elif loss_value > 0.5:
            config['learning_rate'] = 0.1
        elif loss_value > 0.2:
            config['learning_rate'] = 0.05
        else:
            config['learning_rate'] = max(loss_value / 5.0, 0.005)

        # update layers' weights
        model.update(config)

        acc_value = calculate_acc(output, label)
        loss_list.append(loss_value)
        acc_list.append(acc_value)
        ll.append(loss_value)
        ac.append(acc_value)

        if iter_counter % disp_freq == 0:
            msg = '  Training iter %d, batch loss %.4f, batch acc %.4f' % (
                iter_counter, np.mean(loss_list), np.mean(acc_list))
            Loss.append(np.mean(loss_list))
            Acur.append(np.mean(acc_list))
            loss_list = []
            acc_list = []
            LOG_INFO(msg)

    Loss.append(np.mean(ll))
    Acur.append(np.mean(ac))
Example #15
0
def test_net(model, loss, inputs, labels, batch_size):
    loss_list = []
    acc_list = []

    for input, label in data_iterator(inputs, labels, batch_size, shuffle=False):
        target = onehot_encoding(label, 10)
        output = model.forward(input)
        loss_value = loss.forward(output, target)
        acc_value = calculate_acc(output, label)
        loss_list.append(loss_value)
        acc_list.append(acc_value)

    msg = '    Testing, total mean loss %.5f, total acc %.5f' % (np.mean(loss_list), np.mean(acc_list))
    LOG_INFO(msg)
Example #16
0
def test_net(model, loss, inputs, labels, batch_size):
    loss_list = []
    acc_list = []

    for input, label in data_iterator(inputs, labels, batch_size, shuffle=False):
        target = onehot_encoding(label, 10)
        output = model.forward(input)
        loss_value = loss.forward(output, target)
        acc_value = calculate_acc(output, label)
        loss_list.append(loss_value)
        acc_list.append(acc_value)

    msg = '    Testing, total mean loss %.5f, total acc %.5f' % (np.mean(loss_list), np.mean(acc_list))
    LOG_INFO(msg)
Example #17
0
    def step(self, batch):
        self.model.train()
        self.optim.zero_grad()
        self.center_optim.zero_grad()

        img, target = batch
        img, target = img.cuda(), target.cuda()
        if self.cfg.MODEL.USE_COS:
            outputs = self.model(img, target)
        else:
            outputs = self.model(img)

        loss, tpl, ce, ct = self.loss_func(outputs, target, in_detail=True)

        if self.current_iteration % self.cfg.SOLVER.TENSORBOARD.LOG_PERIOD == 0:
            if self.summary_writer:
                self.summary_writer.add_scalar('Train/tpl', tpl,
                                               self.current_iteration)
                self.summary_writer.add_scalar('Train/ce', ce,
                                               self.current_iteration)
                self.summary_writer.add_scalar('Train/ct', ct,
                                               self.current_iteration)

        if self.mix_precision:
            with amp.scale_loss(loss, self.optim) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()
        self.optim.step()
        if self.mix_precision:
            # [todo] fix the center's step
            pass
            # for param in amp.master_params(self.center_optim):
            #     param.grad.data *= (0.5 / self.cfg.SOLVER.CENTER_LOSS.WEIGHT)
        else:
            for param in self.loss_func.center_criterion.parameters():
                # param.grad.data *= (1.0 / self.cfg.SOLVER.CENTER_LOSS.WEIGHT)
                param.grad.data *= (self.cfg.SOLVER.CENTER_LOSS.ALPHA /
                                    self.cfg.SOLVER.CENTER_LOSS.WEIGHT)

        self.center_optim.step()
        # acc = (score.max(1)[1] == target).float().mean()
        acc = calculate_acc(self.cfg, outputs, target)

        self.loss_avg.update(loss.cpu().item())
        self.acc_avg.update(acc.cpu().item())

        return self.loss_avg.avg, self.acc_avg.avg
def train_net(model, loss, config, inputs, labels, batch_size, disp_freq,
              test_inputs, test_labels):

    iter_counter = 0
    train_loss_list, train_acc_list = [], []
    test_loss_list, test_acc_list = [], []
    # loss_list, acc_list = [], []

    for input, label in data_iterator(inputs, labels, batch_size):
        # train_loss_value, train_acc_value = test_net(model, loss, input, labels, 10000000)
        # train_loss_list.append(train_loss_value)
        # train_acc_list.append(train_acc_value)

        test_loss_value, test_acc_value = test_net(model, loss, test_inputs,
                                                   test_labels, 10000000)
        test_loss_list.append(test_loss_value)
        test_acc_list.append(test_acc_value)

        target = onehot_encoding(label, 10)
        iter_counter += 1

        # forward net
        output = model.forward(input)
        # calculate loss
        loss_value = loss.forward(output, target)
        # generate gradient w.r.t loss
        grad = loss.backward(output, target)
        # backward gradient

        model.backward(grad)
        # update layers' weights
        model.update(config)

        acc_value = calculate_acc(output, label)
        # loss_list.append(loss_value)
        # acc_list.append(acc_value)

        train_loss_list.append(loss_value)
        train_acc_list.append(acc_value)

        # if iter_counter % disp_freq == 0:
        #     msg = '  Training iter %d, batch loss %.4f, batch acc %.4f' % (iter_counter, np.mean(loss_list), np.mean(acc_list))
        #     loss_list = []
        #     acc_list = []
        #     LOG_INFO(msg)

    return train_loss_list, train_acc_list, test_loss_list, test_acc_list
def test_net(model, loss, inputs, labels, batch_size):
    loss_list = []
    acc_list = []

    # test model with all the test data
    for input, label in data_iterator(inputs, labels, batch_size, shuffle=False):
        # get the expected value of this batch of input
        target = onehot_encoding(label, 10)
        output = model.forward(input)
        # calculate loss of this batch
        loss_value = loss.forward(output, target)
        acc_value = calculate_acc(output, label)
        loss_list.append(loss_value)
        acc_list.append(acc_value)

    # use the mean of all batch's loss and accuracy as the final result
    msg = '    Testing, total mean loss %.5f, total acc %.5f' % (np.mean(loss_list), np.mean(acc_list))
    LOG_INFO(msg)
Example #20
0
def train_net(model, loss, config, inputs, labels, batch_size, disp_freq, loss_file):

    iter_counter = 0
    loss_list = []
    acc_list = []

    for input, label in data_iterator(inputs, labels, batch_size):
        target = onehot_encoding(label, 10)
        iter_counter += 1

	# print "Debug: ", "input=", input.shape, " target=", target.shape

        # forward net
        output = model.forward(input)
        # calculate loss
        loss_value = loss.forward(output, target) 
        # generate gradient w.r.t loss
        grad = loss.backward(output, target)
        # backward gradient

        model.backward(grad)
        # update layers' weights
        model.update(config)

        acc_value = calculate_acc(output, label)
        loss_list.append(loss_value)
        acc_list.append(acc_value)

	'''
	outf = file(loss_file, "a")
	outf.write(str(loss_value) + ' ' + str(acc_value) + '\n')
	outf.close()
	'''

        if iter_counter % disp_freq == 0:
            msg = '  Training iter %d, batch loss %.4f, batch acc %.4f' % (iter_counter, np.mean(loss_list), np.mean(acc_list))

	    outf = file(loss_file, "a")
	    outf.write(str(np.mean(loss_list)) + ' ' + str(np.mean(acc_list)) + '\n')
	    outf.close()

            loss_list = []
            acc_list = []
            LOG_INFO(msg)
    def step(self, batch):
        self.model.train()
        self.optim.zero_grad()
        img, target = batch
        img, target = img.cuda(), target.cuda()
        #
        # Target inputs
        try:
            inputs_exemplar = next(self.exemplar_iter)
        except:
            self.exemplar_iter = iter(self.exemplar_dl)
            inputs_exemplar = next(self.exemplar_iter)

        img_exemplar, target_exemplar = inputs_exemplar
        img_exemplar, target_exemplar = img_exemplar.cuda(
        ), target_exemplar.cuda()
        # source
        outputs = self.model(img)
        loss = self.loss_func(outputs, target)
        #
        exemplar_outputs = self.model(img_exemplar, 'exemplar_feat')
        loss_un = self.exemplar_memory(exemplar_outputs,
                                       target_exemplar,
                                       epoch=self.train_epoch)

        loss = (1 - self.cfg.DATASETS.EXEMPLAR.MEMORY.LAMBDA
                ) * loss + self.cfg.DATASETS.EXEMPLAR.MEMORY.LAMBDA * loss_un

        if self.mix_precision:
            with amp.scale_loss(loss, self.optim) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()
        self.optim.step()

        # acc = (score.max(1)[1] == target).float().mean()
        acc = calculate_acc(self.cfg, outputs, target)

        self.loss_avg.update(loss.cpu().item())
        self.acc_avg.update(acc.cpu().item())

        return self.loss_avg.avg, self.acc_avg.avg
def train_net(model, loss, config, inputs, labels, batch_size, disp_freq, Loss, Acur):

    iter_counter = 0
    loss_list = []
    acc_list = []
    ll = []
    ac = []

    # train model with
    for input, label in data_iterator(inputs, labels, batch_size):
        target = onehot_encoding(label, 10)
        iter_counter += 1

        # forward net
        output = model.forward(input)
        # calculate loss value of the whole batch
        loss_value = loss.forward(output, target)
        # generate gradient w.r.t loss, this is actually the local gradient contribution of the output layer
        grad = loss.backward(output, target)
        # backward gradient

        model.backward(grad)

        # update layers' weights: recount after the whole backward procedure
        model.update(config)

        acc_value = calculate_acc(output, label)
        loss_list.append(loss_value)
        ll.append(loss_value)
        acc_list.append(acc_value)
        ac.append(acc_value)

        if iter_counter % disp_freq == 0:
            msg = '  Training iter %d, batch loss %.4f, batch acc %.4f' % (iter_counter, np.mean(loss_list), np.mean(acc_list))
            loss_list = []
            acc_list = []
            LOG_INFO(msg)
    Loss.append(np.mean(ll))
    Acur.append(np.mean(ac))
Example #23
0
def train_net(model, loss, config, input_feats, labels, train_mask,
              label_kind):
    target = onehot_encoding(labels, label_kind)

    # forward net
    output = model.forward(input_feats)
    # set mask
    output[~train_mask] = target[~train_mask]
    # calculate loss
    loss_value = loss.forward(output, target)
    # generate gradient w.r.t loss
    grad = loss.backward(output, target)
    # backward gradient
    model.backward(grad)
    # update layers' weights
    model.update(config)

    acc_value = calculate_acc(output, labels, np.sum(train_mask))

    msg = '  Training batch loss %.4f, batch acc %.4f' % (loss_value,
                                                          acc_value)
    LOG_INFO(msg)
Example #24
0
    def step(self, batch):
        self.model.train()
        self.optim.zero_grad()
        img, target = batch
        img, target = img.cuda(), target.cuda()
        outputs = self.model(img)

        loss = self.loss_func(outputs, target)

        if self.mix_precision:
            with amp.scale_loss(loss, self.optim) as scaled_loss:
                scaled_loss.backward()
        else:
            loss.backward()
        self.optim.step()

        # acc = (score.max(1)[1] == target).float().mean()
        acc = calculate_acc(self.cfg, outputs, target)

        self.loss_avg.update(loss.cpu().item())
        self.acc_avg.update(acc.cpu().item())

        return self.loss_avg.avg, self.acc_avg.avg
Example #25
0
def main(cfg):
    # setting up output directories, and writing to stdout
    make_dirs(cfg.stdout_dir, replace=False)
    if cfg.train:
        run_type = 'train'
    else:
        if 'weight' in cfg.prune_type.lower():
            run_type = 'weight-prune'
        else:
            run_type = 'unit-prune'
    sys.stdout = open(
        '{}/stdout_{}_{}.txt'.format(cfg.stdout_dir, cfg.model_name, run_type),
        'w')
    print(cfg)
    print('\n')
    sys.stdout.flush()

    # if train mode, replace the previous plot and ckpt directories; if in prune mode, use existing directories
    if cfg.plot:
        make_dirs(os.path.join(cfg.plot_dir, cfg.model_name),
                  replace=cfg.train)
    if cfg.save_model:
        make_dirs(os.path.join(cfg.model_dir, cfg.model_name),
                  replace=cfg.train)

    # set random seed
    if cfg.random_seed != 0:
        random_seed = cfg.random_seed
    else:
        random_seed = random.randint(1, 100000)
    random.seed(random_seed)
    np.random.seed(random_seed)
    torch.manual_seed(random_seed)

    # set device as cuda or cpu
    if cfg.use_gpu and torch.cuda.is_available():
        # reproducibility using cuda
        torch.cuda.manual_seed(random_seed)
        cudnn.deterministic = True
        cudnn.benchmark = False
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')
        if cfg.use_gpu:
            print('gpu option was to <True>, but no cuda device was found')
            print('\n')

    # datasets and dataloaders
    # normalizing training and validation images to [0, 1] suffices for the purposes of our research objective
    # in training, <drop_last> minibatch in an epoch set to <True> for simplicity in tracking training performance
    dataset_train = MNIST(root='./data/mnist',
                          train=True,
                          download=True,
                          transform=transforms.Compose([transforms.ToTensor()
                                                        ]),
                          target_transform=None)
    dataloader_train = DataLoader(dataset=dataset_train,
                                  batch_size=cfg.batch_size,
                                  shuffle=cfg.shuffle,
                                  num_workers=cfg.num_workers,
                                  pin_memory=True,
                                  drop_last=True)

    dataset_val = MNIST(root='./data/mnist',
                        train=False,
                        download=True,
                        transform=transforms.Compose([transforms.ToTensor()]),
                        target_transform=None)
    dataloader_val = DataLoader(dataset=dataset_val,
                                batch_size=100,
                                shuffle=False,
                                num_workers=cfg.num_workers,
                                pin_memory=True,
                                drop_last=False)

    # automatically compute number of classes
    targets = np.asarray(dataset_train.targets)
    c = np.unique(targets).shape[0]

    # define model
    # weights initialized using Kaiming uniform (He initialization)
    # number of units per hidden layer is passed in as an argument
    net = Net(np.product(cfg.img_size), c, cfg.units).to(device)

    criterion = nn.CrossEntropyLoss()

    if cfg.train:
        # training mode

        if cfg.use_sgd:
            optimizer = optim.SGD(params=net.parameters(),
                                  lr=cfg.lr,
                                  momentum=cfg.momentum,
                                  nesterov=cfg.use_nesterov)
        else:
            optimizer = optim.Adam(params=net.parameters(),
                                   lr=cfg.lr,
                                   betas=(cfg.beta1, cfg.beta2))

        # tracking training and validation stats over epochs
        epochs = []
        train_loss_epochs, val_loss_epochs = [], []
        train_acc_epochs, val_acc_epochs = [], []

        # best model is defined as model with best performing validation loss
        best_loss = float('inf')
        for epoch in range(cfg.epochs):
            # tracking training and validation stats over a given epoch
            train_loss_epoch, val_loss_epoch = [], []
            train_acc_epoch, val_acc_epoch = [], []

            # training set
            for i, (x, y) in enumerate(dataloader_train):
                x, y = x.to(device), y.to(device)

                optimizer.zero_grad()
                logits = net(x)
                loss = criterion(logits, y)
                loss.backward()
                optimizer.step()

                acc = calculate_acc(logits, y)

                append((train_loss_epoch, loss.item()),
                       (train_acc_epoch, acc.item()))

            # validation set
            with torch.no_grad():
                for i, (x, y) in enumerate(dataloader_val):
                    x, y = x.to(device), y.to(device)

                    logits = net(x)
                    loss = criterion(logits, y)

                    acc = calculate_acc(logits, y)

                    append((val_loss_epoch, loss.item()),
                           (val_acc_epoch, acc.item()))

            train_loss_epoch, val_loss_epoch = get_average(
                train_loss_epoch), get_average(val_loss_epoch)
            train_acc_epoch, val_acc_epoch = get_average(
                train_acc_epoch), get_average(val_acc_epoch)

            print('train_epoch{:0=3d}_loss{:.4f}_acc{:.4f}'.format(
                epoch + 1, train_loss_epoch, train_acc_epoch))
            print('valid_epoch{:0=3d}_loss{:.4f}_acc{:.4f}'.format(
                epoch + 1, val_loss_epoch, val_acc_epoch))
            print('\n')
            sys.stdout.flush()

            if cfg.plot:
                append((epochs, epoch + 1),
                       (train_loss_epochs, train_loss_epoch),
                       (val_loss_epochs, val_loss_epoch),
                       (train_acc_epochs, train_acc_epoch),
                       (val_acc_epochs, val_acc_epoch))

                plot_line(epochs, train_loss_epochs, val_loss_epochs,
                          'Epoch Number', 'Loss', cfg)
                plot_line(epochs, train_acc_epochs, val_acc_epochs,
                          'Epoch Number', 'Accuracy', cfg)

            if val_loss_epoch < best_loss:
                best_loss = val_loss_epoch
                print('New best model at epoch {:0=3d} with val_loss {:.4f}'.
                      format(epoch + 1, best_loss))
                print('\n')
                if cfg.save_model:
                    # save model when validation loss improves
                    save_name = '{}_net_epoch{:0=3d}_val_loss{:.4f}'.format(
                        cfg.model_name, epoch + 1, best_loss)
                    torch.save(
                        net.state_dict(),
                        os.path.join(cfg.model_dir, cfg.model_name,
                                     '{}.pth'.format(save_name)))
                    with open(
                            os.path.join(cfg.model_dir, cfg.model_name,
                                         '{}.txt'.format(cfg.model_name)),
                            'w') as file:
                        file.write('{}.pth'.format(save_name))

    else:
        # pruning mode

        # checks on arguments passed in
        for k in cfg.sparsity:
            assert 0 <= k <= 1
        if cfg.use_sparse_mul:
            assert cfg.to_sparse

        # load model
        with open(
                os.path.join(cfg.model_dir, cfg.model_name,
                             '{}.txt'.format(cfg.model_name)), 'r') as file:
            load_name = file.readline()
        net.load_state_dict(
            torch.load(
                os.path.join(cfg.model_dir, cfg.model_name,
                             '{}'.format(load_name))))
        net.eval()

        # select pruning approach to use
        if 'weight' in cfg.prune_type.lower():
            prune = weight_prune
        else:
            prune = unit_prune

        sparsities = []
        val_loss_sparse, val_acc_sparse = [], []
        time_sparsities = []
        for k in cfg.sparsity:
            val_loss_k, val_acc_k = [], []
            time_k = []

            # copy network so that the sparsity changes are not additive for each k
            net_sparse = copy.deepcopy(net)

            pruned_weights = []
            # prune model, except for the last layer
            for (i, p) in enumerate(net_sparse.parameters()):
                if i < len(cfg.units):
                    original_weights = copy.deepcopy(p.data)
                    if cfg.plot:
                        # plot magnitude of original weights (for comparison to post-pruned weights)
                        plot_hist([
                            torch.abs(
                                original_weights.flatten()).cpu().numpy()
                        ], ['b'], cfg.prune_type, i + 1, k,
                                  'Non-Pruned Weight Magnitudes', 'Counts',
                                  cfg)
                    prune(p.data, k)
                    if cfg.plot:
                        # plot original magnitudes of pruned weights, and magnitudes of remaining weights, separately
                        pruned_weights_non_zero = torch.abs(
                            original_weights.flatten()[p.data.flatten() != 0])
                        pruned_weights_zeroed = torch.abs(
                            original_weights.flatten()[p.data.flatten() == 0])
                        plot_hist([
                            pruned_weights_non_zero.cpu().numpy(),
                            pruned_weights_zeroed.cpu().numpy()
                        ], ['g', 'r'], cfg.prune_type, i + 1, k,
                                  'Weight Magnitudes', 'Counts', cfg)
                        plot_hist([pruned_weights_non_zero.cpu().numpy()],
                                  ['k'], cfg.prune_type, i + 1, k,
                                  'Surviving Weight Magnitudes', 'Counts', cfg)
                if cfg.to_sparse and i < len(cfg.units):
                    pruned_weights.append(p.data.to_sparse())
                else:
                    pruned_weights.append(p.data)

            with torch.no_grad():
                for i, (x, y) in enumerate(dataloader_val):
                    x, y = x.to(device), y.to(device)

                    start = time.time()
                    logits = forward(x, pruned_weights, cfg.use_sparse_mul)
                    end = time.time()
                    loss = criterion(logits, y)

                    acc = calculate_acc(logits, y)

                    append((val_loss_k, loss.item()), (val_acc_k, acc.item()),
                           (time_k, end - start))

            val_loss_k, val_acc_k, time_k = get_average(
                val_loss_k), get_average(val_acc_k), get_average(time_k)

            print('valid_{}_k{:.2f}_loss{:.4f}_acc{:.4f}'.format(
                run_type, k, val_loss_k, val_acc_k))
            print('valid_{}_k{:.2f}_time/minibatch{:.6f}'.format(
                run_type, k, time_k))
            print('\n')
            sys.stdout.flush()

            if cfg.plot:
                append((sparsities, k), (val_loss_sparse, val_loss_k),
                       (val_acc_sparse, val_acc_k), (time_sparsities, time_k))

                plot_line(sparsities, [], val_loss_sparse,
                          'Sparsity {} Prune'.format(cfg.prune_type), 'Loss',
                          cfg)
                plot_line(sparsities, [], val_acc_sparse,
                          'Sparsity {} Prune'.format(cfg.prune_type),
                          'Accuracy', cfg)
                plot_line(sparsities, [], time_sparsities,
                          'Sparsity {} Prune'.format(cfg.prune_type), 'Time',
                          cfg)

            if cfg.save_model:
                torch.save(
                    net_sparse.state_dict(),
                    os.path.join(
                        cfg.model_dir, cfg.model_name,
                        '{}_sparse_net_{}_val_loss{:.4f}.pth'.format(
                            cfg.model_name, run_type, val_loss_k)))
Example #26
0
                            test_outputs = rnn_model(test_x)
                        elif 'gru' in args.rnn_model_type:
                            test_outputs, _, _ = rnn_model(test_x)
                        elif 'lstm' in args.rnn_model_type:
                            test_outputs, _, _ = rnn_model(test_x)
                        test_outputs = test_outputs.view(
                            test_x.size(1), num_class)
                        if use_loss_weights:
                            test_loss = rnn_loss_function(
                                test_outputs, test_labels, weight=loss_weights)
                        else:
                            test_loss = rnn_loss_function(
                                test_outputs, test_labels)
                        test_average_loss += test_loss.item()

                        curr_correct, curr_total, corr_labels, incorr_labels = calculate_acc(
                            test_outputs, test_labels)
                        correct_labels.extend(corr_labels.tolist())
                        incorrect_labels.extend(incorr_labels.tolist())
                        total += curr_total
                        correct += curr_correct

                    _ = print_per_label_accu(Counter(correct_labels), Counter(
                        incorrect_labels), test_state_map)
                    test_average_loss /= len(test_loader)
                    accuracy = float(correct) / total

                    print('[INFO][Test] Testing loss: {}. Overall testing accuracy: {}'.format(
                        test_average_loss, accuracy))
                    rnn_model.train()  # Now returning to train model
    else:
        rnn_model = torch.load(args.rnn_model).to(device)
Example #27
0
    for layer in range(0, 2):   ## ATTENTION!!!
        K_transform = load_data(os.path.join(params.save_data, "K_transform_layer{}".format(layer)))
        batch_size = 5000
        con = []
        for b in range(0, ph_out[0].shape[0], batch_size):
            data = ph_out[layer][b: b + batch_size]
            data = K_transform.predict(data)
            # mean1 = np.mean(data, axis=1, keepdims=True)
            # data = mean1 - data
            # data = np.where(data < 0, 0, data)
            # data = np.sum(data, axis=1)
            con.append(data)
        con = np.concatenate(con, axis=0)
        mo3 = load_data(os.path.join(params.save_data, 'simple_pred_layer{}_ratio{}'.format(layer, ratio)))
        prediction = mo3.predict(con)
        calculate_acc(prediction, test_labels)
        concate.append(prediction)


    for layer in range(2, params.num_layers):   ## ATTENTION!!!
        data = ph_out[layer]
        data = np.reshape(data, newshape=(data.shape[0], -1))
        lag = load_data(os.path.join(params.save_data, 'LAG_{}_{}'.format(layer, ratio)))
        lag_pred = lag.predict_proba(data)
        concate.append(lag_pred)
    concate = np.concatenate(concate, axis=1)
    print("Concate shape:", concate.shape)
    rf = load_data(os.path.join(params.save_data, 'RF_{}'.format(ratio)))
    prediction = rf.predict(concate)
    # calculate_acc(prediction, subset_label)
    print("ACC=", np.sum(prediction.reshape((-1)) == test_labels.reshape((-1))) / test_labels.shape[0] * 100)
Example #28
0
def cross_validation(k, X, y, params, regression):
    """
    Performing regression using K-Cross Validation.

    This function is used to generate a model, given data, a regression function
    and a set of parameters.

    Args:
        k (int): k for cross validation
        X (nd.array): training samples of form N x D
        y (nd.array): training samples of form N
        params (dict): dictionary of training samples
        regression (function): regression function

    Returns:
        float: mean loss on validation datasets
        float: mean accuracy on validation datasets

    Raise:
        ValueError: if the regression function raises an error
    """

    # Cross-validation
    k_indices = build_k_indices(y, k)
    accuracies = []
    losses = []

    # print(f"(max_iters: {params['max_iters']}, gamma: {params['gamma']}, lambda: {params['lambda_']})")
    # each iteration for each split of training and validation
    for k_iteration in range(k):
        # split the data accordingly into training and validation
        X_train, Y_train, X_val, Y_val = cross_validation_iter(
            y, X, k_indices, k_iteration)
        # initial weights
        W_init = np.random.rand(D, )
        # initialize dictionary for the training regression model
        args_train = {
            "tx": X_train,
            "y": Y_train,
            "initial_w": W_init,
            "max_iters": params["max_iters"],
            "gamma": params["gamma"],
            "lambda_": params["lambda_"]
        }
        # try to train the model, if this doesnt work, raise an error
        try:
            W, loss_tr = regression(**args_train)
        except ValueError:
            print("Regression diverged with these parameters.")
            return None, None

        if "Logistic" in f_name:
            prediction_val_regression = sigmoid(X_val @ W)
        else:
            prediction_val_regression = X_val @ W
        # calculate prediction for the validation dataset
        prediction_val = create_labels(prediction_val_regression)
        # calculate corresponding loss and accuracy
        loss_val = calculate_mse_loss(Y_val, prediction_val)
        acc_val = calculate_acc(Y_val, prediction_val)
        losses.append(loss_val)
        accuracies.append(acc_val)
    # finally, generate the means
    mean_loss_val = np.array(losses).mean()
    mean_acc_val = np.array(accuracies).mean()

    return mean_loss_val, mean_acc_val
Example #29
0
    #     validation_loss = 0.0
    #     for j, data in enumerate(testloader): # (10,000 / args.batch) batches
    #         inputs, labels = data
    #         inputs = inputs.to(device)
    #         labels = labels.to(device)

    #         outputs = net(inputs)
    #         loss = criterion(outputs, labels)

    #         validation_loss += loss.item()
    # Calculate training accuracy, top-1
    # train_acc = calculate_acc(trainloader, net, device)

    # Calculate validation accuracy
    net.eval()
    val_acc = calculate_acc(testloader, net, device)
    if val_acc > stats['best_acc']:
        stats['best_acc'] = val_acc
        stats['best_epoch'] = epoch + 1
        if args.save:
            # Save the checkpoint
            state = {
                'epoch': epoch,
                'optimizer': optimizer.state_dict(),
                'net': net.state_dict(),
                'stats': stats
            }
            torch.save(state, checkpoint_path)

    # Switch back to training mode
    net.train()
Example #30
0
def event_tagger():
    # Read event data
    en_train = read_event_data('en/train.txt')
    en_dev = read_event_data('en/dev.txt')
    en_test = read_event_data('en/test.txt')

    it_train = read_event_data('it/train.txt')
    it_dev = read_event_data('it/dev.txt')
    it_test = read_event_data('it/test.txt')

    print('English TimeML:', len(en_train), len(en_dev), len(en_test))
    print('Italian News:', len(it_train), len(it_dev), len(it_test))

    tags = list(set(word_label[1] for sent in it_train for word_label in sent))
    print(len(tags))

    # By convention, the 0'th slot is reserved for padding.
    tags = ["<pad>"] + tags

    tag2idx = {tag: idx for idx, tag in enumerate(tags)}
    idx2tag = {idx: tag for idx, tag in enumerate(tags)}

    print(tag2idx)
    print(idx2tag)

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased',
                                              do_lower_case=False)

    model = Net(vocab_size=len(tag2idx), device=device)
    model.to(device)
    model = nn.DataParallel(model)

    # One fine-tuning step
    train_dataset = EventDataset(en_train, tokenizer, tag2idx)

    train_iter = data.DataLoader(dataset=train_dataset,
                                 batch_size=8,
                                 shuffle=True,
                                 num_workers=1,
                                 collate_fn=pad)

    eval_dataset = EventDataset(it_test, tokenizer, tag2idx)

    test_iter = data.DataLoader(dataset=eval_dataset,
                                batch_size=8,
                                shuffle=False,
                                num_workers=1,
                                collate_fn=pad)

    criterion = nn.CrossEntropyLoss(ignore_index=0)

    num_epoch = 1
    base_lr = 0.001
    decay_factor = 0.2
    discriminative_fine_tuning = True
    gradual_unfreezing = False

    # params order top to bottom
    group_to_discriminate = ['classifier', 'bert']
    no_decay = ['bias', 'LayerNorm.weight']

    if discriminative_fine_tuning:
        optimizer_grouped_parameters = [{
            'params': [
                p for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay) and not 'bert' in n
            ],
            'layers': [
                n for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay) and not 'bert' in n
            ],
            'lr':
            0.001,
            'name':
            'classifier.decay',
            'weight_decay':
            0.01
        }, {
            'params': [
                p for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay) and not 'bert' in n
            ],
            'layers': [
                n for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay) and not 'bert' in n
            ],
            'lr':
            0.001,
            'name':
            'classifier.no_decay',
            'weight_decay':
            0.0
        }, {
            'params': [
                p for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay) and 'bert' in n
            ],
            'layers': [
                n for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay) and 'bert' in n
            ],
            'lr':
            0.00002,
            'name':
            'bert.decay',
            'weight_decay':
            0.01
        }, {
            'params': [
                p for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay) and 'bert' in n
            ],
            'layers': [
                n for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay) and 'bert' in n
            ],
            'lr':
            0.00002,
            'name':
            'bert.no_decay',
            'weight_decay':
            0.0
        }]
    else:
        optimizer_grouped_parameters = [{
            'params': [
                p for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.01
        }, {
            'params': [
                p for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.0
        }]

    optimizer = AdamW(optimizer_grouped_parameters)
    scheduler = WarmupLinearSchedule(optimizer,
                                     warmup_steps=len(train_iter) *
                                     num_epoch // 10,
                                     t_total=len(train_iter) * num_epoch)

    for e in range(num_epoch):
        unfreeze = (True, False)[e != 0]

        if discriminative_fine_tuning and gradual_unfreezing:
            for pg in optimizer.param_groups:
                layers = ''
                for layer in pg['layers']:
                    layers += layer + ';'
                # print('epoch: {}, Layers: {}'.format(e, layers))
                if 'bert' in pg['name']:
                    for param in pg['params']:
                        param.requires_grad = unfreeze

        loss = train(model, train_iter, optimizer, scheduler, criterion)
        acc = eval(model, test_iter, idx2tag)

        print("epoch: {}, loss: {}".format(e, loss))
        print("epoch: {}, acc: {}".format(e, acc))
    '''
    ## Second fine-tuning step (epoch=1)
    
    train_dataset = EventDataset(it_train, tokenizer, tag2idx)
    for e in range(num_epoch):
        unfreeze = (True, False)[e != 0]

        if discriminative_fine_tuning and gradual_unfreezing:
            for pg in optimizer.param_groups:
                layers = ''
                for layer in pg['layers']:
                    layers += layer + ';'
                # print('epoch: {}, Layers: {}'.format(e, layers))
                if 'bert' in pg['name']:
                    for param in pg['params']:
                        param.requires_grad = unfreeze

        loss = train(model, train_iter, optimizer, scheduler, criterion)
        acc = eval(model, test_iter, idx2tag)

        print("epoch: {}, loss: {}".format(e, loss))
        print("epoch: {}, acc: {}".format(e, acc))
    '''

    calculate_acc()
    calculate_f1()