Beispiel #1
0
def test_2layer_net():
    params = init_toy_model()
    X, y = init_toy_data()
    Y_enc = ut.encode_labels(y)
    # Make the net
    layer_1 = layers.Linear(*params['W1'].T.shape,
                            reg='frob',
                            reg_param=0.05,
                            init_vals=(params['W1'].T, params['b1'].ravel()))
    act_1 = layers.Relu()
    layer_2 = layers.Linear(*params['W2'].T.shape,
                            reg='frob',
                            reg_param=0.05,
                            init_vals=(params['W2'].T, params['b2'].ravel()))
    net_2 = nn.Network([layer_1, act_1, layer_2], ls.CrossEntropy(),
                       optim.SGD(lr=1e-5))
    scores = net_2.forward(X)
    correct_scores = np.asarray([[-1.07260209, 0.05083871, -0.87253915],
                                 [-2.02778743, -0.10832494, -1.52641362],
                                 [-0.74225908, 0.15259725, -0.39578548],
                                 [-0.38172726, 0.10835902, -0.17328274],
                                 [-0.64417314, -0.18886813, -0.41106892]])
    diff = np.sum(np.abs(scores - correct_scores))
    assert (np.isclose(diff, 0.0, atol=1e-6))
    loss = net_2.loss(X, Y_enc)
    correct_loss = 1.071696123862817
    assert (np.isclose(loss, correct_loss, atol=1e-8))
Beispiel #2
0
 def loss_func_b(bb):
     layer_lin = layers.Linear(n,
                               c,
                               reg='l2',
                               reg_param=0.05,
                               init_vals=(W.T, bb.ravel()))
     loss_func = ls.CrossEntropy()
     net = nn.Network([layer_lin], loss_func, optimizer=None)
     return net.loss(X_dev, Y_dev_enc)
Beispiel #3
0
def test_CrossEntropyLoss():
    np.random.seed(1)
    W = np.random.randn(c, n) * 0.0001
    b = np.random.randn(c, 1) * 0.0001
    layer_lin = layers.Linear(n, c, init_vals=(W.T, b.ravel()))
    loss_func = ls.CrossEntropy()
    net = nn.Network([layer_lin], loss_func, optimizer=None)
    my_loss = net.loss(X_dev, Y_dev_enc)
    assert (np.isclose(my_loss, -np.log(.1), atol=1e-2))
Beispiel #4
0
def test_CrossEntropy_Linear_Grad():
    np.random.seed(1)
    W = np.random.randn(c, n) * 0.0001
    b = np.random.randn(c, 1) * 0.0001
    layer_lin = layers.Linear(n,
                              c,
                              reg='l2',
                              reg_param=0.05,
                              init_vals=(W.T, b.ravel()))
    loss_func = ls.CrossEntropy()
    net = nn.Network([layer_lin], loss_func, optimizer=None)
    net_loss = net.loss(X_dev, Y_dev_enc)
    ngrad = net.backward()

    # Define functions to pass to helper
    def loss_func_W(ww):
        layer_lin = layers.Linear(n,
                                  c,
                                  reg='l2',
                                  reg_param=0.05,
                                  init_vals=(ww.T, b.ravel()))
        loss_func = ls.CrossEntropy()
        net = nn.Network([layer_lin], loss_func, optimizer=None)
        return net.loss(X_dev, Y_dev_enc)

    def loss_func_b(bb):
        layer_lin = layers.Linear(n,
                                  c,
                                  reg='l2',
                                  reg_param=0.05,
                                  init_vals=(W.T, bb.ravel()))
        loss_func = ls.CrossEntropy()
        net = nn.Network([layer_lin], loss_func, optimizer=None)
        return net.loss(X_dev, Y_dev_enc)

    # Actually run the test
    rel_err_weight = dutil.grad_check_sparse(loss_func_W,
                                             W,
                                             net.grads[0].T,
                                             10,
                                             seed=42)
    rel_err_bias = dutil.grad_check_sparse(loss_func_b,
                                           b.ravel(),
                                           net.grads[1],
                                           10,
                                           seed=42)
    assert (np.allclose(rel_err_weight,
                        np.zeros(rel_err_weight.shape),
                        atol=1e-4))
    assert (np.allclose(rel_err_bias, np.zeros(rel_err_bias.shape), atol=1e-4))
Beispiel #5
0
def test_2layer_grad():
    params = init_toy_model()
    X, y = init_toy_data()
    Y_enc = ut.encode_labels(y)
    # Make the net
    layer_1 = layers.Linear(*params['W1'].T.shape,
                            reg='frob',
                            reg_param=0.05,
                            init_vals=(params['W1'].T, params['b1'].ravel()))
    act_1 = layers.Relu()
    layer_2 = layers.Linear(*params['W2'].T.shape,
                            reg='frob',
                            reg_param=0.05,
                            init_vals=(params['W2'].T, params['b2'].ravel()))
    net_2 = nn.Network([layer_1, act_1, layer_2], ls.CrossEntropy(),
                       optim.SGD(lr=1e-5))
    loss = net_2.loss(X, Y_enc)
    net_2.backward()

    def f_change_param(param_name, U):
        if param_name == 3:
            net_2.layers[0].params['b'] = U
        if param_name == 2:
            net_2.layers[0].params['W'] = U
        if param_name == 1:
            net_2.layers[2].params['b'] = U
        if param_name == 0:
            net_2.layers[2].params['W'] = U
        return net_2.loss(X, Y_enc)

    rel_errs = np.empty(4)
    for param_name in range(4):
        f = lambda U: f_change_param(param_name, U)
        if param_name == 3:
            pass_pars = net_2.layers[0].params['b']
        if param_name == 2:
            pass_pars = net_2.layers[0].params['W']
        if param_name == 1:
            pass_pars = net_2.layers[2].params['b']
        if param_name == 0:
            pass_pars = net_2.layers[2].params['W']
        param_grad_num = dutil.grad_check(f, pass_pars, epsilon=1e-5)
        rel_errs[param_name] = ut.rel_error(param_grad_num,
                                            net_2.grads[param_name])
    assert (np.allclose(rel_errs, np.zeros(4), atol=1e-7))
def train(model, args, datasets):
  """
  train for one epoch
  args are some parameters of our model, e.g. batch size or n_class, etc.
  """
  
  #switch the model in training mode
  model.encoder.train()
  model.decoder.train() 
  if args.adversarial:
      model.discr.train()

  #the loader function will take care of the batching
  # train_set was defined prior
  loader = torch.utils.data.DataLoader(datasets, \
         batch_size=args.batch_size, shuffle=True, drop_last=True)
  
  # loss on the whole dataset
  loss_data = tnt.meter.AverageValueMeter()
  loss_data_alt = tnt.meter.AverageValueMeter()
  loss_data_rad = tnt.meter.AverageValueMeter()
  loss_disc_val = tnt.meter.AverageValueMeter()
  accu_discr = 0.0
  
  # loops over the batches
  for index, (tiles, labels) in enumerate(loader):
    
    # loading on the gpu
    if args.cuda:
        tiles = tiles.cuda().float()
        labels = labels.cuda().long()
    else:
        tiles = tiles.float()
        labels = labels.long()
    
    # adding noise to the sample
    noise = np.random.normal(0, 0.01, tiles.shape)
    noise_tens = fun.torch_raster(noise)
    
    # adding noise
    tiles_noise = tiles + noise_tens
    
   
    # applying arg max on labels for cross entropy
    _, labels = labels.max(dim=1)
    
    # ============discriminator===========
    
    if args.adversarial:
        
        # ============forward===========
        
        #pred_year = discr(code.detach())
        
        code = model.encoder(tiles_noise, args)
        pred_year = model.discr(code, args)
        
        # ============loss===========
        
        # applying arg max for checking accuracy
        _, pred_max = pred_year.max(dim=1)
        
        ## applying loss function for the discriminator and optimizing the weights
        loss_disc = loss_fun.CrossEntropy(pred_year, labels)
        
        
        # checking the accuracy
        matrix_accu = pred_max == labels
        matrix_accu_f = matrix_accu.flatten()
        matrix_accu_f = matrix_accu_f.cpu().detach().numpy()
        nb_true = np.count_nonzero(matrix_accu_f == True)
        accu_discr += nb_true / len(matrix_accu_f)
        
        # ============backward===========
        
        # optimizing the discriminator. optional: training the encoder as well
        model.opti_D.zero_grad()
        #model.opti_AE.zero_grad()
        loss_disc.backward(retain_graph=True)
        
        #we clip the gradient at norm 1 this helps learning faster
        if args.grad_clip:
            for p in model.discr.parameters():
                p.register_hook(lambda grad: torch.clamp(grad, -1, 1))
                
        model.opti_D.step()
        model.opti_AE.zero_grad()
        model.opti_AE.step()
    
        # saving the loss
        loss_disc_val.add(loss_disc.item())
        
        # putting an adversarial training on the encoder
        if args.opti_adversarial_encoder:
            code = model.encoder(tiles, args)
            pred_year = model.discr(code, args)
            loss_disc = loss_fun.CrossEntropy(pred_year, labels)
            loss_disc_adv = loss_disc
            model.opti_AE.zero_grad()
            loss_disc_adv.backward()
            model.opti_AE.step()
        
        #averaging accuracy
        accufin = accu_discr/(len(loader))
        
        
        
    
    # ============auto_encoder optimization===========
    
    # ============forward auto-encoder===========
    # compute the prediction
    pred = model.predict(tiles_noise, args)
    code = model.encoder(tiles_noise, args)
    
    # boolean matrixes to remove effect of no data
    bool_matr_alt = tiles[:,None,0,:,:] != 0
    bool_matr_rad = tiles[:,None,1,:,:] != 0
    
    # filtering the data
    pred_alt = pred[:,None,0,:,:][bool_matr_alt]
    tiles_alt = tiles[:,None,0,:,:][bool_matr_alt]
    pred_rad = pred[:,None,1,:,:][bool_matr_rad]
    tiles_rad = tiles[:,None,1,:,:][bool_matr_rad]
    
    ## defiance part
    if args.defiance:
        # loading defiance matrix
        d_mat_rad = pred[:,None,2,:,:][bool_matr_rad]
        
        # calculating the loss
        eps = 10**-5
        loss_alt = loss_fun.MeanSquareError(pred_alt, tiles_alt)
        
        # loss for the defiance
        mse_rad = (tiles_rad - pred_rad)**2
        loss_rad = torch.mean(mse_rad / (d_mat_rad+eps) + (1/2)*torch.log(d_mat_rad+eps))
 
        
    else:
        ## sum of squares
        loss_alt = loss_fun.MeanSquareError(pred_alt, tiles_alt)
        loss_rad = loss_fun.MeanSquareError(pred_rad, tiles_rad)
    
    
    if args.auto_encod:
        
        # ============forward===========
        if args.adversarial:
            code = model.encoder(tiles_noise, args)
            pred_year = model.discr(code, args)
            loss_disc = loss_fun.CrossEntropy(pred_year, labels)
            
        # ============loss==========
        
        if args.adversarial and args.data_fusion:
            loss =  loss_rad + loss_alt #-  args.disc_loss_weight * loss_disc   
        elif args.data_fusion:
            loss =  loss_rad + loss_alt
        elif args.adversarial and args.rad_input:
            loss =  loss_rad -  args.disc_loss_weight * loss_disc
        elif args.adversarial:
            loss =  loss_alt -  args.disc_loss_weight * loss_disc
        elif args.rad_input:
            loss = loss_rad
        else:
            loss = loss_alt
            
        loss_data.add(loss.item())
        
        # ============backward===========
        
            
        model.opti_AE.zero_grad()
        loss.backward()
        
        #we clip the gradient at norm 1 this helps learning faster
        if args.grad_clip:
            for p in model.AE_params:
                p.register_hook(lambda grad: torch.clamp(grad, -1, 1))
            
        model.opti_AE.step()
        
    
    # storing the loss values
    loss_data_alt.add(loss_alt.item())
    loss_data_rad.add(loss_rad.item())
    
    if args.adversarial == False:
        accufin = 0
  
  
  # output of various losses
  result = (loss_data.value()[0], len(loader), loss_data_alt.value()[0],
              loss_data_rad.value()[0], loss_disc_val.value()[0], accufin)
  
  return result
Beispiel #7
0
                                   transform=torchvision.transforms.Compose([
                                       torchvision.transforms.ToTensor(),
                                   ])), shuffle=True, batch_size=batch_size)
    return train_loader, test_loader


if __name__ == '__main__':
    torch.random.manual_seed(1234)
    np.random.seed(1234)

    epochs = 10
    lr = 0.01
    batch_size = 32

    optimizer = optimizers.SGD(learning_rate=lr)
    criterion = loss.CrossEntropy()
    layers = [
        layers.LinearLayer(784, 512),
        layers.ReLU(),
        layers.Dropout(keep_rate=0.8),
        layers.LinearLayer(512, 512),
        layers.ReLU(),
        layers.Dropout(keep_rate=0.8),
        layers.LinearLayer(512, 10)
    ]
    model = Model(layers, optimizer, criterion)

    train_loader, test_loader = get_dataset(batch_size)
    for epoch_id in range(epochs):
        model.train()
        total = 0
Beispiel #8
0
                 model.Linear(25, 25, initOption = 'He'), model.ReLU(),
                 model.Linear(25, 25, initOption = 'He'), model.ReLU(),
                 model.Linear(25, 25, initOption = 'He'), model.ReLU(),
                 model.Linear(25, 2, initOption = 'Xavier'))

Model_8 = model.MLP(model.Linear(2, 25), model.ReLU(),
                 model.Linear(25, 25), model.ReLU(),
                 model.Linear(25, 25), model.ReLU(),
                 model.Linear(25, 25), model.ReLU(),
                 model.Linear(25, 2))

learning_rate = 0.01
nb_epochs = 200
l_CE = []
text = ['Tanh', 'ReLU + BN', 'ReLU + Init', 'ReLU']
for i, M in enumerate([Model_5, Model_6, Model_7, Model_8]):
    loss_fnc = loss.CrossEntropy(M)
    print('Model', i+5)
    print('Loss function: Cross-Entropy; Activation function:', text[i])
    l_CE.append(train_model(M, train_input, train_target, loss_fnc, nb_epochs, learning_rate))

    print("---------------------- Error ---------------------")
    nb_train_errors = compute_nb_errors(M, train_input, train_target)
    nb_test_errors = compute_nb_errors(M, test_input, 
                                       test_target)

    print('Test error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_test_errors) / test_input.size(0),
                                                      nb_test_errors, test_input.size(0)))
    print('Train error Net {:0.2f}% {:d}/{:d}'.format((100 * nb_train_errors) / train_input.size(0),
                                                      nb_train_errors, train_input.size(0)))
    print("--------------------------------------------------\n")