def train_prediction( net: Neural_network.NeuralNet, inputs_train: Tensor, targets_train: Tensor, inputs_test: Tensor, targets_test: Tensor, loss: Loss.Loss = Loss.MeanSquareError(), optimizer: OptimizerClass.Optimizer = OptimizerClass.SGD(), num_epochs: int = 5000, batch_size: int = 32): Data = pd.DataFrame(columns=('MSE_train', 'MSE_test', 'error_round_train', 'error_round_test')) size_training = inputs_train.shape[0] for epoch in range(num_epochs): Chi2_train = 0.0 error_round_train = 0.0 nbr_batch = 0 for i in range(0, size_training, batch_size): nbr_batch += 1 # 1) feed forward y_actual = net.forward(inputs_train[i:i + batch_size]) # 2) compute the loss and the gradients Chi2_train += loss.loss(targets_train[i:i + batch_size], y_actual) grad_ini = loss.grad(targets_train[i:i + batch_size], y_actual) # 3)feed backwards grad_fini = net.backward(grad_ini) # 4) update the net optimizer.step(net, n_epoch=epoch) error_round_train += Error_round.error_round( targets_train[i:i + batch_size], y_actual) Chi2_train = Chi2_train / nbr_batch error_round_train = error_round_train / nbr_batch y_actual_test = net.forward(inputs_test) Chi2_test = loss.loss(targets_test, y_actual_test) error_round_test = Error_round.error_round(targets_test, y_actual_test) if epoch % 100 == 0: print('epoch : ' + str(epoch) + "/" + str(num_epochs) + "\r", end="") datanew = pd.DataFrame({ 'MSE_train': [Chi2_train], 'MSE_test': [Chi2_test], 'error_round_train': [error_round_train], 'error_round_test': [error_round_test] }) Data = Data.append(datanew) os.chdir(path_ini) Data.to_csv('Opt_num_epoch_backup.csv', index=False) return Data
def test_MeanSquareError() : predicted = np.array([[1, 1, 1, 1, 1, 1],[2,4,5,6,8,1]]) actual = np.array([[1, 1, 2, 1, 3, 1],[2,5,1,1,3,1]]) mse = Loss.MeanSquareError() print('test mse.loss : ', mse.loss(predicted, actual)) print('test mse.grad : ', mse.grad(predicted, actual)) ''' OK '''
def train(net: Neural_network.NeuralNet, inputs: Tensor, targets: Tensor, loss: Loss = Loss.MeanSquareError(), optimizer: OptimizerClass.Optimizer = OptimizerClass.SGD(), num_epochs: int = 5000, batch_size: int = 32) -> tuple: chi2_list = [] round_error_list = [] size_training = inputs.shape[0] for epoch in range(num_epochs): chi2_loss = 0.0 round_error_loss = 0.0 nbr_batch = 0 for i in range(0, size_training, batch_size): nbr_batch += 1 # 1) Feed forward y_actual = net.forward(inputs[i:i + batch_size]) # 2) Compute the loss and the gradient chi2_loss += loss.loss(targets[i:i + batch_size], y_actual) round_error_loss += Error_round.error_round( targets[i:i + batch_size], y_actual) grad_ini = loss.grad(targets[i:i + batch_size], y_actual) # 3) Feed backwards grad_fini = net.backward(grad_ini) # 4) Update the net optimizer.step(net, n_epoch=epoch) chi2_loss = chi2_loss / nbr_batch round_error_loss = round_error_loss / nbr_batch chi2_list.append(chi2_loss) round_error_list.append(round_error_loss) # Print status every 50 iterations if epoch % 50 == 0: print('\r epoch : ' + str(epoch) + "/" + str(num_epochs) + ", training mean squared error : " + str(chi2_loss) + "\r", end="") print('epoch : ' + str(epoch) + "/" + str(num_epochs) + ", training final mean squared error : " + str(chi2_loss) + '\n') return chi2_list, round_error_list
def train_alternative_model(model1, model2, args, datasets): """ function train for the """ # loading data per year years = list(datasets.keys()) data_year1 = datasets[years[0]] data_year2 = datasets[years[1]] #the loader function will take care of the batching # train_set was defined prior loader1 = torch.utils.data.DataLoader(data_year1, \ batch_size=args.batch_size, shuffle=False, drop_last=True) loader2 = torch.utils.data.DataLoader(data_year2, \ batch_size=args.batch_size, shuffle=False, drop_last=True) #switch the model in training mode model1.encoder.train() model1.decoder.train() model2.encoder.train() model2.decoder.train() # loss on the whole dataset loss_data = tnt.meter.AverageValueMeter() # loops over the batches for year1, year2 in zip(loader1, loader2): # loading on the gpu if args.cuda: year1 = year1.cuda().float() year2 = year2.cuda().float() else: year1 = year1.float() year2 = year2.float() # ============forward auto-encoder=========== # loading the codes code1 = model1.encoder(year1, args) code2 = model2.encoder(year2, args) # computing the first loss loss_codes = loss_fun.MeanSquareError(code1, code2) # computing the reconstitution pred_year2 = model1.decoder(code1, args) pred_year1 = model2.decoder(code2, args) # boolean matrixes to remove effect of no data bool_matr1 = year1 != 0 bool_matr2 = year2 != 0 # filtering the data pred_year1 = pred_year1[bool_matr1] pred_year2 = pred_year2[bool_matr2] year1 = year1[bool_matr1] year2 = year2[bool_matr2] # computing the reconstitution losses loss2 = loss_fun.MeanSquareError(year2, pred_year2) loss3 = loss_fun.MeanSquareError(year1, pred_year1) # adding into total loss loss_total = loss_codes + loss2 + loss3 # backpropagation model1.opti_AE.zero_grad() model2.opti_AE.zero_grad() loss_total.backward() # optimization model1.opti_AE.step() model2.opti_AE.step() #we clip the gradient at norm 1 this helps learning faster if args.grad_clip: for p in model1.AE_params: p.register_hook(lambda grad: torch.clamp(grad, -1, 1)) for p in model2.AE_params: p.register_hook(lambda grad: torch.clamp(grad, -1, 1)) loss_data.add(loss_total.item()) return loss_data.value()[0]
def train(model, args, datasets): """ train for one epoch args are some parameters of our model, e.g. batch size or n_class, etc. """ #switch the model in training mode model.encoder.train() model.decoder.train() if args.adversarial: model.discr.train() #the loader function will take care of the batching # train_set was defined prior loader = torch.utils.data.DataLoader(datasets, \ batch_size=args.batch_size, shuffle=True, drop_last=True) # loss on the whole dataset loss_data = tnt.meter.AverageValueMeter() loss_data_alt = tnt.meter.AverageValueMeter() loss_data_rad = tnt.meter.AverageValueMeter() loss_disc_val = tnt.meter.AverageValueMeter() accu_discr = 0.0 # loops over the batches for index, (tiles, labels) in enumerate(loader): # loading on the gpu if args.cuda: tiles = tiles.cuda().float() labels = labels.cuda().long() else: tiles = tiles.float() labels = labels.long() # adding noise to the sample noise = np.random.normal(0, 0.01, tiles.shape) noise_tens = fun.torch_raster(noise) # adding noise tiles_noise = tiles + noise_tens # applying arg max on labels for cross entropy _, labels = labels.max(dim=1) # ============discriminator=========== if args.adversarial: # ============forward=========== #pred_year = discr(code.detach()) code = model.encoder(tiles_noise, args) pred_year = model.discr(code, args) # ============loss=========== # applying arg max for checking accuracy _, pred_max = pred_year.max(dim=1) ## applying loss function for the discriminator and optimizing the weights loss_disc = loss_fun.CrossEntropy(pred_year, labels) # checking the accuracy matrix_accu = pred_max == labels matrix_accu_f = matrix_accu.flatten() matrix_accu_f = matrix_accu_f.cpu().detach().numpy() nb_true = np.count_nonzero(matrix_accu_f == True) accu_discr += nb_true / len(matrix_accu_f) # ============backward=========== # optimizing the discriminator. optional: training the encoder as well model.opti_D.zero_grad() #model.opti_AE.zero_grad() loss_disc.backward(retain_graph=True) #we clip the gradient at norm 1 this helps learning faster if args.grad_clip: for p in model.discr.parameters(): p.register_hook(lambda grad: torch.clamp(grad, -1, 1)) model.opti_D.step() model.opti_AE.zero_grad() model.opti_AE.step() # saving the loss loss_disc_val.add(loss_disc.item()) # putting an adversarial training on the encoder if args.opti_adversarial_encoder: code = model.encoder(tiles, args) pred_year = model.discr(code, args) loss_disc = loss_fun.CrossEntropy(pred_year, labels) loss_disc_adv = loss_disc model.opti_AE.zero_grad() loss_disc_adv.backward() model.opti_AE.step() #averaging accuracy accufin = accu_discr/(len(loader)) # ============auto_encoder optimization=========== # ============forward auto-encoder=========== # compute the prediction pred = model.predict(tiles_noise, args) code = model.encoder(tiles_noise, args) # boolean matrixes to remove effect of no data bool_matr_alt = tiles[:,None,0,:,:] != 0 bool_matr_rad = tiles[:,None,1,:,:] != 0 # filtering the data pred_alt = pred[:,None,0,:,:][bool_matr_alt] tiles_alt = tiles[:,None,0,:,:][bool_matr_alt] pred_rad = pred[:,None,1,:,:][bool_matr_rad] tiles_rad = tiles[:,None,1,:,:][bool_matr_rad] ## defiance part if args.defiance: # loading defiance matrix d_mat_rad = pred[:,None,2,:,:][bool_matr_rad] # calculating the loss eps = 10**-5 loss_alt = loss_fun.MeanSquareError(pred_alt, tiles_alt) # loss for the defiance mse_rad = (tiles_rad - pred_rad)**2 loss_rad = torch.mean(mse_rad / (d_mat_rad+eps) + (1/2)*torch.log(d_mat_rad+eps)) else: ## sum of squares loss_alt = loss_fun.MeanSquareError(pred_alt, tiles_alt) loss_rad = loss_fun.MeanSquareError(pred_rad, tiles_rad) if args.auto_encod: # ============forward=========== if args.adversarial: code = model.encoder(tiles_noise, args) pred_year = model.discr(code, args) loss_disc = loss_fun.CrossEntropy(pred_year, labels) # ============loss========== if args.adversarial and args.data_fusion: loss = loss_rad + loss_alt #- args.disc_loss_weight * loss_disc elif args.data_fusion: loss = loss_rad + loss_alt elif args.adversarial and args.rad_input: loss = loss_rad - args.disc_loss_weight * loss_disc elif args.adversarial: loss = loss_alt - args.disc_loss_weight * loss_disc elif args.rad_input: loss = loss_rad else: loss = loss_alt loss_data.add(loss.item()) # ============backward=========== model.opti_AE.zero_grad() loss.backward() #we clip the gradient at norm 1 this helps learning faster if args.grad_clip: for p in model.AE_params: p.register_hook(lambda grad: torch.clamp(grad, -1, 1)) model.opti_AE.step() # storing the loss values loss_data_alt.add(loss_alt.item()) loss_data_rad.add(loss_rad.item()) if args.adversarial == False: accufin = 0 # output of various losses result = (loss_data.value()[0], len(loader), loss_data_alt.value()[0], loss_data_rad.value()[0], loss_disc_val.value()[0], accufin) return result
def train_simultaneousNN( inputs_train: Tensor, targets_train: Tensor, loss: Loss.Loss = Loss.MeanSquareError(), optimizer: OptimizerClass.Optimizer = OptimizerClass.SGD(), num_epochs: int = 5000, batch_size: int = 32) -> tuple: size_training = inputs_train.shape[0] Result_chi2 = [[], [], [], [], [], [], [], [], []] list_epoch = np.array(range(10, 50, 5)) / 100 * num_epochs '''initialisation des 9 NN''' #verifier question seed() list_net = [] for i in range(9): layers = [] layers.append(Layer.Linear(6, 4)) layers.append(ActivationFunctions.Tanh()) layers.append(Layer.Linear(4, 2)) layers.append(ActivationFunctions.Tanh()) layers.append(Layer.Linear(2, 1)) layers.append(ActivationFunctions.Sigmoid()) list_net.append(Neural_network.NeuralNet(layers)) destroyed_NN = [] nbr_batch = size_training // batch_size ''' training des 9 NN''' for epoch in range(num_epochs): for k in range(9): if k not in destroyed_NN: Chi2_train = 0 for i in range(0, size_training, batch_size): # 1) feed forward y_actual = list_net[k].forward(inputs_train[i:i + batch_size]) # 2) compute the loss and the gradients Chi2_train += loss.loss(targets_train[i:i + batch_size], y_actual) grad_ini = loss.grad(targets_train[i:i + batch_size], y_actual) # 3)feed backwards grad_fini = list_net[k].backward(grad_ini) # 4) update the net optimizer.step(list_net[k], n_epoch=epoch) Chi2_train = Chi2_train / nbr_batch Result_chi2[k].append(Chi2_train) '''Supression du NN le moins efficace ''' if epoch in list_epoch: Comparaison = [[], []] for k in range(9): if k not in destroyed_NN: ErrorSlope = np.polyfit(np.array(range(epoch - 49, epoch)), Result_chi2[k][-50:-1], 1)[0] MixedError = Result_chi2[k][-1] * (1 - np.arctan(ErrorSlope) / (np.pi / 2)) Comparaison[0].append(k) Comparaison[1].append(MixedError) k = Comparaison[0][Comparaison[1].index(max(Comparaison[1]))] destroyed_NN.append(k) if epoch % 100 == 0: print('epoch : ' + str(epoch) + "/" + str(num_epochs) + "\r", end="") for k in range(9): if k not in destroyed_NN: my_NN = list_net[k] return my_NN, Result_chi2