def __init__(self, model, inputs_train, targets_train, inputs_val, targets_val, to_rgb=False, root_dir=os.path.dirname(__name__)): #inputs and target are DF self.model = model #model to evaluate self.model_eval = copy.deepcopy(self.model) self.model_eval.to('cpu') self.labels = None self.labels_num = None self.sampler = self._create_sampler(targets_train.values.astype(int)) self.m_exporter = ModelExporter('temp', root_dir=root_dir) self.model_name = copy.deepcopy(self.model.name) # Generators self.training_set = Fer2013Dataset(inputs=inputs_train, targets=targets_train, to_rgb=to_rgb, device='cpu') self.validation_set = Fer2013Dataset(inputs=inputs_val, targets=targets_val, to_rgb=to_rgb, device='cpu') #https://stanford.edu/~shervine/blog/pytorch-how-to-generate-data-parallel self.use_cuda = torch.cuda.is_available() self.device = torch.device("cuda:0" if self.use_cuda else "cpu") torch.backends.cudnn.benchmark = True #if inputs sizes remain the same, should go faster if self.use_cuda: print('empty cuda cache') torch.cuda.empty_cache() self.model.to(self.device) print(f'use cuda: {self.use_cuda}')
dtype = torch.float model_name = f'cnn_triple_layer_D_bs_{learning_rate}_{batch_size}_{n_epochs}_{n_classes}' model = CnnTripleLayer(model_name, d_out=n_classes) model.train() train_classifier = TrainClassifier2(model, X_train_df, y_train_df, X_val_df, y_val_df, root_dir=current_working_dir) t = time.time() trained_model, optimizer, criterion, \ train_loss_hist, train_acc_hist, train_f1_hist, train_b_hist,\ val_loss_hist, val_acc_hist, val_f1_hist, val_b_hist = train_classifier.run_train(n_epochs=n_epochs, lr=learning_rate, batch_size=batch_size) print(f'trained in {time.time() - t} sec') if args.s_model: m_exporter = ModelExporter('fer2013_datasetD', root_dir=current_working_dir) m_exporter.save_nn_model(trained_model, optimizer, trained_model.get_args()) m_exporter.save_results(f'{model_name}', train_loss_hist, train_acc_hist, train_f1_hist, train_b_hist, val_loss_hist, val_acc_hist, val_f1_hist, val_b_hist)
dtype = torch.float device = torch.device("cpu") model_name = f'cnn_double_layer_D_bs_{learning_rate}_{batch_size}_{n_epochs}_{n_classes}' model = CnnDoubleLayer(model_name, d_out=n_classes) model.train() train_classifier = TrainClassifier2(model, X_df, y_df) t = time.time() trained_model, optimizer, criterion, loss_hist, loss_val_hist, f1_val_hist = train_classifier.run_train( n_epochs=n_epochs, lr=learning_rate, batch_size=batch_size) print(f'trained in {time.time() - t} sec') pre.save_results(loss_hist, loss_val_hist, f1_val_hist, f'{model_name}') if args.s_model: m_exporter = ModelExporter('fer2013_DatasetD') m_exporter.save_nn_model(trained_model, optimizer, trained_model.get_args()) if args.s_patterns: detected_patterns1 = trained_model.get_detected_patterns1() for idx in range(10): plt.figure(1, figsize=(20, 10)) for p in range(trained_model.n_patterns1): pattern = detected_patterns1[idx][p].reshape( detected_patterns1.shape[2], detected_patterns1.shape[3]) patern_np = pattern.detach().numpy().reshape(24, 24) plt.subplot(2, 5, 1 + p) plt.imshow(patern_np, cmap='gray', interpolation='none') pre.save_plt_as_image(plt, f'patterns_1_{idx}')
dtype = torch.float device = torch.device("cpu") model_name = f'cnn_double_layer_reduced_{learning_rate}_{batch_size}_{n_epochs}_{n_classes}' model = CnnDoubleLayer(model_name, d_out=n_classes) model.train() train_classifier = TrainClassifier(model, X_df, y_df) t = time.time() trained_model, optimizer, criterion, loss_hist, loss_val_hist, f1_val_hist = train_classifier.run_train( n_epochs=n_epochs, lr=learning_rate, batch_size=batch_size) print(f'trained in {time.time() - t} sec') pre.save_results(loss_hist, loss_val_hist, f1_val_hist, f'{model_name}') if args.s_model: m_exporter = ModelExporter('fer2013_reduced') m_exporter.save_nn_model(trained_model, optimizer, trained_model.get_args()) if args.s_patterns: detected_patterns1 = trained_model.get_detected_patterns1() for idx in range(10): plt.figure(1, figsize=(20, 10)) for p in range(trained_model.n_patterns1): pattern = detected_patterns1[idx][p].reshape( detected_patterns1.shape[2], detected_patterns1.shape[3]) patern_np = pattern.detach().numpy().reshape(24, 24) plt.subplot(2, 5, 1 + p) plt.imshow(patern_np, cmap='gray', interpolation='none') pre.save_plt_as_image(plt, f'patterns_1_{idx}')
def run_train(self, n_epochs, lr=0.001, batch_size=256): self.lr = lr if(self.data_is_prepared == False): self.prepare_data() # Loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(self.model.parameters(), lr=lr) # Train loss_hist = [] loss_val_hist = [] acc_val_hist = [] f1_val_hist = [] model_versions = {} m_exporter = ModelExporter('temp') model_name = copy.deepcopy(self.model.name) for t in range(n_epochs): for batch in range(0, int(self.N / batch_size)): # Berechne den Batch batch_x, batch_y = self.model.get_batch(self.x, self.y, batch, batch_size) # Berechne die Vorhersage (foward step) outputs = self.model(batch_x) # Berechne den Fehler (Ausgabe des Fehlers alle 100 Iterationen) loss = criterion(outputs, batch_y) # Berechne die Gradienten und Aktualisiere die Gewichte (backward step) optimizer.zero_grad() loss.backward() optimizer.step() # Berechne den Fehler (Ausgabe des Fehlers alle 50 Iterationen) idx = 10 if t % idx == 0: #current_lr = self._get_lr(optimizer) #self._set_lr(optimizer, self._update_lr(optimizer, t)) #print(f'learning_rate: {current_lr}') outputs = self.model(self.x) loss = criterion(outputs, self.y) loss_hist.append(loss.item()) outputs_val = self.model(self.x_val) loss_val = criterion(outputs_val, self.y_val) loss_val_hist.append(loss_val.item()) model_versions[t] = copy.deepcopy(self.model.state_dict()) accuracy_train = (outputs.argmax(1) == self.y.long()).float().mean() accuracy_val= (outputs_val.argmax(1) == self.y_val.long()).float().mean() acc_val_hist.append(accuracy_val) f1_score = metrics.f1_score(self.y_val.long().numpy(), outputs_val.argmax(1).numpy(), average='macro') f1_val_hist.append(f1_score) print(t, ' train_loss: ',loss.item(), 'val_loss: ', loss_val.item(), ' - train_acc: ', accuracy_train, ', val_acc: ', accuracy_val, ', val_f1: ', f1_score) self.model.name = f'{model_name}_epoch{t}' m_exporter.save_nn_model(self.model, optimizer, self.model.get_args(), debug=False) best_iteration = idx*loss_val_hist.index(min(loss_val_hist)) print(f'optimal iteration val_loss: {best_iteration}') best_iteration_f1 = idx * f1_val_hist.index(max(f1_val_hist)) print(f'optimal iteration val_f1: {best_iteration_f1}') best_iteration_acc = idx * acc_val_hist.index(max(acc_val_hist)) print(f'optimal iteration val_acc: {best_iteration_acc}') #use the best trained model self.model.load_state_dict(state_dict=model_versions[best_iteration]) self.model.eval() self.model.name = f'{model_name}' y_pred = self.model(self.x).argmax(1) accuracy_soft = (y_pred == self.y.long()).float().mean() print(f'training accuracy: {accuracy_soft}') return self.model, optimizer, criterion, loss_hist, loss_val_hist, f1_val_hist
X_train_df, y_train_df, X_val_df, y_val_df, root_dir=script_root_dir) t = time.time() trained_model, optimizer, criterion, \ train_loss_hist, train_acc_hist, train_f1_hist, train_b_hist,\ val_loss_hist, val_acc_hist, val_f1_hist, val_b_hist = train_classifier.run_train(n_epochs=n_epochs, lr=learning_rate, batch_size=batch_size) print(f'trained in {time.time() - t} sec') if args.s_model: m_exporter = ModelExporter('fer2013_reduced', root_dir=script_root_dir) m_exporter.save_nn_model(trained_model, optimizer, trained_model.get_args()) m_exporter.save_results(f'{model_name}', train_loss_hist, train_acc_hist, train_f1_hist, train_b_hist, val_loss_hist, val_acc_hist, val_f1_hist, val_b_hist) if args.s_patterns: detected_patterns = trained_model.get_detected_patterns() for idx in range(10): plt.figure(1, figsize=(20, 10)) for p in range(trained_model.n_patterns): pattern = detected_patterns[idx][p].reshape( detected_patterns.shape[2], detected_patterns.shape[3]) patern_np = pattern.detach().numpy().reshape(24, 24)