def learn_encoder_decoder(self, train_samples, plot_dir=None): """ Perform PCA by a straightforward minimization of ||X - XCC^T|| constraint to C's columns being orthonormal vectors i.e C^TC = I. This minimization problem is equivalent to maximization of the projections variation i.e the variation in featues of XC (XC)^T(XC) The analytical solution to this problem is a metrix with XX^T eiegenvalues as columns see http://alexhwilliams.info/itsneuronalblog/2016/03/27/pca/ Assumes X is zero centered """ self.train_mean = train_samples.mean(0) data = train_samples - self.train_mean X = torch.tensor(data, requires_grad=False) C = torch.tensor(np.random.normal(0, 1, (data.shape[1], self.latent_dim)), requires_grad=True) optimizer = torch.optim.Adam([C], lr=self.lr) losses = [[], []] for s in range(self.optimization_steps): projected_data = torch.matmul(X, C) reconstruct_data = torch.matmul(projected_data, C.t()) reconstruction_loss = torch.nn.functional.mse_loss( X, reconstruct_data) # ensure C columns are orthonormal CT_C = torch.matmul(C.t(), C) constraint_loss = torch.nn.functional.mse_loss( CT_C, torch.eye(self.latent_dim, dtype=C.dtype)) loss = reconstruction_loss + self.regularization_factor * constraint_loss loss.backward() optimizer.step() optimizer.zero_grad() losses[0] += [reconstruction_loss.item()] losses[1] += [constraint_loss.item()] # plot training if plot_dir: plot_training(losses, ["reconstruction_loss", "constrain_loss"], os.path.join(plot_dir, f"Learning-{self}.png")) C = C.detach().numpy() # Sort PCs in descending order by eiegenvalues eiegenvalues = [] for i in range(self.latent_dim): data_projected_to_pc = np.dot(data, C[:, i]) pc_variation = np.dot(data_projected_to_pc.transpose(), data_projected_to_pc) C_norm = np.dot(C[0].transpose(), C[0]) eiegenvalues += [pc_variation / C_norm] order = np.argsort(eiegenvalues)[::-1] self.projection_matrix = C[:, order] self.restoration_matrix = self.projection_matrix.transpose()
def learn_encoder_decoder(self, data, plot_dir=None): start = time() print("\tLearning encoder decoder... ", end="") X = torch.tensor(data, requires_grad=False, dtype=torch.float32) optimizer = torch.optim.Adam(list(self.E.parameters()) + list(self.D.parameters()), lr=self.lr) losses = [[]] for s in range(self.optimization_steps): batch_X = X[torch.randint(X.shape[0], (self.batch_size, ), dtype=torch.long)] reconstruct_data = self.D(self.E(batch_X)) loss = self.metric(batch_X, reconstruct_data) loss.backward() optimizer.step() optimizer.zero_grad() losses[0] += [loss.item()] if plot_dir: plot_training(losses, ["reconstruction_loss"], os.path.join(plot_dir, f"Learning-{self}.png")) print(f"Finished in {time() - start:.2f} sec")
def train(): print("training") average_rewards = deque(maxlen=LOG_EVERY) all_rewards = [] tau_count = 0 for episode in range(NUM_EPISODES): env.reset() episode_reward = 0 stack.push(env.state, True) curr_state = stack.get_stack() next_state = None while not env.done: # pick an action and execute action = agent.select_action(state=curr_state, policy_net=policy_net) next_state, reward, done, _ = env.play_action( mod_action_space[action]) stack.push(next_state, False) next_state = stack.get_stack() tau_count += 1 # store experience in memory -> (state, action, next_state, reward, done) action = np.array([action]) experience = (curr_state, action, next_state, reward, done) error = get_error(experience) memory.add(error=error, experience=experience) episode_reward += reward curr_state = next_state # perform experience replay (replay buffer is already sufficient size) experience_replay() average_rewards.append(episode_reward) all_rewards.append(episode_reward) if episode % LOG_EVERY == 0: average_reward = sum(average_rewards) / LOG_EVERY print("Current episode: {}\nAverge reward: {}\n".format( episode, average_reward)) if episode != 0 and episode != 100 and episode % 100 == 0: utils.plot_training(all_rewards) if tau_count >= TAU: target_net.load_state_dict(policy_net.state_dict()) tau_count = 0 if episode % SAVE_UPDATE == 0: torch.save(policy_net.state_dict(), POLICY_NET_PATH) torch.save(target_net.state_dict(), TARGET_NET_PATH) utils.plot_training(all_rewards) torch.save(policy_net.state_dict(), POLICY_NET_PATH) torch.save(target_net.state_dict(), TARGET_NET_PATH) print("done!\n")
def train(): average_rewards = deque(maxlen=50) max_reward = 0 all_rewards = [] for episode in range(NUM_EPISODES): env.reset() episode_reward = 0 for step in range(MAX_STEPS): action = agent.select_action(state=env.state, policy_net=policy_net) curr_state = env.state new_state, reward, done, _ = env.play_action(action) memory.push(curr_state, action, new_state, reward, done) episode_reward += reward.item() experiences = memory.sample(BATCH_SIZE) if experiences: current_q_values, optimal_q_values = experience_replay( experiences) loss = F.mse_loss(current_q_values, optimal_q_values) optimizer.zero_grad() loss.backward() optimizer.step() if env.done: break all_rewards.append(episode_reward) average_rewards.append(episode_reward) if episode_reward > max_reward: max_reward = episode_reward if memory.is_full(): print("Memory is full.") if episode % TARGET_UPDATE == 0: target_net.load_state_dict(policy_net.state_dict()) if episode % 50 == 0: print("Episode {}: {}".format(episode, sum(average_rewards) / 50)) print("Epsilon: {}".format(agent.eps)) torch.save(policy_net.state_dict(), POLICY_NET_PATH) torch.save(target_net.state_dict(), TARGET_NET_PATH) utils.plot_training(all_rewards)
def main(): parser = build_parser() args = parser.parse_args() check_args(args) check_args_to_run(args) device = torch.device('cuda') if args.gpu and torch.cuda.is_available( ) else torch.device('cpu') if args.subcmd == 'citation': dataset = load_dataset(args.dataset) hparams = { 'input_dim': dataset.num_node_features, 'hidden_dim': args.hidden_dim, 'output_dim': dataset.num_classes, 'n_layers': args.n_layers, 'dropout': args.dropout, 'edge_dropout': args.edge_dropout, 'layer_wise_dropedge': args.layer_wise_dropedge } model_name = f'{args.model}-{args.n_layers}-hidden_dim={args.hidden_dim}-dropout={args.dropout}-edge_dropout={args.edge_dropout}-LW={args.layer_wise_dropedge}' model_path = f'pretrained_models/{model_name}_{args.dataset.lower()}' + '_{}.pth' histories = train_for_citation(args=args, hparams=hparams, dataset=dataset, device=device, model_path=model_path) plot_training( histories, title=f'{model_name} / {args.dataset.title()}', metric_name='accuracy', save_path=f'images/{model_name}_{args.dataset.lower()}.png')
def learn_encoder_decoder(self, data, plot_dir=None): start = time() print("\tLearning encoder decoder... ", end="") dataset = SimpleDataset(data) kwargs = {'batch_size': self.batch_size} if self.device != "cpu": kwargs.update( { 'num_workers': 1, 'pin_memory': True, 'shuffle': True }, ) train_loader = torch.utils.data.DataLoader(dataset, **kwargs) ED_optimizer = torch.optim.Adam(list(self.E.parameters()) + list(self.D.parameters()), lr=self.lr, betas=(0.0, 0.99)) FG_optimizer = torch.optim.Adam(list(self.F.parameters()) + list(self.G.parameters()), lr=self.lr, betas=(0.0, 0.99)) EG_optimizer = torch.optim.Adam(list(self.E.parameters()) + list(self.G.parameters()), lr=self.lr, betas=(0.0, 0.99)) softplus = F.softplus mse = F.mse_loss # X = torch.tensor(data, requires_grad=True, ) losses = [[], [], []] for epoch in range(self.epochs): for batch_idx, batch_real_data in enumerate(train_loader): batch_real_data = batch_real_data.requires_grad_(True).float() # Step I. Update E, and D: Optimize the discriminator D(E( * )) to better differentiate between real x data # and data generated by G(F( * )) ED_optimizer.zero_grad() batch_latent_vectors = torch.tensor(np.random.normal( 0, 1, size=(self.batch_size, self.z_dim)), requires_grad=False, dtype=torch.float32) real_images_dicriminator_outputs = self.D( self.E(batch_real_data)) L_adv_ED = softplus( self.D(self.E(self.G(self.F(batch_latent_vectors))))).mean( ) + softplus(-real_images_dicriminator_outputs).mean() # R1 gradient regularization as in paper real_grads = torch.autograd.grad( outputs=real_images_dicriminator_outputs, inputs=batch_real_data, grad_outputs=torch.ones_like( real_images_dicriminator_outputs), create_graph=True, retain_graph=True)[0] gradient_penalty = 0.5 * ( (real_grads.norm(2, dim=1) - 1)**2).mean() L_adv_ED += gradient_penalty * self.g_penalty_coeff L_adv_ED.backward() ED_optimizer.step() # Step II. Update F, and G: Optimize the generator G(F( * )) to fool D(E ( * )) FG_optimizer.zero_grad() batch_latent_vectors = torch.tensor(np.random.normal( 0, 1, size=(self.batch_size, self.z_dim)), requires_grad=False, dtype=torch.float32) L_adv_FG = softplus(-self.D( self.E(self.G(self.F(batch_latent_vectors))))).mean() L_adv_FG.backward() FG_optimizer.step() # Step III. Update E, and G: Optimize the reconstruction loss in the Latent space W EG_optimizer.zero_grad() batch_latent_vectors = torch.tensor(np.random.normal( 0, 1, size=(self.batch_size, self.z_dim)), requires_grad=False, dtype=torch.float32) w_latent_vectors = self.F(batch_latent_vectors).detach() L_err_EG = mse(w_latent_vectors, self.E(self.G(w_latent_vectors))) L_err_EG.backward() EG_optimizer.step() losses[0] += [L_adv_ED.item()] losses[1] += [L_adv_FG.item()] losses[2] += [L_err_EG.item()] print(f"Epoch done {epoch}") # plot training if plot_dir: plot_training(losses, ["ED_loss", "FG_loss", 'EG_loss'], os.path.join(plot_dir, f"Learning-{self}.png")) print(f"Finished in {time() - start:.2f} sec")
def fit(self, X:np.ndarray) -> None: """Apprentissage des centroides """ # Récupère le nombre de données n_data = X.shape[0] # Sauvegarde tous les calculs de la somme des distances euclidiennes pour l'affichage if self.display: shutil.rmtree('./img_training', ignore_errors=True) metric = [] # 2 cas à traiter : # - soit le nombre de clusters est supérieur ou égale au nombre de données # - soit le nombre de clusters est inférieur au nombre de données if self.n_clusters >= n_data: # Initialisation des centroides : chacune des données est le centre d'un clusteur self.cluster_centers = np.zeros(self.n_clusters, X.shape[1]) self.cluster_centers[:n_data] = X else: # Initialisation des centroides self.cluster_centers = X[:self.n_clusters,:] # A RaJOUTER POUR LE RANDOm POUR LES DONNEES EN 2D !!! #self.cluster_centers = sample(list(X),k=self.n_clusters); # initialisation d'un paramètre permettant de stopper les itérations lors de la convergence stabilise = False # Exécution de l'algorithme sur plusieurs itérations for i in range(self.max_iter): # détermine le numéro du cluster pour chacune de nos données y = self.predict(X) # calcule de la somme des distances initialiser le paramètres # de la somme des distances if i == 0: current_distance = self._compute_inertia(X, y) # mise à jour des centroides self._update_centers(X, y) # mise à jour de la somme des distances old_distance = current_distance current_distance = self._compute_inertia(X, y) # stoppe l'algorithme si la somme des distances quadratiques entre # 2 itérations est inférieur au seuil de tolérance if self.early_stopping: # A compléter if abs(old_distance-current_distance)<self.tol : stabilise=True if stabilise: diff = abs(old_distance - current_distance) metric.append(diff) plot_training(i, X, y, self.cluster_centers, metric) break # affichage des clusters if self.display: diff = abs(old_distance - current_distance) metric.append(diff) plot_training(i, X, y, self.cluster_centers, metric)
ger_vocab_size, batch_size=batch_size) H = model.fit_generator(train_generator, steps_per_epoch=train_steps, validation_data=val_generator, validation_steps=val_steps, epochs=epochs, verbose=1, callbacks=[earlystopping, checkpoint]) stopped_epoch = earlystopping.stopped_epoch print('[INFO] Early stopping at epoch: {:d}'.format(stopped_epoch)) plot_training(H, stopped_epoch + 1, plot_path_loss='training_loss_attention_model_new_arch.png', plot_path_acc='training_acc_attention_model_new_arch.png') # plot_training(H, epochs, plot_path_loss='training_loss_attention_model_new_arch.png', plot_path_acc='training_acc_attention_model_new_arch.png') else: # Use both train and dev sets to train final model final_dataset = np.concatenate((train, dev)) print('[INFO] Building final model with dataset size: {:d}'.format( len(final_dataset))) steps = len(final_dataset) // batch_size train_generator = data_generator(final_dataset, eng_tokenizer, eng_length, ger_tokenizer, ger_length, eng_vocab_size,
verbose=1, mode='max') callbacks_list = [checkpoint] history = fine_tune_model.fit( train_generator, epochs=args.num_epochs, workers=8, steps_per_epoch=num_train_images // BATCH_SIZE, validation_data=validation_generator, validation_steps=num_val_images // BATCH_SIZE, # class_weight='auto', shuffle=True, callbacks=callbacks_list) utils.plot_training(history) elif args.mode == "predict": if args.image is None: ValueError("You must pass an image path when using prediction mode.") # Create directories if needed if not os.path.isdir("%s" % ("Predictions")): os.makedirs("%s" % ("Predictions")) # Read in your image image = cv2.imread(args.image, -1) save_image = image image = np.float32(cv2.resize(image, (HEIGHT, WIDTH))) image = preprocessing_function(image.reshape(1, HEIGHT, WIDTH, 3))
phase = parameters.phase output = parameters.output up = parameters.up if mode == 'tEncoder': ''' Train Transdormers based encoders BETo for spanish and BERTweet for English ''' if os.path.exists('./logs') == False: os.system('mkdir logs') text, hateness = load_data(data_path) history = train_Encoder(text, hateness, language, mode_weigth, splits, epoches, batch_size, max_length, interm_layer_size, learning_rate, decay, 1, 0.1) plot_training(history[-1], language, 'acc') exit(0) if mode == 'encode': ''' Get Encodings for each author's message from the Transformer based encoders ''' weight_path = os.path.join( weight_path, 'bestmodelo_split_{}_1.pt'.format(language[:2])) if os.path.isfile(weight_path) == False: print( f"{bcolors.FAIL}{bcolors.BOLD}ERROR: Weight path set unproperly{bcolors.ENDC}" ) exit(1)
def train_model(model, criterion, optimizer, dataloaders, scheduler, dataset_sizes, num_epochs): since = time.time() best_model_wts = copy.deepcopy(model.state_dict()) best_acc = 0.0 costs = {x: [] for x in data_cat} # for storing costs per epoch accs = {x: [] for x in data_cat} # for storing accuracies per epoch print('Train batches:', len(dataloaders['train'])) print('Valid batches:', len(dataloaders['valid']), '\n') for epoch in range(num_epochs): confusion_matrix = { x: meter.ConfusionMeter(2, normalized=True) for x in data_cat } print('Epoch {}/{}'.format(epoch + 1, num_epochs)) print('-' * 10) # Each epoch has a training and validation phase for phase in data_cat: model.train(phase == 'train') running_loss = 0.0 running_corrects = 0 # Iterate over data. for i, data in enumerate(dataloaders[phase]): # get the inputs print(i, end='\r') inputs = data['images'][0] labels = data['label'].type(torch.FloatTensor) """ need fixing""" # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() # statistics preds = (outputs.data > 0.5).type(torch.FloatTensor) preds = preds.view(-1) running_corrects += torch.sum(preds == labels.data) confusion_matrix[phase].add(preds, labels.data) epoch_loss = running_loss / dataset_sizes[phase] epoch_acc = running_corrects / dataset_sizes[phase] costs[phase].append(epoch_loss) accs[phase].append(epoch_acc) print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc)) print('Confusion Meter:\n', confusion_matrix[phase].value()) # deep copy the model if phase == 'valid': scheduler.step(epoch_loss) if epoch_acc > best_acc: best_acc = epoch_acc best_model_wts = copy.deepcopy(model.state_dict()) time_elapsed = time.time() - since print('Time elapsed: {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) print() time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Best valid Acc: {:4f}'.format(best_acc)) plot_training(costs, accs) # load best model weights model.load_state_dict(best_model_wts) return model
def train(args, u_model, train_samples, val_samples): # Compile the loaded model model = compile_model(args=args, uncomp_model=u_model) # Load pre-trained weights if args.finetune_weights_path != '': try: model.load_weights(args.finetune_weights_path) except Exception as e: print(e) print( '!!! Failed to load custom weights file. Training without pre-trained weights. !!!' ) # Set the callbacks callbacks = get_callbacks(args) if args.aug_data: train_datagen = ImageDataGenerator( samplewise_center=False, samplewise_std_normalization=False, rotation_range=45, width_shift_range=0.1, height_shift_range=0.1, shear_range=0.1, zoom_range=0.1, fill_mode='nearest', horizontal_flip=True, vertical_flip=True, rescale=None, preprocessing_function=custom_train_data_augmentation) val_datagen = ImageDataGenerator(samplewise_center=False, samplewise_std_normalization=False, rescale=None) else: train_datagen = ImageDataGenerator(samplewise_center=False, samplewise_std_normalization=False, rotation_range=0, width_shift_range=0., height_shift_range=0., shear_range=0., zoom_range=0., fill_mode='nearest', horizontal_flip=False, vertical_flip=False, rescale=None) val_datagen = ImageDataGenerator(samplewise_center=False, samplewise_std_normalization=False, rescale=None) if debug: save_dir = args.img_aug_dir else: save_dir = None def xcaps_data_gen(gen): while True: x, y = gen.next() if args.num_classes == 1: mal = np.array([y[i][0][6, 0] for i in range(y.shape[0])]) else: mal = np.array([y[i][0][6, 1:] for i in range(y.shape[0])]) yield x, [ mal, np.array([y[i][0][0, 0] for i in range(y.shape[0])]), np.array([y[i][0][1, 0] for i in range(y.shape[0])]), np.array([y[i][0][2, 0] for i in range(y.shape[0])]), np.array([y[i][0][3, 0] for i in range(y.shape[0])]), np.array([y[i][0][4, 0] for i in range(y.shape[0])]), np.array([y[i][0][5, 0] for i in range(y.shape[0])]), x * np.expand_dims( np.array([y[i][1] for i in range(y.shape[0])]), axis=-1) ] def capsnet_data_gen(gen): while True: x, y = gen.next() if args.num_classes == 1: y = np.array([y[i][0][6, 0] for i in range(y.shape[0])]) else: y = np.array([y[i][0][6, 1:] for i in range(y.shape[0])]) yield [x, y], [y, x] # Prepare images and labels for training train_imgs = normalize_img( np.expand_dims(train_samples[0], axis=-1).astype(np.float32)) val_imgs = normalize_img( np.expand_dims(val_samples[0], axis=-1).astype(np.float32)) train_labels = [] val_labels = [] n_attr = 9 # 8 attr + mal score skip_attr_list = [1, 2] for i in range(n_attr): skip = False if skip_attr_list: for j in skip_attr_list: if i == j: #indexing from negative side skip_attr_list.remove(j) skip = True if args.num_classes == 1 and i == n_attr - 1: tlab = np.repeat(np.expand_dims(train_samples[2][:, 2 * i + n_attr], axis=-1), 6, axis=1) tlab[tlab < 3.] = 0. tlab[tlab >= 3.] = 1. train_labels.append(tlab) vlab = np.repeat(np.expand_dims(val_samples[2][:, 2 * i + n_attr], axis=-1), 6, axis=1) vlab[vlab < 3.] = 0. vlab[vlab >= 3.] = 1. val_labels.append(vlab) skip = True if not skip: train_labels.append( np.hstack( (np.expand_dims( (train_samples[2][:, 2 * i + n_attr] - 1) / 4., axis=-1), get_pseudo_label([1., 2., 3., 4., 5.], train_samples[2][:, 2 * i + n_attr], train_samples[2][:, 2 * i + 1 + n_attr])))) val_labels.append( np.hstack( (np.expand_dims( (val_samples[2][:, 2 * i + n_attr] - 1) / 4., axis=-1), get_pseudo_label([1., 2., 3., 4., 5.], val_samples[2][:, 2 * i + n_attr], val_samples[2][:, 2 * i + 1 + n_attr])))) train_labels = np.rollaxis(np.asarray(train_labels), 0, 2) val_labels = np.rollaxis(np.asarray(val_labels), 0, 2) new_labels = np.empty((len(train_labels), 2), dtype=np.object) for i in range(len(train_labels)): new_labels[i, 0] = train_labels[i] if args.masked_recon: new_labels[i, 1] = train_samples[1][i] else: new_labels[i, 1] = np.ones_like(train_samples[1][i]) train_labels = new_labels new_labels = np.empty((len(val_labels), 2), dtype=np.object) for i in range(len(val_labels)): new_labels[i, 0] = val_labels[i] if args.masked_recon: new_labels[i, 1] = val_samples[1][i] else: new_labels[i, 1] = np.ones_like(val_samples[1][i]) val_labels = new_labels train_flow_gen = train_datagen.flow(x=train_imgs, y=train_labels, batch_size=args.batch_size, shuffle=True, seed=12, save_to_dir=save_dir) val_flow_gen = val_datagen.flow(x=val_imgs, y=val_labels, batch_size=args.batch_size, shuffle=True, seed=12, save_to_dir=save_dir) if args.net.find('xcaps') != -1: train_gen = xcaps_data_gen(train_flow_gen) val_gen = xcaps_data_gen(val_flow_gen) elif args.net.find('capsnet') != -1: train_gen = capsnet_data_gen(train_flow_gen) val_gen = capsnet_data_gen(val_flow_gen) else: raise NotImplementedError( 'Data generator not found for specified network. Please check train.py file.' ) # Settings train_steps = len(train_samples[0]) // args.batch_size val_steps = len(val_samples[0]) // args.batch_size workers = 4 multiproc = True # Run training history = model.fit_generator(train_gen, max_queue_size=40, workers=workers, use_multiprocessing=multiproc, steps_per_epoch=train_steps, validation_data=val_gen, validation_steps=val_steps, epochs=args.epochs, class_weight=None, callbacks=callbacks, verbose=args.verbose, shuffle=True) # Plot the training data collected plot_training(history, args)
def fit(self, X: np.ndarray): """Apprentissage des centroides """ # Récupère le nombre de données n_data = X.shape[0] # Sauvegarde tous les calculs de la somme des distances euclidiennes pour l'affichage if self.display: shutil.rmtree('./img_training', ignore_errors=True) metric = [] # 2 cas à traiter : # - soit le nombre de clusters est supérieur ou égale au nombre de données # - soit le nombre de clusters est inférieur au nombre de données if self.n_clusters >= n_data: # Initialisation des centroides : chacune des données est le centre d'un clusteur self.cluster_centers = np.zeros(self.n_clusters, X.shape[1]) self.cluster_centers[:n_data] = X else: # Initialisation des centroides self.cluster_centers = np.ndarray(shape=(self.n_clusters, X.shape[1])) # centroides pris aleatoirement #tableau de r pour recupere nb rcentre = [] for i in range(self.n_clusters): r = random.randint(0, n_data - 1) self.cluster_centers[i] = X[r] rcentre.append(r) # initialisation d'un paramètre permettant de stopper les itérations lors de la convergence stabilise = False # Exécution de l'algorithme sur plusieurs itérations for i in range(self.max_iter): # détermine le numéro du cluster pour chacune de nos données y = self.predict(X) # calcule de la somme des distances initialiser le paramètres # de la somme des distances if i == 0: current_distance = self._compute_inertia(X, y) # mise à jour des centroides self._update_centers(X, y) # mise à jour de la somme des distances old_distance = current_distance current_distance = self._compute_inertia(X, y) # stoppe l'algorithme si la somme des distances quadratiques entre # 2 itérations est inférieur au seuil de tolérance if self.early_stopping: if abs((old_distance - current_distance) <= self.tol): stabilise = True if stabilise: break # affichage des clusters if self.display: print("on affiche") diff = abs(old_distance - current_distance) metric.append(diff) plot_training(i, X, y, self.cluster_centers, metric) print('on tourne') return y, self.cluster_centers
def main(): usage = 'usage: %prog [options] <data_dir> <model_name> <output_dir> ...' parser = OptionParser(usage) parser.add_option( '-b', dest='batch_size', default=64, help='Batch size for the model training [Default: %default]') parser.add_option('-p', dest='patience', default=20, help='Training patience [Default: %default]') parser.add_option('-l', dest='learning_rate', default=0.1, help='Learning rate [Default: %default]') parser.add_option('-e', dest='n_epochs', default=100, help='Training number of epochs [Default: %default]') parser.add_option('-o', dest='model_filename', default='model.h5', help='Filename of the model [Default: %default]') (options, args) = parser.parse_args() ########TODO:ADD THE REST OF THE parameters if len(args) < 3: parser.error('Must provide data_dir, model and output directory.') else: data_path = args[0] model_name = args[1] output_dir = args[2] if not os.path.isdir(output_dir): os.mkdir(output_dir) if not options.model_filename.endswith('.h5'): options.model_filename = options.model_filename + '.h5' model_path = os.path.join(output_dir, options.model_filename) # ####LOAD DATA dataset = utils.load_data(data_path) x_train, y_train, x_valid, y_valid, x_test, y_test = dataset #####TODO:REMOVE THESE LINES ONCE COMPLETED # x_train = x_train[:100,:,:] # y_train = y_train[:100,:] # x_valid = x_valid[:100,:,:] # y_valid = y_valid[:100,:] ###### N, L, A = x_train.shape input_size = (L, A) n_labels = y_train.shape[1] if model_name == 'deepsea': model = model_zoo.deepsea(input_size, n_labels) elif model_name == 'basset': model = model_zoo.basset(input_size, n_labels) elif model_name == 'basset_mod_dr_bn': model = model_zoo.basset_mod_dr_bn(input_size, n_labels) else: 'Model not found' print(model.summary()) # set up optimizer and metrics auroc = keras.metrics.AUC(curve='ROC', name='auroc') aupr = keras.metrics.AUC(curve='PR', name='aupr') optimizer = keras.optimizers.Adam(learning_rate=options.learning_rate) loss = keras.losses.BinaryCrossentropy(from_logits=False, label_smoothing=0) model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy', auroc, aupr]) # train model es_callback = keras.callbacks.EarlyStopping( monitor='val_auroc', #'val_aupr',# patience=options.patience, verbose=1, mode='max', restore_best_weights=False) reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_auroc', factor=0.2, patience=options.patience, min_lr=1e-7, mode='max', verbose=1) history = model.fit(x_train, y_train, epochs=int(options.n_epochs), batch_size=int(options.batch_size), shuffle=True, validation_data=(x_valid, y_valid), callbacks=[es_callback, reduce_lr]) model.save(model_path) utils.plot_training(history, 'aupr', output_dir) utils.plot_training(history, 'auroc', output_dir)
def learn_encoder_decoder(self, data, plot_dir=None): start = time() print("\tLearning encoder decoder... ", end="") # Optimizers optimizer_G = torch.optim.Adam(self.G.parameters(), lr=self.lr, betas=(0.5, 0.999)) optimizer_D = torch.optim.Adam(self.D.parameters(), lr=self.lr, betas=(0.5, 0.999)) optimizer_E = torch.optim.Adam(self.E.parameters(), lr=self.lr, betas=(0.5, 0.999)) X = torch.tensor(data, requires_grad=False, dtype=torch.float32) losses = [[], [], [], []] for s in range(self.optimization_steps): # Adversarial ground truths ones = torch.ones((self.batch_size, 1), dtype=torch.float32, requires_grad=False) zeros = torch.zeros((self.batch_size, 1), dtype=torch.float32, requires_grad=False) batch_real_data = X[torch.randint(data.shape[0], (self.batch_size, ))] # Train Generator # optimizer_G.zero_grad() z = torch.tensor(np.random.normal(0, 1, size=(self.batch_size, self.latent_dim)), dtype=torch.float32, requires_grad=False) generated_data = self.G(z) g_loss = self.BCE_loss(self.D(generated_data), ones) g_loss.backward() optimizer_G.step() # Train discriminator # optimizer_D.zero_grad() fake_loss = self.BCE_loss( self.D(generated_data.detach()), zeros) # detach so that no gradient will be computed for G real_loss = self.BCE_loss(self.D(batch_real_data), ones) d_loss = (real_loss + fake_loss) / 2 d_loss.backward() optimizer_D.step() losses[0] += [g_loss.item()] losses[1] += [real_loss.item()] losses[2] += [fake_loss.item()] if self.regressor_training == 'joint': losses[3] += [self.regress_encoder(optimizer_E, z)] if self.regressor_training != 'joint': for s in range(self.optimization_steps): z = torch.tensor(np.random.normal(0, 1, size=(self.batch_size, self.latent_dim)), dtype=torch.float32, requires_grad=False) losses[3] += [self.regress_encoder(optimizer_E, z)] # plot training if plot_dir: plot_training(losses, ["g-loss", "D-real", 'd-fake', 'z-reconstruction'], os.path.join(plot_dir, f"Learning-{self}.png")) print(f"Finished in {time() - start:.2f} sec")
def main(): states = [] n_ants = 50 # Setting up RL Reward #reward_funct = ExplorationReward() reward_funct = All_Rewards(fct_explore=1, fct_food=2, fct_anthill=10, fct_explore_holding=1, fct_headinganthill=3) # Setting up RL Api api = RLApi(reward=reward_funct, reward_threshold=1, max_speed=1, max_rot_speed=40 / 180 * np.pi, carry_speed_reduction=0.05, backward_speed_reduction=0.5) api.save_perceptive_field = True agent = CollectAgentMemory(epsilon=0.9, discount=0.99, rotations=3, pheromones=3, learning_rate=0.00001) agent_is_setup = False avg_loss = None avg_time = None all_loss = [] all_reward = [] print("Starting simulation...") for episode in range(episodes): visualize_episode = ( episode + 1) % visualize_every == 0 or episode == 0 or not training generator = EnvironmentGenerator( w=200, h=200, n_ants=n_ants, n_pheromones=2, n_rocks=0, food_generator=CirclesGenerator(20, 5, 10), walls_generator=PerlinGenerator(scale=22.0, density=0.3), max_steps=steps, seed=None) env = generator.generate(api) print('\n--- Episode {}/{} --- {}'.format( episode + 1, episodes, "VISUALIZED" if visualize_episode else "")) # Setups the agents only once if not agent_is_setup: agent.setup(api, use_model) agent_is_setup = True # Initializes the agents on the new environment agent.initialize(api) obs, agent_state, state = api.observation() episode_reward = np.zeros(n_ants) mean_reward = 0 for s in range(steps): now = time.time() # Compute the next action of the agents action = agent.get_action(obs, agent_state, training) # Execute the action new_state, new_agent_state, reward, done = api.step(*action[:2]) # Add the reward values to total reward of episode episode_reward += reward # Update replay memory with new action and states agent.update_replay_memory(obs, agent_state, action, reward, new_state, new_agent_state, done) # Train the neural network if training: loss = agent.train(done, s) if avg_loss is None: avg_loss = loss else: avg_loss = 0.99 * avg_loss + 0.01 * loss else: avg_loss = 0 # Set obs to the new state obs = new_state agent_state = new_agent_state if (s + 1) % 50 == 0: mean_reward = episode_reward.mean(axis=0) # max_reward = episode_reward.max(axis=0) # min_reward = episode_reward.min(axis=0) # var_reward = episode_reward.std(axis=0) total_reward = episode_reward.sum(axis=0) eta_seconds = int( ((steps - s) * avg_time + (episodes - episode - 1) * steps * avg_time) / 1000) print( "\rAverage loss : {:.5f} --".format(avg_loss), # "Episode reward stats: mean {:.2f} - min {:.2f} - max {:.2f} - std {:.2f} - total {:.2f} --".format( # mean_reward, min_reward, max_reward, var_reward, total_reward), "Episode rewards: {} --".format(total_reward), "Avg-time per step: {:.3f}ms, step {}/{}".format( avg_time, s + 1, steps), "-- E.T.A: {} min {} sec".format(eta_seconds // 60, eta_seconds % 60), end="") # Pass new step env.update() elapsed = (time.time() - now) * 1000 if avg_time is None: avg_time = elapsed else: avg_time = 0.99 * avg_time + 0.01 * elapsed if visualize_episode: states.append(env.save_state()) if visualize_episode: if episode == 0: previous_states = [] else: previous_states = pickle.load( open("saved/" + save_file_name, "rb")) pickle.dump(previous_states + states, open("saved/" + save_file_name, "wb")) del states del previous_states states = [] gc.collect() agent.epsilon = max( min_epsilon, min(max_epsilon, 1.0 - math.log10((episode + 1) / 2))) print('\n Epsilon : ', agent.epsilon) all_loss.append(avg_loss) all_reward.append(mean_reward) if save_model and training: date = datetime.datetime.now() model_name = str(date.day) + '_' + str(date.month) + '_' + str( date.hour) + '_' + agent.name + '.h5' agent.save_model(model_name) plot_training(all_reward, all_loss)
### Training ### model, history_training = train_model(model=model, hist=history_training, criterion=criterion, optimizer=optimizer, dataloaders=dataloaders, dataset_sizes=dataset_sizes, data_augment=DATA_AUGMENT, scheduler=lr_sched, num_epochs=EPOCHS, patience_es= 15) ### Testing ### history_training = test_model(model=model, hist=history_training, criterion=criterion, dataloaders=dataloaders, dataset_sizes=dataset_sizes) ### Save the model ### save_model(model=model, hist=history_training, trained_models_path=MODEL_PATH, model_type=MODEL_TYPE, do_save=SAVING) ### Plotting the losses ### plot_training(hist=history_training, graphs_path=GRAPHS_PATH, model_type=MODEL_TYPE, do_save=SAVING) ### Plotting the CM ### plot_cm(hist=history_training, graphs_path=GRAPHS_PATH, model_type=MODEL_TYPE, do_save=SAVING) ### Give the classification report ### classif_report(hist=history_training)
def train_model(model, criterion, optimizer, dataloaders, scheduler, dataset_sizes, num_epochs): begin = since = time.time() best_model_wts = copy.deepcopy(model.state_dict()) best_acc = 0.0 costs = {x:[] for x in data_cat} # for storing costs per epoch accs = {x:[] for x in data_cat} # for storing accuracies per epoch print('Train batches:', len(dataloaders['train'])) print('Valid batches:', len(dataloaders['valid']), '\n') for epoch in range(num_epochs): confusion_matrix = {x: meter.ConfusionMeter(2, normalized=True) for x in data_cat} print('Epoch {}/{}'.format(epoch+1, num_epochs)) print('-' * 10) # Each epoch has a training and validation phase for phase in data_cat: model.train(phase=='train') running_loss = 0.0 running_corrects = 0 # Iterate over data with progress bar. dl_iter = tqdm(enumerate(dataloaders[phase]), desc=phase, total=len(dataloaders[phase])) for i, data in dl_iter: # get the inputs # print(i, end='\r') inputs = data['images'][0] labels = data['label'].type(torch.FloatTensor) # wrap them in Variable inputs = Variable(inputs.cuda()) labels = Variable(labels.cuda()) # zero the parameter gradients optimizer.zero_grad() # forward outputs = model(inputs) outputs = torch.mean(outputs) loss = criterion(outputs, labels, phase) running_loss += loss.data[0] # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() # statistics preds = (outputs.data > 0.5).type(torch.cuda.FloatTensor) preds_for_conf = torch.unsqueeze(preds, 0) running_corrects += torch.sum(preds == labels.data) confusion_matrix[phase].add(preds_for_conf.data, labels.data) epoch_loss = running_loss.to(dtype=torch.float) / dataset_sizes[phase] epoch_acc = running_corrects.to(dtype=torch.float) / dataset_sizes[phase] costs[phase].append(epoch_loss) accs[phase].append(epoch_acc) print(running_loss, dataset_sizes, epoch_loss) print('{}\nLoss: {:.4f} Acc: {:.4f}'.format( phase, epoch_loss, epoch_acc)) print('Confusion Meter:\n', confusion_matrix[phase].value(), "\n") # deep copy the model if phase == 'valid': scheduler.step(epoch_loss) if epoch_acc > best_acc: best_acc = epoch_acc best_model_wts = copy.deepcopy(model.state_dict()) time_elapsed = time.time() - since since = time.time() print('Time elapsed: {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print("\n") time_elapsed = time.time() - begin since = time.time() print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Best valid Acc: {:4f}'.format(best_acc)) plot_training(costs, accs) # load best model weights model.load_state_dict(best_model_wts) return model
l_a2b, l_b2a = control.train_with_buffer(env, likelihood, hparam['nb_episode']) likelihood_a2b[run, :] = l_a2b likelihood_b2a[run, :] = l_b2a print('Training finished') test(env, likelihood) env.adapt_a() likelihood.reinitialize_optimizer(lr=1e-2) l_a2b, l_b2a = control.train_with_buffer( env, likelihood, hparam['nb_episode_adapt']) likelihood_a2b_adapt[run, :] = l_a2b likelihood_b2a_adapt[run, :] = l_b2a utils.plot_training(likelihood_a2b, likelihood_b2a, hparam['output'], True) utils.plot_adaptation(likelihood_a2b_adapt, likelihood_b2a_adapt, hparam['output'], True) if rl_mode: reward_a2b = np.cumsum(reward_a2b, axis=1) reward_b2a = np.cumsum(reward_b2a, axis=1) reward_a2b_adapt = np.cumsum(reward_a2b_adapt, axis=1) reward_b2a_adapt = np.cumsum(reward_b2a_adapt, axis=1) utils.plot_reward(reward_a2b, reward_b2a, hparam['output'], True) utils.plot_reward_adapt(reward_a2b_adapt, reward_b2a_adapt, hparam['output'], True) def test(env, likelihood): env.compare_directions(likelihood)
def TransferLearning(args): """ Performs training. """ train_dir = args.train_folder val_dir = args.validation_folder nb_train_samples = utils.get_nb_files(args.train_folder) nb_val_samples = utils.get_nb_files(args.validation_folder) nb_classes = utils.get_labels(args.train_folder, args.validation_folder) nb_epochs = int(args.nb_epoch) batch_size = int(args.batch_size) base_architecture = args.base_architecture model_load = args.model_load # Define base layer if base_architecture == 'VGG16': base_model = applications.VGG16(weights='imagenet', include_top=False) layers_to_freeze = 10 elif base_architecture == 'VGG19': base_model = applications.VGG19(weights='imagenet', include_top=False) layers_to_freeze = 11 elif base_architecture == 'InceptionV3': base_model = applications.InceptionV3(weights='imagenet', include_top=False) layers_to_freeze = 172 # TODO: understand how many levels! elif base_architecture == 'ResNet50': base_model = applications.ResNet50(weights='imagenet', include_top=False) elif base_architecture == 'Xception': base_model = applications.Xception(weights='imagenet', include_top=False) model = replace_classification_layer(base_model, nb_classes, 1024) train_datagen = ImageDataGenerator( rescale = 1./255, fill_mode='nearest') test_datagen = ImageDataGenerator( rescale = 1./255, fill_mode='nearest') train_generator = train_datagen.flow_from_directory( train_dir, target_size=(img_height, img_width), batch_size=batch_size, class_mode='categorical') validation_generator = test_datagen.flow_from_directory( val_dir, target_size=(img_height, img_width), batch_size=batch_size, class_mode='categorical') transfer_learning(base_model, model, model_load) history_tl = model.fit_generator( train_generator, nb_epoch=nb_epochs, samples_per_epoch=nb_train_samples, validation_data=validation_generator, nb_val_samples=nb_val_samples, class_weight='auto', callbacks=args.callbacks) utils.plot_training(history_tl) setup_fine_tuning(model, layers_to_freeze, model_load) history_ft = model.fit_generator( train_generator, samples_per_epoch=nb_train_samples, nb_epoch=nb_epochs, validation_data=validation_generator, nb_val_samples=nb_val_samples, class_weight='auto', callbacks=args.callbacks) # NOTE model.save(os.path.join(os.getcwd(), 'models', args.output_model_file)) utils.plot_training(history_ft)
val_steps = len(dev) // batch_size train_generator = data_generator(train, eng_tokenizer, eng_length, ger_tokenizer, ger_length, ger_vocab_size, batch_size=batch_size) val_generator = data_generator(dev, eng_tokenizer, eng_length, ger_tokenizer, ger_length, ger_vocab_size, batch_size=batch_size) H = model.fit_generator(train_generator, steps_per_epoch=train_steps, validation_data=val_generator, validation_steps=val_steps, epochs=epochs, verbose=1, callbacks=[checkpoint]) plot_training(H, epochs, plot_path_loss='training_loss_baseline_model.png', plot_path_acc='training_acc_baseline_model.png')
def fit(self, X:np.ndarray, y:np.ndarray) -> np.ndarray: """Apprentissage du modèle du perceptron X : données d'entrée de la forme [nb_data, nb_param] y : label associée à X ayant comme valeur 1 pour la classe positive -1 pour la classe négative y est de la forme [nb_data] """ # vérification des labels assert np.all(np.unique(y) == np.array([-1, 1])) # Sauvegarde tous les calculs de la somme des distances euclidiennes pour l'affichage if self.display: shutil.rmtree('./img_training', ignore_errors=True) metric = [] # initialisation d'un paramètre permettant de stopper les itérations lors de la convergence stabilise = False # apprentissage sur les données errors = np.zeros(self.max_iter) for iteration in range(self.max_iter): # variable stockant l'accumulation des coordonnées modif_w = np.zeros(len(self.weights)) pred = self.predict(X) for point, label in zip(range(X.shape[0]), y): # prédiction du point point_pred = pred[point] # accumulation des coordonnées suivant la classe si les données sont mal classées if label != point_pred: errors[iteration] += 1 modif_w = modif_w + (label - point_pred) * np.insert(X[point], X.shape[1], 1) # affichage de l'erreur et de la ligne séparatrice if self.display: plot_training(iteration, X, y, self.weights, list(errors[:iteration+1])) # mise à jour des poids old_weights = np.array(self.weights) if self.lr_decay: lr = self.lr/(iteration+1) #lr = self.lr #lr = 1/(iteration+1) else: lr = self.lr self.weights += lr * modif_w if (abs(np.all(old_weights - self.weights)) < self.tol): stabilise = True else: stabilise = False # stopper l'algorithme lorsque l'algorithme converge if self.early_stopping: if stabilise: # on affiche le dernier hyperplan calculé plot_training(iteration, X, y, self.weights, list(errors[:iteration+1])) # on arrete l'apprentissage break
def fit(self, X:np.ndarray, y:np.ndarray) -> np.ndarray: """Apprentissage du modèle du perceptron X : données d'entrée de la forme [nb_data, nb_param] y : label associée à X ayant comme valeur 1 pour la classe positive -1 pour la classe négative y est de la forme [nb_data] """ # vérification des labels assert np.all(np.unique(y) == np.array([-1, 1])) # Sauvegarde tous les calculs de la somme des distances euclidiennes pour l'affichage if self.display: shutil.rmtree('./img_training', ignore_errors=True) metric = [] # initialisation d'un paramètre permettant de stopper les itérations lors de la convergence stabilise = False # apprentissage sur les données errors = np.zeros(self.max_iter) for iteration in range(self.max_iter): # variable stockant l'accumulation des coordonnées modif_w = np.zeros(len(self.weights)) erreur = y[0] - y[0] erreurpoint=X[0] print("---------------") print("poids", self.weights) for point, label in zip(X, y): # prédiction du point labelpredict = self.predict(point) # accumulation des coordonnées suivant la classe si les données sont mal classées if label != labelpredict : errors[iteration] += 1 # je prend le premier point mal classé pour le recalcul des poids if errors[iteration]==1 : erreurpoint = point erreur = label - labelpredict # affichage de l'erreur et de la ligne séparatrice if self.display: plot_training(iteration, X, y, self.weights, list(errors[:iteration+1])) # mise à jour des poids old_weights = np.array(self.weights) if self.lr_decay: lr = self.lr/(iteration+1) else: lr = self.lr erreurpoint = np.insert(erreurpoint,2,1.) self.weights = np.add(old_weights, ((self.lr/(iteration+1)) * erreur * erreurpoint)) # stopper l'algorithme lorsque l'algorithme converge if self.early_stopping: # A compléter # je pourrais utiliser self.tol et donc soustraire le poids ancien au poids actuel # et voir si c'est inférieur ou égale au self.tol pour arrêter stabilise = False if (self.weights == old_weights).all() : stabilise = True if stabilise: # on affiche le dernier hyperplan calculé plot_training(iteration, X, y, self.weights, list(errors[:iteration+1])) # on arrete l'apprentissage break
print("** Model has {} parameters **".format(count_parameters(model))) ## Data generation train_input, train_target, train_classes, \ test_input, test_target, test_classes = generate_data( args.datasize, normalize=True) ## Model Training print("** Starting training... **") torch.manual_seed(0) SUCCESS = model.train_(train_input, train_classes, test_input, test_classes, test_target, epoch=args.epoch, eta=eta, criterion=loss) ## Results saving if SUCCESS: print("** Training done in : {:.0f} minutes {:.0f} seconds\n".format( (time.time() - start_rep_time) // 60, int(time.time() - start_rep_time) % 60)) else: print('** Training failed. **') ## Ploting results plot_training(model.sumloss, model.train_error, model.test_error, model.test_final_error)