def run(config): model_dir = os.path.join(config.store_path, config.experiment_name + '.dir') os.makedirs(config.store_path, exist_ok=True) os.makedirs(model_dir, exist_ok=True) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(model_dir, config.experiment_name), filemode='w') # define a new Handler to log to console as well console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) # Load VAE model and define classifier curl = torch.load(config.curl_model, map_location=lambda storage, loc: storage) curl_model = nnetCurlSupervised(curl['feature_dim'] * curl['num_frames'], curl['encoder_num_layers'], curl['decoder_num_layers'], curl['hidden_dim'], curl['bn_dim'], curl['comp_num'], config.use_gpu) curl_model.load_state_dict(curl["model_state_dict"]) #curl_sampler = curlLatentSampler(config.use_gpu) model = nnetRNN(curl['bn_dim'], config.num_layers, config.hidden_dim, config.num_classes, 0) logging.info('Model Parameters: ') logging.info('Number of Layers: %d' % (config.num_layers)) logging.info('Hidden Dimension: %d' % (config.hidden_dim)) logging.info('Number of Classes: %d' % (config.num_classes)) logging.info('Data dimension: %d' % (curl['feature_dim'])) logging.info('Number of Frames: %d' % (curl['num_frames'])) logging.info('Optimizer: %s ' % (config.optimizer)) logging.info('Batch Size: %d ' % (config.batch_size)) logging.info('Initial Learning Rate: %f ' % (config.learning_rate)) logging.info('Dropout: %f ' % (config.dropout)) logging.info('Learning rate reduction rate: %f ' % (config.lrr)) logging.info('Weight decay: %f ' % (config.weight_decay)) sys.stdout.flush() if config.use_gpu: # Set environment variable for GPU ID id = get_device_id() os.environ["CUDA_VISIBLE_DEVICES"] = id model = model.cuda() curl_model = curl_model.cuda() criterion = nn.CrossEntropyLoss() lr = config.learning_rate if config.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) elif config.optimizer == 'adadelta': optimizer = optim.Adadelta(model.parameters(), weight_decay=config.weight_decay) elif config.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) elif config.optimizer == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) elif config.optimizer == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) else: raise NotImplementedError("Learning method not supported for the task") # Load datasets dataset_train = nnetDatasetSeq( os.path.join(config.egs_dir, config.train_set)) data_loader_train = torch.utils.data.DataLoader( dataset_train, batch_size=config.batch_size, shuffle=True) dataset_dev = nnetDatasetSeq(os.path.join(config.egs_dir, config.dev_set)) data_loader_dev = torch.utils.data.DataLoader(dataset_dev, batch_size=config.batch_size, shuffle=True) model_path = os.path.join(model_dir, config.experiment_name + '__epoch_0.model') torch.save( { 'epoch': 1, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, (open(model_path, 'wb'))) ep_loss_tr = [] ep_fer_tr = [] ep_loss_dev = [] ep_fer_dev = [] err_p = 0 best_model_state = None for epoch_i in range(config.epochs): #################### ##### Training ##### #################### model.train() train_losses = [] tr_fer = [] # Main training loop for batch_x, batch_l, lab in data_loader_train: _, indices = torch.sort(batch_l, descending=True) if config.use_gpu: batch_x = Variable(batch_x[indices]).cuda() batch_l = Variable(batch_l[indices]).cuda() lab = Variable(lab[indices]).cuda() else: batch_x = Variable(batch_x[indices]) batch_l = Variable(batch_l[indices]) lab = Variable(lab[indices]) # First get CURL embeddings _, latent = curl_model(batch_x, batch_l) batch_x = compute_latent_features(latent) optimizer.zero_grad() # Main forward pass class_out = model(batch_x, batch_l) class_out = pad2list(class_out, batch_l) lab = pad2list(lab, batch_l) loss = criterion(class_out, lab) train_losses.append(loss.item()) if config.use_gpu: tr_fer.append( compute_fer(class_out.cpu().data.numpy(), lab.cpu().data.numpy())) else: tr_fer.append( compute_fer(class_out.data.numpy(), lab.data.numpy())) loss.backward() grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), config.clip_thresh) optimizer.step() ep_loss_tr.append(np.mean(train_losses)) ep_fer_tr.append(np.mean(tr_fer)) ###################### ##### Validation ##### ###################### model.eval() val_losses = [] val_fer = [] # Main training loop for batch_x, batch_l, lab in data_loader_dev: _, indices = torch.sort(batch_l, descending=True) if config.use_gpu: batch_x = Variable(batch_x[indices]).cuda() batch_l = Variable(batch_l[indices]).cuda() lab = Variable(lab[indices]).cuda() else: batch_x = Variable(batch_x[indices]) batch_l = Variable(batch_l[indices]) lab = Variable(lab[indices]) # First get CURL embeddings _, latent = curl_model(batch_x, batch_l) batch_x = compute_latent_features(latent) optimizer.zero_grad() # Main forward pass class_out = model(batch_x, batch_l) class_out = pad2list(class_out, batch_l) lab = pad2list(lab, batch_l) loss = criterion(class_out, lab) val_losses.append(loss.item()) if config.use_gpu: val_fer.append( compute_fer(class_out.cpu().data.numpy(), lab.cpu().data.numpy())) else: val_fer.append( compute_fer(class_out.data.numpy(), lab.data.numpy())) # Manage learning rate and revert model if epoch_i == 0: err_p = np.mean(val_losses) best_model_state = model.state_dict() else: if np.mean(val_losses) > (100 - config.lr_tol) * err_p / 100: logging.info( "Val loss went up, Changing learning rate from {:.6f} to {:.6f}" .format(lr, config.lrr * lr)) lr = config.lrr * lr for param_group in optimizer.param_groups: param_group['lr'] = lr model.load_state_dict(best_model_state) else: err_p = np.mean(val_losses) best_model_state = model.state_dict() ep_loss_dev.append(np.mean(val_losses)) ep_fer_dev.append(np.mean(val_fer)) print_log = "Epoch: {:d} ((lr={:.6f})) Tr loss: {:.3f} :: Tr FER: {:.2f}".format( epoch_i + 1, lr, ep_loss_tr[-1], ep_fer_tr[-1]) print_log += " || Val: {:.3f} :: Val FER: {:.2f}".format( ep_loss_dev[-1], ep_fer_dev[-1]) logging.info(print_log) if (epoch_i + 1) % config.model_save_interval == 0: model_path = os.path.join( model_dir, config.experiment_name + '__epoch_%d' % (epoch_i + 1) + '.model') torch.save( { 'epoch': epoch_i + 1, 'vaeenc': config.curl_model, 'feature_dim': curl['feature_dim'], 'num_frames': curl['num_frames'], 'num_classes': config.num_classes, 'num_layers': config.num_layers, 'hidden_dim': config.hidden_dim, 'ep_loss_tr': ep_loss_tr, 'ep_loss_dev': ep_loss_dev, 'dropout': config.dropout, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, (open(model_path, 'wb')))
def run(config): if config.use_gpu: # Set environment variable for GPU ID id = get_device_id() os.environ["CUDA_VISIBLE_DEVICES"] = id model_dir = os.path.join(config.store_path, config.experiment_name + '.dir') os.makedirs(config.store_path, exist_ok=True) os.makedirs(model_dir, exist_ok=True) logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(model_dir, config.experiment_name), filemode='w') # define a new Handler to log to console as well console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) # Load feature configuration egs_config = pkl.load(open(os.path.join(config.egs_dir, config.train_set, 'egs.config'), 'rb')) context = egs_config['concat_feats'].split(',') num_frames = int(context[0]) + int(context[1]) + 1 logging.info('Model Parameters: ') logging.info('Encoder Number of Layers: %d' % (config.encoder_num_layers)) logging.info('Decoder Number of Layers: %d' % (config.decoder_num_layers)) logging.info('Classifier Number of Layers: %d' % (config.classifier_num_layers)) logging.info('Hidden Dimension: %d' % (config.hidden_dim)) logging.info('Classifier Hidden Dimension: %d' % (config.hidden_dim_classifier)) logging.info('Data dimension: %d' % (config.feature_dim)) logging.info('Number of classes: %d' % (config.num_classes)) logging.info('Bottleneck dimension: %d' % (config.bn_dim)) logging.info('Component Number: %d' % (config.comp_num)) logging.info('Number of Frames: %d' % (num_frames)) logging.info('Optimizer: %s ' % (config.optimizer)) logging.info('Batch Size: %d ' % (config.batch_size)) logging.info('Initial Learning Rate: %f ' % (config.learning_rate)) logging.info('Learning rate reduction rate: %f ' % (config.lrr)) logging.info('Weight decay: %f ' % (config.weight_decay)) logging.info('Encoder Gradient Scale: %f ' % (config.encoder_grad_scale)) sys.stdout.flush() model = nnetCurlMultistreamClassifier(config.feature_dim * num_frames, config.encoder_num_layers, config.decoder_num_layers, config.classifier_num_layers, config.hidden_dim, config.hidden_dim_classifier, config.bn_dim, config.comp_num, config.num_classes, config.use_gpu, enc_scale=config.encoder_grad_scale) lr = config.learning_rate criterion = nn.CrossEntropyLoss() if config.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) elif config.optimizer == 'adadelta': optimizer = optim.Adadelta(model.parameters(), weight_decay=config.weight_decay) elif config.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) elif config.optimizer == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) elif config.optimizer == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) else: raise NotImplementedError("Learning method not supported for the task") if config.use_gpu: model = model.cuda() if config.load_previous_model != "None": ckpt = torch.load(config.load_previous_model) model.load_state_dict(ckpt["model_state_dict"]) model.expand_component(config.use_gpu) previous_mean_p = torch.from_numpy(ckpt["prior_means"]) previous_mean_p = torch.cat([previous_mean_p, 5 * (torch.rand(1, config.bn_dim) - 0.5)]) if config.load_checkpoint != "None": ckpt = torch.load(config.load_checkpoint) model.load_state_dict(ckpt["model_state_dict"]) for param_group in optimizer.param_groups: param_group['lr'] = lr ep_start = ckpt["epoch"] means_p = torch.from_numpy(ckpt["prior_means"]) else: ep_start = 0 if config.load_previous_model != "None": means_p = previous_mean_p else: means_p = 5 * (torch.rand(config.comp_num, config.bn_dim) - 0.5) model_path = os.path.join(model_dir, config.experiment_name + '__epoch_0.model') torch.save({ 'epoch': 1, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'prior_means': means_p.numpy()}, (open(model_path, 'wb'))) if config.use_gpu: model = model.cuda() ep_curl_tr = [] ep_loss_tr = [] ep_fer_tr = [] ep_curl_dev = [] ep_loss_dev = [] ep_fer_dev = [] # Load Datasets dataset_train = nnetDatasetSeq(os.path.join(config.egs_dir, config.train_set)) data_loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=config.batch_size, shuffle=True) dataset_dev = nnetDatasetSeq(os.path.join(config.egs_dir, config.dev_set)) data_loader_dev = torch.utils.data.DataLoader(dataset_dev, batch_size=config.batch_size, shuffle=True) err_p = 10000000 best_model_state = model.state_dict() for epoch_i in range(ep_start, config.epochs): #################### ##### Training ##### #################### model.train() train_curl_losses = [] train_losses = [] tr_fer = [] # Main training loop for batch_x, batch_l, lab in data_loader_train: _, indices = torch.sort(batch_l, descending=True) if config.use_gpu: batch_x = Variable(batch_x[indices]).cuda() batch_l = Variable(batch_l[indices]).cuda() lab = Variable(lab[indices]).cuda() else: batch_x = Variable(batch_x[indices]) batch_l = Variable(batch_l[indices]) lab = Variable(lab[indices]) optimizer.zero_grad() # Main forward pass class_out, ae_out, latent_out = model(batch_x, batch_l) """ # Keep only on-zero label rows for label and class_out nonzero_idx = [] for idx, l in enumerate(lab): if torch.sum(l) != 0: nonzero_idx.append(idx) nonzero_idx = torch.FloatTensor(nonzero_idx).long() lab = lab[nonzero_idx] class_out = class_out[nonzero_idx] """ # Convert all the weird tensors to frame-wise form batch_x = pad2list(batch_x, batch_l) ae_out = pad2list3d(ae_out, batch_l) # if nonzero_idx.nelement() != 0: class_out = pad2list(class_out[config.comp_label], batch_l) lab = pad2list(lab, batch_l) latent_out = ( pad2list(latent_out[0], batch_l), pad2list3d(latent_out[1], batch_l), pad2list3d(latent_out[2], batch_l)) # if nonzero_idx.nelement() != 0: loss_class = criterion(class_out, lab) train_losses.append(loss_class.item()) loss = curl_loss_supervised(batch_x, ae_out, latent_out, means_p, config.comp_label, use_gpu=config.use_gpu) train_curl_losses.append(loss.item()) # if nonzero_idx.nelement() != 0: if config.use_gpu: tr_fer.append(compute_fer(class_out.cpu().data.numpy(), lab.cpu().data.numpy())) else: tr_fer.append(compute_fer(class_out.data.numpy(), lab.data.numpy())) # if nonzero_idx.nelement() != 0: # (loss_class).backward() (-loss + 100 * loss_class).backward() # else: # (-loss).backward() grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), config.clip_thresh) optimizer.step() ep_curl_tr.append(np.mean(train_curl_losses)) ep_loss_tr.append(np.mean(train_losses)) ep_fer_tr.append(np.mean(tr_fer)) ###################### ##### Validation ##### ###################### model.eval() # with torch.set_grad_enabled(False): val_curl_losses = [] val_losses = [] val_fer = [] for batch_x, batch_l, lab in data_loader_dev: _, indices = torch.sort(batch_l, descending=True) if config.use_gpu: batch_x = Variable(batch_x[indices]).cuda() batch_l = Variable(batch_l[indices]).cuda() lab = Variable(lab[indices]).cuda() else: batch_x = Variable(batch_x[indices]) batch_l = Variable(batch_l[indices]) lab = Variable(lab[indices]) batch_l = batch_l # Main forward pass class_out, ae_out, latent_out = model(batch_x, batch_l) # Convert all the weird tensors to frame-wise form batch_x = pad2list(batch_x, batch_l) class_out = pad2list(class_out[config.comp_label], batch_l) lab = pad2list(lab, batch_l) ae_out = pad2list3d(ae_out, batch_l) latent_out = (pad2list(latent_out[0], batch_l), pad2list3d(latent_out[1], batch_l), pad2list3d(latent_out[2], batch_l)) loss_class = criterion(class_out, lab) loss = curl_loss_supervised(batch_x, ae_out, latent_out, means_p, config.comp_label, use_gpu=config.use_gpu) val_curl_losses.append(loss.item()) val_losses.append(loss_class.item()) if config.use_gpu: val_fer.append(compute_fer(class_out.cpu().data.numpy(), lab.cpu().data.numpy())) else: val_fer.append(compute_fer(class_out.data.numpy(), lab.data.numpy())) ep_curl_dev.append(np.mean(val_curl_losses)) ep_loss_dev.append(np.mean(val_losses)) ep_fer_dev.append(np.mean(val_fer)) # Manage learning rate and revert model if epoch_i == 0: err_p = np.mean(val_losses) best_model_state = model.state_dict() else: if np.mean(val_losses) > (100 - config.lr_tol) * err_p / 100: logging.info( "Val loss went up, Changing learning rate from {:.6f} to {:.6f}".format(lr, config.lrr * lr)) lr = config.lrr * lr for param_group in optimizer.param_groups: param_group['lr'] = lr model.load_state_dict(best_model_state) else: err_p = np.mean(val_losses) best_model_state = model.state_dict() print_log = "Epoch: {:d} ((lr={:.6f})) Tr CURL Log-likelihood: {:.3f} || Tr Loss: {:.3f} || Tr FER: {:.3f} :: Val CURL Log-likelihood: {:.3f} || Val Loss: {:.3f} || Val FER: {:.3f}".format( epoch_i + 1, lr, ep_curl_tr[-1], ep_loss_tr[-1], ep_fer_tr[-1], ep_curl_dev[-1], ep_loss_dev[-1], ep_fer_dev[-1]) logging.info(print_log) sys.stdout.flush() if (epoch_i + 1) % config.model_save_interval == 0: model_path = os.path.join(model_dir, config.experiment_name + '__epoch_%d' % (epoch_i + 1) + '.model') torch.save({ 'epoch': epoch_i + 1, 'feature_dim': config.feature_dim, 'num_frames': num_frames, 'encoder_num_layers': config.encoder_num_layers, 'decoder_num_layers': config.decoder_num_layers, 'classifier_num_layers': config.classifier_num_layers, 'hidden_dim': config.hidden_dim, 'hidden_dim_classifier': config.hidden_dim_classifier, 'comp_num': config.comp_num, 'num_classes': config.num_classes, 'bn_dim': config.bn_dim, 'ep_curl_tr': ep_curl_tr, 'ep_curl_dev': ep_curl_dev, 'prior_means': means_p.numpy(), 'lr': lr, 'encoder_grad_scale': config.encoder_grad_scale, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict()}, (open(model_path, 'wb')))
def run(config): model_dir = os.path.join(config.store_path, config.experiment_name + '.dir') os.makedirs(config.store_path, exist_ok=True) os.makedirs(model_dir, exist_ok=True) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(model_dir, config.experiment_name), filemode='w') # define a new Handler to log to console as well console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) # Load feature configuration egs_config = pkl.load( open(os.path.join(config.egs_dir, config.train_set, 'egs.config'), 'rb')) context = egs_config['concat_feats'].split(',') num_frames = int(context[0]) + int(context[1]) + 1 logging.info('Model Parameters: ') logging.info('Number of Decoder Layers: %d' % (config.num_layers_dec)) logging.info('Hidden Dimension: %d' % (config.feature_dim)) logging.info('Number of Classes: %d' % (config.num_classes)) logging.info('Input channels: %s' % (config.in_channels)) logging.info('Output channels: %s' % (config.out_channels)) logging.info('Kernel Size: %d' % (config.kernel)) logging.info('Input Kernel Size: %d' % (config.input_filter_kernel)) logging.info('Window size: %f' % (config.wind_size)) logging.info('Frequency Number: %d' % (config.freq_num)) logging.info('Data dimension: %d' % (config.feature_dim)) logging.info('Number of Frames: %d' % (num_frames)) logging.info('Optimizer: %s ' % (config.optimizer)) logging.info('Batch Size: %d ' % (config.batch_size)) logging.info('Initial Learning Rate: %f ' % (config.learning_rate)) logging.info('Dropout: %f ' % (config.dropout)) logging.info('Learning rate reduction rate: %f ' % (config.lrr)) logging.info('Weight decay: %f ' % (config.weight_decay)) sys.stdout.flush() in_channels = config.in_channels.split(',') in_channels = [int(x) for x in in_channels] out_channels = config.out_channels.split(',') out_channels = [int(x) for x in out_channels] model = modulationSigmoidNet(config.feature_dim, num_frames, in_channels, out_channels, config.kernel, config.input_filter_kernel, config.freq_num, config.wind_size, config.num_layers_dec, config.hidden_dim, config.num_classes, config.use_gpu) if config.use_gpu: # Set environment variable for GPU ID id = get_device_id() os.environ["CUDA_VISIBLE_DEVICES"] = id model = model.cuda() criterion = nn.CrossEntropyLoss() lr = config.learning_rate if config.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) elif config.optimizer == 'adadelta': optimizer = optim.Adadelta(model.parameters(), weight_decay=config.weight_decay) elif config.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) elif config.optimizer == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) elif config.optimizer == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) else: raise NotImplementedError("Learning method not supported for the task") # Load datasets dataset_train = nnetDatasetSeq( os.path.join(config.egs_dir, config.train_set)) data_loader_train = torch.utils.data.DataLoader( dataset_train, batch_size=config.batch_size, shuffle=True) dataset_dev = nnetDatasetSeq(os.path.join(config.egs_dir, config.dev_set)) data_loader_dev = torch.utils.data.DataLoader(dataset_dev, batch_size=config.batch_size, shuffle=True) model_path = os.path.join(model_dir, config.experiment_name + '__epoch_0.model') torch.save( { 'epoch': 1, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, (open(model_path, 'wb'))) ep_loss_tr = [] ep_fer_tr = [] ep_loss_dev = [] ep_fer_dev = [] ep_mod_tr = [] ep_mod_dev = [] err_p = 0 best_model_state = None for epoch_i in range(config.epochs): #################### ##### Training ##### #################### model.train() train_losses = [] tr_fer = [] tr_mod = [] # Main training loop for batch_x, batch_l, lab in data_loader_train: s = batch_x.shape batch_x = batch_x.view(s[0], s[1], config.feature_dim, num_frames) batch_x = batch_x.view(s[0] * s[1], config.feature_dim, num_frames) batch_x = batch_x[:, None, :, :] # change the data format for CNNs if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() lab = Variable(lab).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) lab = Variable(lab) optimizer.zero_grad() # Main forward pass class_out, mod_f = model(batch_x) class_out = class_out.view(s[0], s[1], -1) class_out = pad2list(class_out, batch_l) lab = pad2list(lab, batch_l) loss = criterion(class_out, lab) train_losses.append(loss.item()) tr_mod.append(mod_f.item()) if config.use_gpu: tr_fer.append( compute_fer(class_out.cpu().data.numpy(), lab.cpu().data.numpy())) else: tr_fer.append( compute_fer(class_out.data.numpy(), lab.data.numpy())) sys.stdout.flush() loss.backward() grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), config.clip_thresh) optimizer.step() ep_loss_tr.append(np.mean(train_losses)) ep_fer_tr.append(np.mean(tr_fer)) ep_mod_tr.append(np.mean(tr_mod)) ###################### ##### Validation ##### ###################### model.eval() val_losses = [] val_fer = [] val_mod = [] # Main training loop for batch_x, batch_l, lab in data_loader_dev: s = batch_x.shape batch_x = batch_x.view( s[0], s[1], config.feature_dim, num_frames) # change the data format for CNNs batch_x = batch_x.view(s[0] * s[1], config.feature_dim, num_frames) batch_x = batch_x[:, None, :, :] if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() lab = Variable(lab).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) lab = Variable(lab) # Main forward pass class_out, mod_f = model(batch_x) class_out = class_out.view(s[0], s[1], -1) class_out = pad2list(class_out, batch_l) lab = pad2list(lab, batch_l) loss = criterion(class_out, lab) val_losses.append(loss.item()) val_mod.append(mod_f.item()) if config.use_gpu: val_fer.append( compute_fer(class_out.cpu().data.numpy(), lab.cpu().data.numpy())) else: val_fer.append( compute_fer(class_out.data.numpy(), lab.data.numpy())) # Manage learning rate and revert model if epoch_i == 0: err_p = np.mean(val_losses) best_model_state = model.state_dict() else: if np.mean(val_losses) > (100 - config.lr_tol) * err_p / 100: logging.info( "Val loss went up, Changing learning rate from {:.6f} to {:.6f}" .format(lr, config.lrr * lr)) lr = config.lrr * lr for param_group in optimizer.param_groups: param_group['lr'] = lr model.load_state_dict(best_model_state) else: err_p = np.mean(val_losses) best_model_state = model.state_dict() ep_loss_dev.append(np.mean(val_losses)) ep_fer_dev.append(np.mean(val_fer)) ep_mod_dev.append(np.mean(val_mod)) print_log = "Epoch: {:d} ((lr={:.6f})) Tr loss: {:.3f} :: Tr FER: {:.2f} :: Tr Modulation {:.2f} Hz".format( epoch_i + 1, lr, ep_loss_tr[-1], ep_fer_tr[-1], ep_mod_tr[-1]) print_log += " || Val: {:.3f} :: Val FER: {:.2f} :: Val Modulation {:.2f} Hz".format( ep_loss_dev[-1], ep_fer_dev[-1], ep_mod_dev[-1]) logging.info(print_log) if (epoch_i + 1) % config.model_save_interval == 0: model_path = os.path.join( model_dir, config.experiment_name + '__epoch_%d' % (epoch_i + 1) + '.model') torch.save( { 'epoch': epoch_i + 1, 'feature_dim': config.feature_dim, 'num_frames': num_frames, 'num_classes': config.num_classes, 'num_layers_dec': config.num_layers_dec, 'hidden_dim': config.hidden_dim, 'in_channels': config.in_channels, 'out_channels': config.out_channels, 'kernel': config.kernel, 'freq_num': config.freq_num, 'input_filter_kernel': config.input_filter_kernel, 'wind_size': config.wind_size, 'ep_loss_tr': ep_loss_tr, 'ep_loss_dev': ep_loss_dev, 'dropout': config.dropout, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, (open(model_path, 'wb')))
def run(config): model_dir = os.path.join(config.store_path, config.experiment_name + '.dir') os.makedirs(config.store_path, exist_ok=True) os.makedirs(model_dir, exist_ok=True) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(model_dir, config.experiment_name), filemode='w') # define a new Handler to log to console as well console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) # Load model nnet = torch.load(config.model, map_location=lambda storage, loc: storage) model = nnetVAEClassifier(nnet['feature_dim'] * nnet['num_frames'], nnet['num_classes'], nnet['encoder_num_layers'], nnet['classifier_num_layers'], nnet['ae_num_layers'], nnet['hidden_dim'], nnet['bn_dim'], 0.5, config.use_gpu) model.load_state_dict(nnet['model_state_dict']) # I want to only update the encoder for p in model.classifier.parameters(): p.requires_grad = False for p in model.vae_decoder.parameters(): p.requires_grad = False logging.info('Model Parameters: ') logging.info('Encoder Number of Layers: %d' % (nnet['encoder_num_layers'])) logging.info('Classifier Number of Layers: %d' % (nnet['classifier_num_layers'])) logging.info('AE Number of Layers: %d' % (nnet['ae_num_layers'])) logging.info('Hidden Dimension: %d' % (nnet['hidden_dim'])) logging.info('Number of Classes: %d' % (nnet['num_classes'])) logging.info('Data dimension: %d' % (nnet['feature_dim'])) logging.info('Bottleneck dimension: %d' % (nnet['bn_dim'])) logging.info('Number of Frames: %d' % (nnet['num_frames'])) logging.info('Optimizer: %s ' % (config.optimizer)) logging.info('Batch Size: %d ' % (config.batch_size)) logging.info('Initial Learning Rate: %f ' % (config.learning_rate)) logging.info('Encoder Dropout: %f ' % (nnet['enc_dropout'])) sys.stdout.flush() if config.use_gpu: # Set environment variable for GPU ID id = get_device_id() os.environ["CUDA_VISIBLE_DEVICES"] = id model = model.cuda() criterion_classifier = nn.CrossEntropyLoss() if config.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'adadelta': optimizer = optim.Adadelta(model.parameters()) elif config.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) else: raise NotImplementedError("Learning method not supported for the task") model_path = os.path.join(model_dir, config.experiment_name + '__epoch_0.model') torch.save( { 'epoch': 1, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, (open(model_path, 'wb'))) ep_vae_adapt = [] ep_mm_adapt = [] ep_loss_anchor = [] ep_fer_anchor = [] ep_vae_anchor = [] ep_loss_test = [] ep_fer_test = [] ep_vae_test = [] # Load Datasets # Anchor set path = os.path.join(config.egs_dir, config.anchor_set) with open(os.path.join(path, 'lengths.pkl'), 'rb') as f: lengths_anchor = pickle.load(f) labels_anchor = torch.load(os.path.join(path, 'labels.pkl')) anchor_ids = list(labels_anchor.keys()) # Adaptation Set dataset_adapt = nnetDatasetSeqAE( os.path.join(config.egs_dir, config.adapt_set)) data_loader_adapt = torch.utils.data.DataLoader( dataset_adapt, batch_size=config.batch_size, shuffle=True) # Test Set dataset_test = nnetDatasetSeq(os.path.join(config.egs_dir, config.test_set)) data_loader_test = torch.utils.data.DataLoader( dataset_test, batch_size=config.batch_size, shuffle=True) # Start off with initial performance on test set model.eval() test_losses = [] test_vae_losses = [] test_fer = [] for batch_x, batch_l, lab in data_loader_test: _, indices = torch.sort(batch_l, descending=True) if config.use_gpu: batch_x = Variable(batch_x[indices]).cuda() batch_l = Variable(batch_l[indices]).cuda() lab = Variable(lab[indices]).cuda() else: batch_x = Variable(batch_x[indices]) batch_l = Variable(batch_l[indices]) lab = Variable(lab[indices]) # Main forward pass class_out, ae_out, latent_out = model(batch_x, batch_l) # Convert all the weird tensors to frame-wise form class_out = pad2list(class_out, batch_l) batch_x = pad2list(batch_x, batch_l) lab = pad2list(lab, batch_l) ae_out = pad2list(ae_out, batch_l) latent_out = (pad2list(latent_out[0], batch_l), pad2list(latent_out[1], batch_l)) loss_classifier = criterion_classifier(class_out, lab) loss_vae = vae_loss(batch_x, ae_out, latent_out) test_losses.append(loss_classifier.item()) test_vae_losses.append(loss_vae[0].item() + loss_vae[1].item()) if config.use_gpu: test_fer.append( compute_fer(class_out.cpu().data.numpy(), lab.cpu().data.numpy())) else: test_fer.append( compute_fer(class_out.data.numpy(), lab.data.numpy())) print_log = "Initial Testset Error : Adapt (Test) loss: {:.3f} :: Adapt (Test) FER: {:.2f} :: Adapt (Test) Vae log-likelihood loss: {:.3f}".format( np.mean(test_losses), np.mean(test_fer), np.mean(test_vae_losses)) logging.info(print_log) for epoch_i in range(config.epochs): ###################### ##### Adaptation ##### ###################### model.train() adapt_vae_losses = [] adapt_mm_losses = [] anchor_losses = [] anchor_vae_losses = [] anchor_fer = [] test_losses = [] test_vae_losses = [] test_fer = [] # Main training loop for batch_x, batch_l in data_loader_adapt: # First do the adaptation _, indices = torch.sort(batch_l, descending=True) if config.use_gpu: batch_x = Variable(batch_x[indices]).cuda() batch_l = Variable(batch_l[indices]).cuda() else: batch_x = Variable(batch_x[indices]) batch_l = Variable(batch_l[indices]) # Main forward pass optimizer.zero_grad() class_out, ae_out, latent_out = model(batch_x, batch_l) # Convert all the weird tensors to frame-wise form class_out = pad2list(class_out, batch_l) batch_x = pad2list(batch_x, batch_l) ae_out = pad2list(ae_out, batch_l) latent_out = (pad2list(latent_out[0], batch_l), pad2list(latent_out[1], batch_l)) loss_vae = vae_loss(batch_x, ae_out, latent_out) mm_loss = mmeasure_loss(class_out, use_gpu=config.use_gpu) loss = config.adapt_weight * ( -loss_vae[0] - loss_vae[1] ) - config.mm_weight * mm_loss # Just the autoencoder loss adapt_vae_losses.append(loss_vae[0].item() + loss_vae[1].item()) adapt_mm_losses.append(mm_loss.item()) loss.backward() grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), config.clip_thresh) optimizer.step() # Now lets try to anchor the parameters as close as possible to previously seen data # Select anchor data randomly ids = [random.choice(anchor_ids) for i in range(config.batch_size)] batch_x = torch.cat([ torch.load(os.path.join(path, index))[None, :, :] for index in ids ]) batch_l = torch.cat( [torch.IntTensor([lengths_anchor[index]]) for index in ids]) lab = torch.cat([labels_anchor[index][None, :] for index in ids]) _, indices = torch.sort(batch_l, descending=True) if config.use_gpu: batch_x = Variable(batch_x[indices]).cuda() batch_l = Variable(batch_l[indices]).cuda() lab = Variable(lab[indices]).cuda() else: batch_x = Variable(batch_x[indices]) batch_l = Variable(batch_l[indices]) lab = Variable(lab[indices]) # Main forward pass optimizer.zero_grad() class_out, ae_out, latent_out = model(batch_x, batch_l) # Convert all the weird tensors to frame-wise form class_out = pad2list(class_out, batch_l) batch_x = pad2list(batch_x, batch_l) lab = pad2list(lab, batch_l) ae_out = pad2list(ae_out, batch_l) latent_out = (pad2list(latent_out[0], batch_l), pad2list(latent_out[1], batch_l)) loss_classifier = criterion_classifier(class_out, lab) loss_vae = vae_loss(batch_x, ae_out, latent_out) loss = config.anchor_weight * ( -loss_vae[0] - loss_vae[1] + loss_classifier ) # Use all the loss for anchor set anchor_losses.append(loss_classifier.item()) anchor_vae_losses.append(loss_vae[0].item() + loss_vae[1].item()) if config.use_gpu: anchor_fer.append( compute_fer(class_out.cpu().data.numpy(), lab.cpu().data.numpy())) else: anchor_fer.append( compute_fer(class_out.data.numpy(), lab.data.numpy())) loss.backward() grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), config.clip_thresh) optimizer.step() ## Test it on the WSJ test set model.eval() for batch_x, batch_l, lab in data_loader_test: _, indices = torch.sort(batch_l, descending=True) if config.use_gpu: batch_x = Variable(batch_x[indices]).cuda() batch_l = Variable(batch_l[indices]).cuda() lab = Variable(lab[indices]).cuda() else: batch_x = Variable(batch_x[indices]) batch_l = Variable(batch_l[indices]) lab = Variable(lab[indices]) # Main forward pass class_out, ae_out, latent_out = model(batch_x, batch_l) # Convert all the weird tensors to frame-wise form class_out = pad2list(class_out, batch_l) batch_x = pad2list(batch_x, batch_l) lab = pad2list(lab, batch_l) ae_out = pad2list(ae_out, batch_l) latent_out = (pad2list(latent_out[0], batch_l), pad2list(latent_out[1], batch_l)) loss_classifier = criterion_classifier(class_out, lab) loss_vae = vae_loss(batch_x, ae_out, latent_out) test_losses.append(loss_classifier.item()) test_vae_losses.append(loss_vae[0].item() + loss_vae[1].item()) if config.use_gpu: test_fer.append( compute_fer(class_out.cpu().data.numpy(), lab.cpu().data.numpy())) else: test_fer.append( compute_fer(class_out.data.numpy(), lab.data.numpy())) ep_vae_adapt.append(np.mean(adapt_vae_losses)) ep_mm_adapt.append(np.mean(adapt_mm_losses)) ep_loss_anchor.append(np.mean(anchor_losses)) ep_fer_anchor.append(np.mean(anchor_fer)) ep_vae_anchor.append(np.mean(anchor_vae_losses)) ep_loss_test.append(np.mean(test_losses)) ep_fer_test.append(np.mean(test_fer)) ep_vae_test.append(np.mean(test_vae_losses)) print_log = "Epoch: {:d} Adapt (Test) loss: {:.3f} :: Adapt (Test) FER: {:.2f}".format( epoch_i + 1, ep_loss_test[-1], ep_fer_test[-1]) print_log += " || Anchor loss : {:.3f} :: Anchor FER: {:.2f}".format( ep_loss_anchor[-1], ep_fer_anchor[-1]) print_log += " || VAE llhood (Adapt) : {:.3f} :: VAE llhood (Anchor) : {:.3f} :: VAE llhood (Test) : {:.3f} ".format( ep_vae_adapt[-1], ep_vae_anchor[-1], ep_vae_test[-1]) print_log += " || Adapt mm loss : {:.3f} ".format(ep_mm_adapt[-1]) logging.info(print_log) if (epoch_i + 1) % config.model_save_interval == 0: model_path = os.path.join( model_dir, config.experiment_name + '__epoch_%d' % (epoch_i + 1) + '.model') torch.save( { 'epoch': epoch_i + 1, 'feature_dim': nnet['feature_dim'], 'num_frames': nnet['num_frames'], 'num_classes': nnet['num_classes'], 'encoder_num_layers': nnet['encoder_num_layers'], 'classifier_num_layers': nnet['classifier_num_layers'], 'ae_num_layers': nnet['ae_num_layers'], 'ep_vae_adapt': ep_vae_adapt, 'ep_mm_adapt': ep_mm_adapt, 'ep_loss_anchor': ep_loss_anchor, 'ep_fer_anchor': ep_fer_anchor, 'ep_vae_anchor': ep_vae_anchor, 'ep_loss_test': ep_loss_test, 'ep_fer_test': ep_fer_test, 'ep_vae_test': ep_vae_test, 'hidden_dim': nnet['hidden_dim'], 'bn_dim': nnet['bn_dim'], 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, (open(model_path, 'wb')))
def run(config): model_dir = os.path.join(config.store_path, config.experiment_name + '.dir') os.makedirs(config.store_path, exist_ok=True) os.makedirs(model_dir, exist_ok=True) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(model_dir, config.experiment_name), filemode='w') # define a new Handler to log to console as well console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) # Load feature configuration egs_config = pkl.load( open(os.path.join(config.egs_dir, config.train_set, 'egs.config'), 'rb')) context = egs_config['concat_feats'].split(',') num_frames = int(context[0]) + int(context[1]) + 1 logging.info('Model Parameters: ') logging.info('Input Channels: %s' % (config.in_channels)) logging.info('Output Channels: %s' % (config.out_channels)) logging.info('Kernel: %s' % (config.kernel)) logging.info('Data dimension: %d' % (config.feature_dim)) logging.info('Bottleneck dimension: %d' % (config.bn_dim)) logging.info('Number of Frames: %d' % (num_frames)) logging.info('Optimizer: %s ' % (config.optimizer)) logging.info('Batch Size: %d ' % (config.batch_size)) logging.info('Initial Learning Rate: %f ' % (config.learning_rate)) logging.info('Learning rate reduction rate: %f ' % (config.lrr)) logging.info('Weight decay: %f ' % (config.weight_decay)) sys.stdout.flush() in_channels = [int(x) for x in config.in_channels.split(',')] out_channels = [int(x) for x in config.out_channels.split(',')] kernel = tuple([int(x) for x in config.kernel.split(',')]) if config.nopool: model = nnetVAECNNNopool(config.feature_dim, num_frames, in_channels, out_channels, kernel, config.bn_dim, config.use_gpu) else: model = nnetVAECNN(config.feature_dim, num_frames, in_channels, out_channels, kernel, config.bn_dim, config.use_gpu) if config.use_gpu: # Set environment variable for GPU ID id = get_device_id() os.environ["CUDA_VISIBLE_DEVICES"] = id model = model.cuda() lr = config.learning_rate if config.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) elif config.optimizer == 'adadelta': optimizer = optim.Adadelta(model.parameters(), weight_decay=config.weight_decay) elif config.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) elif config.optimizer == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) elif config.optimizer == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) else: raise NotImplementedError("Learning method not supported for the task") model_path = os.path.join(model_dir, config.experiment_name + '__epoch_0.model') torch.save( { 'epoch': 1, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, (open(model_path, 'wb'))) ep_vae_rec_tr = [] ep_vae_kl_tr = [] ep_vae_rec_dev = [] ep_vae_kl_dev = [] # Load Datasets dataset_train = nnetDatasetSeq( os.path.join(config.egs_dir, config.train_set)) data_loader_train = torch.utils.data.DataLoader( dataset_train, batch_size=config.batch_size, shuffle=True) dataset_dev = nnetDatasetSeq(os.path.join(config.egs_dir, config.dev_set)) data_loader_dev = torch.utils.data.DataLoader(dataset_dev, batch_size=config.batch_size, shuffle=True) err_p = 0 best_model_state = None for epoch_i in range(config.epochs): #################### ##### Training ##### #################### model.train() train_vae_rec_losses = [] train_vae_kl_losses = [] # Main training loop for batch_x, batch_l, lab in data_loader_train: s = batch_x.shape if config.nopool: batch_x = batch_x.view(s[0], s[1], config.feature_dim, num_frames) batch_x = batch_x.view(s[0] * s[1], config.feature_dim, num_frames) batch_x = batch_x[:, None, :, :] # change the data format for CNNs batch_x = batch_x.view(s[0], s[1], config.feature_dim, num_frames) batch_x = batch_x.view(s[0] * s[1], config.feature_dim, num_frames) batch_x = batch_x[:, None, :, :] # change the data format for CNNs if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) optimizer.zero_grad() # Main forward pass ae_out, latent_out = model(batch_x) if config.nopool: loss = vae_loss_nopool(batch_x[:, 0, :, :], ae_out[:, 0, :, :], latent_out, batch_l, s) else: loss = vae_loss(batch_x[:, 0, :, :], ae_out[:, 0, :, :], latent_out) train_vae_rec_losses.append(loss[0].item()) train_vae_kl_losses.append(loss[1].item()) (-loss[0] - loss[1]).backward() grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), config.clip_thresh) optimizer.step() ep_vae_rec_tr.append(np.mean(train_vae_rec_losses)) ep_vae_kl_tr.append(np.mean(train_vae_kl_losses)) ###################### ##### Validation ##### ###################### model.eval() with torch.set_grad_enabled(False): val_vae_rec_losses = [] val_vae_kl_losses = [] for batch_x, batch_l, lab in data_loader_dev: s = batch_x.shape batch_x = batch_x.view(s[0], s[1], config.feature_dim, num_frames) batch_x = batch_x.view(s[0] * s[1], config.feature_dim, num_frames) batch_x = batch_x[:, None, :, :] # change the data format for CNNs if config.use_gpu: batch_x = Variable(batch_x).cuda() batch_l = Variable(batch_l).cuda() else: batch_x = Variable(batch_x) batch_l = Variable(batch_l) # Main forward pass ae_out, latent_out = model(batch_x) if config.nopool: loss = vae_loss_nopool(batch_x[:, 0, :, :], ae_out[:, 0, :, :], latent_out, batch_l, s) else: loss = vae_loss(batch_x[:, 0, :, :], ae_out[:, 0, :, :], latent_out) val_vae_rec_losses.append(loss[0].item()) val_vae_kl_losses.append(loss[1].item()) ep_vae_rec_dev.append(np.mean(val_vae_rec_losses)) ep_vae_kl_dev.append(np.mean(val_vae_kl_losses)) # Manage learning rate if epoch_i == 0: err_p = -np.mean(val_vae_rec_losses) - np.mean(val_vae_kl_losses) best_model_state = model.state_dict() else: if -np.mean(val_vae_rec_losses) - np.mean(val_vae_kl_losses) > ( 100 - config.lr_tol) * err_p / 100: logging.info( "Val loss went up, Changing learning rate from {:.6f} to {:.6f}" .format(lr, config.lrr * lr)) lr = config.lrr * lr for param_group in optimizer.param_groups: param_group['lr'] = lr model.load_state_dict(best_model_state) else: err_p = -np.mean(val_vae_rec_losses) - np.mean( val_vae_kl_losses) best_model_state = model.state_dict() print_log = "Epoch: {:d} ((lr={:.6f})) Tr VAE ll={:.3f},rec={:.3f},kld={:.3f} :: Val VAE ll={:.3f},rec={:.3f},kld={:.3f}".format( epoch_i + 1, lr, ep_vae_kl_tr[-1] + ep_vae_rec_tr[-1], ep_vae_rec_tr[-1], ep_vae_kl_tr[-1], ep_vae_kl_dev[-1] + ep_vae_rec_dev[-1], ep_vae_rec_dev[-1], ep_vae_kl_dev[-1]) logging.info(print_log) if (epoch_i + 1) % config.model_save_interval == 0: model_path = os.path.join( model_dir, config.experiment_name + '__epoch_%d' % (epoch_i + 1) + '.model') torch.save( { 'epoch': epoch_i + 1, 'feature_dim': config.feature_dim, 'num_frames': num_frames, 'in_channels': config.in_channels, 'out_channels': config.out_channels, 'kernel': config.kernel, 'bn_dim': config.bn_dim, 'nopool': config.nopool, 'ep_vae_kl_tr': ep_vae_kl_tr, 'ep_vae_rec_tr': ep_vae_rec_tr, 'ep_vae_kl_dev': ep_vae_kl_dev, 'ep_vae_rec_dev': ep_vae_rec_dev, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, (open(model_path, 'wb')))
def run(config): model_dir = os.path.join(config.store_path, config.experiment_name + '.dir') os.makedirs(config.store_path, exist_ok=True) os.makedirs(model_dir, exist_ok=True) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(model_dir, config.experiment_name), filemode='w') # define a new Handler to log to console as well console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) # Load feature configuration egs_config = pkl.load( open(os.path.join(config.egs_dir, config.train_set, 'egs.config'), 'rb')) context = egs_config['concat_feats'].split(',') num_frames = int(context[0]) + int(context[1]) + 1 logging.info('Model Parameters: ') logging.info('Encoder Number of Layers: %d' % (config.encoder_num_layers)) logging.info('Decoder Number of Layers: %d' % (config.decoder_num_layers)) logging.info('Hidden Dimension: %d' % (config.hidden_dim)) logging.info('Data dimension: %d' % (config.feature_dim)) logging.info('Bottleneck dimension: %d' % (config.bn_dim)) logging.info('Component Number: %d' % (config.comp_num)) logging.info('Number of Frames: %d' % (num_frames)) logging.info('Optimizer: %s ' % (config.optimizer)) logging.info('Batch Size: %d ' % (config.batch_size)) logging.info('Initial Learning Rate: %f ' % (config.learning_rate)) logging.info('Learning rate reduction rate: %f ' % (config.lrr)) logging.info('Weight decay: %f ' % (config.weight_decay)) sys.stdout.flush() model = nnetCurlSupervised(config.feature_dim * num_frames, config.encoder_num_layers, config.decoder_num_layers, config.hidden_dim, config.bn_dim, config.comp_num, config.use_gpu) if config.use_gpu: # Set environment variable for GPU ID id = get_device_id() os.environ["CUDA_VISIBLE_DEVICES"] = id model = model.cuda() lr = config.learning_rate if config.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) elif config.optimizer == 'adadelta': optimizer = optim.Adadelta(model.parameters(), weight_decay=config.weight_decay) elif config.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) elif config.optimizer == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) elif config.optimizer == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) else: raise NotImplementedError("Learning method not supported for the task") model_path = os.path.join(model_dir, config.experiment_name + '__epoch_0.model') torch.save( { 'epoch': 1, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, (open(model_path, 'wb'))) ep_curl_tr = [] ep_curl_dev = [] # Load Datasets dataset_train = nnetDatasetSeq( os.path.join(config.egs_dir, config.train_set)) data_loader_train = torch.utils.data.DataLoader( dataset_train, batch_size=config.batch_size, shuffle=True) dataset_dev = nnetDatasetSeq(os.path.join(config.egs_dir, config.dev_set)) data_loader_dev = torch.utils.data.DataLoader(dataset_dev, batch_size=config.batch_size, shuffle=True) err_p = 0 best_model_state = None # Prior means means_p = 2 * (torch.rand(config.comp_num, config.bn_dim) - 0.5) for epoch_i in range(config.epochs): #################### ##### Training ##### #################### model.train() train_curl_losses = [] # Main training loop for batch_x, batch_l, lab in data_loader_train: _, indices = torch.sort(batch_l, descending=True) if config.use_gpu: batch_x = Variable(batch_x[indices]).cuda() batch_l = Variable(batch_l[indices]).cuda() else: batch_x = Variable(batch_x[indices]) batch_l = Variable(batch_l[indices]) optimizer.zero_grad() # Main forward pass ae_out, latent_out = model(batch_x, batch_l) # Convert all the weird tensors to frame-wise form batch_x = pad2list(batch_x, batch_l) ae_out = pad2list3d(ae_out, batch_l) latent_out = (pad2list(latent_out[0], batch_l), pad2list3d(latent_out[1], batch_l), pad2list3d(latent_out[2], batch_l)) loss = curl_loss_unsupervised(batch_x, ae_out, latent_out, means_p, use_gpu=config.use_gpu) train_curl_losses.append(loss.item()) (-loss).backward() grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), config.clip_thresh) optimizer.step() ep_curl_tr.append(np.mean(train_curl_losses)) ###################### ##### Validation ##### ###################### model.eval() with torch.set_grad_enabled(False): val_curl_losses = [] for batch_x, batch_l, lab in data_loader_dev: _, indices = torch.sort(batch_l, descending=True) if config.use_gpu: batch_x = Variable(batch_x[indices]).cuda() batch_l = Variable(batch_l[indices]).cuda() else: batch_x = Variable(batch_x[indices]) batch_l = Variable(batch_l[indices]) # Main forward pass ae_out, latent_out = model(batch_x, batch_l) # Convert all the weird tensors to frame-wise form batch_x = pad2list(batch_x, batch_l) ae_out = pad2list3d(ae_out, batch_l) latent_out = (pad2list(latent_out[0], batch_l), pad2list3d(latent_out[1], batch_l), pad2list3d(latent_out[2], batch_l)) loss = curl_loss_unsupervised(batch_x, ae_out, latent_out, means_p, use_gpu=config.use_gpu) val_curl_losses.append(loss.item()) ep_curl_dev.append(np.mean(val_curl_losses)) # Manage learning rate if epoch_i == 0: err_p = -np.mean(val_curl_losses) best_model_state = model.state_dict() else: if -np.mean(val_curl_losses) > (100 - config.lr_tol) * err_p / 100: logging.info( "Val loss went up, Changing learning rate from {:.6f} to {:.6f}" .format(lr, config.lrr * lr)) lr = config.lrr * lr for param_group in optimizer.param_groups: param_group['lr'] = lr model.load_state_dict(best_model_state) else: err_p = -np.mean(val_curl_losses) best_model_state = model.state_dict() print_log = "Epoch: {:d} ((lr={:.6f})) Tr CURL Log-likelihood: {:.3f} :: Val CURL Log-likelihood: {:.3f}".format( epoch_i + 1, lr, ep_curl_tr[-1], ep_curl_dev[-1]) logging.info(print_log) if (epoch_i + 1) % config.model_save_interval == 0: model_path = os.path.join( model_dir, config.experiment_name + '__epoch_%d' % (epoch_i + 1) + '.model') torch.save( { 'epoch': epoch_i + 1, 'feature_dim': config.feature_dim, 'num_frames': num_frames, 'encoder_num_layers': config.encoder_num_layers, 'decoder_num_layers': config.decoder_num_layers, 'hidden_dim': config.hidden_dim, 'comp_num': config.comp_num, 'bn_dim': config.bn_dim, 'ep_curl_tr': ep_curl_tr, 'ep_curl_dev': ep_curl_dev, 'prior_means': means_p.numpy(), 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, (open(model_path, 'wb')))
def run(config): model_dir = os.path.join(config.store_path, config.experiment_name + '.dir') os.makedirs(config.store_path, exist_ok=True) os.makedirs(model_dir, exist_ok=True) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(model_dir, config.experiment_name), filemode='w') # define a new Handler to log to console as well console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) # Load feature configuration egs_config = pkl.load( open(os.path.join(config.egs_dir, 'egs.config'), 'rb')) context = egs_config['concat_feats'].split(',') num_frames = int(context[0]) + int(context[1]) + 1 logging.info('Model Parameters: ') logging.info('Encoder Number of Layers: %d' % (config.encoder_num_layers)) logging.info('Classifier Number of Layers: %d' % (config.classifier_num_layers)) logging.info('AE Number of Layers: %d' % (config.ae_num_layers)) logging.info('AR Time Shift: %d' % (config.time_shift)) logging.info('Hidden Dimension: %d' % (config.feature_dim)) logging.info('Number of Classes: %d' % (config.num_classes)) logging.info('Data dimension: %d' % (config.feature_dim)) logging.info('Bottleneck dimension: %d' % (config.bn_dim)) logging.info('Number of Frames: %d' % (num_frames)) logging.info('Optimizer: %s ' % (config.optimizer)) logging.info('Batch Size: %d ' % (config.batch_size)) logging.info('Initial Learning Rate: %f ' % (config.learning_rate)) sys.stdout.flush() model = nnetAEClassifierMultitaskAEAR( config.feature_dim * num_frames, config.num_classes, config.encoder_num_layers, config.classifier_num_layers, config.ae_num_layers, config.hidden_dim, config.bn_dim, config.time_shift) if config.use_gpu: # Set environment variable for GPU ID id = get_device_id() os.environ["CUDA_VISIBLE_DEVICES"] = id model = model.cuda() criterion_classifier = nn.CrossEntropyLoss() criterion_ae = nn.MSELoss() if config.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'adadelta': optimizer = optim.Adadelta(model.parameters()) elif config.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate) elif config.optimizer == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) else: raise NotImplementedError("Learning method not supported for the task") model_path = os.path.join(model_dir, config.experiment_name + '__epoch_0.model') torch.save( { 'epoch': 1, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, (open(model_path, 'wb'))) ep_loss_tr = [] ep_fer_tr = [] ep_ae_tr = [] ep_ar_tr = [] ep_loss_dev = [] ep_fer_dev = [] ep_ae_dev = [] ep_ar_dev = [] # Load Datasets dataset_train = nnetDatasetSeq( os.path.join(config.egs_dir, config.train_set)) data_loader_train = torch.utils.data.DataLoader( dataset_train, batch_size=config.batch_size, shuffle=True) dataset_dev = nnetDatasetSeq(os.path.join(config.egs_dir, config.dev_set)) data_loader_dev = torch.utils.data.DataLoader(dataset_dev, batch_size=config.batch_size, shuffle=True) for epoch_i in range(config.epochs): #################### ##### Training ##### #################### model.train() train_losses = [] train_ae_losses = [] train_ar_losses = [] tr_fer = [] # Main training loop for batch_x, batch_l, lab in data_loader_train: _, indices = torch.sort(batch_l, descending=True) if config.use_gpu: batch_x = Variable(batch_x[indices]).cuda() batch_l = Variable(batch_l[indices]).cuda() lab = Variable(lab[indices]).cuda() else: batch_x = Variable(batch_x[indices]) batch_l = Variable(batch_l[indices]) lab = Variable(lab[indices]) # Main forward pass class_out, ae_out, ar_out = model(batch_x, batch_l) # Convert all the weird tensors to frame-wise form class_out = pad2list(class_out, batch_l) ae_out = pad2list(ae_out, batch_l) ar_out = pad2list(ar_out, batch_l - config.time_shift) lab = pad2list(lab, batch_l) optimizer.zero_grad() loss_classifier = criterion_classifier(class_out, lab) loss_ae = criterion_ae(ae_out, pad2list(batch_x, batch_l)) loss_ar = criterion_ae( ar_out, pad2list(batch_x[:, config.time_shift:, :], batch_l - config.time_shift)) loss = loss_classifier + loss_ae + loss_ar train_losses.append(loss_classifier.item()) train_ae_losses.append(loss_ae.item()) train_ar_losses.append(loss_ar.item()) if config.use_gpu: tr_fer.append( compute_fer(class_out.cpu().data.numpy(), lab.cpu().data.numpy())) else: tr_fer.append( compute_fer(class_out.data.numpy(), lab.data.numpy())) loss.backward() optimizer.step() ep_loss_tr.append(np.mean(train_losses)) ep_fer_tr.append(np.mean(tr_fer)) ep_ae_tr.append(np.mean(train_ae_losses)) ep_ar_tr.append(np.mean(train_ar_losses)) ###################### ##### Validation ##### ###################### model.eval() with torch.set_grad_enabled(False): val_losses = [] val_ae_losses = [] val_ar_losses = [] val_fer = [] for batch_x, batch_l, lab in data_loader_dev: _, indices = torch.sort(batch_l, descending=True) if config.use_gpu: batch_x = Variable(batch_x[indices]).cuda() batch_l = Variable(batch_l[indices]).cuda() lab = Variable(lab[indices]).cuda() else: batch_x = Variable(batch_x[indices]) batch_l = Variable(batch_l[indices]) lab = Variable(lab[indices]) # Main forward pass class_out, ae_out, ar_out = model(batch_x, batch_l) # Convert all the weird tensors to frame-wise form class_out = pad2list(class_out, batch_l) ae_out = pad2list(ae_out, batch_l) ar_out = pad2list(ar_out, batch_l - config.time_shift) lab = pad2list(lab, batch_l) loss_classifier = criterion_classifier(class_out, lab) loss_ae = criterion_ae(ae_out, pad2list(batch_x, batch_l)) loss_ar = criterion_ae( ar_out, pad2list(batch_x[:, config.time_shift:, :], batch_l - config.time_shift)) val_losses.append(loss_classifier.item()) val_ae_losses.append(loss_ae.item()) val_ar_losses.append(loss_ar.item()) if config.use_gpu: val_fer.append( compute_fer(class_out.cpu().data.numpy(), lab.cpu().data.numpy())) else: val_fer.append( compute_fer(class_out.data.numpy(), lab.data.numpy())) ep_loss_dev.append(np.mean(val_losses)) ep_fer_dev.append(np.mean(val_fer)) ep_ae_dev.append(np.mean(val_ae_losses)) ep_ar_dev.append(np.mean(val_ar_losses)) print_log = "Epoch: {:d} Tr loss: {:.3f} :: Tr FER: {:.2f}".format( epoch_i + 1, ep_loss_tr[-1], ep_fer_tr[-1]) print_log += " || Val : {:.3f} :: Val FER: {:.2f}".format( ep_loss_dev[-1], ep_fer_dev[-1]) print_log += " || AE Loss (Train) : {:.3f} :: AE Loss (Dev) : {:.3f} ".format( ep_ae_tr[-1], ep_ae_dev[-1]) print_log += " || AR Loss (Train) : {:.3f} :: AR Loss (Dev) : {:.3f} ".format( ep_ar_tr[-1], ep_ar_dev[-1]) logging.info(print_log) if (epoch_i + 1) % config.model_save_interval == 0: model_path = os.path.join( model_dir, config.experiment_name + '__epoch_%d' % (epoch_i + 1) + '.model') torch.save( { 'epoch': epoch_i + 1, 'feature_dim': config.feature_dim, 'num_frames': num_frames, 'num_classes': config.num_classes, 'encoder_num_layers': config.encoder_num_layers, 'classifier_num_layers': config.classifier_num_layers, 'ae_num_layers': config.ae_num_layers, 'hidden_dim': config.hidden_dim, 'bn_dim': config.bn_dim, 'time_shift': config.time_shift, 'ep_loss_tr': ep_loss_tr, 'ep_loss_dev': ep_loss_dev, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, (open(model_path, 'wb')))
def run(config): model_dir = os.path.join(config.store_path, config.experiment_name + '.dir') os.makedirs(config.store_path, exist_ok=True) os.makedirs(model_dir, exist_ok=True) logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filename=os.path.join(model_dir, config.experiment_name), filemode='w') # define a new Handler to log to console as well console = logging.StreamHandler() console.setLevel(logging.INFO) formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') console.setFormatter(formatter) logging.getLogger('').addHandler(console) # Load feature configuration egs_config = pkl.load( open(os.path.join(config.egs_dir, 'egs.config'), 'rb')) context = egs_config['concat_feats'].split(',') num_frames = int(context[0]) + int(context[1]) + 1 logging.info('Model Parameters: ') logging.info('Encoder Number of Layers: %d' % (config.encoder_num_layers)) logging.info('Classifier Number of Layers: %d' % (config.classifier_num_layers)) logging.info('AE Number of Layers: %d' % (config.ae_num_layers)) logging.info('Hidden Dimension: %d' % (config.hidden_dim)) logging.info('Number of Classes: %d' % (config.num_classes)) logging.info('Data dimension: %d' % (config.feature_dim)) logging.info('Bottleneck dimension: %d' % (config.bn_dim)) logging.info('Number of Frames: %d' % (num_frames)) logging.info('Optimizer: %s ' % (config.optimizer)) logging.info('Batch Size: %d ' % (config.batch_size)) logging.info('Initial Learning Rate: %f ' % (config.learning_rate)) logging.info('Inital Cross-entropy weight: %f ' % (config.ce_weight_init)) logging.info('Cross-entropy change intervla: %d ' % (config.ce_change_interval)) logging.info('Encoder Dropout: %f ' % (config.enc_dropout)) logging.info('Learning rate reduction rate: %f ' % (config.lrr)) logging.info('Weight decay: %f ' % (config.weight_decay)) sys.stdout.flush() model = nnetVAEClassifier(config.feature_dim * num_frames, config.num_classes, config.encoder_num_layers, config.classifier_num_layers, config.ae_num_layers, config.hidden_dim, config.bn_dim, config.enc_dropout, config.use_gpu) if config.use_gpu: # Set environment variable for GPU ID id = get_device_id() os.environ["CUDA_VISIBLE_DEVICES"] = id model = model.cuda() criterion_classifier = nn.CrossEntropyLoss() lr = config.learning_rate ce_weight = config.ce_weight_init if config.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) elif config.optimizer == 'adadelta': optimizer = optim.Adadelta(model.parameters(), weight_decay=config.weight_decay) elif config.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) elif config.optimizer == 'adagrad': optimizer = optim.Adagrad(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) elif config.optimizer == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) else: raise NotImplementedError("Learning method not supported for the task") model_path = os.path.join(model_dir, config.experiment_name + '__epoch_0.model') torch.save( { 'epoch': 1, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, (open(model_path, 'wb'))) ep_loss_tr = [] ep_fer_tr = [] ep_vae_rec_tr = [] ep_vae_kl_tr = [] ep_loss_dev = [] ep_fer_dev = [] ep_vae_rec_dev = [] ep_vae_kl_dev = [] # Load Datasets dataset_train = nnetDatasetSeq( os.path.join(config.egs_dir, config.train_set)) data_loader_train = torch.utils.data.DataLoader( dataset_train, batch_size=config.batch_size, shuffle=True) dataset_dev = nnetDatasetSeq(os.path.join(config.egs_dir, config.dev_set)) data_loader_dev = torch.utils.data.DataLoader(dataset_dev, batch_size=config.batch_size, shuffle=True) err_p = 0 best_model_state = None for epoch_i in range(config.epochs): #################### ##### Training ##### #################### model.train() train_losses = [] train_vae_rec_losses = [] train_vae_kl_losses = [] tr_fer = [] # Main training loop for batch_x, batch_l, lab in data_loader_train: _, indices = torch.sort(batch_l, descending=True) if config.use_gpu: batch_x = Variable(batch_x[indices]).cuda() batch_l = Variable(batch_l[indices]).cuda() lab = Variable(lab[indices]).cuda() else: batch_x = Variable(batch_x[indices]) batch_l = Variable(batch_l[indices]) lab = Variable(lab[indices]) optimizer.zero_grad() # Main forward pass class_out, ae_out, latent_out = model(batch_x, batch_l) # Convert all the weird tensors to frame-wise form class_out = pad2list(class_out, batch_l) batch_x = pad2list(batch_x, batch_l) lab = pad2list(lab, batch_l) ae_out = pad2list(ae_out, batch_l) latent_out = (pad2list(latent_out[0], batch_l), pad2list(latent_out[1], batch_l)) loss_classifier = criterion_classifier(class_out, lab) loss_vae = vae_loss(batch_x, ae_out, latent_out) if np.isnan(loss_vae[0].item()) or np.isnan(loss_vae[1].item()): logging.info( "VAE Loss can diverged to nan, reverting to previous model" ) revert = True continue loss = ce_weight * loss_classifier - (loss_vae[0] + loss_vae[1]) train_losses.append(loss_classifier.item()) train_vae_rec_losses.append(loss_vae[0].item()) train_vae_kl_losses.append(loss_vae[1].item()) if config.use_gpu: tr_fer.append( compute_fer(class_out.cpu().data.numpy(), lab.cpu().data.numpy())) else: tr_fer.append( compute_fer(class_out.data.numpy(), lab.data.numpy())) loss.backward() grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), config.clip_thresh) optimizer.step() if not (epoch_i + 1) % config.ce_change_interval: logging.info("Changing CE weight from {:.6f} to {:.6f}".format( ce_weight, config.cerr * ce_weight)) ce_weight = config.cerr * ce_weight ep_loss_tr.append(np.mean(train_losses)) ep_fer_tr.append(np.mean(tr_fer)) ep_vae_rec_tr.append(np.mean(train_vae_rec_losses)) ep_vae_kl_tr.append(np.mean(train_vae_kl_losses)) ###################### ##### Validation ##### ###################### model.eval() with torch.set_grad_enabled(False): val_losses = [] val_vae_rec_losses = [] val_vae_kl_losses = [] val_fer = [] for batch_x, batch_l, lab in data_loader_dev: _, indices = torch.sort(batch_l, descending=True) if config.use_gpu: batch_x = Variable(batch_x[indices]).cuda() batch_l = Variable(batch_l[indices]).cuda() lab = Variable(lab[indices]).cuda() else: batch_x = Variable(batch_x[indices]) batch_l = Variable(batch_l[indices]) lab = Variable(lab[indices]) # Main forward pass class_out, ae_out, latent_out = model(batch_x, batch_l) # Convert all the weird tensors to frame-wise form class_out = pad2list(class_out, batch_l) batch_x = pad2list(batch_x, batch_l) lab = pad2list(lab, batch_l) ae_out = pad2list(ae_out, batch_l) latent_out = (pad2list(latent_out[0], batch_l), pad2list(latent_out[1], batch_l)) loss_classifier = criterion_classifier(class_out, lab) loss_vae = vae_loss(batch_x, ae_out, latent_out) val_losses.append(loss_classifier.item()) val_vae_rec_losses.append(loss_vae[0].item()) val_vae_kl_losses.append(loss_vae[1].item()) if config.use_gpu: val_fer.append( compute_fer(class_out.cpu().data.numpy(), lab.cpu().data.numpy())) else: val_fer.append( compute_fer(class_out.data.numpy(), lab.data.numpy())) ep_loss_dev.append(np.mean(val_losses)) ep_fer_dev.append(np.mean(val_fer)) ep_vae_rec_dev.append(np.mean(val_vae_rec_losses)) ep_vae_kl_dev.append(np.mean(val_vae_kl_losses)) # Manage learning rate if epoch_i == 0: err_p = np.mean(val_losses) best_model_state = model.state_dict() else: if np.mean(val_losses) > (100 + config.lr_tol) * err_p / 100: logging.info( "Val loss went up, Changing learning rate from {:.6f} to {:.6f}" .format(lr, config.lrr * lr)) lr = config.lrr * lr for param_group in optimizer.param_groups: param_group['lr'] = lr model.load_state_dict(best_model_state) else: err_p = np.mean(val_losses) best_model_state = model.state_dict() print_log = "Epoch: {:d} ((lr={:.6f})) Tr loss: {:.3f} :: Tr FER: {:.2f}".format( epoch_i + 1, lr, ep_loss_tr[-1], ep_fer_tr[-1]) print_log += " || Val : {:.3f} :: Val FER: {:.2f}".format( ep_loss_dev[-1], ep_fer_dev[-1]) print_log += " || VAE Log-Likelihood (Train) : Rec> {:.3f} KL> {:.3f} :: VAE Log-Likelihood (Dev) : Rec> {:.3f} KL> {:.3f} ".format( ep_vae_rec_tr[-1], ep_vae_kl_tr[-1], ep_vae_rec_dev[-1], ep_vae_kl_dev[-1]) logging.info(print_log) if (epoch_i + 1) % config.model_save_interval == 0: model_path = os.path.join( model_dir, config.experiment_name + '__epoch_%d' % (epoch_i + 1) + '.model') torch.save( { 'epoch': epoch_i + 1, 'feature_dim': config.feature_dim, 'num_frames': num_frames, 'num_classes': config.num_classes, 'encoder_num_layers': config.encoder_num_layers, 'classifier_num_layers': config.classifier_num_layers, 'ae_num_layers': config.ae_num_layers, 'hidden_dim': config.hidden_dim, 'bn_dim': config.bn_dim, 'enc_dropout': config.enc_dropout, 'ep_loss_tr': ep_loss_tr, 'ep_loss_dev': ep_loss_dev, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, (open(model_path, 'wb')))