def main(argv): data_manager = DataManager() data_manager.prepare() dae = DAE() vae = VAE(dae, beta=flags.vae_beta) scan = SCAN(dae, vae, beta=flags.scan_beta, lambd=flags.scan_lambda) scan_recomb = SCANRecombinator(dae, vae, scan) dae_saver = CheckPointSaver(flags.checkpoint_dir, "dae", dae.get_vars()) vae_saver = CheckPointSaver(flags.checkpoint_dir, "vae", vae.get_vars()) scan_saver = CheckPointSaver(flags.checkpoint_dir, "scan", scan.get_vars()) scan_recomb_saver = CheckPointSaver(flags.checkpoint_dir, "scan_recomb", scan_recomb.get_vars()) sess = tf.Session() # Initialze variables init = tf.global_variables_initializer() sess.run(init) # For Tensorboard log summary_writer = tf.summary.FileWriter(flags.log_file, sess.graph) # Load from checkpoint dae_saver.load(sess) vae_saver.load(sess) scan_saver.load(sess) scan_recomb_saver.load(sess) # Train if flags.train_dae: train_dae(sess, dae, data_manager, dae_saver, summary_writer) if flags.train_vae: train_vae(sess, vae, data_manager, vae_saver, summary_writer) disentangle_check(sess, vae, data_manager) if flags.train_scan: train_scan(sess, scan, data_manager, scan_saver, summary_writer) sym2img_check(sess, scan, data_manager) img2sym_check(sess, scan, data_manager) if flags.train_scan_recomb: train_scan_recomb(sess, scan_recomb, data_manager, scan_recomb_saver, summary_writer) recombination_check(sess, scan_recomb, data_manager) sess.close()
def test_vae(self): dae = DAE() vae = VAE(dae) vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "vae") # Check size of optimizing vars self.assertEqual(len(vars), 14+12)
def test_vae(self): dae = DAE() vae = VAE(dae) vars = vae.get_vars() # Check size of optimizing vars self.assertEqual(len(vars), 14 + 12)
def test_scan(self): dae = DAE() vae = VAE(dae) scan = SCAN(dae, vae) vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "scan") # Check size of optimizing vars self.assertEqual(len(vars), 6+4)
def test_scan(self): dae = DAE() vae = VAE(dae) scan = SCAN(dae, vae) vars = scan.get_vars() # Check size of optimizing vars self.assertEqual(len(vars), 6 + 4)
def train(data, path): os.system("mkdir -p " + path) norm = data['norm'] pvc = data['pvc'] feat = np.concatenate([norm, pvc], axis=0) #feat = norm x = tf.placeholder(tf.float32, [None, input_dim], name='x') z = tf.placeholder(tf.float32, [None, z_dim], name='z') model = DAE() loss = model.loss(x) _z = model.encode(x) x_h = model.decode(z) tf.add_to_collection('encode', _z) tf.add_to_collection('decode', x_h) tf.add_to_collection('prd', loss['pmse']) tf.add_to_collection('cc', loss['corr']) w_loss = 60 * loss['pmse'] - 40 * loss['corr'] optimize = tf.train.AdamOptimizer(learning_rate=5e-5, beta1=0.9, beta2=0.99).minimize(w_loss) #optimize = tf.train.RMSPropOptimizer(learning_rate=1e-5).minimize(loss) merged = tf.summary.merge_all() with tf.Session() as sess: start = time.time() init = tf.global_variables_initializer() sess.run(init) saver = tf.train.Saver() writer = tf.summary.FileWriter(path + 'logs', sess.graph) for i in range(iter_num): np.random.shuffle(feat) for j in range(0, len(feat), batch_num): _ = sess.run([optimize], feed_dict={x: feat[j:j + batch_num]}) if i % 100 == 0: xb = feat[j:] _, err, result = sess.run([optimize, loss, merged], feed_dict={x: xb}) writer.add_summary(result, len(feat) * i + j) print('Epoch [%4d] Time [%5.4f] PRD: [%.4f] CC: [%.4f]' % (i + 1, time.time() - start, err['pmse'], err['corr'])) if i % 1000 == 0: saver.save(sess, path + 'test_best_model')
def test_scan_recombinator(self): dae = DAE() vae = VAE(dae) scan = SCAN(dae, vae) scan_recomb = SCANRecombinator(dae, vae, scan) vars = scan_recomb.get_vars() # Check size of optimizing vars self.assertEqual(len(vars), 4)
def main(argv): data_manager = DataManager() data_manager.prepare() dae = DAE() vae = VAE(dae) scan = SCAN(dae, vae) dae_saver = CheckPointSaver(CHECKPOINT_DIR, "dae", dae.get_vars()) vae_saver = CheckPointSaver(CHECKPOINT_DIR, "vae", vae.get_vars()) scan_saver = CheckPointSaver(CHECKPOINT_DIR, "scan", scan.get_vars()) sess = tf.Session() # Initialze variables init = tf.global_variables_initializer() sess.run(init) # For Tensorboard log summary_writer = tf.summary.FileWriter(LOG_FILE, sess.graph) # Load from checkpoint dae_saver.load(sess) vae_saver.load(sess) scan_saver.load(sess) # Train train_dae(sess, dae, data_manager, dae_saver, summary_writer) train_vae(sess, vae, data_manager, vae_saver, summary_writer) disentangle_check(sess, vae, data_manager) train_scan(sess, scan, data_manager, scan_saver, summary_writer) sym2img_check(sess, scan, data_manager) img2sym_check(sess, scan, data_manager) sess.close()
step += 1 if epoch % display_epoch == 0: print("Epoch:", '%04d' % (epoch + 1), "reconstr=", "{:.3f}".format(average_reconstr_loss), "latent0=", "{:.3f}".format(average_latent_loss0), "latent1=", "{:.3f}".format(average_latent_loss1)) if (epoch % save_epoch == 0) or (epoch == training_epochs - 1): torch.save(scan.state_dict(), '{}/scan_epoch_{}.pth'.format(exp, epoch)) data_manager = DataManager() data_manager.prepare() dae = DAE() vae = VAE() scan = SCAN() if use_cuda: dae.load_state_dict(torch.load('save/dae/dae_epoch_2999.pth')) vae.load_state_dict(torch.load('save/vae/vae_epoch_2999.pth')) scan.load_state_dict(torch.load('save/scan/scan_epoch_1499.pth')) dae, vae, scan = dae.cuda(), vae.cuda(), scan.cuda() else: dae.load_state_dict( torch.load('save/dae/dae_epoch_2999.pth', map_location=lambda storage, loc: storage)) vae.load_state_dict( torch.load('save/vae/vae_epoch_2999.pth', map_location=lambda storage, loc: storage)) scan.load_state_dict(
#load into array data_set=dataset.load_data(NORM_DATA_PATH) #create one hot array one_hots=dataloader.generate_one_hots(data_set.shape[0]) #create shuffle indexes perm=dataset.index_generate_random(data_set) #shuffle the data data_set=data_set[perm] one_hots=one_hots[perm] train_data,test_data=dataset.split_train_test(data_set,config.TRAIN_SIZE) oh_train_data,oh_test_data=dataset.split_train_test(one_hots,config.TRAIN_SIZE) writer = SummaryWriter() DAE_net=DAE(config.DATA_CHANNEL,config.DAE_ENCODER_SIZES,config.DAE_DECODER_SIZES,config.DAE_ENCODER_ACTIVATION,config.DAE_DECODER_ACTIVATION,config.DAE_config.DAE_BOTTLENECK_SIZE,"bernouilli") DAE_net.cuda() optim_dae= torch.optim.Adam(DAE_net.parameters(),lr=config.DAE_lr,eps=config.DAE_eps) if not config.DAE_PRETRAIN: train_set=dataloader.DAEdata(train_data) dae_training_generator=data.DataLoader(train_set,**config.generator_params) test_set=dataloader.DAEdata(test_data) dae_test_generator=data.DataLoader(test_set,**config.generator_params) train.train_dae(DAE_net,optim_dae,dae_training_generator,dae_test_generator,nn.MSELoss(),config.DAE_CHECKPOINT,config.DAE_TRAIN_EPOCH,writer,config.DAE_LOG): else: utils.load_model(config.DAE_LOAD_PATH,DAE_net,optim_dae)
help="print out decoded output and error of each sample") parser.add_argument('--output-path', default=None, type=str, help="Where to save raw acoustic output") parser = add_decoder_args(parser) parser.add_argument('--save-output', action="store_true", help="Saves output of model from test") args = parser.parse_args() if __name__ == '__main__': torch.set_grad_enabled(False) device = torch.device("cuda" if args.cuda else "cpu") model = load_model(device, args.model_path, args.cuda) denoiser = DAE() denoiser.load_state_dict( torch.load('./models/denoiser_deepspeech_final.pth')) denoiser = denoiser.to(device) denoiser.eval() if args.decoder == "beam": from decoder import BeamCTCDecoder decoder = BeamCTCDecoder(model.labels, lm_path=args.lm_path, alpha=args.alpha, beta=args.beta, cutoff_top_n=args.cutoff_top_n, cutoff_prob=args.cutoff_prob, beam_width=args.beam_width,
noise_dir=args.noise_dir, noise_prob=args.noise_prob, noise_levels=(args.noise_min, args.noise_max)) rnn_type = args.rnn_type.lower() assert rnn_type in supported_rnns, "rnn_type should be either lstm, rnn or gru" model = DeepSpeech(rnn_hidden_size=args.hidden_size, nb_layers=args.hidden_layers, labels=labels, rnn_type=supported_rnns[rnn_type], audio_conf=audio_conf, bidirectional=args.bidirectional, mixed_precision=args.mixed_precision) model = DeepSpeech.load_model(args.model_path) denoiser = DAE() decoder = GreedyDecoder(labels) train_dataset_clean = SpectrogramDataset( audio_conf=audio_conf, manifest_filepath=args.train_manifest_clean, labels=labels, normalize=True, augment=args.augment) train_dataset_adv = SpectrogramDataset( audio_conf=audio_conf, manifest_filepath=args.train_manifest_adv, labels=labels, normalize=True, augment=args.augment) test_dataset = SpectrogramDataset(audio_conf=audio_conf,
from torchvision.utils import save_image from model import DAE from visualize import * # hyperparameters num_epochs = 100 batch_size = 128 lr = 1e-3 # get images from MNIST database dataset = MNIST('../data', transform=transforms.ToTensor(), download=True) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True) # create denoising autoencoder and optimizer for it dae = DAE() optimizer = optim.Adam(dae.parameters(), lr=lr) # start training for epoch in range(num_epochs): # minibatch optimization with Adam for data in dataloader: img, _ = data # change the images to be 1D img = img.view(img.size(0), -1) # get output from network out = dae(img)
def test_dae(self): dae = DAE() vars = dae.get_vars() # Check size of optimizing vars self.assertEqual(len(vars), 10 + 10)
if max_values[col] - min_values[col] != 0: out_data[:, col] /= max_values[col] - min_values[col] np.place(out_data[:, col], max_mask, 1.0) np.place(out_data[:, col], min_mask, 0.0) return out_data if __name__ == "__main__": epochs = 5000 batch_size = 128 input_size = 34 latent_size = 8 model = DAE(input_size, latent_size) model.to('cuda') torch.backends.cudnn.benchmark = True loss_fn = nn.MSELoss() optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9) DECAY = 0.95 scheduler = LambdaLR(optimizer, lr_lambda=lambda t: DECAY**t) data = pd.read_csv("model/training_test_data.csv") data.sample(frac=1, random_state=200) data = data.to_numpy() size = data.shape[0] training_data = data[:int(0.7 * size)] validation_data = data[int(0.7 * size):int(0.9 * size)]
target = torch.transpose(target, 1, 3) if use_cuda: hsv_image_t = target.data.cpu().numpy() else: hsv_image_t = target.data.numpy() rgb_image_t = utils.convert_hsv_to_rgb(hsv_image_t[0]) utils.save_image(rgb_image_t, "{}/target_epoch_{}.png".format(exp, epoch)) # Save to checkpoint if (epoch % save_epoch == 0) or (epoch == training_epochs - 1): torch.save(dae.state_dict(), '{}/dae_epoch_{}.pth'.format(exp, epoch)) data_manager = DataManager() data_manager.prepare() dae = DAE() if opt.load != '': print('loading {}'.format(opt.load)) if use_cuda: dae.load_state_dict(torch.load()) else: dae.load_state_dict( torch.load(exp + '/' + opt.load, map_location=lambda storage, loc: storage)) if use_cuda: dae = dae.cuda() if opt.train: dae_optimizer = optim.Adam(dae.parameters(), lr=1e-4, eps=1e-8) train_dae(dae, data_manager, dae_optimizer)
def train(data, Test_data, path): os.system("mkdir -p " + path) #print(path) noi = data['noi'] #feat = _preprocessing(noi) feat = noi # Train Clean Target clean = data['clean'] #print(clean.shape) #print(int(clean.shape[0])) clean = numpy.matlib.repmat(clean, 5, 1) #print(feat.shape, clean.shape) #target = _preprocessing(clean) target = clean #print(feat.shape) # validation data Test_noi = Test_data['noi'] #Test_feat = _preprocessing(Test_noi) Test_feat = Test_noi # valid Clean Target Test_clean = Test_data['clean'] clean_Test = numpy.matlib.repmat(Test_clean, 5, 1) #Test_target= _preprocessing(clean_Test) Test_target = clean_Test #print(Test_feat.shape, Test_target.shape) x = tf.placeholder(tf.float32, [None, input_dim], name='x') z = tf.placeholder(tf.float32, [None, z_dim], name='z') y = tf.placeholder(tf.float32, [None, input_dim], name='y') model = DAE() loss = model.loss(x, y) training_summary = tf.summary.scalar("training_loss", loss['mse']) validation_summary = tf.summary.scalar("validation_loss", loss['mse']) _z = model.encode(x) x_h = model.decode(z) tf.add_to_collection('encode', _z) tf.add_to_collection('decode', x_h) tf.add_to_collection('err_mse', loss['mse']) #reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) #reg_constant = 0.01 #l2_loss = tf.losses.get_regularization_loss() #w_loss = loss['mse'] + l2_loss # + reg_constant * sum(reg_losses) w_loss = loss['mse'] #lossL2 = tf.add_n([ tf.nn.l2_loss(v) for v in tf.trainable_variables if 'bias' not in v.name ]) * 0.001 #w_loss = loss['mse'] + lossL2 print( np.sum([ np.prod(v.get_shape().as_list()) for v in tf.trainable_variables() ])) optimize = tf.train.AdamOptimizer(learning_rate=5e-5, beta1=0.9, beta2=0.99).minimize(w_loss) #optimize = tf.train.RMSPropOptimizer(learning_rate=1e-5).minimize(loss) #merged = tf.summary.merge_all() with tf.Session() as sess: start = time.time() init = tf.global_variables_initializer() sess.run(init) saver = tf.train.Saver() writer = tf.summary.FileWriter(path + 'logs', sess.graph) patience = 10 best_loss = 10000. for i in range(epoch): Num = np.arange(feat.shape[0]) np.random.shuffle(Num) feat = feat[Num, :] target = target[Num, :] #length = int(feat.shape[0]/2) feat_1 = feat[0:-int(feat.shape[0] / 2), :] feat_2 = feat[-int(feat.shape[0] / 2):, :] com = np.array([feat_1, feat_2]) target_1 = target[0:-int(target.shape[0] / 2), :] target_2 = target[-int(target.shape[0] / 2):, :] com_target = np.array([target_1, target_2]) #print(feat_1.shape, feat_2.shape, com.shape) #feat_1 = np.reshape(feat[0:-int(feat.shape[0]/2),:], (int(feat.shape[0]/2),1024,-1)) #feat_2 = np.reshape(feat[-int(feat.shape[0]/2):,:], (int(feat.shape[0]/2),1024,-1)) for k in range(2): feat_noi = np.reshape(com[k, :, :], (-1, 1024)) #print(feat.shape, com[k,:,:].shape) #feat_noi = com[k,:,:] target_clean = com_target[k, :, :] #print(feat_noi.shape) for j in range(0, len(feat_noi), batch_num): sess.run( [optimize], feed_dict={ x: feat_noi[j:j + batch_num], y: target_clean[j:j + batch_num] }) if i % 10 == 0: # To log training accuracy. err, train_summ = sess.run([loss, training_summary], feed_dict={ x: feat_noi, y: target_clean }) writer.add_summary(train_summ, i) # To log validation accuracy. Test_err, valid_summ = sess.run([loss, validation_summary], feed_dict={ x: Test_feat, y: Test_target }) writer.add_summary(valid_summ, i) if Test_err['mse'] < best_loss: best_loss = Test_err['mse'] patience = 10 saver.save(sess, path + 'test_best_model') else: patience -= 1 print( 'Epoch [%4d] Time [%5.4f] MSE [%.6f] Val_MSE [%.6f]' % (i + 1, time.time() - start, err['mse'], Test_err['mse'])) if patience == 0: print('Early Stopping') break