def main(args): data_train = pp_data.DataLoader(args.data, partition='train', batch_size=100, balance=True, samples_per_chunk=5) data_val = pp_data.DataLoader(args.data, partition='validation') model = models.build_dense(input_shape=((num_expansion_frames + 1) * 40, ), lr=args.lr) model.summary() # dump_file = os.path.join(args.model, time.strftime("%Y%m%d%H%M",time.localtime()) + '_keras_weights.{epoch:02d}-{val_loss:.2f}.ckpt') # commented out because the val_loss key throws an error when the data are too small dump_file = os.path.join( args.model, time.strftime("%Y%m%d%H%M", time.localtime()) + '_keras_weights.{epoch:02d}.ckpt') eachmodel = ModelCheckpoint(dump_file, monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto') model.fit_generator(generator=data_train, validation_data=data_val, nb_epoch=args.epochs, verbose=1, use_multiprocessing=True, workers=6, callbacks=[eachmodel])
def calculate_scalar(args): workspace = args.workspace stack_num = args.stack_num hop_frames = args.hop_frames filename = args.filename audio_type = 'speech' hdf5_file = os.path.join(args.workspace, "features", "cmplx_spectrogram.h5") data_type = 'train' batch_size = 500 data_loader = pp_data.DataLoader(hdf5_file, data_type, audio_type, stack_num, hop_frames, center_only=True, batch_size=batch_size) x_all = [] iter = 0 max_iter = 100 for (batch_x, batch_y) in data_loader.generate(): x_all.append(batch_x) iter += 1 if iter == max_iter: break x_all = np.concatenate(x_all, axis=0) x_all = np.abs(x_all) x_all = transform(x_all, type='numpy') (mean_, std_) = pp_data.calculate_scalar(x_all) out_path = os.path.join(workspace, "scalars", filename, "scalar.p") pp_data.create_folder(os.path.dirname(out_path)) cPickle.dump((mean_, std_), open(out_path, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL) print("Scalar saved to %s" % out_path)
def main(args): model = load_model(args.model) data_test = pp_data.DataLoader(args.data, partition='evaluation') pred_scores = np.zeros((len(data_test), n_classes)) true_scores = np.zeros((len(data_test), n_classes)) result_rows = [] chunk_refs = [] for it, [X, y] in enumerate(data_test): pred = model.predict(X) pred = pred.mean(axis=0) y = y[0, :] pred_scores[it, :] = pred true_scores[it, :] = y chunk_ref = "chunk_" + str(it) chunk_refs.append(chunk_ref) for cl in range(n_classes): result_rows.append([chunk_ref, ind_to_tag[cl], pred[cl]]) eval_result_filename = "eval_result.csv" with open(os.path.join(args.data, eval_result_filename), "w") as csvfile: writer = csv.writer(csvfile) writer.writerows(result_rows) eers = np.zeros(n_classes) for cl in range(n_classes): eers[cl] = compute_eer(os.path.join(args.data, eval_result_filename), ind_to_tag[cl], dict(zip(chunk_refs, list(true_scores[:, cl])))) print("EER scores per tag:", eers)
def train(args): workspace = args.workspace audio_type = args.audio_type stack_num = args.stack_num hop_frames = args.hop_frames filename = args.filename cuda = args.use_cuda and torch.cuda.is_available() fft_size = cfg.fft_size print("cuda:", cuda) hdf5_file = os.path.join(args.workspace, "features", "cmplx_spectrogram.h5") data_type = 'train' t1 = time.time() batch_size = 500 shuffle = False load_raw = False data_loader = pp_data.DataLoader(hdf5_file, data_type, audio_type, stack_num, hop_frames, center_only=True, batch_size=batch_size, shuffle=shuffle, load_raw=load_raw) eval_tr_data_loader = pp_data.DataLoader(hdf5_file, 'train', audio_type, stack_num, hop_frames, center_only=True, batch_size=batch_size, shuffle=shuffle, load_raw=load_raw) eval_te_data_loader = pp_data.DataLoader(hdf5_file, 'test', audio_type, stack_num, hop_frames, center_only=True, batch_size=batch_size, shuffle=shuffle, load_raw=load_raw) print("Load time: %s" % (time.time() - t1)) # Load scalar scalar_path = os.path.join(workspace, "scalars", filename, "scalar.p") (mean_, std_) = cPickle.load(open(scalar_path, 'rb')) mean_ = move_data_to_gpu(mean_, cuda) std_ = move_data_to_gpu(std_, cuda) # Model n_freq = 257 model = DNN(stack_num, n_freq) if cuda: model.cuda() dft = pp_data.DFT(fft_size, cuda) # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-4, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) # Train iter = 0 model_dir = os.path.join(workspace, "models", filename, audio_type) pp_data.create_folder(model_dir) t_train = time.time() for (batch_x, batch_y) in data_loader.generate(): output = forward(model, batch_x, mean_, std_, dft, cuda) batch_y = np.abs(batch_y) batch_y = move_data_to_gpu(batch_y, cuda) # batch_y = transform(batch_y, type='torch') # batch_y = pp_data.scale(batch_y, mean_, std_) loss = mse_loss(output, batch_y) # Backward optimizer.zero_grad() loss.backward() optimizer.step() iter += 1 # Evaluate. loss_ary = [] if iter % 500 == 0: t_eval = time.time() tr_loss = evaluate(model, eval_tr_data_loader, mean_, std_, dft, cuda) # tr_loss = -1 te_loss = evaluate(model, eval_te_data_loader, mean_, std_, dft, cuda) print("Iter: %d, train err: %f, test err: %f, train time: %s, eval time: %s" % \ (iter, tr_loss, te_loss, time.time() - t_train, time.time() - t_eval)) t_train = time.time() # Save model. if iter % 5000 == 0: save_out_dict = { 'iter': iter, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'te_loss': loss, } save_out_path = os.path.join(model_dir, "md_%d_iters.tar" % iter) torch.save(save_out_dict, save_out_path) print("Save model to %s" % save_out_path) t1 = time.time()
def train(args): workspace = args.workspace stack_num = args.stack_num hop_frames = args.hop_frames filename = args.filename cuda = args.use_cuda and torch.cuda.is_available() print("cuda:", cuda) hdf5_file = os.path.join(args.workspace, "features", "cmplx_spectrogram.h5") data_type = 'train' t1 = time.time() data_loader = pp_data.DataLoader(hdf5_file, data_type, stack_num, hop_frames, center_only=True, batch_size=100) eval_tr_data_loader = pp_data.DataLoader(hdf5_file, 'train', stack_num, hop_frames, center_only=True, batch_size=100) eval_te_data_loader = pp_data.DataLoader(hdf5_file, 'test', stack_num, hop_frames, center_only=True, batch_size=100) print("Load time: %s" % (time.time() - t1)) # Load scalar scalar_path = os.path.join(workspace, "scalars", filename, "scalar.p") (mean_, std_) = cPickle.load(open(scalar_path, 'rb')) mean_ = move_data_to_gpu(mean_, cuda) std_ = move_data_to_gpu(std_, cuda) # Model n_freq = 257 model = DNN(stack_num, n_freq) if cuda: model.cuda() # Optimizer optimizer = optim.Adam(model.parameters(), lr=1e-4, betas=(0.9, 0.999), eps=1e-08, weight_decay=0) # Train iter = 0 model_dir = os.path.join(workspace, "models", filename) pp_data.create_folder(model_dir) t_train = time.time() for (batch_x, batch_y) in data_loader.generate(): output = forward(model, batch_x, mean_, std_, cuda) batch_y = np.abs(batch_y) batch_y = move_data_to_gpu(batch_y, cuda) loss = mse_loss(output, batch_y) if iter % 1000 == 0: fig, axs = plt.subplots(3, 1, sharex=True) axs[0].matshow(np.log((np.abs(batch_x[:, 0, :]))).T, origin='lower', aspect='auto', cmap='jet') axs[1].matshow(np.log((np.abs(batch_y.data.cpu().numpy()))).T, origin='lower', aspect='auto', cmap='jet') axs[2].matshow(np.log((np.abs(output.data.cpu().numpy()))).T, origin='lower', aspect='auto', cmap='jet') plt.show() # Backward optimizer.zero_grad() loss.backward() optimizer.step() iter += 1 # Evaluate. loss_ary = [] if iter % 100 == 0: t_eval = time.time() tr_loss = evaluate(model, eval_tr_data_loader, mean_, std_, cuda) te_loss = evaluate(model, eval_te_data_loader, mean_, std_, cuda) print("Iter: %d, train err: %f, test err: %f, train time: %s, eval time: %s" % \ (iter, tr_loss, te_loss, time.time() - t_train, time.time() - t_eval)) t_train = time.time() # Save model. if iter % 1000 == 0: save_out_dict = { 'iter': iter, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'te_loss': loss, } save_out_path = os.path.join(model_dir, "md_%d_iters.tar" % iter) torch.save(save_out_dict, save_out_path) print("Save model to %s" % save_out_path) t1 = time.time()