def handle2x(config, args): # resize input h1, w1, scale1 = pad_to_height(config.img_size[0], args.img1_height, args.img1_width) h2, w2, scale2 = pad_to_height(config.img_size[0], args.img2_height, args.img2_width) # load trained model net = get_autoencoder(config) net.load_state_dict(torch.load(args.model_path)) net.to(config.device) net.eval() # mean/std pose mean_pose, std_pose = get_meanpose(config) # get input input1 = openpose2motion(args.vid1_json_dir, scale=scale1, max_frame=args.max_length) input2 = openpose2motion(args.vid2_json_dir, scale=scale2, max_frame=args.max_length) input1 = preprocess_motion2d(input1, mean_pose, std_pose) input2 = preprocess_motion2d(input2, mean_pose, std_pose) input1 = input1.to(config.device) input2 = input2.to(config.device) # transfer by network out12 = net.transfer(input1, input2) out21 = net.transfer(input2, input1) # postprocessing the outputs input1 = postprocess_motion2d(input1, mean_pose, std_pose, w1 // 2, h1 // 2) input2 = postprocess_motion2d(input2, mean_pose, std_pose, w2 // 2, h2 // 2) out12 = postprocess_motion2d(out12, mean_pose, std_pose, w2 // 2, h2 // 2) out21 = postprocess_motion2d(out21, mean_pose, std_pose, w1 // 2, h1 // 2) if not args.disable_smooth: out12 = gaussian_filter1d(out12, sigma=2, axis=-1) out21 = gaussian_filter1d(out21, sigma=2, axis=-1) if args.out_dir is not None: save_dir = args.out_dir ensure_dir(save_dir) color1 = hex2rgb(args.color1) color2 = hex2rgb(args.color2) np.savez(os.path.join(save_dir, 'results.npz'), input1=input1, input2=input2, out12=out12, out21=out21) if args.render_video: print("Generating videos...") motion2video(input1, h1, w1, os.path.join(save_dir, 'input1.mp4'), color1, args.transparency, fps=args.fps, save_frame=args.save_frame) motion2video(input2, h2, w2, os.path.join(save_dir,'input2.mp4'), color2, args.transparency, fps=args.fps, save_frame=args.save_frame) motion2video(out12, h2, w2, os.path.join(save_dir,'out12.mp4'), color2, args.transparency, fps=args.fps, save_frame=args.save_frame) motion2video(out21, h1, w1, os.path.join(save_dir,'out21.mp4'), color1, args.transparency, fps=args.fps, save_frame=args.save_frame) print("Done.")
def train_autoencoder(): print("found {} files".format(len(wav_files))) encoder = keras.models.load_model("ae/encoder-test2.h") decoder = keras.models.load_model("ae/decoder-test2.h") train_generator = NoteIsoSequence(train_wav_files, sample_duration=sample_duration, sample_rate=sample_rate, n_fft=n_fft, batch_size=batch_size, epsilon=epsilon, song_indices=song_indices, instr_indices=instr_indices, note_indices=note_indices) test_generator = NoteIsoSequence(test_wav_files, sample_duration=sample_duration, sample_rate=sample_rate, n_fft=n_fft, batch_size=batch_size, epsilon=epsilon, song_indices=song_indices, instr_indices=instr_indices, note_indices=note_indices) # tb = keras.callbacks.TensorBoard(histogram_freq=0, write_grads=True) encoder, decoder, autoencoder = get_autoencoder(encoder, decoder) autoencoder.summary() now = datetime.now() log_dir = "logs/ae-" + now.strftime("%Y-%m-%d-%H:%M:%S") + "/" callbacks = [ TensorBoardWrapper(test_generator, log_dir=log_dir, nb_steps=5, histogram_freq=0, batch_size=batch_size, write_graph=False, write_grads=True, write_images=False) ] autoencoder.fit_generator(generator=train_generator, validation_data=test_generator, use_multiprocessing=use_multiprocessing, workers=workers, epochs=epochs, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, callbacks=callbacks) print("saving model...") encoder.save("ae/encoder-test2.h") decoder.save("ae/decoder-test2.h") autoencoder.save("ae/ae-test2.h") print("saved autoencoder.")
def test(): parser = argparse.ArgumentParser() parser.add_argument('-n', '--name', type=str, choices=['skeleton', 'view', 'full'], required=True, help='which structure to use.') parser.add_argument('-p', '--model_path', type=str, default="model/pretrained_view.pth") parser.add_argument('--phase', type=str, default="test", choices=['train', 'test']) parser.add_argument('-g', '--gpu_ids', type=int, default=0, required=False, help="specify gpu ids") args = parser.parse_args() # set config config.initialize(args) os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_ids) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # load trained model net = get_autoencoder(config) net.load_state_dict(torch.load(args.model_path)) net.to(config.device) net.eval() # get dataset train_ds = MixamoDatasetForFull(args.phase, config) cluster_data = train_ds.get_cluster_data() # score, img = cluster_body(net, cluster_data, device, './cluster_body.png') if args.name == 'view': cluster_view(net, cluster_data, device, './cluster_view.png') cluster_motion(net, cluster_data, device, './cluster_motion.png') elif args.name == 'skeleton': cluster_body(net, cluster_data, device, './cluster_body.png') cluster_motion(net, cluster_data, device, './cluster_motion.png', mode='body') else: cluster_motion(net, cluster_data, device, './cluster_motion.png')
def main(): run_config = tf.contrib.learn.RunConfig(save_checkpoints_steps=1000) hparams = tf.contrib.training.HParams(type="image", batch_size=64, learning_rate=0.01, lr_scheme="exp", delay=0, staircased=False, learning_rate_decay_interval=2000, learning_rate_decay_rate=0.1, clip_grad_norm=1.0, l2_loss=0.0, label_smoothing=0.1, init_scheme="random", warmup_steps=10000, encoder_depth=2, decoder_depth=2, hidden_size=100, is_ae=True, activation=tf.nn.sigmoid, enc_layers=[50, 50], dec_layers=[50], label_shape=[1], dropout=0, channels=1, input_shape=[28, 28, 1], output_shape=[28, 28, 1]) train_input_fn = get_mnist("tmp/data", hparams, training=True) eval_input_fn = get_mnist("tmp/data", hparams, training=False) estimator = tf.estimator.Estimator(model_fn=get_autoencoder(hparams, 0.01), model_dir="tmp/run", config=run_config) estimator.train(train_input_fn, steps=100) estimator.evaluate(eval_input_fn, steps=10)
def motion_feature_extract(config, args): # resize input h1, w1, scale1 = pad_to_height(config.img_size[0], args.img1_height, args.img1_width) # load trained model net = get_autoencoder(config) net.load_state_dict(torch.load(args.model_path)) net.to(config.device) net.eval() # mean/std pose mean_pose, std_pose = get_meanpose(config) # get input input1 = openpose2motion(args.vid1_json_dir, scale=scale1, max_frame=args.max_length) print("after motion") print(input1.shape) input1 = preprocess_motion2d(input1, mean_pose, std_pose) print("after preprocess") print(input1.shape) if args.out_dir is not None: save_dir = args.out_dir ensure_dir(save_dir) # color1 = hex2rgb(args.color1) # color2 = hex2rgb(args.color2) np.savez(os.path.join(save_dir, 'pose_feature.npz'), pose=input1) input1 = input1.to(config.device) # transfer by network # out = net.transfer_three(input1, input2, input3) out = net.forward(input1) mot = net.mot_encoder(input1) print(mot.shape) # postprocessing the outputs input1 = postprocess_motion2d(input1, mean_pose, std_pose, w1 // 2, h1 // 2) out = postprocess_motion2d(out, mean_pose, std_pose, w1 // 2, h1 // 2) print("after postprocess") print(input1.shape) if not args.disable_smooth: out = gaussian_filter1d(out, sigma=2, axis=-1) # if args.out_dir is not None: # save_dir = args.out_dir # ensure_dir(save_dir) # # color1 = hex2rgb(args.color1) # # color2 = hex2rgb(args.color2) # np.savez(os.path.join(save_dir, 'results.npz'), pose=input1) # input1=input1, # input2=input2, # input3=input3, # out=out) # if args.render_video: # print("Generating videos...") # motion2video(input1, h1, w1, os.path.join(save_dir,'input1.mp4'), color1, args.transparency, # fps=args.fps, save_frame=args.save_frame) # motion2video(input2, h2, w2, os.path.join(save_dir,'input2.mp4'), color2, args.transparency, # fps=args.fps, save_frame=args.save_frame) # motion2video(input3, h3, w3, os.path.join(save_dir,'input3.mp4'), color3, args.transparency, # fps=args.fps, save_frame=args.save_frame) # motion2video(out, h1, w1, os.path.join(save_dir,'out.mp4'), color2, args.transparency, # fps=args.fps, save_frame=args.save_frame) print("Done.")
from generator_pair import Dual_Track_Generator from loss import (pair_loss, cumulative_point_distance_error, mean_point_distance_error, pair_cumulative_point_distance_error, pair_mean_point_distance_error, loss_weight_adjustments, base_loss_function) from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, TensorBoard from keras.optimizers import Adam # input shape of 2 poses/frame input_shape = (2, 2 * number_of_coordinates) # layer_sizes = [30, 20, 10] # layer_sizes = [128, 64, 10] # layer_sizes = [256, 128, 64, 10] layer_sizes = [512, 256, 128, 64, 10] autoencoder, encoder, decoder = get_autoencoder(input_shape, layer_sizes, is_variational=False, verbose=True) model = get_sequence_model(autoencoder, (input_shape), number_of_frames=number_of_frames, factor=2, verbose=True) sequences_df = pd.read_csv("pose_pair_sequences.csv", sep=",", header=0, index_col=None) print(len(sequences_df), "records:") print(sequences_df.head(5)) # get all counts print() print("Counts:") counts_df = sequences_df.groupby(["sequence_id"], as_index=False).count().loc[:, ["sequence_id", "step"]] print(counts_df.head(5)) # extract all sequence IDs with at least ${number_of_frames} steps print() print("Suitable training sequences:")
def main(): parser = argparse.ArgumentParser() parser.add_argument('-n', '--name', type=str, choices=['skeleton', 'view', 'full'], required=True, help='which structure to use') # parser.add_argument('-c', '--continue', dest='continue_path', type=str, required=False) parser.add_argument('-g', '--gpu_ids', type=int, default=0, required=False, help="specify gpu ids") parser.add_argument('--disable_triplet', action='store_true', default=False, help="disable triplet loss") parser.add_argument('--use_footvel_loss', action='store_true', default=False, help="use use footvel loss") parser.add_argument('--vis', action='store_true', default=False, help="visualize output in training") args = parser.parse_args() config.initialize(args) net = get_autoencoder(config) print(net) net = net.to(config.device) # create tensorboard writer train_tb = SummaryWriter(os.path.join(config.log_dir, 'train.events')) val_tb = SummaryWriter(os.path.join(config.log_dir, 'val.events')) # create dataloader train_loader = get_dataloader('train', config, config.batch_size, config.num_workers) mean_pose, std_pose = train_loader.dataset.mean_pose, train_loader.dataset.std_pose val_loader = get_dataloader('test', config, config.batch_size, config.num_workers) val_loader = cycle(val_loader) # create training agent tr_agent = get_training_agent(config, net) clock = tr_agent.clock # start training for e in range(config.nr_epochs): # begin iteration pbar = tqdm(train_loader) for b, data in enumerate(pbar): # train step outputs, losses = tr_agent.train_func(data) losses_values = {k:v.item() for k, v in losses.items()} # record loss to tensorboard for k, v in losses_values.items(): train_tb.add_scalar(k, v, clock.step) # visualize if args.vis and clock.step % config.visualize_frequency == 0: imgs = visulize_motion_in_training(outputs, mean_pose, std_pose) for k, img in imgs.items(): train_tb.add_image(k, torch.from_numpy(img), clock.step) pbar.set_description("EPOCH[{}][{}/{}]".format(e, b, len(train_loader))) pbar.set_postfix(OrderedDict({"loss": sum(losses_values.values())})) # validation step if clock.step % config.val_frequency == 0: data = next(val_loader) outputs, losses = tr_agent.val_func(data) losses_values = {k: v.item() for k, v in losses.items()} for k, v in losses_values.items(): val_tb.add_scalar(k, v, clock.step) if args.vis and clock.step % config.visualize_frequency == 0: imgs = visulize_motion_in_training(outputs, mean_pose, std_pose) for k, img in imgs.items(): val_tb.add_image(k, torch.from_numpy(img), clock.step) clock.tick() train_tb.add_scalar('learning_rate', tr_agent.optimizer.param_groups[-1]['lr'], clock.epoch) tr_agent.update_learning_rate() if clock.epoch % config.save_frequency == 0: tr_agent.save_network() tr_agent.save_network('latest.pth.tar') clock.tock()
def handle2x(config, args): w1 = h1 = w2 = h2 = 512 # load trained model net = get_autoencoder(config) net.load_state_dict(torch.load(args.model_path)) net.to(config.device) net.eval() # mean/std pose mean_pose, std_pose = get_meanpose(config) # get input dataloder = get_dataloader('test', config) input1 = dataloder.dataset.preprocessing(args.path1, args.view1).unsqueeze(0) input2 = dataloder.dataset.preprocessing(args.path2, args.view2).unsqueeze(0) input1 = input1.to(config.device) input2 = input2.to(config.device) # transfer by network out12 = net.transfer(input1, input2) out21 = net.transfer(input2, input1) # postprocessing the outputs input1 = postprocess_motion2d(input1, mean_pose, std_pose, w1 // 2, h1 // 2) input2 = postprocess_motion2d(input2, mean_pose, std_pose, w2 // 2, h2 // 2) out12 = postprocess_motion2d(out12, mean_pose, std_pose, w2 // 2, h2 // 2) out21 = postprocess_motion2d(out21, mean_pose, std_pose, w1 // 2, h1 // 2) if not args.disable_smooth: out12 = gaussian_filter1d(out12, sigma=2, axis=-1) out21 = gaussian_filter1d(out21, sigma=2, axis=-1) if args.out_dir is not None: save_dir = args.out_dir ensure_dir(save_dir) color1 = hex2rgb(args.color1) color2 = hex2rgb(args.color2) np.savez(os.path.join(save_dir, 'results.npz'), input1=input1, input2=input2, out12=out12, out21=out21) if args.render_video: print("Generating videos...") motion2video(input1, h1, w1, os.path.join(save_dir, 'input1.mp4'), color1, args.transparency, fps=args.fps, save_frame=args.save_frame) motion2video(input2, h2, w2, os.path.join(save_dir, 'input2.mp4'), color2, args.transparency, fps=args.fps, save_frame=args.save_frame) motion2video(out12, h2, w2, os.path.join(save_dir, 'out12.mp4'), color2, args.transparency, fps=args.fps, save_frame=args.save_frame) motion2video(out21, h1, w1, os.path.join(save_dir, 'out21.mp4'), color1, args.transparency, fps=args.fps, save_frame=args.save_frame) print("Done.")
parser.add_argument('--transparency', action='store_true', help="make background transparent in resulting frames") parser.add_argument('-g', '--gpu_ids', type=int, default=0, required=False) args = parser.parse_args() config.initialize(args) # if keep no attribute, interpolate over all three latent space if args.keep_attr == 'none': assert args.form == 'line' # clip and pad the video h1, w1, scale1 = pad_to_height(config.img_size[0], args.img1_height, args.img1_width) h2, w2, scale2 = pad_to_height(config.img_size[0], args.img2_height, args.img2_width) # load trained model net = get_autoencoder(config) net.load_state_dict(torch.load(args.model_path)) net.to(config.device) net.eval() # mean/std pose mean_pose, std_pose = get_meanpose(config) # process input data input1 = openpose2motion(args.vid1_json_dir, scale=scale1, max_frame=args.max_length) input2 = openpose2motion(args.vid2_json_dir, scale=scale2, max_frame=args.max_length) if input1.shape[-1] != input2.shape[-1]: length = min(input1.shape[-1], input2.shape[-1]) input1 = input1[:, :, length] input2 = input2[:, :, length] input1 = preprocess_motion2d(input1, mean_pose, std_pose)