コード例 #1
0
    def __init__(self, model_file, mean_pose_file, host, port, using_docker):
        """
        This service generates gestures for an input speech segment by passing 
        the audio and the text transcription (as received from the 3D agent)
        to the trained Gesticulator model.

        The generated gestures are first saved into csv files, then the paths
        to those files are sent to the standalone ActiveMQ server, which forwards them 
        to the 3D agent.

        Args:
            model_file:  The pretrained Gesticulator model
            mean_pose_file:  The path to .npy file that contains the mean pose of the dataset
            host:  The hostname of the ActiveMQ connection.
            port:  The port of the ActiveMQ connection.
            using_docker:  See 'on_message()' for details.
            unity_assets_folder:  The path to Unity's Assets folder.
        """
        self.using_docker = using_docker
        self.connection = MessagingServer(listener=self, host=host, port=port)

        print("Loading Blenderbot model")
        self.chatbot = Blenderbot()

        print("Loading text-to-speech model")
        self.tts_model = GlowTTS()

        print("Loading pretrained Gesticulator model")
        self.model = GesticulatorModel.load_from_checkpoint(
            model_file, inference_mode=True)

        print("Creating GesturePredictor interface")
        feature_type = check_feature_type(self.model)
        self.predictor = GesturePredictor(self.model,
                                          feature_type=feature_type)
コード例 #2
0
ファイル: demo.py プロジェクト: teshima058/gesticulator
def main(args):
    # 0. Check feature type based on the model
    feature_type, audio_dim = check_feature_type(args.model_file)

    # 1. Load the model
    model = GesticulatorModel.load_from_checkpoint(args.model_file,
                                                   inference_mode=True)
    # This interface is a wrapper around the model for predicting new gestures conveniently
    gp = GesturePredictor(model, feature_type)

    # 2. Predict the gestures with the loaded model
    motion = gp.predict_gestures(args.audio, args.text)

    # 3. Visualize the results
    motion_length_sec = int(motion.shape[1] / 20)

    visualize(motion.detach(),
              "temp.bvh",
              "temp.npy",
              "temp.mp4",
              start_t=0,
              end_t=motion_length_sec,
              data_pipe_dir='../gesticulator/utils/data_pipe.sav')

    # Add the audio to the video
    command = f"ffmpeg -y -i {args.audio} -i temp.mp4 -c:v libx264 -c:a libvorbis -loglevel quiet -shortest {args.video_out}"
    subprocess.call(command.split())

    print("\nGenerated video:", args.video_out)

    # Remove temporary files
    for ext in ["bvh", "npy", "mp4"]:
        os.remove("temp." + ext)
コード例 #3
0
ファイル: evaluate.py プロジェクト: PeterZhouSZ/gesticulator
def main(test_params):
    model = GesticulatorModel.load_from_checkpoint(test_params.model_file,
                                                   inference_mode=True)

    create_save_dirs(model)

    model.generate_evaluation_videos(semantic=test_params.use_semantic_input,
                                     random=test_params.use_random_input)
コード例 #4
0
    def _setup(self, config):
        self.hparams = config["hparams"]  # Namespace()
        for key, val in config.items():
            if key == "hparams":
                continue
            try:
                val = val.item()
            except AttributeError:
                pass

            setattr(self.hparams, key, val)

        self.model = GesticulatorModel(self.hparams)

        checkpoint_callback = ModelCheckpoint(
            filepath=os.path.join(self.logdir, "checkpoint"),
            save_best_only=True,
            verbose=True,
            monitor="avg_val_loss",
            mode="min",
        )

        try:
            gpus = len(ray.get_gpu_ids())
        except:
            print("failed to get gpus")
            gpus = 1

        self.trainer = Trainer(
            gpus=gpus,
            distributed_backend="dp",
            max_nb_epochs=1,
            checkpoint_callback=checkpoint_callback,
            nb_sanity_val_steps=2,
            log_gpu_memory="all",
            weights_summary=None,
            early_stop_callback=None,
            # show_progress_bar=False,
            train_percent_check=0.00001 if self.hparams.dev_test else 1,
        )
        self.val_loss = float("inf")
コード例 #5
0
def main(hparams):
    model = GesticulatorModel(hparams)
    logger = create_logger(model.save_dir)
    callbacks = [ModelSavingCallback()
                 ] if hparams.save_model_every_n_epochs > 0 else []

    trainer = Trainer.from_argparse_args(hparams,
                                         logger=logger,
                                         callbacks=callbacks,
                                         checkpoint_callback=False,
                                         early_stop_callback=False)

    trainer.fit(model)
    trainer.save_checkpoint(os.path.join(model.save_dir, "trained_model.ckpt"))
コード例 #6
0
    def __init__(self, model : GesticulatorModel, feature_type : str):
        """An interface for generating gestures using saved GesticulatorModel.

        Args:
            model:           the trained Gesticulator model
            feature_type:    the feature type in the input data (must be the same as it was in the training dataset!)
        """
        if feature_type not in self.supported_features:
            print(f"ERROR: unknown feature type '{self.feature_type}'!")
            print(f"Possible values: {self.supported_features}")
            exit(-1)
        
        self.feature_type = feature_type
        self.model = model.eval() # Put the model into 'testing' mode
        self.embedding = self._create_embedding(model.text_dim)
コード例 #7
0
def load_model(test_params):
    """This function enables the test datasets that were selected by the user"""
    model = GesticulatorModel.load_from_checkpoint(test_params.model_file,
                                                   inference_mode=True)

    # Make sure that at least one of the two test datasets are enabled
    if not test_params.use_semantic_input and not test_params.use_random_input:
        print("ERROR: Please provide at least one of the following two flags:")
        print(
            "       python test.py --use_semantic_input (to use the semantic test input segments)"
        )
        print(
            "       python test.py --use_random_input (to use the random test input segments)"
        )
        exit(-1)

    model.hparams.generate_semantic_test_predictions = test_params.use_semantic_input
    model.hparams.generate_random_test_predictions = test_params.use_random_input

    return model
コード例 #8
0
def main(config):
    hparams = Namespace()
    for key, val in config.items():
        setattr(hparams, key, val)

    model = GesticulatorModel(hparams)

    checkpoint_callback = ModelCheckpoint(
        filepath=hparams.model_path,
        save_best_only=True,
        verbose=True,
        monitor="avg_val_loss",
        mode="min",
    )

    trainer = Trainer(
        gpus=len(ray.get_gpu_ids()),
        distributed_backend=hparams.distributed_backend,
        max_nb_epochs=20,
        checkpoint_callback=checkpoint_callback,
    )

    trainer.fit(model)
コード例 #9
0
def profile_with_clipping(model_file, feature_type, mean_pose_file, input,
                          duration):
    """Profile the inference phase and the conversion from exp. map to joint angles."""
    model = GesticulatorModel.load_from_checkpoint(
        model_file,
        inference_mode=True,
        mean_pose_file=mean_pose_file,
        audio_dim=4)

    predictor = GesturePredictor(model, feature_type)
    truncate_audio(input, duration)

    audio = f"{input}_{duration}s.wav"
    text = f"{input}_{duration}s.json"

    print("Profiling gesture prediction...")
    profiler = cProfile.Profile()

    profiler.enable()

    gestures = predictor.predict_gestures(audio, text)

    out_file = "/home/work/Desktop/repositories/gesticulator/gesticulator/interface/profiling/predicted_rotations_{}.csv"

    np.savetxt(out_file.format('_DATASET_INPUT_x'),
               gestures[:, :, 0],
               delimiter=',')
    np.savetxt(out_file.format('_DATASET_INPUT_y'),
               gestures[:, :, 1],
               delimiter=',')
    np.savetxt(out_file.format('_DATASET_INPUT_z'),
               gestures[:, :, 2],
               delimiter=',')

    profiler.disable()

    profiler.print_stats(sort='cumtime')
コード例 #10
0
def main(hparams):
    if hparams.model_checkpoint is None:
        model = GesticulatorModel(hparams)
    else:
        model = GesticulatorModel.load_from_checkpoint(hparams.model_checkpoint, 
            model_checkpoint=hparams.model_checkpoint)

    logger = create_logger(model.save_dir)
    callbacks = [ModelSavingCallback()] if hparams.save_model_every_n_epochs > 0 else []
    
    if hparams.model_checkpoint is None:
        trainer = Trainer.from_argparse_args(hparams, logger=logger, callbacks = callbacks,
            checkpoint_callback=False)
    else:
        # Workaround
        model.init_prediction_saving_params()
        model.on_train_start()
        
        trainer = Trainer.from_argparse_args(hparams, resume_from_checkpoint=hparams.model_checkpoint, 
            logger=logger, callbacks=callbacks, checkpoint_callback=False, num_sanity_val_steps=0)
    
    trainer.fit(model)
    trainer.save_checkpoint(os.path.join(model.save_dir, f"trained_model_{model.current_epoch+1}epochs.ckpt"))
コード例 #11
0
    # denormalize
    gestures = gestures_norm * model.max_val + model.mean_pose[np.newaxis]
    print(gestures.shape)

    np.save(gesture_file, gestures)


if __name__ == "__main__":

    args = parser.parse_args()

    mean_pose = np.array([0 for i in range(45)])
    max_val = np.array([0 for i in range(45)])

    the_model = GesticulatorModel(args, mean_pose, max_val)
    the_model.load_state_dict(torch.load(args.model_file))

    train_dataset = SpeechGestureDataset(args.data_dir,
                                         train=True,
                                         apply_PCA=args.pca)

    # Produce gestures
    print("Generation gestures ...")

    gesture_file = "temp_ges.npy"
    predict(the_model, args.test_audio, args.test_text, gesture_file)
    """print("Making a video ... ")
    epoch = args.curr_epoch

    # define files
コード例 #12
0

if __name__ == "__main__":
    parent_parser = ArgumentParser(add_help=False)

    parent_parser.add_argument("--gpus",
                               default=[0, 1, 2, 3, 4, 5],
                               help="how many gpus")
    parent_parser.add_argument(
        "--distributed_backend",
        type=str,
        default="dp",
        help="supports three options dp, ddp, ddp2",
    )

    parser = GesticulatorModel.add_model_specific_args(parent_parser)
    hyperparams = parser.parse_args()

    config = {}
    for hparam, val in vars(hyperparams).items():
        if isinstance(val, list):
            config[hparam] = tune.sample_from(val)
        else:
            config[hparam] = val

    class MyAsyncHyperBandScheduler(AsyncHyperBandScheduler):
        def on_trial_error(self, trial_runner, trial):
            if trial.resources.gpu < 4:
                trial.resources = Resources(cpu=trial.resources.cpu * 2,
                                            gpu=trial.resources.gpu * 2)
            super().on_trial_error(trial_runner, trial)
コード例 #13
0
class TrainableTrainer(tune.Trainable):
    def _setup(self, config):
        self.hparams = config["hparams"]  # Namespace()
        for key, val in config.items():
            if key == "hparams":
                continue
            try:
                val = val.item()
            except AttributeError:
                pass

            setattr(self.hparams, key, val)

        self.model = GesticulatorModel(self.hparams)

        checkpoint_callback = ModelCheckpoint(
            filepath=os.path.join(self.logdir, "checkpoint"),
            save_best_only=True,
            verbose=True,
            monitor="avg_val_loss",
            mode="min",
        )

        try:
            gpus = len(ray.get_gpu_ids())
        except:
            print("failed to get gpus")
            gpus = 1

        self.trainer = Trainer(
            gpus=gpus,
            distributed_backend="dp",
            max_nb_epochs=1,
            checkpoint_callback=checkpoint_callback,
            nb_sanity_val_steps=2,
            log_gpu_memory="all",
            weights_summary=None,
            early_stop_callback=None,
            # show_progress_bar=False,
            train_percent_check=0.00001 if self.hparams.dev_test else 1,
        )
        self.val_loss = float("inf")

    def _train(self):
        self.trainer.fit(self.model)
        self.val_loss = self.trainer.callback_metrics["avg_val_loss"]
        return {"mean_loss": self.val_loss}

    def _generate_video(self):
        print("generating video!")

        seq_len = 300
        text_len = int(seq_len / 2)

        # read data
        dev_dir = "/home/tarask/Documents/storage/SpeechToMotion/Irish/WithTextV5/dev_inputs"
        speech_data = np.load(dev_dir +
                              "/X_test_NaturalTalking_01.npy")[:seq_len]
        text = np.load(dev_dir + "/T_test_NaturalTalking_01.npy")[:text_len]

        # upsample text to get the same sampling rate as the audio
        cols = np.linspace(0,
                           text.shape[0],
                           endpoint=False,
                           num=text.shape[0] * 2,
                           dtype=int)
        text_data = text[cols, :]

        # Convert to float tensors and put on GPU
        speech = torch.tensor([speech_data]).float().cuda()
        text = torch.tensor([text_data]).float().cuda()
        # Text on validation sequences without teacher forcing
        predicted_gesture = self.model.forward(speech,
                                               text,
                                               condition=True,
                                               motion=None,
                                               teacher=False)
        """if self.hparams.pca_model:
            pca_output = pca.inverse_transform(val.reshape(-1, self.hparams.pose_dim))
            output = pca_output.reshape(val.shape[0], val.shape[1], -1)
        else:
            output = val"""

        gen_dir = "/home/tarask/Documents/Code/CVPR2020/gesticulator/log/gestures/"
        ges_file = gen_dir + self.logdir[88:95] + ".npy"
        np.save(ges_file, predicted_gesture.detach().cpu().numpy())
        print("Writing into: ", gen_dir + self.logdir[88:95] + ".npy")

    def _stop(self):
        if self.val_loss < 0.3 and self.iteration >= 4:
            self._generate_video()

    def _save(self, tmp_checkpoint_dir):
        print("Saving!")
        return {}

    def _restore(self, checkpoint):
        print("Restoring!")