"max_nge", "max_ngvl", "max_ngel", "share_emb", "share_arch"
            ]
    ]):
        model.increase_input_size(finetune_config)
    if not all([
            train_config[key] == finetune_config[key] for key in [
                "predict_net", "predict_net_hidden_dim",
                "predict_net_num_heads", "predict_net_mem_len",
                "predict_net_mem_init", "predict_net_recurrent_steps"
            ]
    ]):
        new_predict_net = model.create_predict_net(
            finetune_config["predict_net"],
            pattern_dim=model.predict_net.pattern_dim,
            graph_dim=model.predict_net.graph_dim,
            hidden_dim=finetune_config["predict_net_hidden_dim"],
            num_heads=finetune_config["predict_net_num_heads"],
            recurrent_steps=finetune_config["predict_net_recurrent_steps"],
            mem_len=finetune_config["predict_net_mem_len"],
            mem_init=finetune_config["predict_net_mem_init"])
        del model.predict_net
        model.predict_net = new_predict_net
    model = model.to(device)
    torch.cuda.empty_cache()
    logger.info("load the model based on the dev set (epoch: {:0>3d})".format(
        best_epochs["dev"]))
    logger.info(model)
    logger.info("num of parameters: %d" %
                (sum(p.numel()
                     for p in model.parameters() if p.requires_grad)))