Beispiel #1
0
def load_model(path, vocab_size, model_class='GRU'):
    """
    Load a model given saved states
    From: https://pytorch.org/tutorials/beginner/saving_loading_models.html

    Default model parameters:
    --model=RNN --optimizer=SGD --initial_lr=1.0 --batch_size=128 --seq_len=35
                --hidden_size=512 --num_layers=2 --dp_keep_prob=0.8
                --num_epochs=20 --save_best
    --model=GRU --optimizer=ADAM --initial_lr=0.001 --batch_size=128
                --seq_len=35 --hidden_size=512 --num_layers=2
                --dp_keep_prob=0.5  --num_epochs=20 --save_best

    :return:
    """

    # Default

    if model_class is 'GRU':
        model = GRU(emb_size=200,
                    hidden_size=512,
                    seq_len=35,
                    batch_size=128,
                    vocab_size=vocab_size,
                    num_layers=2,
                    dp_keep_prob=0.8)
    else:
        model = RNN(emb_size=200,
                    hidden_size=512,
                    seq_len=35,
                    batch_size=128,
                    vocab_size=vocab_size,
                    num_layers=2,
                    dp_keep_prob=0.8)

    model.load_state_dict(torch.load(path))

    return model
###############################################################################
#
# MODEL SETUP
#
###############################################################################

# NOTE ==============================================
# This is where your model code will be called.
if args.model == 'RNN':
    model = RNN(emb_size=args.emb_size, hidden_size=args.hidden_size,
                seq_len=args.seq_len, batch_size=args.batch_size,
                vocab_size=vocab_size, num_layers=args.num_layers,
                dp_keep_prob=args.dp_keep_prob)
elif args.model == 'GRU':
    model = GRU(emb_size=args.emb_size, hidden_size=args.hidden_size,
                seq_len=args.seq_len, batch_size=args.batch_size,
                vocab_size=vocab_size, num_layers=args.num_layers,
                dp_keep_prob=args.dp_keep_prob)
elif args.model == 'TRANSFORMER':
    if args.debug:  # use a very small model
        model = TRANSFORMER(vocab_size=vocab_size, n_units=16, n_blocks=2)
    else:
        # Note that we're using num_layers and hidden_size to mean slightly
        # different things here than in the RNNs.
        # Also, the Transformer also has other hyperparameters
        # (such as the number of attention heads) which can change it's behavior.
        model = TRANSFORMER(vocab_size=vocab_size, n_units=args.hidden_size,
                            n_blocks=args.num_layers, dropout=1.-args.dp_keep_prob)
    # these 3 attributes don't affect the Transformer's computations;
    # they are only used in run_epoch
    model.batch_size=args.batch_size
    model.seq_len=args.seq_len
Beispiel #3
0
                hidden_size=args.hidden_size,
                seq_len=args.seq_len,
                batch_size=10,
                vocab_size=10000,
                num_layers=args.num_layers,
                dp_keep_prob=0.8)
    model.load_state_dict(
        torch.load(
            "RNN_SGD_model=RNN_optimizer=SGD_initial_lr=1.0_batch_size=128_seq_len=35_hidden_size=512_num_layers=2_dp_keep_prob=0.8_num_epochs=20_save_best_0/best_params.pt"
        ))

elif args.model == "GRU":
    model = GRU(emb_size=args.emb_size,
                hidden_size=args.hidden_size,
                seq_len=args.seq_len,
                batch_size=10,
                vocab_size=10000,
                num_layers=args.num_layers,
                dp_keep_prob=0.5)
    model.load_state_dict(
        torch.load(
            "GRU_ADAM_model=GRU_optimizer=ADAM_initial_lr=0.001_batch_size=128_seq_len=35_hidden_size=512_num_layers=2_dp_keep_prob=0.5_num_epochs=20_save_best_0/best_params.pt"
        ))
#print(model.out_layer.weight.data)

#toy
#model.load_state_dict(torch.load("RNN_SGD_model=RNN_optimizer=SGD_initial_lr=1.0_batch_size=128_seq_len=35_hidden_size=512_num_layers=2_dp_keep_prob=0.8_num_epochs=1_save_best_0/best_params.pt"))

model.eval()

#print(model.out_layer.weight.data)
Beispiel #4
0
# VOCAB_SIZE = 10000
# NUM_LAYERS = 2
# DP_KEEP_PROB = 0.5
# SEQ_LEN = 35
GENERATED_SEQ_LEN = 34



#--------------- LOAD MODEL


load_path = os.path.join(MODEL_PATH, 'best_params.pt')
model = GRU(emb_size=200,
                    hidden_size=512,
                    seq_len=35,
                    batch_size=128,
                    vocab_size=10000,
                    num_layers=2,
                    dp_keep_prob=0.5)

model.load_state_dict(torch.load(load_path, map_location='cpu'))
hidden = model.init_hidden()
model.eval()

#--------------- GENERATE SAMPLES

# first_words = torch.LongTensor(BATCH_SIZE).random_(0, 10000)
# samples = model.generate(torch.zeros(BATCH_SIZE).to(torch.long), hidden, generated_seq_len=GENERATED_SEQ_LEN)
samples = model.generate(first_words, hidden, generated_seq_len=GENERATED_SEQ_LEN)

#-------------- CONVERTING TO WORDS