Example #1
0
def load_model(path, vocab_size, model_class='GRU'):
    """
    Load a model given saved states
    From: https://pytorch.org/tutorials/beginner/saving_loading_models.html

    Default model parameters:
    --model=RNN --optimizer=SGD --initial_lr=1.0 --batch_size=128 --seq_len=35
                --hidden_size=512 --num_layers=2 --dp_keep_prob=0.8
                --num_epochs=20 --save_best
    --model=GRU --optimizer=ADAM --initial_lr=0.001 --batch_size=128
                --seq_len=35 --hidden_size=512 --num_layers=2
                --dp_keep_prob=0.5  --num_epochs=20 --save_best

    :return:
    """

    # Default

    if model_class is 'GRU':
        model = GRU(emb_size=200,
                    hidden_size=512,
                    seq_len=35,
                    batch_size=128,
                    vocab_size=vocab_size,
                    num_layers=2,
                    dp_keep_prob=0.8)
    else:
        model = RNN(emb_size=200,
                    hidden_size=512,
                    seq_len=35,
                    batch_size=128,
                    vocab_size=vocab_size,
                    num_layers=2,
                    dp_keep_prob=0.8)

    model.load_state_dict(torch.load(path))

    return model
train_data, valid_data, test_data, word_to_id, id_2_word = raw_data
vocab_size = len(word_to_id)
print('  vocabulary size: {}'.format(vocab_size))


###############################################################################
#
# MODEL SETUP
#
###############################################################################

# NOTE ==============================================
# This is where your model code will be called.
if args.model == 'RNN':
    model = RNN(emb_size=args.emb_size, hidden_size=args.hidden_size,
                seq_len=args.seq_len, batch_size=args.batch_size,
                vocab_size=vocab_size, num_layers=args.num_layers,
                dp_keep_prob=args.dp_keep_prob)
elif args.model == 'GRU':
    model = GRU(emb_size=args.emb_size, hidden_size=args.hidden_size,
                seq_len=args.seq_len, batch_size=args.batch_size,
                vocab_size=vocab_size, num_layers=args.num_layers,
                dp_keep_prob=args.dp_keep_prob)
elif args.model == 'TRANSFORMER':
    if args.debug:  # use a very small model
        model = TRANSFORMER(vocab_size=vocab_size, n_units=16, n_blocks=2)
    else:
        # Note that we're using num_layers and hidden_size to mean slightly
        # different things here than in the RNNs.
        # Also, the Transformer also has other hyperparameters
        # (such as the number of attention heads) which can change it's behavior.
        model = TRANSFORMER(vocab_size=vocab_size, n_units=args.hidden_size,
Example #3
0
    initial hidden states for the next mini-batch.

    Using the final hidden states in this way makes sense when the elements of
    the mini-batches are actually successive subsequences in a set of longer sequences.
    This is the case with the way we've processed the Penn Treebank dataset.
    """
    if isinstance(h, Variable):
        return h.detach_()
    else:
        return tuple(repackage_hidden(v) for v in h)


model = RNN(emb_size=args.emb_size,
            hidden_size=args.hidden_size,
            seq_len=args.seq_len,
            batch_size=args.batch_size,
            vocab_size=10000,
            num_layers=args.num_layers,
            dp_keep_prob=args.dp_keep_prob)

model.load_state_dict(
    torch.load(
        "RNN_SGD_model=RNN_optimizer=SGD_initial_lr=1.0_batch_size=128_seq_len=35_hidden_size=512_num_layers=2_dp_keep_prob=0.8_num_epochs=1_save_best_0/best_params.pt"
    ))
model.eval()


def per_timestep_gradients(model, data):
    """
    Find the per-timestep gradients
    """
Example #4
0
    Using the final hidden states in this way makes sense when the elements of
    the mini-batches are actually successive subsequences in a set of longer sequences.
    This is the case with the way we've processed the Penn Treebank dataset.
    """
    if isinstance(h, Variable):
        return h.detach_()
    else:
        return tuple(repackage_hidden(v) for v in h)


if args.model == "RNN":
    model = RNN(emb_size=args.emb_size,
                hidden_size=args.hidden_size,
                seq_len=args.seq_len,
                batch_size=10,
                vocab_size=10000,
                num_layers=args.num_layers,
                dp_keep_prob=0.8)
    model.load_state_dict(
        torch.load(
            "RNN_SGD_model=RNN_optimizer=SGD_initial_lr=1.0_batch_size=128_seq_len=35_hidden_size=512_num_layers=2_dp_keep_prob=0.8_num_epochs=20_save_best_0/best_params.pt"
        ))

elif args.model == "GRU":
    model = GRU(emb_size=args.emb_size,
                hidden_size=args.hidden_size,
                seq_len=args.seq_len,
                batch_size=10,
                vocab_size=10000,
                num_layers=args.num_layers,
Example #5
0
HIDDEN_SIZE = 512
BATCH_SIZE = 128
VOCAB_SIZE = 10000
NUM_LAYERS = 2
DP_KEEP_PROB = 0.8
SEQ_LEN = 35
GENERATED_SEQ_LEN = 34

#--------------- LOAD MODEL


load_path = os.path.join(MODEL_PATH, 'best_params.pt')
model = RNN(emb_size=EMB_SIZE,
                    hidden_size=HIDDEN_SIZE,
                    seq_len=SEQ_LEN,
                    batch_size=BATCH_SIZE,
                    vocab_size=VOCAB_SIZE,
                    num_layers=NUM_LAYERS,
                    dp_keep_prob=DP_KEEP_PROB)

model.load_state_dict(torch.load(load_path, map_location='cpu'))
hidden = model.init_hidden()
model.eval()

#--------------- GENERATE SAMPLES

first_words = torch.LongTensor(BATCH_SIZE).random_(0, 10000)
# samples = model.generate(torch.zeros(BATCH_SIZE).to(torch.long), hidden, generated_seq_len=GENERATED_SEQ_LEN)
samples = model.generate(first_words, hidden, generated_seq_len=GENERATED_SEQ_LEN)

#-------------- CONVERTING TO WORDS
Example #6
0
vocab_size = len(word_to_id)
print('  vocabulary size: {}'.format(vocab_size))

###############################################################################
#
# MODEL SETUP
#
###############################################################################

# NOTE ==============================================
# This is where your model code will be called.
if args.model == 'RNN':
    model = RNN(emb_size=args.emb_size,
                hidden_size=args.hidden_size,
                seq_len=args.seq_len,
                batch_size=args.batch_size,
                vocab_size=vocab_size,
                num_layers=args.num_layers,
                dp_keep_prob=args.dp_keep_prob)
elif args.model == 'GRU':
    model = GRU(emb_size=args.emb_size,
                hidden_size=args.hidden_size,
                seq_len=args.seq_len,
                batch_size=args.batch_size,
                vocab_size=vocab_size,
                num_layers=args.num_layers,
                dp_keep_prob=args.dp_keep_prob)
elif args.model == 'TRANSFORMER':
    if args.debug:  # use a very small model
        model = TRANSFORMER(vocab_size=vocab_size, n_units=16, n_blocks=2)
    else:
vocab_size = len(word_to_id)
print('  vocabulary size: {}'.format(vocab_size))

###############################################################################
#
# MODEL SETUP
#
###############################################################################

# NOTE ==============================================
# This is where your model code will be called.
if args.model == 'RNN':
    model = RNN(emb_size=args.emb_size,
                hidden_size=args.hidden_size,
                seq_len=args.seq_len,
                batch_size=args.batch_size,
                vocab_size=vocab_size,
                num_layers=args.num_layers,
                dp_keep_prob=args.dp_keep_prob)
    model.load_state_dict(
        torch.load(
            '/network/home/bhattdha/IFT6135H20_assignment/Assignment2/practical/Exp1/RNN_SGD_model=RNN_optimizer=SGD_initial_lr=1.0_batch_size=128_seq_len=35_hidden_size=512_num_layers=2_dp_keep_prob=0.8_num_epochs=20_save_best_save_dir=Exp1_0/best_params.pt'
        ))
elif args.model == 'GRU':
    model = GRU(emb_size=args.emb_size,
                hidden_size=args.hidden_size,
                seq_len=args.seq_len,
                batch_size=args.batch_size,
                vocab_size=vocab_size,
                num_layers=args.num_layers,
                dp_keep_prob=args.dp_keep_prob)
Example #8
0
    print("WARNING: You are about to run on cpu, and this will likely run out \
      of memory. \n You can try setting batch_size=1 to reduce memory usage")
    device = torch.device("cpu")


###############################################################################
#
# MODEL SETUP
#
###############################################################################

# NOTE ==============================================
# This is where your model code will be called.
if args.model == 'RNN':
    model = RNN(emb_size=args.emb_size, hidden_size=args.hidden_size,
                seq_len=args.seq_len, batch_size=args.batch_size,
                vocab_size=vocab_size, num_layers=args.num_layers,
                dp_keep_prob=args.dp_keep_prob)
elif args.model == 'GRU':
    model = GRU(emb_size=args.emb_size, hidden_size=args.hidden_size,
                seq_len=args.seq_len, batch_size=args.batch_size,
                vocab_size=vocab_size, num_layers=args.num_layers,
                dp_keep_prob=args.dp_keep_prob)
elif args.model == 'TRANSFORMER':
    if args.debug:  # use a very small model
        model = TRANSFORMER(vocab_size=vocab_size, n_units=16, n_blocks=2)
    else:
        # Note that we're using num_layers and hidden_size to mean slightly
        # different things here than in the RNNs.
        # Also, the Transformer also has other hyperparameters
        # (such as the number of attention heads) which can change it's behavior.
        model = TRANSFORMER(vocab_size=vocab_size, n_units=args.hidden_size,