Ejemplo n.º 1
0
def get_best_model(model_type: str) -> nn.Module:
    model: nn.Module = None
    if model_type == 'RNN':
        model = RNN(emb_size=200,
                    hidden_size=1500,
                    seq_len=35,
                    batch_size=20,
                    vocab_size=vocab_size,
                    num_layers=2,
                    dp_keep_prob=0.35)
        model.load_state_dict(
            torch.load('./4_1_a/best_params.pt', map_location=device))
    elif model_type == 'GRU':
        model = GRU(emb_size=200,
                    hidden_size=1500,
                    seq_len=35,
                    batch_size=20,
                    vocab_size=vocab_size,
                    num_layers=2,
                    dp_keep_prob=0.35)
        model.load_state_dict(
            torch.load('./4_1_b/best_params.pt', map_location=device))
    elif model_type == 'TRANSFORMER':
        model = TRANSFORMER(vocab_size=vocab_size,
                            n_units=512,
                            n_blocks=6,
                            dropout=1. - 0.9)
        model.batch_size = 128
        model.seq_len = 35
        model.vocab_size = vocab_size
        model.load_state_dict(torch.load('./4_1_c/best_params.pt'))
    return model
Ejemplo n.º 2
0
def make_my_model(model_name, device, seq_len=35, batch_size=20, pt=None):
    #          --model=RNN --optimizer=ADAM --initial_lr=0.0001 --batch_size=20 --seq_len=35 --hidden_size=1500 --num_layers=2 --dp_keep_prob=0.35 --save_best
    #          --model=GRU --optimizer=SGD_LR_SCHEDULE --initial_lr=10 --batch_size=20 --seq_len=35 --hidden_size=1500 --num_layers=2 --dp_keep_prob=0.35 --save_best
    #          --model=TRANSFORMER --optimizer=SGD_LR_SCHEDULE --initial_lr=20 --batch_size=128 --seq_len=35 --hidden_size=512 --num_layers=6 --dp_keep_prob=0.9 --save_best
    if model_name == 'RNN':
        model = RNN(emb_size=200,
                    hidden_size=1500,
                    seq_len=seq_len,
                    batch_size=batch_size,
                    vocab_size=vocab_size,
                    num_layers=2,
                    dp_keep_prob=0.35)
    elif model_name == 'GRU':
        model = GRU(emb_size=200,
                    hidden_size=1500,
                    seq_len=seq_len,
                    batch_size=batch_size,
                    vocab_size=vocab_size,
                    num_layers=2,
                    dp_keep_prob=0.35)
    elif model_name == 'TRANSFORMER':
        model = TRANSFORMER(vocab_size=vocab_size,
                            n_units=512,
                            n_blocks=6,
                            dropout=1. - 0.9)
        # these 3 attributes don't affect the Transformer's computations;
        # they are only used in run_epoch
        model.batch_size = 128
        model.seq_len = 35
        model.vocab_size = vocab_size
    else:
        print("ERROR: Model type not recognized.")
        return
    # Model to device
    model = model.to(device)
    # Load pt
    if pt is not None:
        model.load_state_dict(torch.load(pt, map_location=device))
    return model
Ejemplo n.º 3
0
def load_model(model_info,
               device,
               vocab_size,
               emb_size=200,
               load_on_device=True):
    params_path = model_info.get_params_path()

    if model_info.model == 'RNN':
        model = RNN(emb_size=emb_size,
                    hidden_size=model_info.hidden_size,
                    seq_len=model_info.seq_len,
                    batch_size=model_info.batch_size,
                    vocab_size=vocab_size,
                    num_layers=model_info.num_layers,
                    dp_keep_prob=model_info.dp_keep_prob)
    elif model_info.model == 'GRU':
        model = GRU(emb_size=emb_size,
                    hidden_size=model_info.hidden_size,
                    seq_len=model_info.seq_len,
                    batch_size=model_info.batch_size,
                    vocab_size=vocab_size,
                    num_layers=model_info.num_layers,
                    dp_keep_prob=model_info.dp_keep_prob)
    else:
        model = TRANSFORMER(vocab_size=vocab_size,
                            n_units=model_info.hidden_size,
                            n_blocks=model_info.num_layers,
                            dropout=1. - model_info.dp_keep_prob)
        model.batch_size = model_info.batch_size
        model.seq_len = model_info.seq_len
        model.vocab_size = vocab_size

    if load_on_device:
        model = model.to(device)
    model.load_state_dict(torch.load(params_path, map_location=device))
    return model
Ejemplo n.º 4
0
    if args.debug:  # use a very small model
        model = TRANSFORMER(vocab_size=vocab_size, n_units=16, n_blocks=2)
    else:
        # Note that we're using num_layers and hidden_size to mean slightly
        # different things here than in the RNNs.
        # Also, the Transformer also has other hyperparameters
        # (such as the number of attention heads) which can change it's behavior.
        model = TRANSFORMER(vocab_size=vocab_size,
                            n_units=args.hidden_size,
                            n_blocks=args.num_layers,
                            dropout=1. - args.dp_keep_prob)
    # these 3 attributes don't affect the Transformer's computations;
    # they are only used in run_epoch
    model.batch_size = args.batch_size
    model.seq_len = args.seq_len
    model.vocab_size = vocab_size
else:
    print("Model type not recognized.")

model = model.to(device)

# LOSS FUNCTION
loss_fn = nn.CrossEntropyLoss()
if args.optimizer == 'ADAM':
    optimizer = torch.optim.Adam(model.parameters(), lr=args.initial_lr)

# LEARNING RATE SCHEDULE
lr = args.initial_lr
lr_decay_base = 1 / 1.15
m_flat_lr = 14.0  # we will not touch lr for the first m_flat_lr epochs