Ejemplo n.º 1
0
def main():
    opts = optparser.parse_args()[0]

    train_loader = Loader(opts.train)

    opts.vocab_len = len(train_loader._char_to_id)
    opts.pos_len = len(train_loader._pos_to_id)
    opts.max_pos_len = train_loader._pos_max_len
    opts.max_target_len = train_loader._char_max_len
    opts.use_cuda = opts.use_cuda == 1
    opts.eval = opts.eval == 1
    opts.data_size = train_loader.get_data_size()

    if not torch.cuda.is_available():
        opts.use_cuda = False
    torch.manual_seed(opts.seed)
    np.random.seed(opts.seed)

    if not opts.eval:
        # weights for paddings, set to 0
        loss_weights = torch.ones(opts.vocab_len)
        loss_weights[0] = 0
        criterion = nn.NLLLoss(loss_weights, size_average=False)

        c2i, i2c, p2i, i2p = train_loader.get_mappings()
        dev_loader = Loader(opts.dev, c2i, i2c, p2i, i2p)
        if dev_loader._pos_max_len > opts.max_pos_len:
            opts.max_pos_len = dev_loader._pos_max_len

        model = Module(opts)
        if opts.model_path is not '':
            model = torch.load(opts.model_path)

        train_batcher = Batcher(opts.batch_size, train_loader.get_data(),
                                opts.max_pos_len, opts.eval)

        dev_batcher = Batcher(decode_batch, dev_loader.get_data(),
                              opts.max_pos_len, True)

        print model
        start_train(model, criterion, opts, train_batcher, dev_batcher)
    else:
        model = torch.load(opts.model_path)
        model.eval()
        #print model

        c2i, i2c, p2i, i2p = train_loader.get_mappings()

        test_loader = Loader(opts.test, c2i, i2c, p2i, i2p)
        if test_loader._pos_max_len > opts.max_pos_len:
            opts.max_pos_len = test_loader._pos_max_len
        test_batcher = Batcher(1, test_loader.get_data(), opts.max_pos_len,
                               opts.eval)

        opts.data_size = test_loader.get_data_size()
        decode(model, opts, test_batcher, i2c, i2p)