예제 #1
0
def main():
    ''' Main function '''
    parser = argparse.ArgumentParser()

    parser.add_argument('-data', required=True)

    parser.add_argument('-epoch', type=int, default=100)
    parser.add_argument('-batch_size', type=int, default=64)

    #parser.add_argument('-d_word_vec', type=int, default=512)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-d_inner_hid', type=int, default=1024)
    parser.add_argument('-d_k', type=int, default=64)
    parser.add_argument('-d_v', type=int, default=64)

    parser.add_argument('-n_head', type=int, default=8)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-n_warmup_steps', type=int, default=4000)

    parser.add_argument('-dropout', type=float, default=0.1)
    parser.add_argument('-embs_share_weight', action='store_true')
    parser.add_argument('-proj_share_weight', action='store_true')

    parser.add_argument('-log', default=None)
    parser.add_argument('-save_model', default=None)
    parser.add_argument('-save_mode',
                        type=str,
                        choices=['all', 'best'],
                        default='all')

    parser.add_argument('-no_cuda', action='store_true')

    parser.add_argument('-multi_gpu', action='store_true')

    parser.add_argument('-use_ctx', action='store_true')

    parser.add_argument(
        '-external_validation_script',
        type=str,
        default=None,
        metavar='PATH',
        nargs='*',
        help=
        "location of validation script (to run your favorite metric for validation) (default: %(default)s)"
    )

    opt = parser.parse_args()
    opt.cuda = not opt.no_cuda
    opt.d_word_vec = opt.d_model

    #========= Loading Dataset =========#
    data = torch.load(opt.data)
    opt.max_token_seq_len = data['settings'].max_token_seq_len

    #========= Preparing DataLoader =========#
    training_data = DataLoader(
        data['dict']['src'],
        data['dict']['tgt'],
        src_insts=data['train']['src'],
        tgt_insts=data['train']['tgt'],
        ctx_insts=(data['train']['ctx'] if opt.use_ctx else None),
        batch_size=opt.batch_size,
        cuda=opt.cuda,
        is_train=True,
        sort_by_length=True)

    validation_data = DataLoader(
        data['dict']['src'],
        data['dict']['tgt'],
        src_insts=data['valid']['src'],
        tgt_insts=data['valid']['tgt'],
        ctx_insts=(data['valid']['ctx'] if opt.use_ctx else None),
        batch_size=opt.batch_size,
        shuffle=False,
        cuda=opt.cuda,
        is_train=False,
        sort_by_length=True)

    opt.src_vocab_size = training_data.src_vocab_size
    opt.tgt_vocab_size = training_data.tgt_vocab_size

    #========= Preparing Model =========#
    if opt.embs_share_weight and training_data.src_word2idx != training_data.tgt_word2idx:
        print(
            '[Warning]',
            'The src/tgt word2idx table are different but asked to share word embedding.'
        )

    print(opt)

    transformer = Transformer(opt.src_vocab_size,
                              opt.tgt_vocab_size,
                              opt.max_token_seq_len,
                              proj_share_weight=opt.proj_share_weight,
                              embs_share_weight=opt.embs_share_weight,
                              d_k=opt.d_k,
                              d_v=opt.d_v,
                              d_model=opt.d_model,
                              d_word_vec=opt.d_word_vec,
                              d_inner_hid=opt.d_inner_hid,
                              n_layers=opt.n_layers,
                              n_head=opt.n_head,
                              dropout=opt.dropout,
                              use_ctx=opt.use_ctx)

    #print(transformer)

    # optimizer = ScheduledOptim(
    #     optim.Adam(
    #         transformer.get_trainable_parameters(),
    #         betas=(0.9, 0.98), eps=1e-09),
    #     opt.d_model, opt.n_warmup_steps)

    optimizer = ScheduledOptim(
        optim.Adam(transformer.get_trainable_parameters(),
                   betas=(0.9, 0.98),
                   eps=1e-09), opt.d_model, opt.n_warmup_steps)

    def get_criterion(vocab_size):
        ''' With PAD token zero weight '''
        weight = torch.ones(vocab_size)
        weight[Constants.PAD] = 0
        #return nn.CrossEntropyLoss(weight, size_average=False)
        return nn.NLLLoss(weight, size_average=False)

    crit = get_criterion(training_data.tgt_vocab_size)
    logsoftmax = nn.LogSoftmax()

    if opt.cuda:
        transformer = transformer.cuda()
        crit = crit.cuda()
        logsoftmax = logsoftmax.cuda()

    if opt.multi_gpu:
        transformer = nn.DataParallel(transformer)
        crit = nn.DataParallel(crit)
        logsoftmax = nn.DataParallel(logsoftmax)

    train(transformer, training_data, validation_data, crit, logsoftmax,
          optimizer, opt)
예제 #2
0
def main():
    ''' Main function '''
    parser = argparse.ArgumentParser()

    parser.add_argument('-data', required=True)

    parser.add_argument('-epoch', type=int, default=10)
    parser.add_argument('-batch_size', type=int, default=64)

    #parser.add_argument('-d_word_vec', type=int, default=512)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-d_inner_hid', type=int, default=1024)
    parser.add_argument('-d_k', type=int, default=64)
    parser.add_argument('-d_v', type=int, default=64)

    parser.add_argument('-n_head', type=int, default=8)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-n_warmup_steps', type=int, default=4000)

    parser.add_argument('-dropout', type=float, default=0.1)
    parser.add_argument('-embs_share_weight', action='store_true')
    parser.add_argument('-proj_share_weight', action='store_true')

    parser.add_argument('-log', default=None)
    parser.add_argument('-save_model', default=None)
    parser.add_argument('-save_mode',
                        type=str,
                        choices=['all', 'best'],
                        default='best')

    parser.add_argument('-no_cuda', action='store_true')

    opt = parser.parse_args()
    opt.cuda = not opt.no_cuda
    opt.d_word_vec = opt.d_model

    #========= Loading Dataset =========#
    data = torch.load(opt.data)
    opt.max_token_seq_len = data['settings'].max_token_seq_len

    #========= Preparing DataLoader =========#
    training_data = DataLoader(data['dict']['src'],
                               data['dict']['tgt'],
                               src_insts=data['train']['src'],
                               tgt_insts=data['train']['tgt'],
                               batch_size=opt.batch_size,
                               cuda=opt.cuda)

    validation_data = DataLoader(data['dict']['src'],
                                 data['dict']['tgt'],
                                 src_insts=data['valid']['src'],
                                 tgt_insts=data['valid']['tgt'],
                                 batch_size=opt.batch_size,
                                 shuffle=False,
                                 test=True,
                                 cuda=opt.cuda)

    opt.src_vocab_size = training_data.src_vocab_size
    opt.tgt_vocab_size = training_data.tgt_vocab_size

    #========= Preparing Model =========#
    if opt.embs_share_weight and training_data.src_word2idx != training_data.tgt_word2idx:
        print(
            '[Warning]',
            'The src/tgt word2idx table are different but asked to share word embedding.'
        )

    print(opt)

    transformer = Transformer(opt.src_vocab_size,
                              opt.tgt_vocab_size,
                              opt.max_token_seq_len,
                              proj_share_weight=opt.proj_share_weight,
                              embs_share_weight=opt.embs_share_weight,
                              d_k=opt.d_k,
                              d_v=opt.d_v,
                              d_model=opt.d_model,
                              d_word_vec=opt.d_word_vec,
                              d_inner_hid=opt.d_inner_hid,
                              n_layers=opt.n_layers,
                              n_head=opt.n_head,
                              dropout=opt.dropout)

    #print(transformer)

    optimizer = ScheduledOptim(
        optim.Adam(transformer.get_trainable_parameters(),
                   betas=(0.9, 0.98),
                   eps=1e-09), opt.d_model, opt.n_warmup_steps)

    def get_criterion(vocab_size):
        ''' With PAD token zero weight '''
        weight = torch.ones(vocab_size)
        weight[Constants.PAD] = 0
        return nn.CrossEntropyLoss(weight, size_average=False)

    crit = get_criterion(training_data.tgt_vocab_size)

    if opt.cuda:
        transformer = transformer.cuda()
        crit = crit.cuda()

    print("===>TRAIN\n")
    train(transformer, training_data, validation_data, crit, optimizer, opt)
def main():
    ''' Main function '''
    parser = argparse.ArgumentParser()

    parser.add_argument('-epoch', type=int, default=1)
    parser.add_argument('-batch_size', type=int, default=4)
    parser.add_argument('-context_width', type=int, default=1)
    parser.add_argument('-frame_rate', type=int, default=30)

    #parser.add_argument('-d_word_vec', type=int, default=512)
    parser.add_argument('-d_model', type=int, default=512)
    parser.add_argument('-d_inner_hid', type=int, default=1024)
    parser.add_argument('-d_k', type=int, default=64)
    parser.add_argument('-d_v', type=int, default=64)

    parser.add_argument('-n_head', type=int, default=8)
    parser.add_argument('-n_layers', type=int, default=6)
    parser.add_argument('-n_warmup_steps', type=int, default=400)

    parser.add_argument('-dropout', type=float, default=0.1)

    parser.add_argument('-log', default=None)
    parser.add_argument('-save_model', default='./exp')
    parser.add_argument('-save_mode',
                        type=str,
                        choices=['all', 'best'],
                        default='best')

    opt = parser.parse_args()

    cfg_path = './config/transformer.cfg'
    config = configparser.ConfigParser()
    config.read(cfg_path)

    #========= Preparing DataLoader =========#
    training_data = DataLoader('train',
                               config,
                               DEVICE,
                               batch_size=opt.batch_size,
                               context_width=opt.context_width,
                               frame_rate=opt.frame_rate)
    validation_data = DataLoader('dev',
                                 config,
                                 DEVICE,
                                 batch_size=opt.batch_size,
                                 context_width=opt.context_width,
                                 frame_rate=opt.frame_rate)
    test_data = DataLoader('test',
                           config,
                           DEVICE,
                           batch_size=opt.batch_size,
                           context_width=opt.context_width,
                           frame_rate=opt.frame_rate)

    #========= Preparing Model =========#

    print(opt)

    input_dim = training_data.features_dim
    output_dim = training_data.vocab_size
    n_inputs_max_seq = max(training_data.inputs_max_seq_lengths,
                           validation_data.inputs_max_seq_lengths,
                           test_data.inputs_max_seq_lengths)
    n_outputs_max_seq = max(training_data.outputs_max_seq_lengths,
                            validation_data.outputs_max_seq_lengths,
                            test_data.outputs_max_seq_lengths)
    print('*************************')
    print('The max length of inputs is %d:' % n_inputs_max_seq)
    print('The max length of targets is %d' % n_outputs_max_seq)

    transformer = Transformer(input_dim,
                              output_dim,
                              n_inputs_max_seq,
                              n_outputs_max_seq,
                              d_k=opt.d_k,
                              d_v=opt.d_v,
                              d_model=opt.d_model,
                              d_inner_hid=opt.d_inner_hid,
                              n_layers=opt.n_layers,
                              n_head=opt.n_head,
                              dropout=opt.dropout,
                              device=DEVICE)

    # print(transformer)

    optimizer = ScheduledOptim(
        optim.Adam(transformer.get_trainable_parameters(),
                   betas=(0.9, 0.98),
                   eps=1e-09), opt.d_model, opt.n_warmup_steps)

    def get_criterion(output_dim):
        ''' With PAD token zero weight '''
        weight = torch.ones(output_dim)
        weight[Constants.PAD] = 0
        return nn.CrossEntropyLoss(weight, size_average=False)

    crit = get_criterion(training_data.vocab_size)

    transformer = transformer.to(DEVICE)
    crit = crit.to(DEVICE)

    train(transformer, training_data, validation_data, crit, optimizer, opt)