コード例 #1
0
def train():
    print("Loading data...")
    SRC, TGT, train, val, test = generate_dataloaders()

    devices = [0, 1, 2, 3]
    pad_idx = TGT.vocab.stoi["<blank>"]
    print("Making model...")
    model = make_model(len(SRC.vocab), len(TGT.vocab), N=6)
    model.cuda()
    criterion = LabelSmoothing(
        size=len(TGT.vocab), padding_idx=pad_idx, smoothing=0.1)
    criterion.cuda()
    BATCH_SIZE = 12000
    train_iter = BatchIterator(train, batch_size=BATCH_SIZE, device=torch.device(0),
                               repeat=False, sort_key=lambda x: (len(x.src), len(x.trg)),
                               batch_size_fn=batch_size_fn, train=True)
    valid_iter = BatchIterator(val, batch_size=BATCH_SIZE, device=torch.device(0),
                               repeat=False, sort_key=lambda x: (len(x.src), len(x.trg)),
                               batch_size_fn=batch_size_fn, train=False)
    model_par = nn.DataParallel(model, device_ids=devices)
    model_opt = NoamOpt(model.src_embed[0].d_model, 1, 2000,
                        torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))
    folder = get_unique_folder("./models/", "model")
    if not(os.path.exists(folder)):
        os.mkdir(folder)
    for epoch in tqdm(range(10)):
        model_par.train()
        run_epoch((rebatch(pad_idx, b) for b in train_iter),
                  model_par,
                  MultiGPULossCompute(model.generator, criterion,
                                      devices=devices, opt=model_opt))
        model_par.eval()
        loss = run_epoch((rebatch(pad_idx, b) for b in valid_iter),
                         model_par,
                         MultiGPULossCompute(model.generator, criterion,
                                             devices=devices, opt=None))
        torch.save(model.state_dict, os.path.join(folder, "model.bin." + str(epoch)))
        print(loss)

    for i, batch in enumerate(valid_iter):
        src = batch.src.transpose(0, 1)[:1]
        src_mask = (src != SRC.vocab.stoi["<blank>"]).unsqueeze(-2)
        out = greedy_decode(model, src, src_mask,
                            max_len=60, start_symbol=TGT.vocab.stoi["<s>"])
        print("Translation:", end="\t")
        for i in range(1, out.size(1)):
            sym = TGT.vocab.itos[out[0, i]]
            if sym == "</s>":
                break
            print(sym, end=" ")
        print()
        print("Target:", end="\t")
        for i in range(1, batch.trg.size(0)):
            sym = TGT.vocab.itos[batch.trg.data[i, 0]]
            if sym == "</s>":
                break
            print(sym, end=" ")
        print()
        break
コード例 #2
0
    def __init__(self, local_rank, vocab, embed_dim, ff_embed_dim, num_heads, dropout, layers, smoothing_factor, approx):
        super(BIGLM, self).__init__()
        self.vocab = vocab
        self.embed_dim = embed_dim

        self.tok_embed = Embedding(self.vocab.size, embed_dim, self.vocab.padding_idx)
        self.pos_embed = LearnedPositionalEmbedding(embed_dim, device=local_rank)
        
        self.layers = nn.ModuleList()
        for i in range(layers):
            self.layers.append(TransformerLayer(embed_dim, ff_embed_dim, num_heads, dropout, with_external=True))
        self.emb_layer_norm = LayerNorm(embed_dim)
        self.one_more = nn.Linear(embed_dim, embed_dim)
        self.one_more_layer_norm = LayerNorm(embed_dim)
        self.out_proj = nn.Linear(embed_dim, self.vocab.size)
        
        self.attn_mask = SelfAttentionMask(device=local_rank)
        self.smoothing = LabelSmoothing(local_rank, self.vocab.size, self.vocab.padding_idx, smoothing_factor)
       
        self.dropout = dropout
        self.device = local_rank

        if approx == "none":
            self.approx = None
        elif approx == "adaptive":
            self.approx = nn.AdaptiveLogSoftmaxWithLoss(self.embed_dim, self.vocab.size, [10000, 20000, 200000])
        else:
            raise NotImplementedError("%s has not been implemented"%approx)
        self.reset_parameters()
コード例 #3
0
ファイル: model.py プロジェクト: LewPeng97/NLP-Daily
    def __init__(self, modules, consts, options):
        super(Model, self).__init__()

        self.has_learnable_w2v = options["has_learnable_w2v"]
        self.is_predicting = options["is_predicting"]
        self.is_bidirectional = options["is_bidirectional"]
        self.beam_decoding = options["beam_decoding"]
        self.cell = options["cell"]
        self.device = options["device"]
        self.copy = options["copy"]
        self.coverage = options["coverage"]
        self.avg_nll = options["avg_nll"]

        self.dim_x = consts["dim_x"]
        self.dim_y = consts["dim_y"]
        self.len_x = consts["len_x"]
        self.len_y = consts["len_y"]
        self.hidden_size = consts["hidden_size"]
        self.dict_size = consts["dict_size"]
        self.pad_token_idx = consts["pad_token_idx"]
        self.ctx_size = self.hidden_size * 2 if self.is_bidirectional else self.hidden_size
        self.num_layers = consts["num_layers"]
        self.d_ff = consts["d_ff"]
        self.num_heads = consts["num_heads"]
        self.dropout = consts["dropout"]
        self.smoothing_factor = consts["label_smoothing"]

        self.tok_embed = nn.Embedding(self.dict_size, self.dim_x,
                                      self.pad_token_idx)
        self.pos_embed = LearnedPositionalEmbedding(self.dim_x,
                                                    device=self.device)

        self.enc_layers = nn.ModuleList()
        for i in range(self.num_layers):
            self.enc_layers.append(
                TransformerLayer(self.dim_x, self.d_ff, self.num_heads,
                                 self.dropout))

        self.dec_layers = nn.ModuleList()
        for i in range(self.num_layers):
            self.dec_layers.append(
                TransformerLayer(self.dim_x,
                                 self.d_ff,
                                 self.num_heads,
                                 self.dropout,
                                 with_external=True))

        self.attn_mask = SelfAttentionMask(device=self.device)

        self.emb_layer_norm = LayerNorm(self.dim_x)

        self.word_prob = WordProbLayer(self.hidden_size, self.dict_size,
                                       self.device, self.copy, self.coverage,
                                       self.dropout)

        self.smoothing = LabelSmoothing(self.device, self.dict_size,
                                        self.pad_token_idx,
                                        self.smoothing_factor)

        self.init_weights()
コード例 #4
0
 def __init__(self, text, args, device):
     super(NMT, self).__init__()
     self.text = text
     self.args = args
     self.device = device
     self.Embeddings = Embeddings(args['embed_size'], self.text)
     self.encoder_layer = nn.TransformerEncoderLayer(d_model=args['d_model'], nhead=args['nhead'], dim_feedforward=args['dim_feedforward'], dropout=args['dropout'])
     self.encoder_norm = nn.LayerNorm(args['d_model'])
     self.encoder = nn.TransformerEncoder(encoder_layer=self.encoder_layer, num_layers=args['num_encoder_layers'], norm=self.encoder_norm)
     self.decoder_layer = nn.TransformerDecoderLayer(d_model=args['d_model'], nhead=args['nhead'], dim_feedforward=args['dim_feedforward'], dropout=args['dropout'])
     self.decoder_norm = nn.LayerNorm(args['d_model'])
     self.decoder = nn.TransformerDecoder(decoder_layer=self.decoder_layer, num_layers=args['num_decoder_layers'], norm=self.decoder_norm)
     self.project = nn.Linear(args['d_model'], len(self.text.tar), bias=False)
     self.project.weight = self.Embeddings.tar.weight
     self.dropout = nn.Dropout(args['dropout'])
     self.project_value = math.pow(args['d_model'], -0.5)
     self.smoothing = LabelSmoothing(len(self.text.tar), self.text.tar['<pad>'], self.args['smoothing_eps'])
コード例 #5
0
ファイル: biglm.py プロジェクト: xinyu12138/SongNet
    def __init__(self,
                 local_rank,
                 vocab,
                 embed_dim,
                 ff_embed_dim,
                 num_heads,
                 dropout,
                 layers,
                 smoothing_factor,
                 approx=None):
        super(BIGLM, self).__init__()
        self.vocab = vocab
        self.embed_dim = embed_dim

        self.tok_embed = Embedding(self.vocab.size, embed_dim,
                                   self.vocab.padding_idx)
        self.pos_embed = LearnedPositionalEmbedding(embed_dim,
                                                    device=local_rank)

        self.layers = nn.ModuleList()
        for i in range(layers):
            self.layers.append(
                TransformerLayer(embed_dim,
                                 ff_embed_dim,
                                 num_heads,
                                 dropout,
                                 with_external=True))
        self.emb_layer_norm = LayerNorm(embed_dim)
        self.one_more = nn.Linear(embed_dim, embed_dim)
        self.one_more_layer_norm = LayerNorm(embed_dim)
        self.out_proj = nn.Linear(embed_dim, self.vocab.size)

        self.attn_mask = SelfAttentionMask(device=local_rank)
        self.smoothing = LabelSmoothing(local_rank, self.vocab.size,
                                        self.vocab.padding_idx,
                                        smoothing_factor)

        self.dropout = dropout
        self.device = local_rank

        self.approx = approx
        self.reset_parameters()
コード例 #6
0
def main():
    # Train the simple copy task.
    V = 11
    criterion = LabelSmoothing(size=V, padding_idx=0, smoothing=0.0)
    model = make_model(V, V, N=2)
    model_opt = NoamOpt(
        model.src_embed[0].d_model, 1, 400,
        torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98),
                         eps=1e-9))

    for epoch in range(10):
        model.train()
        print("epoch %d" % epoch)
        run_epoch(data_gen(V, 30, 20), model,
                  SimpleLossCompute(model.generator, criterion, model_opt))
        model.eval()
        print(
            run_epoch(data_gen(V, 30, 5), model,
                      SimpleLossCompute(model.generator, criterion, None)))

    model.eval()
    src = Variable(torch.LongTensor([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]))
    src_mask = Variable(torch.ones(1, 1, 10))
    print(greedy_decode(model, src, src_mask, max_len=10, start_symbol=1))
コード例 #7
0
def get_config(training=True):
    conf = edict()
    conf.model = edict()
    conf.train = edict()
    conf.eval = edict()

    conf.gpu = 3
    conf.use_concat = False
    conf.multi_output = True
    conf.add = True
    conf.feature_c = 128
    conf.use_triplet = False
    conf.use_officical_resnet18 = False
    conf.triplet_ratio = 0.01
    conf.triplet_margin = 0.2
    conf.print_freq = 20
    conf.rgb = True
    conf.depth = False
    conf.ir = True
    conf.crop = True
    conf.use_label_smoothing = True

    conf.data_folder = '/mnt/cephfs/smartauto/users/guoli.wang/jiachen.xue/anti_spoofing/data/CASIA-CeFA/phase2'  #data root for training, and testing, you should change is according to your setting
    conf.save_path = '/mnt/cephfs/smartauto/users/guoli.wang/tao.cai/cvpr_model'
    #conf.save_path = './work_space/save' #path for save model in training process, you should change it according to your setting
    conf.train_list = '/mnt/cephfs/smartauto/users/guoli.wang/jiachen.xue/anti_spoofing/data/CASIA-CeFA/phase1/4@2_train.txt'  #path where training list is, you should change it according to your setting
    # conf.train_list = '/tmp/yuxi.feng/[email protected]'
    # conf.train_list = '/tmp/yuxi.feng/IR/4@2_train.txt'
    conf.test_list = '/tmp/yuxi.feng/IR/4@2_test.txt'
    # conf.test_list = '/mnt/cephfs/smartauto/users/guoli.wang/jiachen.xue/anti_spoofing/data/CASIA-CeFA/phase2/4@2_test_res.txt'
    # conf.test_list = '/mnt/cephfs/smartauto/users/guoli.wang/jiachen.xue/anti_spoofing/data/CASIA-CeFA/phase1/4@2_dev_res.txt' #path where test list is, you should change it according to your setting
    # conf.test_list = '/tmp/yuxi.feng/4@1_test.det'
    conf.batch_size = 128

    # conf.exp = 'phase1_ir_4@2_lr_0.001_epoch_30_input_size_256_emd_128_dropout_0.0_triplet_ratio_0.01_wo_se_offset_32'
    # conf.exp = 'phase1_ir_4@2_lr_0.01_epoch_30_input_size_256_emd_128_dropout_0.0_wo_se_offset_32_smooth_fix_ir'
    conf.exp = 'phase1_ir_4@2_lr_0.01_epoch_30_input_size_256_emd_128_dropout_0.0_wo_se_crop_offset_32_smooth'
    # conf.exp = 'phase1_depth_4@1_lr_0.01_epoch_30_input_size_256_emd_128_dropout_0.0_wo_se_offset_32_smooth'
    # conf.exp = 'phase1_multi_4@3_lr_0.01_epoch_30_input_size_256_emd_128_dropout_0.0_wo_se_offset_32'
    # conf.exp = 'phase1_4@2_lr_0.001_epoch_30_input_size_256_emd_128_dropout_0.0_triplet_ratio_0.01_fix_wo_se_crop_offset_32' #model is saved in conf.save_path/conf.exp, if you want to train different models, you can distinguish them according to this parameter
    # conf.exp = 'phase1_4@2_lr_0.01_epoch_30_input_size_256_emd_128_dropout_0.0_wo_se_offset_32'
    # conf.exp = 'phase1_4@3_lr_0.01_epoch_30_input_size_384_emd_128_dropout_0.0_wo_se_crop_offset_32_smooth'
    # conf.exp = 'phase1_4@1_lr_0.01_epoch_30_input_size_256_emd_128_dropout_0.0_wo_se_crop_offset_32_smooth_twice' # origin smooth
    # conf.exp = 'phase1_4@2_lr_0.1_epoch_30_input_size_256_emd_128_dropout_0.0_se_crop_offset_32_smooth'  # with se
    # conf.exp = 'phase1_4@2_lr_0.1_epoch_30_input_size_256_emd_128_dropout_0.0_wo_se_wo_crop_offset_32_smooth'  # without crop
    # conf.exp = 'phase1_4@3_lr_0.01_epoch_30_input_size_256_emd_128_dropout_0.0_triplet_raito_0.01_wo_se_crop_offset_32_smooth'  # add triplet loss
    # conf.exp = 'phase1_4@2_lr_0.01_epoch_30_input_size_256_emd_128_dropout_0.0_triplet_ratio_0.01_wo_se_offset_32'
    # conf.exp = 'phase1_4@3_lr_0.01_epoch_30_input_size_256_emd_128_dropout_0.0_triplet_ratio_0.01_fix_crop_offset_32'
    # conf.exp = 'phase1_4@3_lr_0.001_epoch_30_input_size_384_emd_128_dp_0_concat_128_bs_128_crop' #model is saved in conf.save_path/conf.exp, if you want to train different models, you can distinguish them according to this parameter
    # conf.exp = 'phase1_4@1_resnet18_lr_0.01_epoch_30_input_size_256_emd_512_bs_128_offset_32_fix'
    # conf.exp = 'phase1_4@1_lr_0.001_epoch_30_input_size_512_emd_128_bs_64'
    #conf.exp = 'phase1_4@1_resnet18_lr_0.001_epoch_30_input_size_512_emd_128_bs_64_test'
    # conf.exp = 'phase1_4@2_lr_0.001_epoch_30_input_size_320_emd_128_dp_0_concat_128_bs_128'
    # conf.exp = 'phase1_4@3_lr_0.001_epoch_30_input_size_384_emd_128_dp_0_concat_128_bs_128_fix_bug'
    # conf.exp = 'phase1_4@1_lr_0.001_epoch_30_input_size_384_emd_128_dp_0_wo_mo_concat_128_bs_64'
    # conf.exp = 'phase1_4@1_lr_0.001_epoch_30_input_size_384_emd_128_dp_0_wo_mo_add_128_bs_128_fix'

    conf.model.input_size = 256  #the input size of our model
    conf.model.random_offset = 32  #for random crop
    conf.model.use_senet = False  #senet is adopted in our resnet18 model
    conf.model.se_reduction = 16  #parameter concerning senet
    conf.model.drop_out = 0.0  #we add dropout layer in our resnet18 model
    conf.model.embedding_size = 128  #feature size of our resnet18 model

    conf.pin_memory = True
    conf.num_workers = 3

    #--------------------Training Config ------------------------
    if training:
        conf.train.lr = 0.01  # the initial learning rate
        conf.train.milestones = [
            10, 20, 25
        ]  #epoch milestones decreased by a factor of 10
        conf.train.epoches = 30  #we trained our model for 200 epoches
        conf.train.momentum = 0.9  #parameter in setting SGD
        conf.train.gamma = 0.1  #parameter in setting lr_scheduler
        conf.train.criterion_SL1 = nn.SmoothL1Loss(
        )  #we use SmoothL1Loss in training stage
        conf.train.softmax_loss = nn.CrossEntropyLoss(
        )  # we use cross-entropyloss for rgb classification
        conf.train.label_smoothing_loss = LabelSmoothing(
            size=2, padding_idx=0, smoothing=0.1)  # for label smoothing
        conf.train.label_smoothing_loss1 = LabelSmoothingLoss(0.1, 2, None)

        conf.train.transform = trans.Compose(
            [  #convert input from PIL.Image to Tensor and normalized
                trans.Resize((conf.model.input_size, conf.model.input_size)),
                trans.RandomCrop(
                    (conf.model.input_size - conf.model.random_offset,
                     conf.model.input_size - conf.model.random_offset)),
                trans.ToTensor(),
                trans.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
            ])
        conf.train.transform1 = trans.Compose(
            [  #convert input from PIL.Image to Tensor and normalized
                trans.Resize((conf.model.input_size, conf.model.input_size)),
                trans.RandomCrop(
                    (conf.model.input_size - conf.model.random_offset,
                     conf.model.input_size - conf.model.random_offset)),
                trans.ToTensor(),
                trans.Normalize([
                    0.5,
                ], [
                    0.5,
                ])
            ])
        conf.train.transform2 = trans.Compose(
            [  #convert input from PIL.Image to Tensor and normalized
                trans.Resize((conf.model.input_size, conf.model.input_size)),
                trans.RandomCrop(
                    (conf.model.input_size - conf.model.random_offset,
                     conf.model.input_size - conf.model.random_offset)),
                trans.ToTensor(),
                trans.Normalize([
                    0.5,
                ], [
                    0.5,
                ])
            ])

#--------------------Inference Config ------------------------
    else:
        conf.test = edict()
        conf.test.set = 'phase2_test'
        # conf.test.set = '4@2_test'
        conf.test.epoch_start = 9
        conf.test.epoch_end = 30
        conf.test.epoch_interval = 10  #we set a range of epoches for testing
        #conf.test.pred_path = '/home/users/tao.cai/PAD/work_space/test_pred' #path for save predict result, pred_result is saved in conf.pred_path/conf.exp, you should change it according to your setting
        conf.test.pred_path = '/mnt/cephfs/smartauto/users/guoli.wang/tao.cai/cvpr_results'
        conf.test.transform = trans.Compose(
            [  #convert input from PIL.Image to Tensor and normalized
                # trans.Resize((conf.model.input_size,conf.model.input_size)),
                trans.Resize(
                    (conf.model.input_size - conf.model.random_offset,
                     conf.model.input_size - conf.model.random_offset)),
                trans.ToTensor(),
                trans.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
            ])
        conf.test.transform1 = trans.Compose(
            [  #convert input from PIL.Image to Tensor and normalized
                # trans.Resize((conf.model.input_size,conf.model.input_size)),
                trans.Resize(
                    (conf.model.input_size - conf.model.random_offset,
                     conf.model.input_size - conf.model.random_offset)),
                trans.ToTensor(),
                trans.Normalize([
                    0.5,
                ], [
                    0.5,
                ])
            ])
        conf.test.transform2 = trans.Compose(
            [  #convert input from PIL.Image to Tensor and normalized
                # trans.Resize((conf.model.input_size,conf.model.input_size)),
                trans.Resize(
                    (conf.model.input_size - conf.model.random_offset,
                     conf.model.input_size - conf.model.random_offset)),
                trans.ToTensor(),
                trans.Normalize([
                    0.5,
                ], [
                    0.5,
                ])
            ])

    return conf
コード例 #8
0
ファイル: real_nlp_data.py プロジェクト: Aran00/transformer
                self.batches.append(sorted(b, key=self.sort_key))


def rebatch(pad_idx, batch):
    "Fix order in torchtext to match ours"
    src, trg = batch.src.transpose(0, 1), batch.trg.transpose(0, 1)
    return Batch(src, trg, pad_idx)


# GPUs to use
devices = [0, 1, 2, 3]
if True:
    pad_idx = TGT.vocab.stoi["<blank>"]
    model = make_model(len(SRC.vocab), len(TGT.vocab), N=6)
    model.cuda()
    criterion = LabelSmoothing(size=len(TGT.vocab), padding_idx=pad_idx, smoothing=0.1)
    criterion.cuda()
    BATCH_SIZE = 12000
    train_iter = MyIterator(train, batch_size=BATCH_SIZE, device=0,
                            repeat=False, sort_key=lambda x: (len(x.src), len(x.trg)),
                            batch_size_fn=batch_size_fn, train=True)
    valid_iter = MyIterator(val, batch_size=BATCH_SIZE, device=0,
                            repeat=False, sort_key=lambda x: (len(x.src), len(x.trg)),
                            batch_size_fn=batch_size_fn, train=False)
    model_par = nn.DataParallel(model, device_ids=devices)
None


if False:
    model_opt = NoamOpt(model.src_embed[0].d_model, 1, 2000,
            torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))