def train(): print("Loading data...") SRC, TGT, train, val, test = generate_dataloaders() devices = [0, 1, 2, 3] pad_idx = TGT.vocab.stoi["<blank>"] print("Making model...") model = make_model(len(SRC.vocab), len(TGT.vocab), N=6) model.cuda() criterion = LabelSmoothing( size=len(TGT.vocab), padding_idx=pad_idx, smoothing=0.1) criterion.cuda() BATCH_SIZE = 12000 train_iter = BatchIterator(train, batch_size=BATCH_SIZE, device=torch.device(0), repeat=False, sort_key=lambda x: (len(x.src), len(x.trg)), batch_size_fn=batch_size_fn, train=True) valid_iter = BatchIterator(val, batch_size=BATCH_SIZE, device=torch.device(0), repeat=False, sort_key=lambda x: (len(x.src), len(x.trg)), batch_size_fn=batch_size_fn, train=False) model_par = nn.DataParallel(model, device_ids=devices) model_opt = NoamOpt(model.src_embed[0].d_model, 1, 2000, torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) folder = get_unique_folder("./models/", "model") if not(os.path.exists(folder)): os.mkdir(folder) for epoch in tqdm(range(10)): model_par.train() run_epoch((rebatch(pad_idx, b) for b in train_iter), model_par, MultiGPULossCompute(model.generator, criterion, devices=devices, opt=model_opt)) model_par.eval() loss = run_epoch((rebatch(pad_idx, b) for b in valid_iter), model_par, MultiGPULossCompute(model.generator, criterion, devices=devices, opt=None)) torch.save(model.state_dict, os.path.join(folder, "model.bin." + str(epoch))) print(loss) for i, batch in enumerate(valid_iter): src = batch.src.transpose(0, 1)[:1] src_mask = (src != SRC.vocab.stoi["<blank>"]).unsqueeze(-2) out = greedy_decode(model, src, src_mask, max_len=60, start_symbol=TGT.vocab.stoi["<s>"]) print("Translation:", end="\t") for i in range(1, out.size(1)): sym = TGT.vocab.itos[out[0, i]] if sym == "</s>": break print(sym, end=" ") print() print("Target:", end="\t") for i in range(1, batch.trg.size(0)): sym = TGT.vocab.itos[batch.trg.data[i, 0]] if sym == "</s>": break print(sym, end=" ") print() break
def __init__(self, local_rank, vocab, embed_dim, ff_embed_dim, num_heads, dropout, layers, smoothing_factor, approx): super(BIGLM, self).__init__() self.vocab = vocab self.embed_dim = embed_dim self.tok_embed = Embedding(self.vocab.size, embed_dim, self.vocab.padding_idx) self.pos_embed = LearnedPositionalEmbedding(embed_dim, device=local_rank) self.layers = nn.ModuleList() for i in range(layers): self.layers.append(TransformerLayer(embed_dim, ff_embed_dim, num_heads, dropout, with_external=True)) self.emb_layer_norm = LayerNorm(embed_dim) self.one_more = nn.Linear(embed_dim, embed_dim) self.one_more_layer_norm = LayerNorm(embed_dim) self.out_proj = nn.Linear(embed_dim, self.vocab.size) self.attn_mask = SelfAttentionMask(device=local_rank) self.smoothing = LabelSmoothing(local_rank, self.vocab.size, self.vocab.padding_idx, smoothing_factor) self.dropout = dropout self.device = local_rank if approx == "none": self.approx = None elif approx == "adaptive": self.approx = nn.AdaptiveLogSoftmaxWithLoss(self.embed_dim, self.vocab.size, [10000, 20000, 200000]) else: raise NotImplementedError("%s has not been implemented"%approx) self.reset_parameters()
def __init__(self, modules, consts, options): super(Model, self).__init__() self.has_learnable_w2v = options["has_learnable_w2v"] self.is_predicting = options["is_predicting"] self.is_bidirectional = options["is_bidirectional"] self.beam_decoding = options["beam_decoding"] self.cell = options["cell"] self.device = options["device"] self.copy = options["copy"] self.coverage = options["coverage"] self.avg_nll = options["avg_nll"] self.dim_x = consts["dim_x"] self.dim_y = consts["dim_y"] self.len_x = consts["len_x"] self.len_y = consts["len_y"] self.hidden_size = consts["hidden_size"] self.dict_size = consts["dict_size"] self.pad_token_idx = consts["pad_token_idx"] self.ctx_size = self.hidden_size * 2 if self.is_bidirectional else self.hidden_size self.num_layers = consts["num_layers"] self.d_ff = consts["d_ff"] self.num_heads = consts["num_heads"] self.dropout = consts["dropout"] self.smoothing_factor = consts["label_smoothing"] self.tok_embed = nn.Embedding(self.dict_size, self.dim_x, self.pad_token_idx) self.pos_embed = LearnedPositionalEmbedding(self.dim_x, device=self.device) self.enc_layers = nn.ModuleList() for i in range(self.num_layers): self.enc_layers.append( TransformerLayer(self.dim_x, self.d_ff, self.num_heads, self.dropout)) self.dec_layers = nn.ModuleList() for i in range(self.num_layers): self.dec_layers.append( TransformerLayer(self.dim_x, self.d_ff, self.num_heads, self.dropout, with_external=True)) self.attn_mask = SelfAttentionMask(device=self.device) self.emb_layer_norm = LayerNorm(self.dim_x) self.word_prob = WordProbLayer(self.hidden_size, self.dict_size, self.device, self.copy, self.coverage, self.dropout) self.smoothing = LabelSmoothing(self.device, self.dict_size, self.pad_token_idx, self.smoothing_factor) self.init_weights()
def __init__(self, text, args, device): super(NMT, self).__init__() self.text = text self.args = args self.device = device self.Embeddings = Embeddings(args['embed_size'], self.text) self.encoder_layer = nn.TransformerEncoderLayer(d_model=args['d_model'], nhead=args['nhead'], dim_feedforward=args['dim_feedforward'], dropout=args['dropout']) self.encoder_norm = nn.LayerNorm(args['d_model']) self.encoder = nn.TransformerEncoder(encoder_layer=self.encoder_layer, num_layers=args['num_encoder_layers'], norm=self.encoder_norm) self.decoder_layer = nn.TransformerDecoderLayer(d_model=args['d_model'], nhead=args['nhead'], dim_feedforward=args['dim_feedforward'], dropout=args['dropout']) self.decoder_norm = nn.LayerNorm(args['d_model']) self.decoder = nn.TransformerDecoder(decoder_layer=self.decoder_layer, num_layers=args['num_decoder_layers'], norm=self.decoder_norm) self.project = nn.Linear(args['d_model'], len(self.text.tar), bias=False) self.project.weight = self.Embeddings.tar.weight self.dropout = nn.Dropout(args['dropout']) self.project_value = math.pow(args['d_model'], -0.5) self.smoothing = LabelSmoothing(len(self.text.tar), self.text.tar['<pad>'], self.args['smoothing_eps'])
def __init__(self, local_rank, vocab, embed_dim, ff_embed_dim, num_heads, dropout, layers, smoothing_factor, approx=None): super(BIGLM, self).__init__() self.vocab = vocab self.embed_dim = embed_dim self.tok_embed = Embedding(self.vocab.size, embed_dim, self.vocab.padding_idx) self.pos_embed = LearnedPositionalEmbedding(embed_dim, device=local_rank) self.layers = nn.ModuleList() for i in range(layers): self.layers.append( TransformerLayer(embed_dim, ff_embed_dim, num_heads, dropout, with_external=True)) self.emb_layer_norm = LayerNorm(embed_dim) self.one_more = nn.Linear(embed_dim, embed_dim) self.one_more_layer_norm = LayerNorm(embed_dim) self.out_proj = nn.Linear(embed_dim, self.vocab.size) self.attn_mask = SelfAttentionMask(device=local_rank) self.smoothing = LabelSmoothing(local_rank, self.vocab.size, self.vocab.padding_idx, smoothing_factor) self.dropout = dropout self.device = local_rank self.approx = approx self.reset_parameters()
def main(): # Train the simple copy task. V = 11 criterion = LabelSmoothing(size=V, padding_idx=0, smoothing=0.0) model = make_model(V, V, N=2) model_opt = NoamOpt( model.src_embed[0].d_model, 1, 400, torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) for epoch in range(10): model.train() print("epoch %d" % epoch) run_epoch(data_gen(V, 30, 20), model, SimpleLossCompute(model.generator, criterion, model_opt)) model.eval() print( run_epoch(data_gen(V, 30, 5), model, SimpleLossCompute(model.generator, criterion, None))) model.eval() src = Variable(torch.LongTensor([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])) src_mask = Variable(torch.ones(1, 1, 10)) print(greedy_decode(model, src, src_mask, max_len=10, start_symbol=1))
def get_config(training=True): conf = edict() conf.model = edict() conf.train = edict() conf.eval = edict() conf.gpu = 3 conf.use_concat = False conf.multi_output = True conf.add = True conf.feature_c = 128 conf.use_triplet = False conf.use_officical_resnet18 = False conf.triplet_ratio = 0.01 conf.triplet_margin = 0.2 conf.print_freq = 20 conf.rgb = True conf.depth = False conf.ir = True conf.crop = True conf.use_label_smoothing = True conf.data_folder = '/mnt/cephfs/smartauto/users/guoli.wang/jiachen.xue/anti_spoofing/data/CASIA-CeFA/phase2' #data root for training, and testing, you should change is according to your setting conf.save_path = '/mnt/cephfs/smartauto/users/guoli.wang/tao.cai/cvpr_model' #conf.save_path = './work_space/save' #path for save model in training process, you should change it according to your setting conf.train_list = '/mnt/cephfs/smartauto/users/guoli.wang/jiachen.xue/anti_spoofing/data/CASIA-CeFA/phase1/4@2_train.txt' #path where training list is, you should change it according to your setting # conf.train_list = '/tmp/yuxi.feng/[email protected]' # conf.train_list = '/tmp/yuxi.feng/IR/4@2_train.txt' conf.test_list = '/tmp/yuxi.feng/IR/4@2_test.txt' # conf.test_list = '/mnt/cephfs/smartauto/users/guoli.wang/jiachen.xue/anti_spoofing/data/CASIA-CeFA/phase2/4@2_test_res.txt' # conf.test_list = '/mnt/cephfs/smartauto/users/guoli.wang/jiachen.xue/anti_spoofing/data/CASIA-CeFA/phase1/4@2_dev_res.txt' #path where test list is, you should change it according to your setting # conf.test_list = '/tmp/yuxi.feng/4@1_test.det' conf.batch_size = 128 # conf.exp = 'phase1_ir_4@2_lr_0.001_epoch_30_input_size_256_emd_128_dropout_0.0_triplet_ratio_0.01_wo_se_offset_32' # conf.exp = 'phase1_ir_4@2_lr_0.01_epoch_30_input_size_256_emd_128_dropout_0.0_wo_se_offset_32_smooth_fix_ir' conf.exp = 'phase1_ir_4@2_lr_0.01_epoch_30_input_size_256_emd_128_dropout_0.0_wo_se_crop_offset_32_smooth' # conf.exp = 'phase1_depth_4@1_lr_0.01_epoch_30_input_size_256_emd_128_dropout_0.0_wo_se_offset_32_smooth' # conf.exp = 'phase1_multi_4@3_lr_0.01_epoch_30_input_size_256_emd_128_dropout_0.0_wo_se_offset_32' # conf.exp = 'phase1_4@2_lr_0.001_epoch_30_input_size_256_emd_128_dropout_0.0_triplet_ratio_0.01_fix_wo_se_crop_offset_32' #model is saved in conf.save_path/conf.exp, if you want to train different models, you can distinguish them according to this parameter # conf.exp = 'phase1_4@2_lr_0.01_epoch_30_input_size_256_emd_128_dropout_0.0_wo_se_offset_32' # conf.exp = 'phase1_4@3_lr_0.01_epoch_30_input_size_384_emd_128_dropout_0.0_wo_se_crop_offset_32_smooth' # conf.exp = 'phase1_4@1_lr_0.01_epoch_30_input_size_256_emd_128_dropout_0.0_wo_se_crop_offset_32_smooth_twice' # origin smooth # conf.exp = 'phase1_4@2_lr_0.1_epoch_30_input_size_256_emd_128_dropout_0.0_se_crop_offset_32_smooth' # with se # conf.exp = 'phase1_4@2_lr_0.1_epoch_30_input_size_256_emd_128_dropout_0.0_wo_se_wo_crop_offset_32_smooth' # without crop # conf.exp = 'phase1_4@3_lr_0.01_epoch_30_input_size_256_emd_128_dropout_0.0_triplet_raito_0.01_wo_se_crop_offset_32_smooth' # add triplet loss # conf.exp = 'phase1_4@2_lr_0.01_epoch_30_input_size_256_emd_128_dropout_0.0_triplet_ratio_0.01_wo_se_offset_32' # conf.exp = 'phase1_4@3_lr_0.01_epoch_30_input_size_256_emd_128_dropout_0.0_triplet_ratio_0.01_fix_crop_offset_32' # conf.exp = 'phase1_4@3_lr_0.001_epoch_30_input_size_384_emd_128_dp_0_concat_128_bs_128_crop' #model is saved in conf.save_path/conf.exp, if you want to train different models, you can distinguish them according to this parameter # conf.exp = 'phase1_4@1_resnet18_lr_0.01_epoch_30_input_size_256_emd_512_bs_128_offset_32_fix' # conf.exp = 'phase1_4@1_lr_0.001_epoch_30_input_size_512_emd_128_bs_64' #conf.exp = 'phase1_4@1_resnet18_lr_0.001_epoch_30_input_size_512_emd_128_bs_64_test' # conf.exp = 'phase1_4@2_lr_0.001_epoch_30_input_size_320_emd_128_dp_0_concat_128_bs_128' # conf.exp = 'phase1_4@3_lr_0.001_epoch_30_input_size_384_emd_128_dp_0_concat_128_bs_128_fix_bug' # conf.exp = 'phase1_4@1_lr_0.001_epoch_30_input_size_384_emd_128_dp_0_wo_mo_concat_128_bs_64' # conf.exp = 'phase1_4@1_lr_0.001_epoch_30_input_size_384_emd_128_dp_0_wo_mo_add_128_bs_128_fix' conf.model.input_size = 256 #the input size of our model conf.model.random_offset = 32 #for random crop conf.model.use_senet = False #senet is adopted in our resnet18 model conf.model.se_reduction = 16 #parameter concerning senet conf.model.drop_out = 0.0 #we add dropout layer in our resnet18 model conf.model.embedding_size = 128 #feature size of our resnet18 model conf.pin_memory = True conf.num_workers = 3 #--------------------Training Config ------------------------ if training: conf.train.lr = 0.01 # the initial learning rate conf.train.milestones = [ 10, 20, 25 ] #epoch milestones decreased by a factor of 10 conf.train.epoches = 30 #we trained our model for 200 epoches conf.train.momentum = 0.9 #parameter in setting SGD conf.train.gamma = 0.1 #parameter in setting lr_scheduler conf.train.criterion_SL1 = nn.SmoothL1Loss( ) #we use SmoothL1Loss in training stage conf.train.softmax_loss = nn.CrossEntropyLoss( ) # we use cross-entropyloss for rgb classification conf.train.label_smoothing_loss = LabelSmoothing( size=2, padding_idx=0, smoothing=0.1) # for label smoothing conf.train.label_smoothing_loss1 = LabelSmoothingLoss(0.1, 2, None) conf.train.transform = trans.Compose( [ #convert input from PIL.Image to Tensor and normalized trans.Resize((conf.model.input_size, conf.model.input_size)), trans.RandomCrop( (conf.model.input_size - conf.model.random_offset, conf.model.input_size - conf.model.random_offset)), trans.ToTensor(), trans.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) conf.train.transform1 = trans.Compose( [ #convert input from PIL.Image to Tensor and normalized trans.Resize((conf.model.input_size, conf.model.input_size)), trans.RandomCrop( (conf.model.input_size - conf.model.random_offset, conf.model.input_size - conf.model.random_offset)), trans.ToTensor(), trans.Normalize([ 0.5, ], [ 0.5, ]) ]) conf.train.transform2 = trans.Compose( [ #convert input from PIL.Image to Tensor and normalized trans.Resize((conf.model.input_size, conf.model.input_size)), trans.RandomCrop( (conf.model.input_size - conf.model.random_offset, conf.model.input_size - conf.model.random_offset)), trans.ToTensor(), trans.Normalize([ 0.5, ], [ 0.5, ]) ]) #--------------------Inference Config ------------------------ else: conf.test = edict() conf.test.set = 'phase2_test' # conf.test.set = '4@2_test' conf.test.epoch_start = 9 conf.test.epoch_end = 30 conf.test.epoch_interval = 10 #we set a range of epoches for testing #conf.test.pred_path = '/home/users/tao.cai/PAD/work_space/test_pred' #path for save predict result, pred_result is saved in conf.pred_path/conf.exp, you should change it according to your setting conf.test.pred_path = '/mnt/cephfs/smartauto/users/guoli.wang/tao.cai/cvpr_results' conf.test.transform = trans.Compose( [ #convert input from PIL.Image to Tensor and normalized # trans.Resize((conf.model.input_size,conf.model.input_size)), trans.Resize( (conf.model.input_size - conf.model.random_offset, conf.model.input_size - conf.model.random_offset)), trans.ToTensor(), trans.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) conf.test.transform1 = trans.Compose( [ #convert input from PIL.Image to Tensor and normalized # trans.Resize((conf.model.input_size,conf.model.input_size)), trans.Resize( (conf.model.input_size - conf.model.random_offset, conf.model.input_size - conf.model.random_offset)), trans.ToTensor(), trans.Normalize([ 0.5, ], [ 0.5, ]) ]) conf.test.transform2 = trans.Compose( [ #convert input from PIL.Image to Tensor and normalized # trans.Resize((conf.model.input_size,conf.model.input_size)), trans.Resize( (conf.model.input_size - conf.model.random_offset, conf.model.input_size - conf.model.random_offset)), trans.ToTensor(), trans.Normalize([ 0.5, ], [ 0.5, ]) ]) return conf
self.batches.append(sorted(b, key=self.sort_key)) def rebatch(pad_idx, batch): "Fix order in torchtext to match ours" src, trg = batch.src.transpose(0, 1), batch.trg.transpose(0, 1) return Batch(src, trg, pad_idx) # GPUs to use devices = [0, 1, 2, 3] if True: pad_idx = TGT.vocab.stoi["<blank>"] model = make_model(len(SRC.vocab), len(TGT.vocab), N=6) model.cuda() criterion = LabelSmoothing(size=len(TGT.vocab), padding_idx=pad_idx, smoothing=0.1) criterion.cuda() BATCH_SIZE = 12000 train_iter = MyIterator(train, batch_size=BATCH_SIZE, device=0, repeat=False, sort_key=lambda x: (len(x.src), len(x.trg)), batch_size_fn=batch_size_fn, train=True) valid_iter = MyIterator(val, batch_size=BATCH_SIZE, device=0, repeat=False, sort_key=lambda x: (len(x.src), len(x.trg)), batch_size_fn=batch_size_fn, train=False) model_par = nn.DataParallel(model, device_ids=devices) None if False: model_opt = NoamOpt(model.src_embed[0].d_model, 1, 2000, torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))