def train(reload_dataset=False, pretrain_model_path=None, optim_fu='adam'): write = SummaryWriter() vis = visdom.Visdom(env="Graph_Attention_compression") viz = Visdom_line(vis=vis, win="Graph_Attention") # 一些配置 DATA_DIR = './data/train_pairs' DICT_PATH = './checkpoint/dict_20000.pkl' EMBEDDING_PATH_RANDOM = './model/save_embedding_97and3.ckpt' SAVE_EMBEDDING = False RELOAD_DATASET = reload_dataset SAVE_DATASET_OBJ = './data/dataset.pkl' SAVE_MODEL_PATH = './checkpoint/Graph_Attn/' PRINT_STEP = 10 SAVE_STEP = 1 GPU_NUM = 0 torch.manual_seed(2) torch.cuda.set_device(GPU_NUM) config = GraphAttenConfig() model = LSTMGraphAttn(config) model.cuda() if os.path.exists(SAVE_MODEL_PATH) is False: os.makedirs(SAVE_MODEL_PATH) # 读取embedding embed = get_word_embed().cuda() embed_flag = get_flag_embed().cuda() vocab = get_vocab() criterion = nn.CrossEntropyLoss(ignore_index=2) optimizer = optim.Adam(model.parameters(), lr=0.0001) trainset = GraphDataset(vocab=vocab) trainloader = DataLoader(dataset=trainset, batch_size=config.batch_size, collate_fn=graph_fn, pin_memory=True, shuffle=True) global_step = 0 for epoch in range(config.epoch): epoch_loss = 0 for index, (src, trg, neighbor, labels) in enumerate(tqdm(trainloader)): src = embed(src.cuda()) trg = embed(trg.cuda()) neighbor = embed(neighbor.cuda()) flag4encoder = torch.zeros(src.shape[0], src.shape[1], 3).cuda() src = torch.cat([src, flag4encoder], dim=2) flag4decoder = torch.zeros([labels.shape[0], 1]).long() flag4decoder = torch.cat([flag4decoder, labels[:, :-1]], dim=1).cuda() flag4decoder = embed_flag(flag4decoder) flag4neighbor = torch.zeros(neighbor.shape[0], neighbor.shape[1], neighbor.shape[2], 3).cuda() neighbor = torch.cat([neighbor, flag4neighbor], dim=-1) trg = torch.cat([trg, flag4decoder], dim=2) labels = labels.cuda() out = model(src, trg, neighbor) out = out.view(-1, 2) labels = labels.view(-1) loss = criterion(out, labels) epoch_loss += loss.item() print(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() write.add_scalar('loss', loss.item(), global_step) global_step += 1 model.save(SAVE_MODEL_PATH + 'model-' + str(epoch) + '.ckpt') write.add_scalar('epoch_loss', epoch_loss, epoch)
def train(reload_dataset=False, pretrain_model_path=None, optim_fu='adam'): write = SummaryWriter() vis = visdom.Visdom(env="syntax_compression") viz = Visdom_line(vis=vis, win="syntax_geted_lstm") # 一些配置 DATA_DIR = '../data/train_pairs' DICT_PATH = '../checkpoint/dict_20000.pkl' EMBEDDING_PATH_RANDOM = '../model/save_embedding_97and3.ckpt' SAVE_EMBEDDING = False RELOAD_DATASET = reload_dataset SAVE_DATASET_OBJ = '../data/dataset.pkl' SAVE_MODEL_PATH = './checkpoint/syntax_gate_lstm/' PRINT_STEP = 10 SAVE_STEP = 1 GPU_NUM = 1 torch.manual_seed(2) torch.cuda.set_device(GPU_NUM) model = SyntaxLSTM(100, 100, 10) model.cuda() if os.path.exists(SAVE_MODEL_PATH) is False: os.makedirs(SAVE_MODEL_PATH) # 读取embedding embed = get_word_embed().cuda() embed_flag = get_flag_embed().cuda() vocab = get_vocab() criterion = nn.CrossEntropyLoss(ignore_index=2) # if pretrain_model_path is not None: # print('Loading the pre train model', pretrain_model_path) # model.load(pretrain_model_path) # model.embed.weight.requires_grad = True # parameters = model.parameters() # optimizer = optim.SGD(parameters, lr=0.000001) # else: parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.Adam(parameters, lr=0.0001) trainset = SyntaxDataset(vocab=vocab, reverse_src=True) trainloader = DataLoader(dataset=trainset, batch_size=200, collate_fn=syntax_fn, pin_memory=True, num_workers=5, shuffle=True) global_step = 0 loss_print = 0 step_print = 0 PINRT_STEP = 10 for epoch in range(500): epoch_loss = 0 for index, (src, trg, labels, tags) in enumerate(tqdm(trainloader)): src = embed(src.cuda()) trg = embed(trg.cuda()) tags = tags.cuda() flag4encoder = torch.zeros(src.shape[0], src.shape[1], 3).cuda() src = torch.cat([src, flag4encoder], dim=2) flag4decoder = torch.zeros([labels.shape[0], 1]).long() # decoder最前面插入一个起始全0 flag4decoder = torch.cat([flag4decoder, labels[:, :-1]], dim=1).cuda() flag4decoder = embed_flag(flag4decoder) trg = torch.cat([trg, flag4decoder], dim=2) # 插入最后三位标志位 labels = labels.cuda() out = model(src, trg, tags) out = out.view(-1, 2) labels = labels.view(-1) loss = criterion(out, labels) epoch_loss += loss.item() loss_print += loss.item() print(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() global_step += 1 if global_step % PRINT_STEP == 0: write.add_scalar('loss', loss_print / PRINT_STEP, step_print) step_print += 1 loss_print = 0 model.save(SAVE_MODEL_PATH + 'model-' + str(epoch) + '.ckpt') write.add_scalar('epoch_loss', epoch_loss, epoch)
def rl_train_3layer(): vis = visdom.Visdom(env="RL_compression") vis_g = Visdom_line(vis=vis, win="generator", name="Generator Loss") vis_d = Visdom_line(vis=vis, win="discriminator", name="Discriminator Loss") cuda_num = 1 get_pretrain_generator = True # True to load the pre train model get_pretrain_discriminator = False g_save_dir = './checkpoint/GAN_train/generator/' d_save_dir = './checkpoint/GAN_train/discriminator/' # MODEL_PATH = './checkpoint/LSTM3Layers/model2.ckpt' BATCH_SIZE = 100 torch.cuda.set_device(cuda_num) # Load the model g_config = LSTM3LayersConfig() generator = LSTM3Layers(g_config).cuda() # generator.load(MODEL_PATH) # 读取embedding embed = get_word_embed().cuda() embed_flag = get_flag_embed().cuda() # 后三位标志位embedding vocab = get_vocab() g_criterion = nn.CrossEntropyLoss( ignore_index=2) # ignore the index of padding g_optimizer = optim.Adam(generator.parameters(), g_config.lr, weight_decay=0.1) g_trainloader = get_gen_trainloader(vocab=vocab, batch_size=g_config.batch_size, reload=True) d_config = SeDiscriminatorConfig() discriminator = SemanticDiscriminator(d_config) d_trainloader = get_gen_trainloader(vocab=vocab, batch_size=d_config.batch_size, reload=False) if torch.cuda.is_available(): discriminator.cuda() d_criterion = nn.CrossEntropyLoss( ignore_index=2) # ignore the index of padding d_optimizer = optim.Adam(discriminator.parameters(), lr=d_config.lr) if get_pretrain_generator is False: print("Pre train the generator...") vis.text('Pre train the generator at {}'.format( time.strftime('%x %X')), win='log') for i in range(g_config.epoch): train_generator(model=generator, embed=embed, flag_embed=embed_flag, dataloader=g_trainloader, vis=vis_g, criterion=g_criterion, optimizer=g_optimizer, cuda=True, name="Trainig Generator Epoch {0}".format(i)) generator.save(g_save_dir + 'pretrain_epoch{}'.format(i)) if get_pretrain_discriminator is False: if get_pretrain_generator is True: generator.load(g_save_dir + 'pretrain_epoch{}'.format(0)) print("Pre train the Discriminator...") vis.text('Pre train the Discriminator at {}'.format( time.strftime('%x %X')), win='log') for j in range(d_config.epoch): train_discriminator( gen=generator, dis=discriminator, embed=embed, flag_embed=embed_flag, dataloader=d_trainloader, vis=vis_d, criterion=d_criterion, optimizer=d_optimizer, cuda=True, name="Training Discriminator Epoch {0}".format(j)) discriminator.save(d_save_dir + 'pretrain_epoch{}'.format(j)) else: print("Loading the pre train model") vis.text("Get the pre train model at {}".format( time.strftime('%x %X')), win='log') generator.load(g_save_dir + 'pretrain_epoch{}'.format(0)) discriminator.load(d_save_dir + 'pretrain_epoch{}'.format(0)) # TODO: Adversarial Training ad_batch_size = 250 ad_train_loader = get_gen_trainloader(vocab=vocab, batch_size=ad_batch_size, reload=False) ad_criterion = nn.NLLLoss(ignore_index=2) ad_optimizer = optim.Adam(generator.parameters(), lr=0.001) for ad_epoch in range(5): for g_step in range(1): train_ad_generator(g_model=generator, d_model=discriminator, embed=embed, flag_embed=embed_flag, dataloader=ad_train_loader, vis=vis_g, name="Ad traing Epoch {}".format(g_step), test_criterion=g_criterion, criterion=ad_criterion, optimizer=g_optimizer, cuda=True) for d_step in range(2): train_discriminator(dis=discriminator, gen=generator, embed=embed, flag_embed=embed_flag, dataloader=d_trainloader, vis=vis_d, criterion=d_criterion, optimizer=d_optimizer)