score_fn=opt.score) if opt.restore: model.load_state_dict(checkpoints['model']) if use_cuda: model.cuda() if len(opt.gpus) > 1: model = nn.DataParallel(model, device_ids=opt.gpus, dim=1) # optimizer if opt.restore: optim = checkpoints['optim'] else: optim = Optim(config.optim, config.learning_rate, config.max_grad_norm, lr_decay=config.learning_rate_decay, start_decay_at=config.start_decay_at) optim.set_parameters(model.parameters()) if config.schedule: scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch) # total number of parameters param_count = 0 for param in model.parameters(): param_count += param.view(-1).size()[0] if not os.path.exists(config.log): os.mkdir(config.log) if opt.log == '': log_path = config.log + utils.format_time(time.localtime()) + '/'
def main(): # 设定种子 torch.manual_seed(args.seed) if use_cuda: torch.cuda.manual_seed(args.seed) # checkpoint if args.restore: # 存储已有模型的路径 print('loading checkpoint...\n') checkpoints = torch.load(os.path.join(log_path, args.restore)) contentfile = os.path.join(config.data, "segged_content.txt") # word2id, id2word, word2count = load_vocab(args.vocab_file, args.vocab_size) vocab = Vocab(config.vocab, contentfile, config.vocab_size) # Load data start_time = time.time() use_gnn = False if args.graph_model == 'GNN': use_gnn = True dataloader = DataLoader(config, config.data, config.batch_size, vocab, args.adj, use_gnn, args.model, args.notrain, args.debug) print("DATA loaded!") torch.backends.cudnn.benchmark = True # data print('loading data...\n') print('loading time cost: %.3f' % (time.time() - start_time)) # model print('building model...\n') # configure the model # Model and optimizer # 增加模型graph2gru, graoh2gru_noAtten if args.model == 'graph2seq': model = graph2seq(config, vocab, use_cuda, args.use_copy, args.use_bert, args.word_level_model, args.graph_model) elif args.model == 'graph2gru': model = graph2gru.graph2gru(config, vocab, use_cuda, args.use_copy, args.use_bert, args.word_level_model, args.graph_model) elif args.model == 'graph2gru_noAtten': model = graph2gru_noAtten.graph2gru_noAtten(config, vocab, use_cuda, args.use_copy, args.use_bert, args.word_level_model, args.graph_model) elif args.model == 'seq2seq': model = seq2seq(config, vocab, use_cuda, use_content=args.use_content) elif args.model == 'bow2seq': model = bow2seq(config, vocab, use_cuda) elif args.model == 'h_attention': model = hierarchical_attention(config, vocab, use_cuda) if args.restore: model.load_state_dict(checkpoints['model']) if use_cuda: model.cuda() # lm_model.cuda() if len(args.gpus) > 1: # 并行 model = nn.DataParallel(model, device_ids=args.gpus, dim=1) logging(repr(model) + "\n\n") # 记录这个文件的框架 # total number of parameters param_count = 0 for param in model.parameters(): param_count += param.view(-1).size()[0] logging('total number of parameters: %d\n\n' % param_count) # updates是已经进行了几个epoch, 防止中间出现程序中断的情况. if args.restore: updates = checkpoints['updates'] ori_updates = updates else: updates = 0 # optimizer if args.restore: optim = checkpoints['optim'] else: optim = Optim(config.optim, config.learning_rate, config.max_grad_norm, lr_decay=config.learning_rate_decay, start_decay_at=config.start_decay_at) # if opt.pretrain: # pretrain_lm(lm_model, vocab) optim.set_parameters(model.parameters()) if config.schedule: scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch) else: scheduler = None print("nana...") if not args.notrain: max_bleu = train(model, vocab, dataloader, scheduler, optim, updates) logging("Best bleu score: %.2f\n" % (max_bleu)) else: assert args.restore is not None eval(model, vocab, dataloader, 0, updates, do_test=False) print("nana```")
def main(vocab, dataloader): # 设定种子 torch.manual_seed(args.seed) if use_cuda: torch.cuda.manual_seed(args.seed) # checkpoint if args.restore: # 存储已有模型的路径 print('loading checkpoint...\n') checkpoints = torch.load(os.path.join(log_path, args.restore)) torch.backends.cudnn.benchmark = True # model print('building model...\n') # configure the model # Model and optimizer model = GLSTM(config, vocab) # model = hierarchical_attention(config, vocab) # model = SLSTM(config, vocab) # model = Transformer(config, vocab) if args.restore: model.load_state_dict(checkpoints['model']) if use_cuda: model.cuda() if len(args.gpus) > 1: # 并行 model = nn.DataParallel(model, device_ids=args.gpus, dim=1) logging(repr(model) + "\n\n") # 记录这个文件的框架 # total number of parameters param_count = 0 for param in model.parameters(): param_count += param.view(-1).size()[0] logging('total number of parameters: %d\n\n' % param_count) # updates是已经进行了几个epoch, 防止中间出现程序中断的情况. if args.restore: updates = checkpoints['updates'] ori_updates = updates else: updates = 0 # optimizer if args.restore: optim = checkpoints['optim'] else: optim = Optim(config.optim, config.learning_rate, config.max_grad_norm, lr_decay=config.learning_rate_decay, start_decay_at=config.start_decay_at) optim.set_parameters(model.parameters()) if config.schedule: scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch) else: scheduler = None if not args.notrain: max_acc, test_acc = train(model, dataloader, scheduler, optim, updates) logging("Best accuracy: %.2f, test accuracy: %.2f\n" % (max_acc * 100, test_acc * 100)) return test_acc else: assert args.restore is not None eval(model, vocab, dataloader, 0, updates, do_test=True)
if use_cuda: netG.cuda() netD.cuda() if len(opt.gpus) > 1: netG = nn.DataParallel(netG, device_ids=opt.gpus, dim=1) netD = nn.DataParallel(netD, device_ids=opt.gpus, dim=1) # optimizer if opt.restore: optimG = checkpoints['optimG'] optimD = checkpoints['optimD'] else: optimG = Optim(config.optim, config.learning_rate, config.max_grad_norm, lr_decay=config.learning_rate_decay, start_decay_at=config.start_decay_at) optimD = Optim(config.optim, config.learning_rate, config.max_grad_norm, lr_decay=config.learning_rate_decay, start_decay_at=config.start_decay_at) optimG.set_parameters(netG.parameters()) optimD.set_parameters(netD.parameters()) if config.schedule: schedulerG = L.CosineAnnealingLR(optimG.optimizer, T_max=config.epoch) schedulerD = L.CosineAnnealingLR(optimD.optimizer, T_max=config.epoch)
def main(): # 设定种子 torch.manual_seed(args.seed) if use_cuda: torch.cuda.manual_seed(args.seed) # checkpoint if args.restore: # 存储已有模型的路径 print('loading checkpoint...\n') checkpoints = torch.load(os.path.join(log_path, args.restore)) # word2id, id2word, word2count = load_vocab(args.vocab_file, args.vocab_size) vocab = Vocab(config.vocab, config.data, config.vocab_size) # Load data start_time = time.time() dataloader = DataLoader(config, args.task, config.has_dev, config.batch_size, vocab, args.model, args.use_depparse, args.notrain, args.debug) print("DATA loaded!") torch.backends.cudnn.benchmark = True # data print('loading data...\n') print('loading time cost: %.3f' % (time.time() - start_time)) # model print('building model...\n') # configure the model # Model and optimizer if args.model == 'h_attention': model = hierarchical_attention(config, vocab, use_cuda) elif args.model == 'slstm': model = SLSTM(config, vocab, use_cuda) elif args.model == 'glstm': model = GLSTM(config, vocab, use_cuda) elif args.model == 'hglstm': model = HGLSTM(config, vocab, use_cuda) if args.restore: model.load_state_dict(checkpoints['model']) if use_cuda: model.cuda() # lm_model.cuda() if len(args.gpus) > 1: # 并行 model = nn.DataParallel(model, device_ids=args.gpus, dim=1) logging(repr(model) + "\n\n") # 记录这个文件的框架 # total number of parameters param_count = 0 for param in model.parameters(): param_count += param.view(-1).size()[0] logging('total number of parameters: %d\n\n' % param_count) # updates是已经进行了几个epoch, 防止中间出现程序中断的情况. if args.restore: updates = checkpoints['updates'] ori_updates = updates else: updates = 0 # optimizer if args.restore: optim = checkpoints['optim'] else: optim = Optim(config.optim, config.learning_rate, config.max_grad_norm, lr_decay=config.learning_rate_decay, start_decay_at=config.start_decay_at) # if opt.pretrain: # pretrain_lm(lm_model, vocab) optim.set_parameters(model.parameters()) if config.schedule: scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch) else: scheduler = None if not args.notrain: max_acc, test_acc = train(model, vocab, dataloader, scheduler, optim, updates) logging("Best accuracy: %.2f, test accuracy: %.2f\n" % (max_acc * 100, test_acc * 100)) else: assert args.restore is not None eval(model, vocab, dataloader, 0, updates, do_test=True)
# print('----') # # for i, p in enumerate(model.parameters()): # print(i) # print(p.shape) if use_cuda: model.cuda() if len(opt.gpus) > 1: model = nn.DataParallel(model, device_ids=opt.gpus, dim=1) # optimizer if opt.restore: optim = checkpoints['optim'] else: optim = Optim(config.optim, config.learning_rate, config.max_grad_norm, lr_decay=config.learning_rate_decay, start_decay_at=config.start_decay_at) optim.set_parameters(filter(lambda p: p.requires_grad,model.parameters())) if config.schedule: scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch) # total number of parameters param_count = 0 for param in model.parameters(): param_count += param.view(-1).size()[0] logging_csv = utils.logging_csv(log_path+'record.csv') for k, v in config.items(): logging("%s:\t%s\n" % (str(k), str(v))) logging("\n")
def main(): # 设定种子 torch.manual_seed(args.seed) if use_cuda: torch.cuda.manual_seed(args.seed) vocab = Vocab(config.vocab_file, config.emb_size, use_pre_emb=False, vocab_size=config.vocab_size) print('vocab clear') torch.backends.cudnn.benchmark = True # model print('building model...\n') # configure the model # Model and optimizer if args.model == 'seq2seq': model_generate = seq2seq(config, vocab, use_cuda, pretrain=None) elif args.model == 'transformer': model_generate = Transformer(config, vocab, use_cuda, pretrain=None) elif args.model == 'transformer_gcn': model_generate = Transformer_gcn(config, vocab, use_cuda, pretrain=None) if args.restore: print('loading checkpoint...\n') checkpoints = torch.load(os.path.join(log_path, args.restore)) model_generate.load_state_dict(checkpoints['model_generate']) if args.train_type == 'sample_rl': emb = model_generate.embedding model_sample = graph2seq_rl(config, vocab, use_cuda, emb, pretrain=None) elif args.train_type == 'generate': model_sample = graph2seq_rl(config, vocab, use_cuda, 0, pretrain=None) model_sample.load_state_dict(checkpoints['model_sample']) else: print('err') else: #model_sample = graph2seq_rl(config, vocab, use_cuda, model_generate.embedding, pretrain=None) model_sample = graph2seq_rl(config, vocab, use_cuda, 0, pretrain=None) '''if args.restore: print('loading checkpoint...\n') checkpoints = torch.load(os.path.join(log_path, args.restore)) model_sample.load_state_dict(checkpoints['model_sample']) model_generate.load_state_dict(checkpoints['model_generate'])''' if use_cuda: model_sample.cuda() model_generate.cuda() # if len(args.gpus) > 1: # 并行 # model = nn.DataParallel(model, device_ids=args.gpus, dim=1) logging(repr(model_sample) + "\n\n") # 记录这个文件的框架 logging(repr(model_generate) + "\n\n") # total number of parameters sample_param_count = 0 generate_param_count = 0 for param in model_sample.parameters(): sample_param_count += param.view(-1).size()[0] for param in model_generate.parameters(): generate_param_count += param.view(-1).size()[0] logging('total number of sample parameters: %d\n\n' % sample_param_count) logging('total number of generate parameters: %d\n\n' % generate_param_count) print('# generator parameters:', sum(param.numel() for param in model_generate.parameters())) # updates是已经进行了几个epoch, 防止中间出现程序中断的情况. if args.restore: updates = checkpoints['updates'] ori_updates = updates else: updates = 0 # optimizer '''if args.restore: optim_sample = checkpoints['optim_sample'] optim_generate = checkpoints['optim_generate'] else:''' #optimizer = optim.Adam(self.params, lr=self.lr) optim_sample = Optim(config.optim, config.learning_rate_sample, config.max_grad_norm, lr_decay=config.learning_rate_decay, start_decay_at=config.start_decay_at) optim_generate = Optim(config.optim, config.learning_rate, config.max_grad_norm, lr_decay=config.learning_rate_decay, start_decay_at=config.start_decay_at) optim_sample.set_parameters(model_sample.parameters()) optim_generate.set_parameters(model_generate.parameters()) if config.schedule: scheduler_sample = L.SetLR(optim_sample.optimizer) scheduler_generate = L.SetLR(optim_generate.optimizer) else: scheduler_sample = None scheduler_generate = None if args.type == 'train': start_time = time.time() #dataloader_train = get_dataloader(vocab, split='train', train_type='qk') dataloader_train = get_dataloader(vocab, split='train', train_type='k') dataloader_dev = get_dataloader(vocab, split='dev', train_type='k') dataloader_dev_qk = get_dataloader(vocab, split='dev', train_type='qk') print('loading data...\n') print('loading time cost: %.3f' % (time.time() - start_time)) max_bleu = train(model_sample, model_generate, vocab, dataloader_train, dataloader_dev, dataloader_dev_qk, scheduler_sample, scheduler_generate, optim_sample, optim_generate, updates) logging("Best bleu score: %.2f\n" % (max_bleu)) elif args.type == 'eval': # Load data start_time = time.time() dataloader_test = get_dataloader(vocab, split='test', train_type='k') dataloader_test_qk = get_dataloader(vocab, split='test', train_type='qk') print('loading data...\n') print('loading time cost: %.3f' % (time.time() - start_time)) assert args.restore is not None eval(model_sample, model_generate, vocab, dataloader_test, dataloader_test_qk, 0, updates) else: print('error')