def main(): # checkpoint if opt.restore: print('loading checkpoint...\n') checkpoints = torch.load(opt.restore) else: checkpoints = None data = load_data() print_log, log_path = build_log() model, optim, print_log = build_model(checkpoints, print_log) # scheduler if config.schedule: scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch) params = {'updates': 0, 'report_loss': 0, 'report_total': 0, 'report_correct': 0, 'report_time': time.time(), 'log': print_log, 'log_path': log_path} for metric in config.metrics: params[metric] = [] if opt.restore: params['updates'] = checkpoints['updates'] if opt.mode == "train": for i in range(1, config.epoch + 1): if config.schedule: scheduler.step() print("Decaying learning rate to %g" % scheduler.get_lr()[0]) train_model(model, data, optim, i, params) for metric in config.metrics: print_log("Best %s score: %.2f\n" % (metric, max(params[metric]))) else: score = eval_model(model, data, params)
def test(input): #checkpoint # print('loading checkpoint...\n') checkpoints = torch.load(opt.restore, map_location='cpu') dict = load_dict_from_file(config.data + 'src.dict') temp = '' for i in input: temp += i temp += ' ' input = temp with open(config.data + 'valid.src.str', 'w', encoding='utf-8') as f: for i in input.split(): f.write(i) f.write(' ') f.write('\n') with open(config.data + 'valid.src.id', 'w', encoding='utf-8') as f: for i in input.split(): if i in dict.keys(): f.write(dict[i]) f.write(' ') f.write('\n') with open(config.data + 'valid.tgt.str', 'w', encoding='utf-8') as f: for i in input.split(): f.write(i) f.write(' ') f.write('\n') with open(config.data + 'valid.tgt.id', 'w', encoding='utf-8') as f: for i in input.split(): if i in dict.keys(): f.write(dict[i]) f.write(' ') f.write('\n') data = load_data() print_log, log_path = build_log() model, optim, print_log = build_model(checkpoints, print_log) # scheduler if config.schedule: scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch) params = { 'updates': 0, 'report_loss': 0, 'report_total': 0, 'report_correct': 0, 'report_time': time.time(), 'log': print_log, 'log_path': log_path } for metric in config.metrics: params[metric] = [] if opt.restore: params['updates'] = checkpoints['updates'] score, ans = predict_eval(model, data, params) return ans
def build_model(checkpoints, print_log): # for k, v in config.items(): # print_log("%s:\t%s\n" % (str(k), str(v))) # model print('building model...\n') model = getattr(models, opt.model)(config) if checkpoints is not None: model.load_state_dict(checkpoints['model']) if opt.pretrain: print('loading checkpoint from %s' % opt.pretrain) pre_ckpt = torch.load(opt.pretrain)['model'] pre_ckpt = OrderedDict({ key[8:]: pre_ckpt[key] for key in pre_ckpt if key.startswith('encoder') }) print(model.encoder.state_dict().keys()) print(pre_ckpt.keys()) model.encoder.load_state_dict(pre_ckpt) if use_cuda: model.cuda() # optimizer if checkpoints is not None: optim = checkpoints['optim'] else: optim = models.Optim(config.optim, config.learning_rate, config.max_grad_norm, lr_decay=config.learning_rate_decay, start_decay_at=config.start_decay_at) optim.set_parameters(model.parameters()) #scheduler if config.schedule: scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch) # print log param_count = 0 for param in model.parameters(): param_count += param.view(-1).size()[0] for k, v in config.items(): print_log("%s:\t%s\n" % (str(k), str(v))) print_log("\n") print_log(repr(model) + "\n\n") print_log('total number of parameters: %d\n\n' % param_count) return model, optim, print_log
def main(): # checkpoint if opt.restore: print('loading checkpoint...\n') checkpoints = torch.load(opt.restore, map_location='cuda:%d' % opt.gpus[0]) else: checkpoints = None # 调用load_data()函数,返回训练和验证集数据,以及两个词表 data = load_data() # 创建日志,返回print_log函数的引用,传入内容可以写入日志,以及日志路径 print_log, log_path = build_log() # 创建模型 model, optim, print_log = build_model(checkpoints, print_log) # scheduler-false if config.schedule: scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch) params = { 'updates': 0, 'report_loss': 0, 'report_total': 0, 'report_correct': 0, 'report_time': time.time(), 'log': print_log, 'log_path': log_path } # 默认为rouge,params[rouge] for metric in config.metrics: params[metric] = [] # 默认为空,restore checkpoint if opt.restore: params['updates'] = checkpoints['updates'] # 训练模式 if opt.mode == "train": # 进行训练模型,默认20个epoch for i in range(1, config.epoch + 1): # 默认为false if config.schedule: scheduler.step() print("Decaying learning rate to %g" % scheduler.get_lr()[0]) # 调用train_model函数 train_model(model, data, optim, i, params) # 打印rouge for metric in config.metrics: print_log("Best %s score: %s\n" % (metric, max(params[metric]))) else: score = eval_model(model, data, params)
def main(): # checkpoint if opt.restore: print('loading checkpoint...\n') checkpoints = torch.load(opt.restore, map_location = 'cuda:%d' % opt.gpus[0]) else: checkpoints = None #data = load_data() lcsts_path = '/Users/alvin/workspace/dataset_nlp/lcsts/LCSTS_DATA_XML/PART_I_10000.txt' train_iter, val_iter, test_iter, LCSTS_FIELD = load_dataset_lcsts(batch_size=5, filename=lcsts_path) config.src_vocab_size = len(LCSTS_FIELD['src'].vocab) config.tgt_vocab_size = len(LCSTS_FIELD['tgt'].vocab) print_log, log_path = build_log() model, optim, print_log = build_model(checkpoints, print_log) # scheduler if config.schedule: scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch) params = {'updates': 0, 'report_loss': 0, 'report_total': 0, 'report_correct': 0, 'report_time': time.time(), 'log': print_log, 'log_path': log_path} for metric in config.metrics: params[metric] = [] if opt.restore: params['updates'] = checkpoints['updates'] if opt.mode == "train": for i in range(1, config.epoch + 1): if config.schedule: scheduler.step() print("Decaying learning rate to %g" % scheduler.get_lr()[0]) train_model(model, train_iter, val_iter, LCSTS_FIELD , optim, i, params) for metric in config.metrics: print_log("Best %s score: %.2f\n" % (metric, max(params[metric]))) else: score = eval_model(model, val_iter, LCSTS_FIELD, params)
def main(): # 设定种子 torch.manual_seed(args.seed) if use_cuda: torch.cuda.manual_seed(args.seed) # checkpoint if args.restore: # 存储已有模型的路径 print('loading checkpoint...\n') checkpoints = torch.load(os.path.join(log_path, args.restore)) contentfile = os.path.join(config.data, "segged_content.txt") # word2id, id2word, word2count = load_vocab(args.vocab_file, args.vocab_size) vocab = Vocab(config.vocab, contentfile, config.vocab_size) # Load data start_time = time.time() use_gnn = False if args.graph_model == 'GNN': use_gnn = True dataloader = DataLoader(config, config.data, config.batch_size, vocab, args.adj, use_gnn, args.model, args.notrain, args.debug) print("DATA loaded!") torch.backends.cudnn.benchmark = True # data print('loading data...\n') print('loading time cost: %.3f' % (time.time() - start_time)) # model print('building model...\n') # configure the model # Model and optimizer # 增加模型graph2gru, graoh2gru_noAtten if args.model == 'graph2seq': model = graph2seq(config, vocab, use_cuda, args.use_copy, args.use_bert, args.word_level_model, args.graph_model) elif args.model == 'graph2gru': model = graph2gru.graph2gru(config, vocab, use_cuda, args.use_copy, args.use_bert, args.word_level_model, args.graph_model) elif args.model == 'graph2gru_noAtten': model = graph2gru_noAtten.graph2gru_noAtten(config, vocab, use_cuda, args.use_copy, args.use_bert, args.word_level_model, args.graph_model) elif args.model == 'seq2seq': model = seq2seq(config, vocab, use_cuda, use_content=args.use_content) elif args.model == 'bow2seq': model = bow2seq(config, vocab, use_cuda) elif args.model == 'h_attention': model = hierarchical_attention(config, vocab, use_cuda) if args.restore: model.load_state_dict(checkpoints['model']) if use_cuda: model.cuda() # lm_model.cuda() if len(args.gpus) > 1: # 并行 model = nn.DataParallel(model, device_ids=args.gpus, dim=1) logging(repr(model) + "\n\n") # 记录这个文件的框架 # total number of parameters param_count = 0 for param in model.parameters(): param_count += param.view(-1).size()[0] logging('total number of parameters: %d\n\n' % param_count) # updates是已经进行了几个epoch, 防止中间出现程序中断的情况. if args.restore: updates = checkpoints['updates'] ori_updates = updates else: updates = 0 # optimizer if args.restore: optim = checkpoints['optim'] else: optim = Optim(config.optim, config.learning_rate, config.max_grad_norm, lr_decay=config.learning_rate_decay, start_decay_at=config.start_decay_at) # if opt.pretrain: # pretrain_lm(lm_model, vocab) optim.set_parameters(model.parameters()) if config.schedule: scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch) else: scheduler = None print("nana...") if not args.notrain: max_bleu = train(model, vocab, dataloader, scheduler, optim, updates) logging("Best bleu score: %.2f\n" % (max_bleu)) else: assert args.restore is not None eval(model, vocab, dataloader, 0, updates, do_test=False) print("nana```")
model.cuda() if len(opt.gpus) > 1: model = nn.DataParallel(model, device_ids=opt.gpus, dim=1) # optimizer if opt.restore: optim = checkpoints['optim'] else: optim = Optim(config.optim, config.learning_rate, config.max_grad_norm, lr_decay=config.learning_rate_decay, start_decay_at=config.start_decay_at) optim.set_parameters(model.parameters()) if config.schedule: scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch) # total number of parameters param_count = 0 for param in model.parameters(): param_count += param.view(-1).size()[0] if not os.path.exists(config.log): os.mkdir(config.log) if opt.log == '': log_path = config.log + utils.format_time(time.localtime()) + '/' else: log_path = config.log + opt.log + '/' if not os.path.exists(log_path): os.mkdir(log_path) logging = utils.logging(
def main(vocab, dataloader): # 设定种子 torch.manual_seed(args.seed) if use_cuda: torch.cuda.manual_seed(args.seed) # checkpoint if args.restore: # 存储已有模型的路径 print('loading checkpoint...\n') checkpoints = torch.load(os.path.join(log_path, args.restore)) torch.backends.cudnn.benchmark = True # model print('building model...\n') # configure the model # Model and optimizer model = GLSTM(config, vocab) # model = hierarchical_attention(config, vocab) # model = SLSTM(config, vocab) # model = Transformer(config, vocab) if args.restore: model.load_state_dict(checkpoints['model']) if use_cuda: model.cuda() if len(args.gpus) > 1: # 并行 model = nn.DataParallel(model, device_ids=args.gpus, dim=1) logging(repr(model) + "\n\n") # 记录这个文件的框架 # total number of parameters param_count = 0 for param in model.parameters(): param_count += param.view(-1).size()[0] logging('total number of parameters: %d\n\n' % param_count) # updates是已经进行了几个epoch, 防止中间出现程序中断的情况. if args.restore: updates = checkpoints['updates'] ori_updates = updates else: updates = 0 # optimizer if args.restore: optim = checkpoints['optim'] else: optim = Optim(config.optim, config.learning_rate, config.max_grad_norm, lr_decay=config.learning_rate_decay, start_decay_at=config.start_decay_at) optim.set_parameters(model.parameters()) if config.schedule: scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch) else: scheduler = None if not args.notrain: max_acc, test_acc = train(model, dataloader, scheduler, optim, updates) logging("Best accuracy: %.2f, test accuracy: %.2f\n" % (max_acc * 100, test_acc * 100)) return test_acc else: assert args.restore is not None eval(model, vocab, dataloader, 0, updates, do_test=True)
model.load_state_dict(stateDict) ploter = LinePlotter("RoboCup") for iter in range(iters): limit = (iter + 1) * epochs optimizer = torch.optim.SGD([ { 'params': model.parameters() }, ], lr=lr, momentum=momentum, weight_decay=weight_decay) scheduler = lr_scheduler.CosineAnnealingLR(optimizer, limit, 1e-3) bestLoss = 100 bestAcc = 0 bestIoU = 0 bestTAcc = 0 bestConf = torch.zeros(numClass, numClass) pruneAm = 0.08 pruneThreshLow = 500 if v2 else 1000 pruneThreshHigh = 15000 if v2 else 50000 if iter > 0: cb() with torch.no_grad(): indices = pruneModel2(model.parameters(), (iter + 1) * pruneAm,
optimG = Optim(config.optim, config.learning_rate, config.max_grad_norm, lr_decay=config.learning_rate_decay, start_decay_at=config.start_decay_at) optimD = Optim(config.optim, config.learning_rate, config.max_grad_norm, lr_decay=config.learning_rate_decay, start_decay_at=config.start_decay_at) optimG.set_parameters(netG.parameters()) optimD.set_parameters(netD.parameters()) if config.schedule: schedulerG = L.CosineAnnealingLR(optimG.optimizer, T_max=config.epoch) schedulerD = L.CosineAnnealingLR(optimD.optimizer, T_max=config.epoch) # total number of parameters if not os.path.exists(config.log): os.makedirs(config.log) if config.log.endswith('/'): log_path = config.log else: log_path = config.log + '/' if not os.path.exists(log_path): os.mkdir(log_path) logging = utils.logging(log_path + 'log.txt')
def main(): # 设定种子 torch.manual_seed(args.seed) if use_cuda: torch.cuda.manual_seed(args.seed) # checkpoint if args.restore: # 存储已有模型的路径 print('loading checkpoint...\n') checkpoints = torch.load(os.path.join(log_path, args.restore)) # word2id, id2word, word2count = load_vocab(args.vocab_file, args.vocab_size) vocab = Vocab(config.vocab, config.data, config.vocab_size) # Load data start_time = time.time() dataloader = DataLoader(config, args.task, config.has_dev, config.batch_size, vocab, args.model, args.use_depparse, args.notrain, args.debug) print("DATA loaded!") torch.backends.cudnn.benchmark = True # data print('loading data...\n') print('loading time cost: %.3f' % (time.time() - start_time)) # model print('building model...\n') # configure the model # Model and optimizer if args.model == 'h_attention': model = hierarchical_attention(config, vocab, use_cuda) elif args.model == 'slstm': model = SLSTM(config, vocab, use_cuda) elif args.model == 'glstm': model = GLSTM(config, vocab, use_cuda) elif args.model == 'hglstm': model = HGLSTM(config, vocab, use_cuda) if args.restore: model.load_state_dict(checkpoints['model']) if use_cuda: model.cuda() # lm_model.cuda() if len(args.gpus) > 1: # 并行 model = nn.DataParallel(model, device_ids=args.gpus, dim=1) logging(repr(model) + "\n\n") # 记录这个文件的框架 # total number of parameters param_count = 0 for param in model.parameters(): param_count += param.view(-1).size()[0] logging('total number of parameters: %d\n\n' % param_count) # updates是已经进行了几个epoch, 防止中间出现程序中断的情况. if args.restore: updates = checkpoints['updates'] ori_updates = updates else: updates = 0 # optimizer if args.restore: optim = checkpoints['optim'] else: optim = Optim(config.optim, config.learning_rate, config.max_grad_norm, lr_decay=config.learning_rate_decay, start_decay_at=config.start_decay_at) # if opt.pretrain: # pretrain_lm(lm_model, vocab) optim.set_parameters(model.parameters()) if config.schedule: scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch) else: scheduler = None if not args.notrain: max_acc, test_acc = train(model, vocab, dataloader, scheduler, optim, updates) logging("Best accuracy: %.2f, test accuracy: %.2f\n" % (max_acc * 100, test_acc * 100)) else: assert args.restore is not None eval(model, vocab, dataloader, 0, updates, do_test=True)
# Loss, and optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, nesterov=True, weight_decay=1e-4) # Number of restarts numRest = 4 # Number of epochs per restart numEpoch = 75 # Cosine annealing learning rate schedules scheduler = lr_scheduler.CosineAnnealingLR(optimizer, numEpoch, eta_min=5e-3) def train(epoch): # variables for loss running_loss = 0.0 correct = 0.0 total = 0 # set the network to train (for batchnorm and dropout) net.train() # Create progress bar bar = progressbar.ProgressBar(0, len(trainLoader),