predictions = output.view(-1, num_tokens) total_loss += len(X) * criterion(predictions, Y).data hidden = repackage_hidden(hidden) final_loss = total_loss[0] / len(data) print("Epoch: " + str(epoch) + " Val Loss: " + str(final_loss) + " Val Perplexity: " + str(math.exp(final_loss))) ''' if final_loss < loss_least: with open(MODEL_SAVE_PATH, 'wb') as f: torch.save(model, f) loss_least = final_loss ''' return final_loss optimizer = torch.optim.SGD(model.parameters(), lr=INITIAL_LEARNING_RATE, weight_decay=WEIGHT_DECAY) stochastic = True def saving_model(final_loss): global loss_least if final_loss < loss_least: with open(MODEL_SAVE_PATH, 'wb') as f: torch.save(model, f) loss_least = final_loss return resume = True
# Total context window size used for training. The context window consists of 1. conditioning_context where input # data points are available and network predictions are conditioned on actual input data, and 2. prediction_context, # where the network predictions are conditioned on network output at the previous step. Covariates are assumed # to be available for the entire context window ctx_win_len = cfg['ctx_win_len'] cond_win_len = cfg['cond_win_len'] pred_win_len = ctx_win_len - cond_win_len - 1 batch_size = cfg['batch_size'] model = model.model(num_lstms=cfg['num_lstms'], input_dim=input_dim, output_dim=cfg['num_targets'], hidden_dim=cfg['hidden_dim']).to(device) criterion = torch.nn.MSELoss() optimizer = optim.Adam(model.parameters(), cfg['lr']) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=cfg['lr_step_size'], gamma=cfg['lr_gamma']) train_sampler, test_sampler = dataset.get_train_test_samplers( cfg['train_test_split']) train_dataloader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler, shuffle=False, num_workers=0) test_dataloader = DataLoader(dataset, batch_size=1,
print('Missing model file for evaluating test set.') exit() else: model = models.model.UNet(args.im_size, args.kernel_size) # Datasets and dataloaders. if not args.test: train_dataset = IGVCDataset(train_txt, im_size=args.im_size, split='train', transform=transform, val_samples=args.val_samples) val_dataset = IGVCDataset(train_txt, im_size=args.im_size, split='val', transform=transform, val_samples=args.val_samples) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, **kwargs) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=True, **kwargs) # Optmizer lr = args.lr print('Initial lr: %f.' % lr) optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=args.weight_decay) else: test_dataset = IGVCDataset(test_txt, im_size=args.im_size, split='test', transform=transform) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=True, **kwargs) criterion = F.binary_cross_entropy if args.cuda: model.cuda() def train(epoch): iters = [] lrs = [] train_losses = [] val_losses = [] val_accuracies = [] model.train()
# from tqdm import tqdm config = DefaultConfig() if config.use_hyperboard: from hyperboard import Agent agent = Agent(username='******', password='******', port=5005) parameter = config.todict() validate_loss_record = agent.register(parameter, 'loss', overwrite=True) train_dataset = dataset.MyDataset() validate_dataset = dataset.MyDataset() criticer = torch.nn.MSELoss() model = model.Model() optimizer = optim.Adam(model.parameters(), lr=config.lr) if config.gpu >= 0: model.cuda(config.gpu) max_loss = 0 no_gain = 0 global_step = 0 train_num = len(train_dataset) model.train() for epoch in range(config.epoch_num): train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True) for step, (traindata, trainlabel) in enumerate(train_loader):
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) val_src_file = '' val_tgt_file = '' val_dataset = MSRDataset(val_src_file, val_tgt_file, max_length) val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True) writer = SummaryWriter(tensorboard_logdir) device = torch.device('cuda') if torch.cuda.is_available() else torch.device( 'cpu') model.to(device) optimizer = AdamW(model.parameters(), lr=lr) train_start = time.time() for epoch in range(epochs): epoch_start = time.time() for batch_idx, batch in enumerate(train_dataloader): model.train() optimizer.zero_grad() input_ids = batch['input_ids'].to(device) attention_mask = batch['attention_mask'].to(device) # Dropping last decoder input id to compensate for the # padding token added at the start to achieve right shift
def train(model, criterion, converter, device, train_datasets, valid_datasets=None, pretrain=False): print('Device:', device) ''' data_parallel = False if torch.cuda.device_count() > 1: print("Use", torch.cuda.device_count(), 'gpus') data_parallel = True model = nn.DataParallel(model) ''' model = model.to(device) if pretrain: #print("Using pretrained model") ''' state_dict = torch.load("/home/chen-ubuntu/Desktop/checks_dataset/pths/crnn_pertrain.pth", map_location=device) cnn_modules = {} rnn_modules = {} for module in state_dict: if module.split('.')[1] == 'FeatureExtraction': key = module.replace("module.FeatureExtraction.", "") cnn_modules[key] = state_dict[module] elif module.split('.')[1] == 'SequenceModeling': key = module.replace("module.SequenceModeling.", "") rnn_modules[key] = state_dict[module] model.cnn.load_state_dict(cnn_modules) model.rnn.load_state_dict(rnn_modules) ''' #model.load_state_dict(torch.load('/root/checks_recognize_v2/pths/hand_num_epoch278_acc0.995020.pth')) dataset_name = 'symbol' batch_dict = { 'print_word': 32, 'hand_num': 48, 'print_num': 48, 'symbol': 64, 'hand_word': 64, 'seal': 64, 'catword': 32 } dataset = train_datasets.get(dataset_name) dataloader = DataLoader(dataset, batch_size=batch_dict.get(dataset_name), shuffle=True, num_workers=4, drop_last=False) lr = 1e-3 params = model.parameters() optimizer = optim.Adam(params, lr) optimizer.zero_grad() batch_cnt = 0 for epoch in range(config.epochs): epoch_loss = 0 model.train() train_acc = 0 train_acc_cnt = 0 for i, (img, label, _) in enumerate(dataloader): n_correct = 0 batch_cnt += 1 train_acc_cnt += 1 img = img.to(device) text, length = converter.encode(label) preds = model(img) preds_size = torch.IntTensor([preds.size(0)] * img.size(0)) preds = preds.to('cpu') loss = criterion(preds, text, preds_size, length) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, preds_size.data, raw=False) list1 = [x for x in label] for pred, target in zip(sim_preds, list1): if pred == target: n_correct += 1 # loss.backward() # optimizer.step() # model.zero_grad() loss.backward() if (i + 1) % 4: optimizer.step() optimizer.zero_grad() epoch_loss += loss.item() train_acc += n_correct / len(list1) if (i + 1) % 4 == 0: print("epoch: {:<3d}, dataset:{:<8}, batch: {:<3d}, batch loss: {:4f}, epoch loss: {:4f}, acc: {}". \ format(epoch, dataset_name, i, loss.item(), epoch_loss, n_correct / len(list1))) # writer.add_scalar('data/train_loss', loss.item(), batch_cnt) # writer.add_scalar('data/train_acc', n_correct/len(list1), batch_cnt) print('==========train_average_acc is: {:.3f}'.format(train_acc / train_acc_cnt)) # writer.add_scalar('data/valid_{}acc'.format(dataset_name), train_acc/train_acc_cnt, batch_cnt) if epoch % 3 == 0: dataset_names = [dataset_name] accs, valid_losses = valid(model, criterion, converter, device, valid_datasets, dataset_names) acc, valid_loss = accs.get(dataset_name), valid_losses.get( dataset_name) print('========== valid acc: ', acc, ' ============valid loss: ', valid_loss) # writer.add_scalar('data/valid_{}acc'.format(dataset_name), acc, batch_cnt) # writer.add_scalar('data/valid_{}loss'.format(dataset_name), valid_loss, batch_cnt) if epoch % 3 == 0: state_dict = model.state_dict() torch.save( state_dict, '/root/last_dataset/crnn_char_pths/catword_lr3_epoch_{}_acc{:4f}.pth' .format(epoch + 1, train_acc / train_acc_cnt)) if train_acc / train_acc_cnt > 0.95: state_dict = model.state_dict() torch.save( state_dict, '/root/last_dataset/crnn_char_pths/catword_lr3_epoch{}_acc{:4f}.pth' .format(epoch + 1, train_acc / train_acc_cnt))
hidden = repackage_hidden(hidden) final_loss = total_loss[0] / len(data) try: print("Epoch: "+str(epoch)+" Val Loss: " + str(final_loss) + " Val Perplexity: " + str(math.exp(final_loss))) except: print("Val Loss: " + str(final_loss) + " Val Perplexity: " + str(math.exp(final_loss))) if final_loss < loss_least: with open(MODEL_SAVE_PATH, 'wb') as f: torch.save(model, f) loss_least = final_loss return final_loss optimizer = torch.optim.ASGD(model.parameters(), lr=INITIAL_LEARNING_RATE, t0=0, lambd=0, weight_decay=WEIGHT_DECAY) stochastic = False def saving_model(final_loss): global loss_least if final_loss < loss_least: with open(MODEL_SAVE_PATH, 'wb') as f: torch.save(model, f) loss_least = final_loss return def train(stochastic): global optimizer if MODEL_TYPE == "QRNN":
im_size=args.im_size, split='val', transform=transform) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, **kwargs) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=True, **kwargs) # Optmizer lr = args.lr print('Initial lr: %f.' % lr) optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=args.weight_decay) else: test_dataset = NeuroDataset(test_txt, im_size=args.im_size, split='test', transform=transform) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=True, **kwargs) criterion = F.binary_cross_entropy if args.cuda: model.cuda()
parser.add_argument('--nhid1', default=128, type=int, help='The second dim') parser.add_argument('--step_size', default=10, type=int, help='The step size for lr') parser.add_argument('--gamma', default=0.9, type=int, help='The gamma for lr') args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() G, Adj, Node = dataset.Read_graph(args.input) model = model.MNN(Node, args.nhid0, args.nhid1, args.dropout, args.alpha) opt = optim.Adam(model.parameters(), lr=args.lr) scheduler = torch.optim.lr_scheduler.StepLR(opt, step_size=args.step_size, gamma=args.gamma) Data = dataset.Dataload(Adj, Node) Data = DataLoader(Data, batch_size=args.bs, shuffle=True, ) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = model.to(device) model.train() for epoch in range(1, args.epochs + 1): loss_sum, loss_L1, loss_L2, loss_reg = 0, 0, 0, 0 for index in Data: adj_batch = Adj[index] adj_mat = adj_batch[:, index] b_mat = torch.ones_like(adj_batch) b_mat[adj_batch != 0] = args.beta opt.zero_grad()
num_epoch = 50 os.makedirs(models_path, exist_ok=True) os.makedirs(os.path.join(images_path, f"faster_rcnn/{attempt}/images"), exist_ok=True) os.makedirs(os.path.join(images_path, f"faster_rcnn/{attempt}/plots"), exist_ok=True) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(f"Running on {device}") print(f"This is {attempt}. attempt") model.to(device=device) params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) # optimizer = torch.optim.Adam(params, lr=0.0005, weight_decay=0) split = "stage1_train" dataset = MyDataset(split=split, transforms=get_transforms(train=True, rescale_size=(256, 256))) trainset, evalset = random_split(dataset, [600, 70]) train_loader = DataLoader(trainset, batch_size=1, num_workers=0,
# end effector loss ee_criterion = nn.MSELoss() if cfg.LOSS.EE else None # vector similarity loss vec_criterion = nn.MSELoss() if cfg.LOSS.VEC else None # collision loss col_criterion = CollisionLoss( cfg.LOSS.COL_THRESHOLD) if cfg.LOSS.COL else None # joint limit loss lim_criterion = JointLimitLoss() if cfg.LOSS.LIM else None # end effector orientation loss ori_criterion = nn.MSELoss() if cfg.LOSS.ORI else None # regularization loss reg_criterion = RegLoss() if cfg.LOSS.REG else None # Create optimizer optimizer = optim.Adam(model.parameters(), lr=cfg.HYPER.LEARNING_RATE) best_loss = float('Inf') for epoch in range(cfg.HYPER.EPOCHS): # Start training train_loss = train_epoch(model, ee_criterion, vec_criterion, col_criterion, lim_criterion, ori_criterion, reg_criterion, optimizer, train_loader, train_target, epoch, logger, cfg.OTHERS.LOG_INTERVAL, writer, device) # Start testing test_loss = test_epoch(model, ee_criterion, vec_criterion, col_criterion, lim_criterion, ori_criterion, reg_criterion, test_loader, test_target, epoch,
def main(): torch.manual_seed(1) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu_devices print(args) # GPU / CPU device = torch.device('cuda') print("Initializing dataset") dataset = data_manager.init_dataset('../imdb/dataset_GEI', 'id_list.csv', args.cooperative) transform = transforms.Compose([ transforms.RandomAffine(degrees=0, translate=(0.05, 0.02)), transforms.ToTensor() ]) transform_test = transforms.Compose([transforms.ToTensor()]) # trainLoader trainLoader = DataLoader(ImageDataset(dataset.train, sample='random', transform=transform), sampler=RandomIdentitySampler(dataset.train, num_instances=2), batch_size=args.train_batch, num_workers=args.workers) # test/val queryLoader # test/val galleryLoader test_probeLoader = DataLoader(ImageDataset(dataset.test_probe, sample='dense', transform=transform_test), shuffle=False, batch_size=args.test_batch, drop_last=False) test_galleryLoader = DataLoader(ImageDataset(dataset.test_gallery, sample='dense', transform=transform_test), shuffle=False, batch_size=args.test_batch, drop_last=False) model = models.model.ICDNet_group_mask_mask_early_8().to(device=device) #model = models.model.ICDNet_mask() #model= nn.DataParallel(model).cuda() #model = models.model.icdnet().to(device=device) print("Model size: {:.5f}M".format( sum(p.numel() for p in model.parameters()) / 1000000.0)) criterion_cont = OnlineContrastiveLoss(margin=3) #criterion_trip = OnlineTripletLoss(3) criterion_trip = TripletLoss(3) criterion_sim = OnlineSimLoss() criterion_l2 = nn.MSELoss() criterion_label = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, betas=(0.5, 0.999)) #scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1) scheduler = lr_scheduler.MultiStepLR(optimizer, [140], gamma=0.1, last_epoch=-1) #checkpoint = torch.load('./save_group_mask_early8_ones2_0002_sa3_500l2_01label_resbottle_shift002_all190_coo0/ep87.pth.tar') #model.load_state_dict(checkpoint['state_dict']) start_time = time.time() best_rank1 = -np.inf #args.max_epoch = 1 cont_iter = 1 for epoch in range(args.start_epoch, args.max_epoch): print("==> {}/{}".format(epoch + 1, args.max_epoch)) cont_iter = train(epoch, model, criterion_cont, criterion_trip, criterion_sim, criterion_l2, criterion_label, optimizer, scheduler, trainLoader, device, cont_iter) if cont_iter > 250000: break if True: print("=============> Test") test_f.write("iter" + str(cont_iter) + '\n') rank1, correct_rate = test(model, test_probeLoader, test_galleryLoader, device) writer.add_scalar("Test/rank1", rank1, epoch) writer.add_scalar("Test/correct", correct_rate, epoch) is_best = rank1 > best_rank1 if is_best: best_rank1 = rank1 if is_best: state_dict = model.state_dict() save_checkpoint( { 'state_dict': state_dict, 'epoch': epoch, 'optimizer': optimizer.state_dict(), }, is_best, osp.join(args.save_dir, 'ep' + str(epoch + 1) + '.pth.tar')) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) print("Finished. Total elapsed time (h:m:s): {}".format(elapsed))
'| epoch {:3d} | {:5d}/{:5d} batches | lr ADAM | ms/batch {:5.2f} | ' 'loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, train_data.size()[1] // MAX_SEQ_LEN, elapsed * 1000 / DEUBG_LOG_INTERVAL, cur_loss, math.exp(cur_loss))) # Print some log statement total_loss = 0 # Reset the loss start_time = time.time() # Reset the time # Loop over epochs. lr = INITIAL_LEARNING_RATE # Define our learning rate best_val_loss = None # Define a best validation loss so we can do early stopping if we want to optimizer = optim.RMSprop( model.parameters(), lr=INITIAL_LEARNING_RATE ) # Initialize the optimizer here, pass the parameters from the model so it can update them when its time, also initialize with the passed learning rate optimizer.zero_grad() # Zero the gradient so it can be used from the start # At any point you can hit Ctrl + C to break out of training early. try: print("Starting training") for epoch in range(1, EPOCH_MAX + 1): # For every epoch we are going to do epoch_start_time = time.time() # Set a start time train() # Train the model val_loss = evaluate( val_data) # test the model on validation data to check performance print('-' * 89) print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' 'valid ppl {:8.2f}'.format( epoch, (time.time() - epoch_start_time), val_loss,
n_workers=n_workers, drop_val=True) else: train_loader = TwitterDataset(train_file, shuffle=True, cache_size=100000, use_user_info=use_user_info, n_workers=n_workers) time.sleep(0.5) print("Loading validation data...") test_loader = TwitterDataset(test_file, val_size, use_user_info=use_user_info) print("%d entries has been loaded" % len(train_loader)) print("Preparing model...") model.to(device) optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) # sheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.95, patience=100, min_lr=1e-5, # threshold=1e-4, threshold_mode='rel', cooldown=0) step = 0 max_score = (0, -1e10, 0) if load_checkpoint: checkpoint = torch.load( os.path.join( checkpoints_dir, model_name + ('_best.pt' if load_checkpoint == 'best' else '_latest.pt'))) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) # sheduler.load_state_dict(checkpoint['sheduler_state_dict']) step = checkpoint['step'] max_score = checkpoint['max_score']
random_state=0) print(len(train_dataset)) print(len(valid_dataset)) train_loader = DataLoader( train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0) #Batch Size定义:一次训练所选取的样本数。 Batch Size的大小影响模型的优化程度和速度。 valid_loader = DataLoader( valid_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0) #Batch Size定义:一次训练所选取的样本数。 Batch Size的大小影响模型的优化程度和速度。 model = model.MobileNetV3_large() model.load_state_dict(torch.load('weigths/best.pkl')) model.conv4 = Conv2d(1280, 5, kernel_size=(1, 1), stride=(1, 1)) #修改最后一层输出的分类个数 model.to(DEVICE) optimizer = torch.optim.SGD(model.parameters(), lr=0.0001, momentum=0.9) loss_func = torch.nn.CrossEntropyLoss() avg_loss = [] avg_acc = [] def train(model, device, train_loader, optimizer, epoch): model.train() for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = loss_func(output, target) loss.backward() optimizer.step() if (batch_idx + 1) % 30 == 0: