def train(**kwargs): # 1. configure model cfg._parse(kwargs) model = MyNet() if cfg.load_model_path: model.load_state_dict(torch.load(cfg.load_model_path)) if cfg.multi_gpu: model = parallel.DataParallel(model) if cfg.use_gpu: model.cuda() # 2. prepare data train_data = SN(root=cfg.train_data_root, crop_size=cfg.crop_size) train_loader = DataLoader(train_data, batch_size=cfg.batch_size, shuffle=True) # 3. criterion (already imported) and optimizer lr = cfg.lr # optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=cfg.weight_decay) optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=cfg.momentum) # 4. meters loss_meter = meter.AverageValueMeter() previous_loss = 1e10 # train for epoch in range(cfg.max_epoch): print('epoch %s: ===========================' % epoch) loss_meter.reset() for ii, (data, label_group) in tqdm(enumerate(train_loader)): # train model if cfg.use_gpu: data = data.cuda() label_group = [label.cuda() for label in label_group] data = Variable(data).float() label_group = [Variable(label) for label in label_group] optimizer.zero_grad() score = model(data) # for item in score: # print(item) loss = criterion(score, label_group, batch_size=cfg.batch_size, neg_pos_ratio=cfg.neg_pos_ratio) loss.backward() optimizer.step() # meters update and print loss_meter.add(loss.item()) if (ii + 1) % cfg.print_freq == 0: print(loss_meter.value()[0]) if (epoch + 1) % cfg.save_freq == 0: torch.save(model.module.state_dict(), f'./checkpoints/last.pth') # update learning rate if loss_meter.value()[0] > previous_loss: lr = lr * cfg.lr_decay # 第二种降低学习率的方法:不会有moment等信息的丢失 for param_group in optimizer.param_groups: param_group['lr'] = lr previous_loss = loss_meter.value()[0]
def main(): """ Main function wrapper for training script. """ matplotlib.use("Agg") random.seed(args["SEED"]) np.random.seed(args["SEED"]) torch.manual_seed(args["SEED"]) if torch.cuda.is_available(): device = torch.device("cuda") kwargs = {"num_workers": args["NUM_WORKERS"], "pin_memory": True} else: device = torch.device("cpu") kwargs = {} trainData = MyDataset("train", datadir=args["DATA_DIRECTORY"]) valSize = int(args["VALIDATION_SPLIT"] * len(trainData)) trainSize = len(trainData) - valSize trainData, valData = random_split(trainData, [trainSize, valSize]) trainLoader = DataLoader( trainData, batch_size=args["BATCH_SIZE"], shuffle=True, **kwargs ) valLoader = DataLoader( valData, batch_size=args["BATCH_SIZE"], shuffle=True, **kwargs ) model = MyNet() model.to(device) optimizer = optim.Adam( model.parameters(), lr=args["LEARNING_RATE"], betas=(args["MOMENTUM1"], args["MOMENTUM2"]), ) scheduler = optim.lr_scheduler.ExponentialLR( optimizer, gamma=args["LR_DECAY"] ) criterion = MyLoss() regularizer = L2Regularizer(lambd=args["LAMBDA"]) if os.path.exists(args["CODE_DIRECTORY"] + "/checkpoints"): while True: char = input( "Continue and remove the 'checkpoints' directory? y/n: " ) if char == "y": break if char == "n": sys.exit() else: print("Invalid input") shutil.rmtree(args["CODE_DIRECTORY"] + "/checkpoints") os.mkdir(args["CODE_DIRECTORY"] + "/checkpoints") os.mkdir(args["CODE_DIRECTORY"] + "/checkpoints/plots") os.mkdir(args["CODE_DIRECTORY"] + "/checkpoints/weights") if args["PRETRAINED_WEIGHTS_FILE"] is not None: print( "Pretrained Weights File: %s" % (args["PRETRAINED_WEIGHTS_FILE"]) ) print("Loading the pretrained weights ....") model.load_state_dict( torch.load( args["CODE_DIRECTORY"] + args["PRETRAINED_WEIGHTS_FILE"], map_location=device, ) ) model.to(device) print("Loading Done.") trainingLossCurve = list() validationLossCurve = list() trainingMetricCurve = list() validationMetricCurve = list() numTotalParams, numTrainableParams = num_params(model) print("Number of total parameters in the model = %d" % (numTotalParams)) print( "Number of trainable parameters in the model = %d" % (numTrainableParams) ) print("Training the model ....") for epoch in range(1, args["NUM_EPOCHS"] + 1): trainingLoss, trainingMetric = train( model, trainLoader, optimizer, criterion, regularizer, device ) trainingLossCurve.append(trainingLoss) trainingMetricCurve.append(trainingMetric) validationLoss, validationMetric = evaluate( model, valLoader, criterion, regularizer, device ) validationLossCurve.append(validationLoss) validationMetricCurve.append(validationMetric) print( ( "| Epoch: %03d |" "| Tr.Loss: %.6f Val.Loss: %.6f |" "| Tr.Metric: %.3f Val.Metric: %.3f |" ) % ( epoch, trainingLoss, validationLoss, trainingMetric, validationMetric, ) ) scheduler.step() if epoch % args["SAVE_FREQUENCY"] == 0: savePath = ( args["CODE_DIRECTORY"] + "/checkpoints/weights/epoch_{:04d}-metric_{:.3f}.pt" ).format(epoch, validationMetric) torch.save(model.state_dict(), savePath) plt.figure() plt.title("Loss Curves") plt.xlabel("Epoch No.") plt.ylabel("Loss value") plt.plot( list(range(1, len(trainingLossCurve) + 1)), trainingLossCurve, "blue", label="Train", ) plt.plot( list(range(1, len(validationLossCurve) + 1)), validationLossCurve, "red", label="Validation", ) plt.legend() plt.savefig( ( args["CODE_DIRECTORY"] + "/checkpoints/plots/epoch_{:04d}_loss.png" ).format(epoch) ) plt.close() plt.figure() plt.title("Metric Curves") plt.xlabel("Epoch No.") plt.ylabel("Metric") plt.plot( list(range(1, len(trainingMetricCurve) + 1)), trainingMetricCurve, "blue", label="Train", ) plt.plot( list(range(1, len(validationMetricCurve) + 1)), validationMetricCurve, "red", label="Validation", ) plt.legend() plt.savefig( ( args["CODE_DIRECTORY"] + "/checkpoints/plots/epoch_{:04d}_metric.png" ).format(epoch) ) plt.close() print("Training Done.") return
nx.draw(G, pos=graph_dataset.dataset_.location_getter(), labels=dataset.country_by_idx, with_labels=True) plt.show() pca = PCA(n_components=16) data = dataset.df[[ c for c in dataset.df.columns if isinstance(c, dt.datetime) ]].astype(float).T pca.fit(data) animator = AnimPlot('train loss', 'test loss') my_net = MyNet() optimizer = torch.optim.Adam(my_net.parameters(), lr=0.0001) train_loader = torch.utils.data.DataLoader(train, 64) test_loader = torch.utils.data.DataLoader(test, 10) for epoch_no in range(10000): with torch.no_grad(): test_loss = 0.0 for X, target in test_loader: transformed = torch.tensor(pca.transform(X), dtype=torch.float32) predicted = my_net(transformed) loss = cycle_loss(predicted.flatten(), target[:, 1].float(), 12)