コード例 #1
0
ファイル: train.py プロジェクト: d6ms/pytorch-protonets
def train(epochs,
          n_train,
          k_train,
          q_train,
          n_eval=1,
          k_eval=3,
          q_eval=5,
          episodes_per_epoch=100,
          num_tasks=1,
          lr=1e-3,
          lr_step_size=20,
          lr_gamma=0.5):
    # print parameters
    print('================ parameters ================')
    print('epochs', epochs)
    print('train (n, k, q)', n_train, k_train, q_train)
    print('eval (n, k, q)', n_eval, k_eval, q_eval)
    print('episodes per epoch', episodes_per_epoch)
    print('num_tasks', num_tasks)
    print('learning rate', lr)
    print('learning rate step size', lr_step_size)
    print('learning rate step rate', lr_gamma)
    print('============================================')

    # dataloaders for train and eval
    train_set = OmniglotDataset(subset='background')
    train_loader = DataLoader(train_set,
                              num_workers=0,
                              batch_sampler=FewShotBatchSampler(
                                  train_set,
                                  episodes_per_epoch=episodes_per_epoch,
                                  n=n_train,
                                  k=k_train,
                                  q=q_train,
                                  num_tasks=num_tasks))
    eval_set = OmniglotDataset(subset='evaluation')
    eval_loader = DataLoader(eval_set,
                             num_workers=0,
                             batch_sampler=FewShotBatchSampler(
                                 eval_set,
                                 episodes_per_epoch=episodes_per_epoch,
                                 n=n_eval,
                                 k=k_eval,
                                 q=q_eval,
                                 num_tasks=num_tasks))

    # train settings
    model = protonet_embedding_model().to(config.DEVICE)
    optimizer = Adam(model.parameters(), lr=lr)
    scheduler = StepLR(optimizer, step_size=lr_step_size, gamma=lr_gamma)
    loss_fn = torch.nn.NLLLoss().to(config.DEVICE)

    summary(model, (1, 105, 105))

    # train
    history = {'loss': list(), 'accuracy': list()}
    for epoch in range(1, epochs + 1):
        train_epoch(model, optimizer, scheduler, loss_fn, train_loader,
                    n_train, k_train, q_train, epoch)
        evaluate(model, history, loss_fn, eval_loader, n_eval, k_eval, q_eval,
                 epoch)

        # save model and history
        if epoch == 1 or history['accuracy'][-1] > max(
                history['accuracy'][:-1]):
            torch.save(model.state_dict(),
                       f'{config.MODEL_PATH}/protonets.ckpt')
        save_history(history)
コード例 #2
0
						transform=transform),
						batch_size=batch_size, shuffle=True)

test_loader = DataLoader(datasets.CIFAR10('../data', train=False,
						transform=transforms.Compose([ToTensor()])),
						batch_size=batch_size, shuffle=True)


model = BasicCNN()
model.cuda()


optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=weight_decay)	
#optimizer = optim.Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss().cuda()
scheduler = StepLR(optimizer, step_size=10, gamma=0.3)



train_loss = np.zeros((epochs, 1), dtype=np.float32)
train_acc = np.zeros((epochs, 1), dtype=np.float32)
val_acc = np.zeros(shape=(epochs,1), dtype=np.float32)
val_loss = np.zeros(shape=(epochs, 1), dtype=np.float32)



def save_checkpoint(state, filename='saved/cifar10_checkpoint_%s.pth.tar'%(numb)):
	torch.save(state, filename)
	if state['is_best']==True:
		shutil.copyfile(filename, 'saved/cifar10_model_best_%s.pth.tar'%(numb))
コード例 #3
0
    topk = 20
    config = get_config(model_name, dataset_name)
    model = SASRec(config).to(device)
elif model_name == 'stamp':
    batch_size = 512
    epoch_number = 30
    lr = 0.001
    lr_dc = 0.1
    lr_dc_step = 80
    topk = 20
    config = get_config(model_name, dataset_name)
    model = STAMP(config).to(device)

# ----------------------------------------init model----------------------------------------
optimizer = optim.Adam(model.parameters(), lr)
scheduler = StepLR(optimizer, step_size=lr_dc_step, gamma=lr_dc)

# ----------------------------------------load data----------------------------------------
if dataset_name == 'yoochoose1_64':
    data_root = './data/yoochoose1_64/'
elif dataset_name == 'diginetica':
    data_root = './data/diginetica/'

train, test = Preprocess(data_root)
train_dataset = RecSysDataset(train)
test_dataset = RecSysDataset(test)
train_loader = DataLoader(train_dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          collate_fn=rec15_collate_fn)
test_loader = DataLoader(test_dataset,
コード例 #4
0
def train(args):
    start_epoch = 0
    data_loader = DataLoader(dataset=HellenDataset(True, 224), batch_size=args.batch, shuffle=True, num_workers=16)
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda:0" if use_cuda else "cpu")
    model = CnnAlign()
    print("add graph")
    writer.add_graph(model, torch.zeros((1, 3, 224, 224)))
    print("add graph over")
    if args.pretrained and os.path.exists(MODEL_SAVE_PATH):
        print("loading ...")
        state = torch.load(MODEL_SAVE_PATH)
        model.load_state_dict(state['net'])
        start_epoch = state['epoch']
        print("loading over")
    model = torch.nn.DataParallel(model, device_ids=[0, 1])  # multi-GPU
    model.to(device)

    optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-5)
    scheduler = StepLR(optimizer, step_size=args.step, gamma=args.gama)
    train_loss = 0
    to_pil_img = tfs.ToPILImage()
    to_tensor = tfs.ToTensor()

    for epoch in range(start_epoch, start_epoch+args.epoes):
        model.train()
        prefetcher = DataPrefetcher(data_loader)
        img_tensor, label_tensor = prefetcher.next()
        last_img_tensor = img_tensor
        last_label_tensor = label_tensor
        optimizer.zero_grad()
        i_batch = 0
        while img_tensor is not None:
            last_img_tensor = img_tensor
            last_label_tensor = label_tensor
            output = model(img_tensor)
            loss = torch.nn.functional.smooth_l1_loss(output, label_tensor.view(-1, output.size(1)))
            if loss is None:
                img_tensor, label_tensor = prefetcher.next()
                continue
            loss.backward()
            if i_batch % args.mini_batch == 0:
                optimizer.step()
                optimizer.zero_grad()

            train_loss = loss.item()
            global_step = epoch*len(data_loader)+i_batch
            progress_bar(i_batch, len(data_loader), 'loss: %f, epeche: %d'%(train_loss, epoch))
            writer.add_scalar("loss", train_loss, global_step=global_step)
            img_tensor, label_tensor = prefetcher.next()
            i_batch += 1


        #save one pic and output
        pil_img = to_pil_img(last_img_tensor[0].cpu())
        ann = output[0].cpu().detach().numpy()
        ann = np.resize(ann, (194, 2))
        draw_ann(pil_img, ann.tolist(), font1, font_size)
        writer.add_image("img: "+str(epoch), to_tensor(pil_img))
        scheduler.step()

        if epoch % 10 == 0:
            print('Saving..')
            state = {
                'net': model.module.state_dict(),
                'epoch': epoch,
            }
            torch.save(state, "./output/face_align"+str(epoch)+".pt")

    if not os.path.isdir('data'):
        os.mkdir('data')
    print('Saving..')
    state = {
        'net': model.module.state_dict(),
        'epoch': epoch,
    }
    torch.save(state, MODEL_SAVE_PATH)
    writer.close()
コード例 #5
0
def main():

    # Training settings
    batch_size = 8
    learning_rate = 0.0001
    gamma = 0.5
    epochs = 50
    lr_scheduler_step_size = 12
    adam_betas = (0.9, 0.999)
    pathToModel = os.path.join(BASEDIR, 'weights.pt')
    restart = True

    # attempt to use GPU if available
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    # CPY ABOVE HERE

    train_folder = os.path.join(DATA, 'train')
    path_train_csv = os.path.join(DATA, 'labels', 'Train_labels.csv')

    print('Loading training data...')
    trainX, trainY = load_data(train_folder, path_train_csv)
    print('x train shape:', trainX.shape)

    print('Split the train/val data sets 80/20')
    num = int(trainX.shape[0] * 0.2)
    np.random.seed(1234567)
    idxs = np.random.choice(np.arange(trainX.shape[0]), num, replace=False)

    x_val_raw = trainX[idxs]
    y_val = trainY[idxs]
    x_train_raw = np.delete(trainX, idxs, axis=0)
    y_train = np.delete(trainY, idxs, axis=0)
    y_train = np.argmax(y_train, axis=1)
    y_val = np.argmax(y_val, axis=1)

    x_train = preprocess(x_train_raw)
    x_val = preprocess(x_val_raw)
    print('Reshaping to have channels first')
    x_train = reshapeInput(x_train)
    x_val = reshapeInput(x_val)

    print('Number of training data:', x_train_raw.shape[0])
    print('Number of validation data:', x_val_raw.shape[0])

    num = int(trainX.shape[0] * 0.2)
    np.random.seed(1234567)
    idxs = np.random.choice(np.arange(trainX.shape[0]), num, replace=False)
    x_val_raw = trainX[idxs]
    y_val = trainY[idxs]
    x_train_raw = np.delete(trainX, idxs, axis=0)
    y_train = np.delete(trainY, idxs, axis=0)

    y_train = np.argmax(y_train, axis=1)
    y_val = np.argmax(y_val, axis=1)

    # preprocess training and validation
    print('Preprocessing...')
    x_train = preprocess(np.copy(x_train_raw))
    x_val = preprocess(np.copy(x_val_raw))
    print('Reshaping to have channels first')
    x_train = reshapeInput(x_train)
    x_val = reshapeInput(x_val)
    # load the model
    model = model_generator()
    # model = PhoneLocator().to(device)
    if (use_cuda):
        model.cuda()
    # load the optimizer and setup schedule to reduce learning rate every 10 epochs
    optimizer = optim.Adam(model.parameters(),
                           lr=learning_rate,
                           betas=adam_betas)
    scheduler = StepLR(optimizer,
                       step_size=lr_scheduler_step_size,
                       gamma=gamma)

    train_dataset = (torch.FloatTensor(x_train), torch.FloatTensor(y_train))
    validation_dataset = (torch.FloatTensor(x_val), torch.FloatTensor(y_val))
    # create train and validatoin data loader
    data_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomGrayscale(p=0.05),
        transforms.ToTensor()
    ])

    train_dataset = CustomTensorDataset(tensors=train_dataset,
                                        transform=data_transform)
    histcount = np.histogram(y_train, bins=7)[0]
    classWeight = 1.0 - histcount / histcount.sum()
    classWeight_tensor = torch.FloatTensor(classWeight).to(device)
    samples_weights = classWeight_tensor[y_train]
    sampler = torch.utils.data.WeightedRandomSampler(
        weights=samples_weights,
        num_samples=len(samples_weights),
        replacement=True)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               sampler=sampler,
                                               **kwargs)
    validation_loader = torch.utils.data.DataLoader(
        TensorDataset(*validation_dataset), shuffle=True, **kwargs)
    # load model if path exists
    if os.path.isfile(pathToModel) and not restart:
        print('restarting..')
        model.load_state_dict(torch.load(pathToModel))
    # each iteration gather the n=test_batch_size samples and their respective labels [0,9]
    best_loss = math.inf
    train_loss_save = np.zeros((epochs))
    val_loss_save = np.zeros((epochs))

    print('Beginning to train')
    for epoch in range(1, epochs + 1):
        train_loss = train(model, device, train_loader, optimizer, epoch)
        val_loss = validate(model, device, validation_loader)
        if (use_cuda):
            train_loss_save[epoch - 1] = train_loss.cpu().data.numpy()
            val_loss_save[epoch - 1] = val_loss.cpu().data.numpy()
        else:
            train_loss_save[epoch - 1] = train_loss.data.numpy()
            val_loss_save[epoch - 1] = val_loss.data.numpy()
        if (val_loss < best_loss):
            print('Loss improved from ', best_loss, 'to', val_loss,
                  ': Saving new model to', pathToModel)
            best_loss = val_loss
            torch.save(model.state_dict(), pathToModel)
        scheduler.step()
        np.save('./val_loss.npy', val_loss_save)
        np.save('./train_loss.npy', train_loss_save)
コード例 #6
0
        optimizer = torch.optim.SGD([{
            'params': model.parameters()
        }, {
            'params': metric_fc.parameters()
        }],
                                    lr=opt.lr,
                                    weight_decay=opt.weight_decay)
    else:
        optimizer = torch.optim.Adam([{
            'params': model.parameters()
        }, {
            'params': metric_fc.parameters()
        }],
                                     lr=opt.lr,
                                     weight_decay=opt.weight_decay)
    scheduler = StepLR(optimizer, step_size=opt.lr_step, gamma=0.1)

    start = time.time()
    for i in range(opt.max_epoch):
        scheduler.step()

        model.train()
        for ii, data in enumerate(trainloader):
            data_input, label = data
            data_input = data_input.to(device)
            #print("data_input.shape",data_input.shape)
            label = label.to(device).long()
            feature = model(data_input)
            #print("feature.shape",feature.shape)
            output = metric_fc(feature, label)
            #print("output.shape",output.shape)
コード例 #7
0
ファイル: eva4s5f4.py プロジェクト: 2anandjha/EVA5S5
    test_loss /= len(test_loader.dataset)
    test_losses.append(test_loss)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    
    test_acc.append(100. * correct / len(test_loader.dataset))

"""# Let's Train and test our model"""

from torch.optim.lr_scheduler import StepLR

model =  Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = StepLR(optimizer, step_size=6, gamma=0.1)
EPOCHS = 15
for epoch in range(EPOCHS):
    print("EPOCH:", epoch)
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

fig, axs = plt.subplots(2,2,figsize=(15,10))
axs[0, 0].plot(train_losses)
axs[0, 0].set_title("Training Loss")
axs[1, 0].plot(train_acc[4000:])
axs[1, 0].set_title("Training Accuracy")
axs[0, 1].plot(test_losses)
axs[0, 1].set_title("Test Loss")
axs[1, 1].plot(test_acc)
axs[1, 1].set_title("Test Accuracy")
コード例 #8
0
def main():
    # -----------------------------------  Model Build -------------------------
    model = UnwarpNet_cmap(combine_num=1)
    args = train_configs.args
    isTrain = True
    model = torch.nn.DataParallel(model.cuda()) 
    start_epoch = 1
    # Load Parameters
    # if args.pretrained:
    if True:
        print("Loading Pretrained model~")
        #""/home1/quanquan/code/film_code/output/train/aug20201129-210822-VktsHX/cmap_aug_19.pkl""
        # "/home1/quanquan/code/Film-Recovery/cmap_only_45.pkl"
        # "/home1/quanquan/code/Film-Recovery/output/train/new_data20201214-090229-F3z21O/cmap_aug_500.pkl"
        pretrained_dict = torch.load("/home1/quanquan/code/Film-Recovery/cmap_only_45.pkl", map_location=None)
        start_lr = pretrained_dict['lr']
        start_epoch = pretrained_dict['epoch'] if pretrained_dict['epoch'] < 100 else 100
        # -----------------------  Load partial model  ---------------------
        model_dict=model.state_dict()
        # 1. filter out unnecessary keys
        pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
        # 2. overwrite entries in the existing state dict
        model_dict.update(pretrained_dict)
        # -------------------------------------------------------------------
        # model.load_state_dict(pretrained_dict['model_state'])
        model.load_state_dict(model_dict)
    # ------------------------------------  Load Dataset  -------------------------
    kwargs = {'num_workers': 8, 'pin_memory': True} 
    # dataset_test = filmDataset_3(npy_dir="/home1/quanquan/datasets/generate/mesh_film_small/")
    # dataset_test_loader = DataLoader(dataset_test,batch_size=args.test_batch_size, shuffle=False, **kwargs)
    dataset_train = filmDataset_3("/home1/quanquan/datasets/generate/mesh_film_hypo_alpha2/", load_mod="new_ab")
    dataset_train_loader = DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True, **kwargs)
    
    # ------------------------------------  Optimizer  -------------------------
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
    # model, optimizer = amp.initialize(model, optimizer,opt_level='O1',loss_scale="dynamic",verbosity=0)
    #criterion = torch.nn.MSELoss()  
    criterion = torch.nn.L1Loss()
    bc_critic = nn.BCELoss() 
    
    if args.visualize_para:
        for name, parameters in model.named_parameters():
            print(name, ':', parameters.size())
    start_lr = args.lr
    
    # -----------------------------------  Training  ---------------------------
    for epoch in range(start_epoch, max_epoch + 1):
        loss_value, loss_cmap_value, loss_ab_value, loss_uv_value, loss_bg_value = 0,0,0,0,0
        model.train()
        datalen = len(dataset_train)
        print("Output dir:", output_dir)
        for batch_idx, data in enumerate(dataset_train_loader):
            
            ori_gt = data[0].cuda()
            ab_gt  = data[1].cuda()
            dep_gt = data[2].cuda()
            nor_gt = data[3].cuda()
            cmap_gt= data[4].cuda()
            uv_gt  = data[5].cuda()
            bg_gt  = data[6].cuda()
            
            optimizer.zero_grad()
            uv, cmap, ab, bg = model(ori_gt)               
            # print("ab shapes: ", ab.shape, ab_gt.shape)
            
            loss_cmap = criterion(cmap, cmap_gt).float()
            loss_ab = criterion(ab, ab_gt).float()
            loss_uv   = criterion(uv, uv_gt).float()
            loss_bg   = criterion(bg, bg_gt).float()
            
            loss = loss_cmap + loss_bg # + loss_ab + loss_uv
            loss.backward()
            optimizer.step()
            
            loss_value      += loss.item()
            loss_cmap_value += loss_cmap.item()
            loss_ab_value   += loss_ab.item()
            loss_uv_value   += loss_uv.item()
            loss_bg_value   += loss_bg.item()
            print("\r Epoch[{}/{}] \t batch:{}/{} \t \t loss: {}".format(epoch, max_epoch, batch_idx,datalen, loss_value/(batch_idx+1)), end=" ") 
            
            lr = get_lr(optimizer)
            # w("check code")
            # break
        
        #scheduler.step()
            
        writer_tb((loss_value/(batch_idx+1), loss_ab_value/(batch_idx+1), loss_uv_value/(batch_idx+1), loss_cmap_value/(batch_idx+1),loss_bg_value/(batch_idx+1), lr), epoch=epoch)
        write_imgs_2((cmap[0,:,:,:], uv[0,:,:,:], ab[0,:,:,:],bg[0,:,:,:], ori_gt[0,:,:,:], cmap_gt[0,:,:,:], uv_gt[0,:,:,:], ab_gt[0,:,:,:], bg_gt[0,:,:,:]), epoch)

        if isTrain and args.save_model and epoch % 10 == 0:
            state = {'epoch': epoch + 1,
                     'lr': lr,
                     'model_state': model.state_dict(),
                     'optimizer_state': optimizer.state_dict()
                     }
            torch.save(state, tfilename(output_dir, "{}_{}.pkl".format("cmap_aug", epoch)))
コード例 #9
0
def train(data_dir,
          train_imdb,
          val_imdb,
          model_save_path="./model/",
          use_gpu=True):

    # initialize training configuration
    config = Config()
    config.pos_pair_range = 180

    # do data augmentation in PyTorch;
    # you can also do complex data augmentation as in the original paper
    center_crop_size = config.instance_size - config.stride
    random_crop_size = config.instance_size - 2 * config.stride

    train_z_transforms = transforms.Compose([
        RandomStretch(),
        CenterCrop((config.examplar_size, config.examplar_size)),
        ToTensor()
    ])
    train_x_transforms = transforms.Compose([
        RandomStretch(),
        CenterCrop((center_crop_size, center_crop_size)),
        RandomCrop((random_crop_size, random_crop_size)),
        ToTensor()
    ])
    valid_z_transforms = transforms.Compose([
        CenterCrop((config.examplar_size, config.examplar_size)),
        ToTensor(),
    ])
    valid_x_transforms = transforms.Compose([ToTensor()])

    # load data (see details in VIDDataset.py)
    train_dataset = VIDDataset(train_imdb,
                               data_dir,
                               config,
                               train_z_transforms,
                               train_x_transforms,
                               curriculum=True)
    val_dataset = VIDDataset(val_imdb,
                             data_dir,
                             Config(),
                             valid_z_transforms,
                             valid_x_transforms,
                             mode="Validation")

    # create dataloader
    train_loader = DataLoader(train_dataset,
                              batch_size=config.batch_size,
                              shuffle=True,
                              num_workers=config.train_num_workers,
                              drop_last=True)
    val_loader = DataLoader(val_dataset,
                            batch_size=config.batch_size,
                            shuffle=True,
                            num_workers=config.val_num_workers,
                            drop_last=True)

    # create SiamFC network architecture (see details in SiamNet.py)
    net = SiamNet()
    # move network to GPU if using GPU
    if use_gpu:
        net.cuda()

    # define training strategy;
    # the learning rate of adjust layer (i.e., a conv layer)
    # is set to 0 as in the original paper
    optimizer = torch.optim.SGD([
        {
            'params': net.feat_extraction.parameters()
        },
        {
            'params': net.adjust.bias
        },
        {
            'params': net.adjust.weight,
            'lr': 0
        },
    ], config.lr, config.momentum, config.weight_decay)

    # adjusting learning in each epoch
    scheduler = StepLR(optimizer, config.step_size, config.gamma)

    # used to control generating label for training;
    # once generated, they are fixed since the labels for each
    # pair of images (examplar z and search region x) are the same
    train_response_flag = False
    valid_response_flag = False

    f = open('./model/modified_loss/loss_data.txt', 'a')

    # ------------------------ training & validation process ------------------------
    for i in range(config.num_epoch):

        # adjusting learning rate
        scheduler.step()

        # ------------------------------ training ------------------------------
        # indicating training (very important for batch normalization)
        net.train()

        # used to collect loss
        train_loss = []
        train_dataset.set_epoch(i + 1)

        for j, data in enumerate(tqdm(train_loader)):

            # fetch data, i.e., B x C x W x H (batchsize x channel x wdith x heigh)
            exemplar_imgs, instance_imgs = data

            # forward pass
            if use_gpu:
                exemplar_imgs = exemplar_imgs.cuda()
                instance_imgs = instance_imgs.cuda()
            output = net.forward(Variable(exemplar_imgs),
                                 Variable(instance_imgs))

            # create label for training (only do it one time)
            if not train_response_flag:
                # change control flag
                train_response_flag = True
                # get shape of output (i.e., response map)
                response_size = output.shape[2:4]
                # generate label and weight
                train_eltwise_label, train_instance_weight = create_label(
                    response_size, config, use_gpu)

            # clear the gradient
            optimizer.zero_grad()

            # loss
            loss = net.weight_loss(output, train_eltwise_label,
                                   train_instance_weight)

            # backward
            loss.backward()

            # update parameter
            optimizer.step()

            # collect training loss
            train_loss.append(loss.data.item())

        # ------------------------------ saving model ------------------------------
        if not os.path.exists(model_save_path):
            os.makedirs(model_save_path)
        torch.save(net,
                   model_save_path + "SiamFC_" + str(i + 1) + "_model.pth")

        # ------------------------------ validation ------------------------------
        # indicate validation
        net.eval()

        # used to collect validation loss
        val_loss = []
        val_dataset.set_epoch(i + 1)

        for j, data in enumerate(tqdm(val_loader)):

            exemplar_imgs, instance_imgs = data

            # forward pass
            if use_gpu:
                exemplar_imgs = exemplar_imgs.cuda()
                instance_imgs = instance_imgs.cuda()
            output = net.forward(Variable(exemplar_imgs),
                                 Variable(instance_imgs))

            # create label for validation (only do it one time)
            if not valid_response_flag:
                valid_response_flag = True
                response_size = output.shape[2:4]
                valid_eltwise_label, valid_instance_weight = create_label(
                    response_size, config, use_gpu)

            # loss
            loss = net.weight_loss(output, valid_eltwise_label,
                                   valid_instance_weight)

            # collect validation loss
            val_loss.append(loss.data.item())
        train_loss = np.array(train_loss)
        val_loss = np.array(val_loss)
        f.write('{}, {}'.format(np.mean(train_loss), np.mean(val_loss)))
        print("Epoch %d   training loss: %f, validation loss: %f" %
              (i + 1, np.mean(train_loss), np.mean(val_loss)))
    f.close()
コード例 #10
0
        DATASET_PATH = os.path.join('/tmp/pycharm_project562/16_tcls_movie')

    criterion_type = {
        'regression': nn.MSELoss(),
        'classification': nn.CrossEntropyLoss(),
        'bilstmwithattn': nn.CrossEntropyLoss(),
        'cnntext': nn.CrossEntropyLoss(),
        'ImgText2Vec': nn.CrossEntropyLoss()
    }

    criterion = criterion_type[config.model]
    reg_criterion = nn.MSELoss().cuda()
    optimizer = optim.Adam(model.parameters(),
                           weight_decay=config.l2,
                           lr=config.learning_rate)
    scheduler = StepLR(optimizer, step_size=3, gamma=config.lr_decay)

    # DONOTCHANGE: They are reserved for nsml
    if config.pause and config.nsml_use:
        nsml.paused(scope=locals())

    # 학습 모드일 때 사용합니다. (기본값)
    if config.mode == 'train':

        print('train data loading...')
        # 데이터를 로드합니다(참고 : 데이터셋의 class 비율이 많이 다릅니다).

        train_loader = DataLoader(
            dataset=dataset,
            batch_size=config.batch,
            shuffle=True,
コード例 #11
0
ファイル: main.py プロジェクト: aayushjr/PytorchTemplate
def run_main():
    # Check if cuda is available
    use_cuda = torch.cuda.is_available()
    
    # Set proper device based on cuda availability 
    device = torch.device("cuda" if use_cuda else "cpu")
    print("Torch device selected: ", device)
    
    # Initialize the model and send to device 
    model = Net().to(device)
    
    # Initialize the criterion for loss computation 
    criterion = nn.CrossEntropyLoss(reduction='mean')
    
    # Initialize optimizer type 
    if config.optimizer_type == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
        print("Use optimizer type: {}, LR: {}".format(config.optimizer_type, config.learning_rate))
    elif config.optimizer_type == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)
        print("Use optimizer type: {}, LR: {}".format(config.optimizer_type, config.learning_rate))
    else:
        print("Select optimizer type from {SGD | Adam}")
        exit(0)
    
    # Create transformations to apply to each data sample 
    # Can specify variations such as image flip, color flip, random crop, ...
    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
        ])
    
    # Load datasets for training and testing
    # Inbuilt datasets available in torchvision (check documentation online)
    dataset1 = datasets.MNIST('./data/', train=True, download=True,
                       transform=transform)
    dataset2 = datasets.MNIST('./data/', train=False,
                       transform=transform)
    train_loader = DataLoader(dataset1, batch_size = config.batch_size, 
                                shuffle=True, num_workers=4)
    test_loader = DataLoader(dataset2, batch_size = config.batch_size, 
                                shuffle=False, num_workers=4)
    
    
    # Optionally, use a scheduler to change learning rate at certain interval manually
    # Used for step LR change, cyclic LR change or manual LR change after some epochs
    scheduler = StepLR(optimizer, step_size=config.step_size, gamma=0.1)
    
    # Init variable to store best loss, can use for saving best model 
    best_accuracy = 0.0
    
    # Create summary writer object in specified folder. 
    # Use same head folder and different sub_folder to easily compare between runs
    # Eg. SummaryWriter("my_logs/run1_Adam"), SummaryWriter("my_logs/run2_SGD")
    #     This allows tensorboard to easily compare between run1 and run2
    writer = SummaryWriter("my_logs/run1_Adam", comment="Test_01_LR_1e-3")
    
    # Run training for n_epochs specified in config 
    for epoch in range(1, config.n_epochs + 1):
        train_loss, train_accuracy = train(model, device, train_loader,
                                            optimizer, criterion, epoch, 
                                            log_interval = 50)
        test_loss, test_accuracy = test(model, device, test_loader)
        scheduler.step()
        
        writer.add_scalar('Loss/train', train_loss, epoch)
        writer.add_scalar('Loss/test', test_loss, epoch)
        writer.add_scalar('Accuracy/train', train_accuracy, epoch)
        writer.add_scalar('Accuracy/test', test_accuracy, epoch)        
        writer.add_scalar('LR', optimizer.param_groups[0]['lr'], epoch)
        
        if test_accuracy > best_accuracy and config.save:
            best_accuracy = test_accuracy
            save_file_path = os.path.join(config.save_dir, 'model_{}_{:2.2f}.pth'.format(epoch, best_accuracy))
            states = {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'best_accuracy': best_accuracy
            }

            try:
                os.mkdir(config.save_dir)
            except:
                pass

            torch.save(states, save_file_path)
            print('Model saved ', str(save_file_path))
            
            # Alternatively same entire model, but takes larger size
            # torch.save(model, save_file_path)
        
        #if epoch % 5 == 0:
        #    break 
    
    # Flush all log to writer and close 
    writer.flush()
    writer.close()
    
    print("Training finished")
コード例 #12
0
ファイル: main.py プロジェクト: mlu415/MNIST-TRIAL
def main():
    epoches = 4
    gamma = 0.7
    log_interval = 10
    torch.manual_seed(1)
    save_model = True

    #RNN
    RNN = True
    N_STEPS = 28
    N_INPUTS = 28
    N_NEURONS = 150
    N_OUTPUTS = 10

    # Check whether you can use Cuda
    use_cuda = torch.cuda.is_available()
    # Use Cuda if you can
    device = torch.device("cuda" if use_cuda else "cpu")

    ######################3   Torchvision    ###########################3
    # Use data predefined loader
    # Pre-processing by using the transform.Compose
    # divide into batches
    #num_workers uses subprocesses to asynchronously load data, and use pinned RAM (pin_memory) to speed up ram and
    # gpu transfers (change 4 change number of num_workers)
    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    #change 1 normalise the input images
    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])
    train_dataset = datasets.MNIST('PATH_TO_STORE_TRAINSET',
                                   download=True,
                                   train=True,
                                   transform=transform)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=64,
                                               shuffle=True,
                                               **kwargs)

    # change 2 normalise test images
    test_dataset = datasets.MNIST('PATH_TO_STORE_TRAINSET',
                                  download=True,
                                  train=True,
                                  transform=transform)
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=1000,
                                              shuffle=True,
                                              **kwargs)

    # get some random training images
    dataiter = iter(train_loader)
    images, labels = dataiter.next()
    #img = torchvision.utils.make_grid(images)
    #imsave(img)

    # #####################    Build your network and run   ############################
    if RNN:
        model = ImageRNN(64, N_STEPS, N_INPUTS, N_NEURONS, N_OUTPUTS,
                         device).to(device)
    else:
        model = ConvNet()

    if RNN:
        optimizer = optim.Adadelta(model.parameters(), lr=0.01)
    else:
        optimizer = optim.Adam(model.parameters(), lr=0.001)

    scheduler = StepLR(optimizer, step_size=1, gamma=gamma)

    for epoch in range(1, epoches + 1):
        if RNN:
            train_rnn(log_interval, model, device, train_loader, optimizer,
                      epoch)
        else:
            train_cnn(log_interval, model, device, train_loader, optimizer,
                      epoch)

        test(model, device, test_loader)
        scheduler.step()

    if save_model:
        torch.save(model.state_dict(), "./results/mnist_cnn.pt")
コード例 #13
0
def train_model(
        batch_size: int = 64,
        test_batch_size: int = 1000,
        epochs: int = 14,
        lr: float = 1.0,
        gamma: float = 0.7,
        no_cuda: bool = False,
        dry_run: bool = False,
        seed: int = 1,
        log_interval: int = 10,
        save_model: bool = False,
        checkpoint_period: int = 5,  # Period between checkpoints in minutes
        checkpoint_input: str = '',
        checkpoint_output: str = ''):

    use_cuda = not no_cuda and torch.cuda.is_available()
    torch.manual_seed(seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    args = argparse.Namespace()
    args.log_interval = log_interval
    args.batch_size = batch_size
    args.dry_run = dry_run

    kwargs = {'batch_size': batch_size}
    if use_cuda:
        kwargs.update({
            'num_workers': 1,
            'pin_memory': True,
            'shuffle': True
        }, )

    transform = transforms.Compose(
        [transforms.ToTensor(),
         transforms.Normalize((0.1307, ), (0.3081, ))])
    dataset1 = datasets.MNIST('../data',
                              train=True,
                              download=True,
                              transform=transform)
    dataset2 = datasets.MNIST('../data', train=False, transform=transform)
    train_loader = torch.utils.data.DataLoader(dataset1, **kwargs)
    test_loader = torch.utils.data.DataLoader(dataset2, **kwargs)

    model = Net().to(device)
    optimizer = optim.Adadelta(model.parameters(), lr=lr)

    scheduler = StepLR(optimizer, step_size=1, gamma=gamma)

    last_checkpoint_time = time.time()

    epoch_start = 1

    if checkpoint_input:
        print(f"Attempt loading checkpoint from {checkpoint_input}")
        try:
            checkpoint = torch.load(checkpoint_input)
        except Exception as e:
            print("Skipping broken checkpoint")
        else:
            model.load_state_dict(checkpoint['model_state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
            epoch_start = checkpoint['epoch'] + 1  # Start from next epoch
            print(f"Resuming from checkpoint with epoch: {epoch_start}")

    for epoch in range(epoch_start, epochs + 1):
        train(args, model, device, train_loader, optimizer, epoch)
        test(model, device, test_loader)
        scheduler.step()

        # The following code block writes a checkpoint if one has not been written
        # in the past checkpoint_period # of minutes
        if save_model and (time.time() -
                           last_checkpoint_time) > (checkpoint_period * 60):
            print("*************** Triggering checkpoint ***************")
            torch.save(
                {
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict()
                }, checkpoint_output)
コード例 #14
0
 def __init__(self, optimizer, gamma=0.96, step_size=100000):
     self.scheduler = StepLR(optimizer, step_size, gamma)
コード例 #15
0
def main():
    # Trainset stats: 2072002577 items from 124950714 sessions
    print('Initializing dataloader...')
    mtrain_loader = SpotifyDataloader(
        config_fpath=args.config,
        mtrain_mode=True,
        data_sel=(0, 99965071),  # 80% 트레인
        batch_size=TR_BATCH_SZ,
        shuffle=True,
        seq_mode=True)  # seq_mode implemented

    mval_loader = SpotifyDataloader(
        config_fpath=args.config,
        mtrain_mode=True,  # True, because we use part of trainset as testset
        data_sel=(99965071, 104965071),  #(99965071, 124950714), # 20%를 테스트
        batch_size=TS_BATCH_SZ,
        shuffle=False,
        seq_mode=True)

    # Load Teacher net
    SMT = SeqModel().cuda(GPU)
    checkpoint = torch.load(FPATH_T_NET_CHECKPOINT,
                            map_location='cuda:{}'.format(GPU))
    tqdm.write(
        "Loading saved teacher model from '{0:}'... loss: {1:.6f}".format(
            FPATH_T_NET_CHECKPOINT, checkpoint['loss']))
    SMT.load_state_dict(checkpoint['SM_state'])

    SMT_Enc = nn.Sequential(*list(SMT.children())[:1]).cuda(GPU)
    #SMT_EncFeat = nn.Sequential(*list(SMT.children())[:2])

    # Init Student net --> copy classifier from the Teacher net
    SM = SeqModel_Student().cuda(GPU)
    SM.feature = deepcopy(SMT.feature)
    for p in list(SM.feature.parameters()):
        p.requires_grad = False
    SM.classifier = deepcopy(SMT.classifier)
    SM.classifier.weight.requires_grad = False
    SM.classifier.bias.requires_grad = False
    SM = SM.cuda(GPU)

    SM_optim = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                       SM.parameters()),
                                lr=LEARNING_RATE)
    SM_scheduler = StepLR(SM_optim, step_size=1, gamma=0.9)

    # Load checkpoint
    if args.load_continue_latest is None:
        START_EPOCH = 0
    else:
        latest_fpath = max(glob.iglob(MODEL_SAVE_PATH + "check*.pth"),
                           key=os.path.getctime)
        checkpoint = torch.load(latest_fpath,
                                map_location='cuda:{}'.format(GPU))
        tqdm.write("Loading saved model from '{0:}'... loss: {1:.6f}".format(
            latest_fpath, checkpoint['loss']))
        SM.load_state_dict(checkpoint['SM_state'])
        SM_optim.load_state_dict(checkpoint['SM_opt_state'])
        SM_scheduler.load_state_dict(checkpoint['SM_sch_state'])
        START_EPOCH = checkpoint['ep']

    # Train
    for epoch in trange(START_EPOCH,
                        EPOCHS,
                        desc='epochs',
                        position=0,
                        ascii=True):
        tqdm.write('Train...')
        tr_sessions_iter = iter(mtrain_loader)
        total_corrects = 0
        total_query = 0
        total_trloss = 0
        for session in trange(len(tr_sessions_iter),
                              desc='sessions',
                              position=1,
                              ascii=True):
            SMT.eval()
            # Teacher-net
            SM.train()
            # Student-net
            x, labels, y_mask, num_items, index = tr_sessions_iter.next(
            )  # FIXED 13.Dec. SEPARATE LOGS. QUERY SHOULT NOT INCLUDE LOGS

            # Sample data for 'support' and 'query': ex) 15 items = 7 sup, 8 queries...
            num_support = num_items[:, 0].detach().numpy().flatten(
            )  # If num_items was odd number, query has one more item.
            num_query = num_items[:, 1].detach().numpy().flatten()
            batch_sz = num_items.shape[0]

            # x: the first 10 items out of 20 are support items left-padded with zeros. The last 10 are queries right-padded.
            x = x.permute(0, 2, 1)  # bx70*20

            # x_feat_T: Teacher-net input, x_feat_S: Student-net input(que-log is excluded)
            x_feat_T = torch.zeros(batch_sz, 72, 20)
            x_feat_T[:, :70, :] = x.clone()
            x_feat_T[:, 70, :10] = 1  # Sup/Que state indicator
            x_feat_T[:, 71, :10] = labels[:, :10].clone()

            x_feat_S = x_feat_T.clone()
            x_feat_S[:, :41, 10:] = 0  # remove que-log

            x_feat_T = x_feat_T.cuda(GPU)
            x_feat_S = Variable(x_feat_S).cuda(GPU)

            # Target: Prepare Teacher's intermediate output
            enc_target = SMT_Enc(x_feat_T)
            #target = SMT_EncFeat(x_feat_T)

            # target mask
            target_mask = y_mask.clone().unsqueeze(1).repeat(1, 128,
                                                             1).cuda(GPU)
            target_mask_que = target_mask.clone().cuda(GPU)
            target_mask_que[:, :, :10] = 0

            # y_mask
            y_mask_que = y_mask.clone()
            y_mask_que[:, :10] = 0

            # Forward & update
            y_hat_enc, y_hat = SM(x_feat_S)  # y_hat: b*20

            # Calcultate Distillation loss: IN_10
            loss1 = F.binary_cross_entropy_with_logits(
                input=y_hat_enc * target_mask_que,
                target=torch.sigmoid(enc_target) * target_mask_que)
            loss2 = F.l1_loss(input=y_hat_enc * target_mask_que,
                              target=enc_target * target_mask_que)
            loss = loss1 + loss2
            total_trloss += loss.item()
            SM.zero_grad()
            loss.backward()
            # Gradient Clipping
            #torch.nn.utils.clip_grad_norm_(SM.parameters(), 0.5)
            SM_optim.step()

            # Decision
            SM.eval()
            y_prob = torch.sigmoid(
                y_hat * y_mask_que.cuda(GPU)).detach().cpu().numpy()  # bx20
            y_pred = (y_prob[:, 10:] > 0.5).astype(np.int)  # bx10
            y_numpy = labels[:, 10:].numpy()  # bx10
            # Acc
            total_corrects += np.sum(
                (y_pred == y_numpy) * y_mask_que[:, 10:].numpy())
            total_query += np.sum(num_query)

            # Restore GPU memory
            del loss, y_hat, y_hat_enc

            if (session + 1) % 500 == 0:
                hist_trloss.append(total_trloss / 900)
                hist_tracc.append(total_corrects / total_query)
                # Prepare display
                sample_sup = labels[0, (
                    10 - num_support[0]):10].long().numpy().flatten()
                sample_que = y_numpy[0, :num_query[0]].astype(int)
                sample_pred = y_pred[0, :num_query[0]]
                sample_prob = y_prob[0, 10:10 + num_query[0]]

                tqdm.write("S:" + np.array2string(sample_sup) + '\n' + "Q:" +
                           np.array2string(sample_que) + '\n' + "P:" +
                           np.array2string(sample_pred) + '\n' + "prob:" +
                           np.array2string(sample_prob))
                tqdm.write(
                    "tr_session:{0:}  tr_loss:{1:.6f}  tr_acc:{2:.4f}".format(
                        session, hist_trloss[-1], hist_tracc[-1]))
                total_corrects = 0
                total_query = 0
                total_trloss = 0

            if (session + 1) % 25000 == 0:
                # Validation
                validate(mval_loader, SM, eval_mode=True, GPU=GPU)
                # Save
                torch.save(
                    {
                        'ep': epoch,
                        'sess': session,
                        'SM_state': SM.state_dict(),
                        'loss': hist_trloss[-1],
                        'hist_vacc': hist_vacc,
                        'hist_vloss': hist_vloss,
                        'hist_trloss': hist_trloss,
                        'SM_opt_state': SM_optim.state_dict(),
                        'SM_sch_state': SM_scheduler.state_dict()
                    }, MODEL_SAVE_PATH +
                    "check_{0:}_{1:}.pth".format(epoch, session))
        # Validation
        validate(mval_loader, SM, eval_mode=True, GPU=GPU)
        # Save
        torch.save(
            {
                'ep': epoch,
                'sess': session,
                'SM_state': SM.state_dict(),
                'loss': hist_trloss[-1],
                'hist_vacc': hist_vacc,
                'hist_vloss': hist_vloss,
                'hist_trloss': hist_trloss,
                'SM_opt_state': SM_optim.state_dict(),
                'SM_sch_state': SM_scheduler.state_dict()
            }, MODEL_SAVE_PATH + "check_{0:}_{1:}.pth".format(epoch, session))
        SM_scheduler.step()
コード例 #16
0
def pre_train(hp, models, train_data, test_data):
    print("----------start pre-training models----------")
    view_num = len(models)
    par = []
    for i in range(view_num):
        models[i].cuda()
        models[i].train()
        par.append({'params': models[i].parameters()})

    optimizer = optim.Adam(par, lr=hp['pre_lr'])
    scheduler = StepLR(optimizer, step_size=10, gamma=0.5)
    batch_size = hp['pre_size']
    loss_func = nn.MSELoss()

    for epoch in range(hp['pre_epoch']):
        scheduler.step()
        running_loss = 0.0
        data_num = 0
        for i in range(view_num):
            models[i].train()
        for i in range(3):
            data = train_data[i]
            if data == None:
                continue
            bag_num = len(data)
            data_num += bag_num
            max_step = int(bag_num / batch_size)
            while max_step * batch_size < bag_num:
                max_step += 1

            for step in range(max_step):
                # get data
                step_data = get_batch(
                    data,
                    list(
                        range(step * batch_size,
                              min((step + 1) * batch_size, bag_num))), hp)
                x1, x2, bag1, bag2, y = step_data
                b_y = Variable(y).cuda()
                loss = 0
                if i == 0 or i == 2:
                    x_img = Variable(x1).cuda()
                    h1, _, _ = models[0](x_img, bag1)
                    loss += loss_func(h1, b_y)
                if i == 0 or i == 1:
                    x_text = Variable(x2).cuda()
                    h2, _, _ = models[1](x_text, bag2)
                    loss += loss_func(h2, b_y)

                running_loss += loss.data * x2.size(0)

                # backward
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
        # epoch loss
        epoch_loss = running_loss / data_num
        print('epoch {}/{} | Loss: {:.9f}'.format(epoch, hp['pre_epoch'],
                                                  epoch_loss))

        rootpath = "{}{}/".format(hp['modelpath'], str(epoch + 1))
        os.makedirs(rootpath, exist_ok=True)
        save_model(models, rootpath)
        hp['rootdir'] = rootpath
        result = test(test_data, hp, models, 'pretrain')

    print("----------end pre-training models----------")
    return models
コード例 #17
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size', type=int, default=64, metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs', type=int, default=5, metavar='N',
                        help='number of epochs to train (default: 14)')
    parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
                        help='learning rate (default: 1.0)')
    parser.add_argument('--gamma', type=float, default=0.7, metavar='M',
                        help='Learning rate step gamma (default: 0.7)')
    parser.add_argument('--no-cuda', action='store_true', default=False,
                        help='disables CUDA training')
    parser.add_argument('--dry-run', action='store_true', default=False,
                        help='quickly check a single pass')
    parser.add_argument('--seed', type=int, default=123, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--log-interval', type=int, default=10, metavar='N',
                        help='how many batches to wait before logging training status')
    parser.add_argument('--save-model', action='store_true', default=False,
                        help='For Saving the current Model')
    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")
    print("device: ", device)

    train_kwargs = {'batch_size': args.batch_size}
    test_kwargs = {'batch_size': args.test_batch_size}
    if use_cuda:
        cuda_kwargs = {'num_workers': 1,
                       'pin_memory': True,
                       'shuffle': True}
        train_kwargs.update(cuda_kwargs)
        test_kwargs.update(cuda_kwargs)

    transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
        ])
    dataset1 = datasets.MNIST('./MNIST', train=True, download=True,
                       transform=transform)
    dataset2 = datasets.MNIST('./MNIST', train=False,
                       transform=transform)
    train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
    test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)

    model = Net().to(device)
    optimizer = optim.Adadelta(model.parameters(), lr=args.lr)

    scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, epoch)
        test(model, device, test_loader, epoch)
        scheduler.step()

    if args.save_model:
        torch.save(model.state_dict(), "mnist_cnn.pt")
コード例 #18
0
def train(hp, models, train_data):

    #if hp['pretrain'] == 1:
    #    models = pre_train(hp, models, train_data)

    print("----------start training models----------")
    view_num = len(models)  # num of view
    l = hp['label']  # num of label
    # 初始化K0,M矩阵
    k_0 = torch.nn.Softmax()(torch.eye(l))
    k_0 = k_0.data.numpy()
    k_0 = k_0 / np.max(k_0)
    k_0_inv = np.linalg.inv(k_0)
    m = cal_distance_matrix(k_0)
    m = m / np.max(m)

    trade = hp['trade_off']  # 平衡系数
    lr = hp['lr']
    ae_coe = hp['ae']

    par = []
    for i in range(view_num):
        models[i].cuda()
        par.append({'params': models[i].parameters()})

    optimizer = optim.Adam(par, lr=lr[0])
    scheduler = StepLR(optimizer, step_size=10, gamma=0.5)
    ae_loss = torch.nn.MSELoss(reduction='elementwise_mean')

    batch_size = hp['batch_size'][0]

    def train_for_dataset(data, train_type):
        loss_record = np.zeros(5)
        if data == None:
            return loss_record
        if train_type in [4, 5] and hp['ae'] == 0:
            return loss_record
        if train_type == 3 and hp['semi'] == 0:
            return loss_record
        bag_num = len(data)
        max_step = int(bag_num / batch_size)
        while max_step * batch_size < bag_num:
            max_step += 1
        for step in range(max_step):
            step_data = get_batch(
                data,
                list(
                    range(step * batch_size,
                          min((step + 1) * batch_size, bag_num))), hp)
            x1, x2, bag1, bag2, y = step_data
            if train_type == 0:
                x_img = Variable(x1).cuda()
                x_text = Variable(x2).cuda()
                b_y = Variable(y).cuda()

                # forward
                h1, fea1, dec1 = models[0](x_img, bag1)
                h2, fea2, dec2 = models[1](x_text, bag2)

                # loss
                w_loss = WassersteinLoss(m, hp['reg'])
                ae_loss1 = ae_loss(fea1, dec1)
                loss1 = w_loss(h1, b_y)
                total_loss = loss1 + hp['ae'] * (ae_loss1)
                optimizer.zero_grad()
                total_loss.backward()
                optimizer.step()

                w_loss = WassersteinLoss(m, hp['reg'])
                loss2 = w_loss(h2, b_y)
                ae_loss2 = ae_loss(fea2, dec2)
                total_loss = loss2 + hp['ae'] * (ae_loss2)
                loss_record[0] += loss1.data.cpu().numpy()[0] * x1.size(0)
                loss_record[1] += loss2.data.cpu().numpy()[0] * x1.size(0)
                loss_record[2] += ae_loss1.data.cpu().numpy() * x1.size(0)
                loss_record[3] += ae_loss2.data.cpu().numpy() * x1.size(0)

            elif train_type == 1:
                x_text = Variable(x2).cuda()
                b_y = Variable(y).cuda()

                # forward
                h2, fea2, dec2 = models[1](x_text, bag2)

                # loss
                w_loss = WassersteinLoss(m, hp['reg'])
                loss2 = w_loss(h2, b_y)
                ae_loss2 = ae_loss(fea2, dec2)

                total_loss = loss2 + hp['ae'] * (ae_loss2)

                loss_record[1] += loss2.data.cpu().numpy()[0] * x2.size(0)
                loss_record[3] += ae_loss2.data.cpu().numpy() * x2.size(0)

            elif train_type == 2:
                x_img = Variable(x1).cuda()
                b_y = Variable(y).cuda()

                # forward
                h1, fea1, dec1 = models[0](x_img, bag1)

                # loss
                w_loss = WassersteinLoss(m, hp['reg'])
                loss1 = w_loss(h1, b_y)
                ae_loss1 = ae_loss(fea1, dec1)

                total_loss = loss1 + hp['ae'] * (ae_loss1)

                loss_record[0] += loss1.data.cpu().numpy()[0] * x1.size(0)
                loss_record[2] += ae_loss1.data.cpu().numpy() * x1.size(0)

            elif train_type == 3 and hp['semi'] == 1:
                x_img = Variable(x1).cuda()
                x_text = Variable(x2).cuda()

                # forward
                h1, fea1, dec1 = models[0](x_img, bag1)
                h2, fea2, dec2 = models[1](x_text, bag2)

                # loss
                w_loss = WassersteinLoss(m, hp['reg'])
                semi_loss = w_loss(h1, h2)
                ae_loss1 = ae_loss(fea1, dec1)
                ae_loss2 = ae_loss(fea2, dec2)

                total_loss = semi_loss + hp['ae'] * (ae_loss1 + ae_loss2)

                loss_record[2] += ae_loss1.data.cpu().numpy() * x1.size(0)
                loss_record[3] += ae_loss2.data.cpu().numpy() * x1.size(0)
                loss_record[4] += semi_loss.data.cpu().numpy()[0] * x1.size(0)

            elif train_type == 4 and hp['ae'] != 0:
                x_text = Variable(x2).cuda()

                # forward
                h2, fea2, dec2 = models[1](x_text, bag2)

                # loss
                ae_loss2 = ae_loss(fea2, dec2)

                total_loss = hp['ae'] * ae_loss2

                loss_record[3] += ae_loss2.data.cpu().numpy() * x2.size(0)

            elif train_type == 5 and hp['ae'] != 0:
                x_img = Variable(x1).cuda()

                # forward
                h1, fea1, dec1 = models[0](x_img, bag1)

                # loss
                ae_loss1 = ae_loss(fea1, dec1)

                total_loss = hp['ae'] * ae_loss1

                loss_record[2] += ae_loss1.data.cpu().numpy() * x1.size(0)

            # backward
            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()
        return loss_record

    store_loss = np.zeros((hp['epoch'] * hp['epoch_1'], 5))
    K = 0
    for epoch in range(hp['epoch']):
        for epoch_1 in range(hp['epoch_1']):
            scheduler.step()
            for t in range(view_num):
                models[t].train()
            for i in range(len(train_data)):
                print(epoch, epoch_1, i)
                data = train_data[i]
                loss_for_dataset = train_for_dataset(data, i)
                store_loss[epoch * hp['epoch_1'] +
                           epoch_1] += loss_for_dataset.reshape((-1))

        # seconde stage
        K = 0
        if hp['fixed'] == 0:
            for i in range(view_num):
                models[i].eval()
            T = np.zeros((l, l))
            # calculate T
            for i in range(len(train_data)):
                # get data
                if i > 2:
                    continue
                data = train_data[i]
                if data == None:
                    continue
                for j in range(len(data)):
                    x1, x2, bag1, bag2, b_y = get_batch(data, [j], hp)
                    b_y = b_y.cpu().numpy().reshape((-1, ))
                    b_y[b_y <= 0] = 1e-9
                    b_y = b_y / np.sum(b_y)

                    x_img = None
                    x_text = None
                    if i == 0 or i == 2:
                        x_img = Variable(x1).cuda()
                        h = models[0](x_img, bag1)[0].cpu().data.numpy()
                        h[h <= 0] = 1e-9
                        h = h / np.sum(h)
                        Gs = ot.sinkhorn(h.reshape(-1), b_y.reshape(-1),
                                         m / np.max(m), hp['reg'])
                        T += Gs
                    if i == 0 or i == 1:
                        x_text = Variable(x2).cuda()
                        h = models[1](x_text, bag2)[0].cpu().data.numpy()
                        h[h <= 0] = 1e-9
                        h = h / np.sum(h)
                        Gs = ot.sinkhorn(h.reshape(-1), b_y.reshape(-1),
                                         m / np.max(m), hp['reg'])
                        T += Gs
            # T /= (bag_num * view_num)

            # calculate K
            G = np.zeros((l, l))
            for i in range(l):
                for j in range(l):
                    if i == j:
                        for k in range(l):
                            if k != i:
                                G[i][j] -= (T[i][k] + T[k][i])
                    else:
                        G[i][j] = 2 * T[i][j]

            K = np.linalg.inv(k_0_inv - G / trade)
            #K = k_0 + G / trade / np.max(G)
            K = (K + K.T) / 2
            u, v = np.linalg.eig(K)
            u[u < 0] = 0
            K = np.dot(v, np.dot(np.diag(u), v.T))

            # calculate M
            m = cal_distance_matrix(K)
            m = m / np.max(m)

    # 保存loss
    np.save("{}loss.npy".format(hp['rootdir']), store_loss)
    # 保存corr矩阵
    np.save("{}M.npy".format(hp['rootdir']), m)
    np.save("{}K.npy".format(hp['rootdir']), K)
    return models
コード例 #19
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size',
                        type=int,
                        default=512,
                        metavar='N',
                        help='input batch size for training (default: 64)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=1000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs',
                        type=int,
                        default=10,
                        metavar='N',
                        help='number of epochs to train (default: 14)')
    parser.add_argument('--lr',
                        type=float,
                        default=1,
                        metavar='LR',
                        help='learning rate (default: 1.0)')
    parser.add_argument('--gamma',
                        type=float,
                        default=0.7,
                        metavar='M',
                        help='Learning rate step gamma (default: 0.7)')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--dry-run',
                        action='store_true',
                        default=False,
                        help='quickly check a single pass')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        metavar='N',
        help='how many batches to wait before logging training status')
    parser.add_argument('--save-model',
                        action='store_true',
                        default=False,
                        help='For Saving the current Model')
    parser.add_argument('--T',
                        type=int,
                        default=450,
                        metavar='N',
                        help='SNN time window')
    parser.add_argument('--resume',
                        type=str,
                        default=None,
                        metavar='RESUME',
                        help='Resume model from checkpoint')

    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {'batch_size': args.batch_size}
    if use_cuda:
        kwargs.update({
            'num_workers': 1,
            'pin_memory': True,
            'shuffle': True
        }, )
    transform_train = transforms.Compose(
        [transforms.ToTensor(),
         AddGaussianNoise(std=0.01)])

    transform = transforms.Compose([transforms.ToTensor()])
    dataset1 = datasets.MNIST('../data',
                              train=True,
                              download=True,
                              transform=transform_train)

    for i in range(30):
        transform_train_1 = transforms.Compose([
            transforms.RandomRotation(10),
            #transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            AddGaussianNoise(std=0.01)
        ])
        dataset1 = dataset1 + datasets.MNIST(
            '../data', train=True, download=True, transform=transform_train_1)

    dataset2 = datasets.MNIST('../data', train=False, transform=transform)
    snn_dataset = SpikeDataset(dataset2, T=args.T)
    train_loader = torch.utils.data.DataLoader(dataset1, **kwargs)

    test_loader = torch.utils.data.DataLoader(dataset2, **kwargs)
    snn_loader = torch.utils.data.DataLoader(snn_dataset, **kwargs)

    model = Net().to(device)
    snn_model = CatNet(args.T).to(device)

    if args.resume != None:
        load_model(torch.load(args.resume), model)
    for param_tensor in model.state_dict():
        print(param_tensor, "\t", model.state_dict()[param_tensor].size())
    optimizer = optim.Adadelta(model.parameters(), lr=args.lr)

    scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
    ACC = 0
    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, epoch)
        ACC_ = test(model, device, test_loader)
        if ACC_ > ACC or ACC_ == ACC:
            ACC = ACC_
            torch.save(model.state_dict(), "mnist_pretrained.pt")

        scheduler.step()
    # After retraining with Q function, you can transfer ANN to SNN.
    fuse_module(model)
    transfer_model(model, snn_model)
    test(snn_model, device, snn_loader)
コード例 #20
0
ファイル: cifar10_model.py プロジェクト: danielekp/Simple-CNN
def main():
    parser = argparse.ArgumentParser(description="My CNN")
    parser.add_argument("--batch-size",
                        type=int,
                        default=64,
                        metavar='N',
                        help="input batch sixe for training (default: 64)")
    parser.add_argument("--test-batch-size",
                        type=int,
                        default=1000,
                        metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--epochs',
                        type=int,
                        default=14,
                        metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr',
                        type=float,
                        default=1.0,
                        metavar='LR',
                        help='learning rate (default: 1.0)')
    parser.add_argument('--gamma',
                        type=float,
                        default=0.7,
                        metavar='M',
                        help='Learning rate step gamma (default: 0.7)')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        metavar='N',
        help='how many batches to wait before logging training status')
    parser.add_argument('--save-model',
                        action='store_true',
                        default=False,
                        help='For Saving the current Model')
    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    torch.manual_seed(args.seed)
    device = torch.device("cuda") if use_cuda else torch.device("cpu")
    print(device)
    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    #transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
    #trainset = torchvision.datasets.CIFAR10('../data', train=True, download=True, transform=transform)
    train_loader = torch.utils.data.DataLoader(datasets.CIFAR10(
        '../data',
        train=True,
        download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               **kwargs)
    test_loader = torch.utils.data.DataLoader(datasets.CIFAR10(
        '../data',
        train=False,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])),
                                              batch_size=args.test_batch_size,
                                              shuffle=True,
                                              **kwargs)

    classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse',
               'ship', 'truck')

    model = Net().to(device)
    optimizer = optim.Adadelta(model.parameters(), lr=args.lr)
    scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, epoch)
        test(args, model, device, test_loader)
        scheduler.step()

    if args.save_model:
        torch.save(model.state_dict(), "cifar10_cnn.pt")
コード例 #21
0
ファイル: basic_pruners_torch.py プロジェクト: yinfupai/nni
def get_model_optimizer_scheduler(args, device, train_loader, test_loader,
                                  criterion):
    if args.model == 'lenet':
        model = LeNet().to(device)
        if args.pretrained_model_dir is None:
            optimizer = torch.optim.Adadelta(model.parameters(), lr=1)
            scheduler = StepLR(optimizer, step_size=1, gamma=0.7)
    elif args.model == 'vgg16':
        model = VGG(depth=16).to(device)
        if args.pretrained_model_dir is None:
            optimizer = torch.optim.SGD(model.parameters(),
                                        lr=0.1,
                                        momentum=0.9,
                                        weight_decay=5e-4)
            scheduler = MultiStepLR(optimizer,
                                    milestones=[
                                        int(args.pretrain_epochs * 0.5),
                                        int(args.pretrain_epochs * 0.75)
                                    ],
                                    gamma=0.1)
    elif args.model == 'vgg19':
        model = VGG(depth=19).to(device)
        if args.pretrained_model_dir is None:
            optimizer = torch.optim.SGD(model.parameters(),
                                        lr=0.1,
                                        momentum=0.9,
                                        weight_decay=5e-4)
            scheduler = MultiStepLR(optimizer,
                                    milestones=[
                                        int(args.pretrain_epochs * 0.5),
                                        int(args.pretrain_epochs * 0.75)
                                    ],
                                    gamma=0.1)
    elif args.model == 'resnet18':
        model = ResNet18().to(device)
        if args.pretrained_model_dir is None:
            optimizer = torch.optim.SGD(model.parameters(),
                                        lr=0.1,
                                        momentum=0.9,
                                        weight_decay=5e-4)
            scheduler = MultiStepLR(optimizer,
                                    milestones=[
                                        int(args.pretrain_epochs * 0.5),
                                        int(args.pretrain_epochs * 0.75)
                                    ],
                                    gamma=0.1)
    else:
        raise ValueError("model not recognized")

    if args.pretrained_model_dir is None:
        print('start pre-training...')
        best_acc = 0
        for epoch in range(args.pretrain_epochs):
            train(args, model, device, train_loader, criterion, optimizer,
                  epoch)
            scheduler.step()
            acc = test(args, model, device, criterion, test_loader)
            if acc > best_acc:
                best_acc = acc
                state_dict = model.state_dict()

        model.load_state_dict(state_dict)
        acc = best_acc

        torch.save(
            state_dict,
            os.path.join(args.experiment_data_dir,
                         f'pretrain_{args.dataset}_{args.model}.pth'))
        print('Model trained saved to %s' % args.experiment_data_dir)

    else:
        model.load_state_dict(torch.load(args.pretrained_model_dir))
        best_acc = test(args, model, device, criterion, test_loader)

    # setup new opotimizer for pruning
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=0.01,
                                momentum=0.9,
                                weight_decay=5e-4)
    scheduler = MultiStepLR(optimizer,
                            milestones=[
                                int(args.pretrain_epochs * 0.5),
                                int(args.pretrain_epochs * 0.75)
                            ],
                            gamma=0.1)

    print('Pretrained model acc:', best_acc)
    return model, optimizer, scheduler
コード例 #22
0
    def __init__(self, args):
        super().__init__()
        self.args = args
        self.polar = args.polar

        self.act = nn.ELU()
        # (1, 60, 160)

        self.d1 = 32
        self.conv1 = nn.Conv2d(1,
                               self.d1,
                               kernel_size=3,
                               stride=1,
                               padding=1,
                               padding_mode='replicate')
        # (32, 60, 160)
        self.batchNorm1 = nn.BatchNorm2d(self.d1)
        self.pool1 = nn.MaxPool2d(2)
        self.dropout2d1 = nn.Dropout2d(0.9)
        # (32, 30, 80)

        self.d2 = 32
        self.conv2 = nn.Conv2d(self.d1,
                               self.d2,
                               kernel_size=3,
                               stride=1,
                               padding=1,
                               padding_mode='replicate')
        # (32, 30, 80)
        self.batchNorm2 = nn.BatchNorm2d(self.d2)
        self.pool2 = nn.MaxPool2d(2)
        self.dropout2d2 = nn.Dropout2d(0.9)
        # (32, 15, 40)

        self.d3 = 64
        self.conv3 = nn.Conv2d(self.d2,
                               self.d3,
                               kernel_size=3,
                               stride=1,
                               padding=1,
                               padding_mode='replicate')
        # (64, 15, 40)
        self.batchNorm3 = nn.BatchNorm2d(self.d3)
        self.pool3 = nn.MaxPool2d(2)
        self.dropout2d3 = nn.Dropout2d(0.9)
        # (64, 7, 20)

        self.d4 = 64
        self.conv4 = nn.Conv2d(self.d3,
                               self.d4,
                               kernel_size=3,
                               stride=1,
                               padding=1,
                               padding_mode='replicate')
        # (64, 7, 20)
        self.batchNorm4 = nn.BatchNorm2d(self.d4)
        self.pool4 = nn.MaxPool2d(2)
        self.dropout2d4 = nn.Dropout2d(0.9)
        # (64, 3, 10)

        self.fc1 = nn.Linear(1920, 16)
        self.dropout1 = nn.Dropout(0.8)
        self.fc2 = nn.Linear(16, 16)
        self.dropout2 = nn.Dropout(0.8)
        self.fc3 = nn.Linear(16, 4)

        self.forward_pass = nn.Sequential(
            self.conv1,
            self.batchNorm1,
            self.pool1,  #self.dropout2d1, 
            self.conv2,
            self.batchNorm2,
            self.pool2,  #self.dropout2d2, 
            self.conv3,
            self.batchNorm3,
            self.pool3,  #self.dropout2d3, 
            self.conv4,
            self.batchNorm4,
            self.pool4,  #self.dropout2d4, 
            nn.Flatten(),
            self.fc1,
            self.fc2,
            self.fc3)

        self.optimizer = optim.Adam(self.parameters(), lr=args.lr)
        self.scheduler = StepLR(self.optimizer,
                                step_size=args.step,
                                gamma=args.gamma)
コード例 #23
0
def train(data_loader, model_index, x_eval_train, gn_fp, dn_fp, ave_fp):
    ### Model Initiation
    gn = GN().cuda()
    dn = DN().cuda()

    ave_state_dict = tor.load(ave_fp)
    gn.load_ave_state(ave_state_dict)
    dn.load_ave_state(ave_state_dict)

    if gn_fp :
        gn_state_dict = tor.load(gn_fp)
        gn.load_state_dict(gn_state_dict)
    if dn_fp :
        dn_state_dict = tor.load(dn_fp)
        dn.load_state_dict(dn_state_dict)
    gn.cuda()
    dn.cuda()


    loss_func = tor.nn.BCELoss().cuda()

    #optim = tor.optim.SGD(fcn.parameters(), lr=LR, momentum=MOMENTUM)
    optim_gn = tor.optim.Adam(gn.parameters(), lr=LR)
    optim_dn = tor.optim.Adam(dn.parameters(), lr=LR)

    lr_step_gn = StepLR(optim_gn, step_size=LR_STEPSIZE, gamma=LR_GAMMA)
    lr_step_dn = StepLR(optim_dn, step_size=LR_STEPSIZE, gamma=LR_GAMMA)


    ### Training
    for epoch in range(EPOCH):
        print("|Epoch: {:>4} |".format(epoch + 1))

        for step, (x_batch, y_batch) in enumerate(data_loader):
            print("Process: {}/{}".format(step, int(AVAILABLE_SIZE[0] / BATCHSIZE)), end="\r")

            ### train true/false pic
            if (step // PIVOT_STEPS) % 3 != 2 :
                out = Variable(x_batch).cuda() if step % 2 == 0 else gn(Variable(tor.randn(BATCHSIZE, 512)).cuda())
                ans = Variable(tor.ones(BATCHSIZE, 1)).cuda() if step % 2 == 0 else Variable(tor.zeros(BATCHSIZE, 1)).cuda()
                dis = dn(out)
                optim = optim_dn

            else :
                out = gn(Variable(tor.randn(BATCHSIZE, 512)).cuda()).cuda()
                ans = Variable(tor.ones(BATCHSIZE, 1)).cuda()
                dis = dn(out)
                optim = optim_dn

            loss = loss_func(dis, ans)
            print (loss.data)
            loss.backward()
            if (step // PIVOT_STEPS) % 3 != 2 :
                optim_dn.step()
            else :
                optim_gn.step()

            optim_dn.zero_grad()
            optim_gn.zero_grad()
            lr_step_dn.step()
            lr_step_gn.step()


            if step % RECORD_JSON_PERIOD == 0 :
                x_true = Variable(x_eval_train).cuda()
                out = dn(x_true)
                acc_true = round(int((out > 0.5).sum().data) / EVAL_SIZE, 5)
                x_false = gn(Variable(tor.randn((EVAL_SIZE, 512))).cuda())
                out = dn(x_false)
                acc_false = round(int((out <= 0.5).sum().data) / EVAL_SIZE, 5)

                print ("|Acc True: {}   |Acc False: {}".format(acc_true, acc_false))

                save_record(model_index, epoch, optim, loss, acc_true, acc_false)

            if step % RECORD_PIC_PERIOD == 0 :
                loss = float(loss.data)
                print("|Loss: {:<8}".format(loss))
                save_pic("output_{}".format(model_index), gn, 3)

            if step % (2 * PIVOT_STEPS) == 0 :
                pass


        ### Save model
            if step % RECORD_MODEL_PERIOD == 0:
                tor.save(gn.state_dict(), os.path.join(MODEL_ROOT, "gan_gn_{}_{}.pkl".format(model_index, epoch)))
コード例 #24
0
def main():
    parser = argparse.ArgumentParser(
        description='PyTorch training script for SUN397 dataset')
    parser.add_argument('conf_file')
    parser.add_argument('output_dir', help='Model save directory')
    parser.add_argument('-w',
                        '--workers',
                        default=4,
                        type=int,
                        metavar='N',
                        help='number of data loading workers (default: 4)')
    parser.add_argument('-b',
                        '--batch-size',
                        default=64,
                        type=int,
                        metavar='N',
                        help='mini-batch size')
    parser.add_argument('-T',
                        '--tensor-board-dir',
                        help='Tensor board log dir',
                        default='runs')
    parser.add_argument('--restart',
                        help='Restart',
                        default=False,
                        action='store_true')
    parser.add_argument('--checkpoint', help='checkpoint file')
    parser.add_argument('--eval',
                        default=False,
                        action='store_true',
                        help='checkpoint file')

    args = parser.parse_args()

    conf = load_conf(args.conf_file)
    train_set, val_set, net, criterion, metrics_dict, (
        score_name, score_function) = task_factory(conf['task'])(conf)

    if args.restart:
        run_id = find_recent_output_dir(conf['tag'], args.output_dir)
    else:
        run_id = '%s_%s' % (conf['tag'], datetime.now().strftime('%Y%m%d%H%M'))
    output_dir = os.path.join(args.output_dir, run_id)

    checkpoint_handler = CheckpointManager(output_dir,
                                           'model',
                                           score_name=score_name,
                                           score_function=score_function,
                                           extra={
                                               'conf': conf,
                                               'args': vars(args)
                                           })
    shutil.copy(args.conf_file, os.path.join(output_dir, 'conf.json'))
    loader_pin_memory = torch.cuda.is_available()
    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=loader_pin_memory,
                                               drop_last=False)
    val_loader = torch.utils.data.DataLoader(val_set,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=loader_pin_memory,
                                             drop_last=False)

    writer = create_summary_writer(net, train_loader, args.tensor_board_dir,
                                   run_id)

    device = 'cpu'
    if torch.cuda.is_available():
        device = 'cuda'
        criterion = criterion.cuda()

    optimizer = torch.optim.Adam(net.parameters(),
                                 lr=conf['lr'],
                                 weight_decay=conf['weight_decay'])

    trainer = create_supervised_trainer(net,
                                        optimizer,
                                        criterion,
                                        device=device,
                                        gradient_clip=conf['clip_gradient'])
    train_evaluator = create_supervised_evaluator(net,
                                                  metrics=metrics_dict,
                                                  device=device)

    evaluator = create_supervised_evaluator(net,
                                            metrics=metrics_dict,
                                            device=device)

    step_scheduler = StepLR(optimizer,
                            step_size=conf['lr_step'],
                            gamma=conf['lr_decay'])
    scheduler = LRScheduler(step_scheduler)
    trainer.add_event_handler(Events.EPOCH_STARTED, scheduler)

    all_params = {
        'model': net,
        'optimizer': optimizer,
        'lr_scheduler': step_scheduler
    }
    evaluator.add_event_handler(Events.EPOCH_COMPLETED, checkpoint_handler,
                                all_params)
    desc = "ITERATION - loss: {:.2f}"
    pbar = tqdm(initial=0,
                leave=False,
                total=len(train_loader),
                desc=desc.format(0))
    log_interval = 10

    # load checkpoint
    if args.restart and checkpoint_handler.is_checkpoint_available():
        state_dicts = checkpoint_handler.load_last()
        load_model(all_params, state_dicts)
    elif args.checkpoint is not None:
        state_dicts = checkpoint_handler.load(args.checkpoint)
        load_model(all_params, state_dicts)

    @trainer.on(Events.EPOCH_STARTED)
    def setup_engine(engine):
        if engine.state.epoch == 1:
            engine.state.epoch = checkpoint_handler.epoch_ + 1

    @trainer.on(Events.ITERATION_COMPLETED)
    def log_training_loss(engine):
        iter = (engine.state.iteration - 1) % len(train_loader) + 1

        if iter % log_interval == 0:
            pbar.desc = desc.format(engine.state.output)
            pbar.update(log_interval)
            writer.add_scalar("training/loss", engine.state.output,
                              engine.state.iteration)

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_training_results(engine):
        pbar.refresh()
        train_evaluator.run(train_loader)
        log_results(engine, train_evaluator, "Training", writer)

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_validation_results(engine):
        checkpoint_handler.epoch_ = engine.state.epoch
        evaluator.run(val_loader)
        log_results(engine, evaluator, "Validation", writer)
        pbar.n = pbar.last_print_n = 0

    if args.eval:
        evaluator.run(val_loader)
        log_results(evaluator, evaluator, "Validation", writer)
    else:
        trainer.run(train_loader, max_epochs=conf['epochs'])
    pbar.close()
    print("END")
コード例 #25
0
def main():
    # Training settings
    # Use the command line to modify the default settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--batch-size', type=int, default=256, metavar='N',
                        help='input batch size for training (default: 256)')
    parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
                        help='input batch size for testing (default: 1000)')
    parser.add_argument('--validation-percentage', type=float, default=15., metavar='P',
                        help='percentage of training data used for validation (default: 15)')
    parser.add_argument('--training-division', type=float, default=1., metavar='D',
                        help='divide the remaining training data by this factor')
    parser.add_argument('--epochs', type=int, default=12, metavar='N',
                        help='number of epochs to train (default: 12)')
    parser.add_argument('--lr', type=float, default=1.0, metavar='LR',
                        help='learning rate (default: 1.0)')
    parser.add_argument('--step', type=int, default=1, metavar='N',
                        help='number of epochs between learning rate reductions (default: 1)')
    parser.add_argument('--gamma', type=float, default=1, metavar='M',
                        help='Learning rate step gamma (default: 1)')
    parser.add_argument('--no-cuda', action='store_true',
                        help='disables CUDA training')
    parser.add_argument('--no-augmentation', action='store_true',
                        help='disables data augmentation')
    parser.add_argument('--seed', type=int, default=2020, metavar='S',
                        help='random seed (default: 2020)')
    parser.add_argument('--log-numbers', type=int, default=1, metavar='N',
                        help='how many entries of logging training status to show per epoch')
    parser.add_argument('--name', type=str, default='default', metavar='name',
                        help='name of the model')
    parser.add_argument('--root', type=str, default='../data/hw03_outputs/', metavar='path',
                        help='path to save all model and plots')
    parser.add_argument('--plot', action='store_true',
                        help='plot the training curve')
    parser.add_argument('--evaluate', action='store_true',
                        help='evaluate your model on the official test set')
    parser.add_argument('--save-model', action='store_true',
                        help='save the current model');    
    
    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()

    torch.manual_seed(args.seed)

    device = torch.device("cuda" if use_cuda else "cpu")

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    # Evaluate on the official test set
    if args.evaluate:
        path_model = args.root+args.name+'.pt'
        
        assert os.path.exists(path_model)

        # Set the test model
        model = Net().to(device)
        model.load_state_dict(torch.load(path_model))

        test_dataset = datasets.MNIST('../data', train=False,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ]))

        test_loader = torch.utils.data.DataLoader(
            test_dataset, batch_size=args.test_batch_size, shuffle=False, **kwargs)

        test_loss, correct, preds = test(model, device, test_loader)
        
        np.save(args.root+args.name+'_test_loss.npy', test_loss)
        np.save(args.root+args.name+'_test_accuracy.npy', correct/len(test_loader.sampler)*100)
        np.save(args.root+args.name+'_preds.npy', preds)

        return

    # Pytorch has default MNIST dataloader which loads data at each iteration
    train_dataset = datasets.MNIST('../data', train=True, download=True,
        transform=transforms.Compose([       # Data preprocessing
            transforms.ToTensor(),           # Add data augmentation here
            transforms.Normalize((0.1307,), (0.3081,))
        ]))

    train_dataset_augmented = datasets.MNIST('../data', train=True, download=True,
        transform=transforms.Compose([
            transforms.RandomAffine(4, translate=(.1, .1), scale=(.9, 1.1), shear=(2, 2, 2, 2)),
            transforms.ToTensor(),
            transforms.Normalize((0.1307,), (0.3081,))
        ]))
    
    train_labels = np.array([data[1] for data in train_dataset])
    labels = np.unique(train_labels)

    rng = np.random.default_rng(args.seed)
    train_label_idc = [rng.permutation(np.argwhere(train_labels==l)) for l in labels]

    subset_indices_train = [idx[0] for idc in train_label_idc for idx in idc[:np.round(len(idc)*(1-args.validation_percentage/100)/args.training_division).astype(int)]]
    subset_indices_valid = [idx[0] for idc in train_label_idc for idx in idc[np.round(len(idc)*(1-args.validation_percentage/100)).astype(int):]]
    
    if args.no_augmentation:
        train_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=args.batch_size,
            sampler=SubsetRandomSampler(subset_indices_train)
        )
    else:
        train_loader = torch.utils.data.DataLoader(
            train_dataset_augmented, batch_size=args.batch_size,
            sampler=SubsetRandomSampler(subset_indices_train)
        )
    
    val_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size,
        sampler=SubsetRandomSampler(subset_indices_valid)
    )

    # Load your model [fcNet, ConvNet, Net]
    model = Net().to(device)

    # Try different optimzers here [Adam, SGD, RMSprop]
    optimizer = optim.Adadelta(model.parameters(), lr=args.lr)

    # Set your learning rate scheduler
    scheduler = StepLR(optimizer, step_size=args.step, gamma=args.gamma)

    # Training loop
    train_loss = np.zeros((args.epochs,))
    val_loss = np.zeros((args.epochs,))
    train_correct = np.zeros((args.epochs,))
    val_correct = np.zeros((args.epochs,))
    for epoch in range(args.epochs):
        train(args, model, device, train_loader, optimizer, epoch)
        train_loss[epoch], train_correct[epoch] = test(model, device, train_loader, name='Training')
        val_loss[epoch], val_correct[epoch] = test(model, device, val_loader, name='Validation')
        print()
        scheduler.step()    # learning rate scheduler

    np.save(args.root+args.name+'_train_loss.npy', train_loss)
    np.save(args.root+args.name+'_val_loss.npy', val_loss)
    np.save(args.root+args.name+'_train_accuracy.npy', train_correct/len(train_loader.sampler)*100)
    np.save(args.root+args.name+'_train_accuracy.npy', val_correct/len(val_loader.sampler)*100)

    if args.save_model:
        torch.save(model.state_dict(), args.root+args.name+'.pt')
    
    if args.plot:
        fig = plt.figure(figsize=(8, 6), tight_layout=True)
        ax1 = plt.axes()
        ax1.plot(np.arange(args.epochs), train_loss, 'b-', label='Training Loss')
        ax1.plot(np.arange(args.epochs), val_loss, 'r-', label='Validation Loss')
        ax1.set_xlabel('Epochs', fontsize=14, fontweight='bold')
        ax1.set_ylabel('Negative Log Likelihood Loss', fontsize=14, fontweight='bold')
        ax2 = ax1.twinx()
        ax2.plot(np.arange(args.epochs), train_correct/len(train_loader.sampler)*100, 'b:', label='Training Accuracy')
        ax2.plot(np.arange(args.epochs), val_correct/len(val_loader.sampler)*100, 'r:', label='Validation Accuracy')
        # ax2.set_xlabel('Epochs')
        ax2.set_ylabel('Accuracy %', fontsize=14, fontweight='bold')
        lines1, line_labels1 = ax1.get_legend_handles_labels()
        lines2, line_labels2 = ax2.get_legend_handles_labels()
        ax2.legend(lines1 + lines2, line_labels1 + line_labels2, loc='right', fontsize=12)
        plt.savefig(args.root+args.name+'.pdf', pad_inches=0, bbox_inches='tight')
        plt.show()
コード例 #26
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description="Pytorch MNIST Example")
    parser.add_argument("--batch-size",
                        type=int,
                        default=64,
                        metavar="N",
                        help="input batch size for training (default : 64)")
    parser.add_argument("--test-batch-size",
                        type=int,
                        default=1000,
                        metavar="N",
                        help="input batch size for testing (default : 1000)")
    parser.add_argument("--epochs",
                        type=int,
                        default=64,
                        metavar="N",
                        help="number of epochs to train (default : 64)")
    parser.add_argument("--learning-rate",
                        type=float,
                        default=0.1,
                        metavar="LR",
                        help="the learning rate (default : 0.1)")
    parser.add_argument("--gamma",
                        type=float,
                        default=0.5,
                        metavar="M",
                        help="Learning rate step gamma (default : 0.5)")
    parser.add_argument("--no-cuda",
                        action="store_true",
                        default=True,
                        help="disables CUDA training")
    parser.add_argument("--dry-run",
                        action="store_true",
                        default=False,
                        help="quickly check a single pass")
    parser.add_argument("--seed",
                        type=int,
                        default=1,
                        metavar="S",
                        help="random seed (default : 1)")
    parser.add_argument(
        "--log-interval",
        type=int,
        default=10,
        metavar="N",
        help="how many batches to wait before logging training status")
    parser.add_argument("--save-model",
                        action="store_true",
                        default=True,
                        help="For saving the current Model")
    parser.add_argument(
        "--load_state_dict",
        type=str,
        default="no",
        help="load the trained model weights or not (default: no)")
    parser.add_argument("--model",
                        type=str,
                        default="LeNet",
                        help="choose the model to train (default: LeNet)")
    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()  # not > and > or
    print("user cuda is {}".format(use_cuda))
    torch.manual_seed(args.seed)  # 设置随机种子,什么是随机种子?

    device = torch.device("cuda" if use_cuda else "cpu")

    train_kwargs = {"batch_size": args.batch_size}
    test_kwargs = {"batch_size": args.test_batch_size}
    '''
    *args和**kwargs一般是用在函数定义的时候。二者的意义是允许定义的函数接受任意数目的参数。
    也就是说我们在函数被调用前并不知道也不限制将来函数可以接收的参数数量。
    在这种情况下我们可以使用*args和**kwargs。
    '''
    if use_cuda:
        cuda_kwargs = {"num_workers": 1, "pin_memory": True, "shuffle": True}
        train_kwargs.update(cuda_kwargs)
        test_kwargs.update(cuda_kwargs)

    transform = transforms.Compose([
        transforms.ToTensor(),
        # normalize(mean, std, inplace=False) mean各通道的均值, std各通道的标准差, inplace是否原地操作
        # 这里说的均值是数据里的均值
        # output = (input - mean) / std
        # 归一化到-1 ~ 1,也不一定,但是属于标准化
        transforms.Normalize((0.1307, ), (0.3081, ))
    ])
    dataset1 = datasets.MNIST("./data",
                              train=True,
                              download=True,
                              transform=transform)
    dataset2 = datasets.MNIST("./data", train=False, transform=transform)
    train_loader = torch.utils.data.DataLoader(dataset1, **train_kwargs)
    test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)

    model_name = args.model.lower()
    if model_name == "lenet":
        model = LeNet().to(device)
    elif model_name == "defaultnet":
        model = DefaultNet().to(device)
    elif model_name == "mynetv1":
        model = MyNetV1().to(device)
    elif model_name == "mynetv2":
        model = MyNetV2().to(device)
    elif model_name == "myfullconvnet":
        model = MyFullConvNet().to(device)
    elif model_name == "myvggnet":
        model = MyVggNet().to(device)

    #model = Net().to(device)
    model_path = Path("./model/weights/{}.pt".format(model_name))
    if model_path.exists() and args.load_state_dict == "yes":
        model.load_state_dict(torch.load(model_path))
        print("Load the last trained model.")
    optimizer = optim.Adadelta(model.parameters(), lr=args.learning_rate)
    #optimizer_path = Path("./model/weights/")

    # scheduler是学习率调整,有lambdaLR机制和stepLR机制,lr = lr * gamma^n, n = epoch/step_size
    scheduler = StepLR(optimizer, step_size=5, gamma=args.gamma)
    '''
optimizer (Optimizer):要更改学习率的优化器;
step_size(int):每训练step_size个epoch,更新一次参数;
gamma(float):更新lr的乘法因子;
last_epoch (int):最后一个epoch的index,如果是训练了很多个epoch后中断了,
继续训练,这个值就等于加载的模型的epoch。默认为-1表示从头开始训练,即从epoch=1开始。

    '''
    for epoch in range(1, args.epochs + 1):
        train(args, model, device, train_loader, optimizer, epoch)
        test(model, device, test_loader)
        scheduler.step()

    if args.save_model:
        torch.save(model.state_dict(),
                   "./model/weights/{}.pt".format(model_name))

    # record the training results
    create_loss_txt_path = "./model/result/{}_loss.txt".format(model_name)
    create_acc_txt_path = "./model/result/{}_acc.txt".format(model_name)
    f = open(create_loss_txt_path, "w+")
    for loss in graph_loss:
        f.writelines("{}\n".format(loss))
    f.close()
    f = open(create_acc_txt_path, "w+")
    for acc in graph_acc:
        f.writelines("{}\n".format(acc))
    f.close()
コード例 #27
0
def train_net(args):
    torch.manual_seed(7)
    np.random.seed(7)
    checkpoint = args.checkpoint
    start_epoch = 0
    best_acc = 0
    writer = SummaryWriter()
    epochs_since_improvement = 0

    # Initialize / load checkpoint
    if checkpoint is None:
        if args.network == 'r18':
            model = resnet18(args)
        elif args.network == 'r34':
            model = resnet34(args)
        elif args.network == 'r50':
            model = resnet50(args)
        elif args.network == 'r101':
            model = resnet101(args)
        elif args.network == 'r152':
            model = resnet152(args)
        else:
            model = resnet_face18(args.use_se)
        model = nn.DataParallel(model)
        metric_fc = ArcMarginModel(args)
        metric_fc = nn.DataParallel(metric_fc)

        if args.optimizer == 'sgd':
            optimizer = torch.optim.SGD([{
                'params': model.parameters()
            }, {
                'params': metric_fc.parameters()
            }],
                                        lr=args.lr,
                                        momentum=args.mom,
                                        weight_decay=args.weight_decay)
        else:
            optimizer = torch.optim.Adam([{
                'params': model.parameters()
            }, {
                'params': metric_fc.parameters()
            }],
                                         lr=args.lr,
                                         weight_decay=args.weight_decay)

    else:
        checkpoint = torch.load(checkpoint)
        start_epoch = checkpoint['epoch'] + 1
        epochs_since_improvement = checkpoint['epochs_since_improvement']
        model = checkpoint['model']
        metric_fc = checkpoint['metric_fc']
        optimizer = checkpoint['optimizer']

    logger = get_logger()

    # Move to GPU, if available
    model = model.to(device)
    metric_fc = metric_fc.to(device)

    # Loss function
    if args.focal_loss:
        criterion = FocalLoss(gamma=args.gamma).to(device)
    else:
        criterion = nn.CrossEntropyLoss().to(device)

    # Custom dataloaders
    train_dataset = ArcFaceDataset('train')
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=True)

    scheduler = StepLR(optimizer, step_size=args.lr_step, gamma=0.1)

    # Epochs
    for epoch in range(start_epoch, args.end_epoch):
        scheduler.step()

        if args.full_log:
            lfw_acc, threshold = lfw_test(model)
            writer.add_scalar('LFW_Accuracy', lfw_acc, epoch)
            full_log(epoch)

        start = datetime.now()
        # One epoch's training
        train_loss, train_top5_accs = train(train_loader=train_loader,
                                            model=model,
                                            metric_fc=metric_fc,
                                            criterion=criterion,
                                            optimizer=optimizer,
                                            epoch=epoch,
                                            logger=logger)
        # train_dataset.shuffle()
        writer.add_scalar('Train_Loss', train_loss, epoch)
        writer.add_scalar('Train_Top5_Accuracy', train_top5_accs, epoch)

        end = datetime.now()
        delta = end - start
        print('{} seconds'.format(delta.seconds))

        # One epoch's validation
        if epoch > 10 and epoch % 2 == 0 and not args.full_log:
            start = datetime.now()
            lfw_acc, threshold = lfw_test(model)
            writer.add_scalar('LFW Accuracy', lfw_acc, epoch)

            # Check if there was an improvement
            is_best = lfw_acc > best_acc
            best_acc = max(lfw_acc, best_acc)
            if not is_best:
                epochs_since_improvement += 1
                print("\nEpochs since last improvement: %d\n" %
                      (epochs_since_improvement, ))
            else:
                epochs_since_improvement = 0

            # Save checkpoint
            save_checkpoint(epoch, epochs_since_improvement, model, metric_fc,
                            optimizer, best_acc, is_best)

            end = datetime.now()
            delta = end - start
            print('{} seconds'.format(delta.seconds))
コード例 #28
0
ファイル: main.py プロジェクト: princeton-vl/CoqGym
def main():

    # parse the options
    opts = parse_args()

    # create the dataloaders
    dataloader = {
        "train":
        create_dataloader("train_valid" if opts.no_validation else "train",
                          opts),
        "valid":
        create_dataloader("valid", opts),
    }

    # create the model
    model = Prover(opts)
    model.to(opts.device)

    # crete the optimizer
    optimizer = torch.optim.RMSprop(
        model.parameters(),
        lr=opts.learning_rate,
        momentum=opts.momentum,
        weight_decay=opts.l2,
    )
    if opts.no_validation:
        scheduler = StepLR(optimizer,
                           step_size=opts.lr_reduce_steps,
                           gamma=0.1)
    else:
        scheduler = ReduceLROnPlateau(optimizer,
                                      patience=opts.lr_reduce_patience,
                                      verbose=True)

    # load the checkpoint
    start_epoch = 0
    if opts.resume != None:
        log("loading model checkpoint from %s.." % opts.resume)
        if opts.device.type == "cpu":
            checkpoint = torch.load(opts.resume, map_location="cpu")
        else:
            checkpoint = torch.load(opts.resume)
        model.load_state_dict(checkpoint["state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer"])
        start_epoch = checkpoint["n_epoch"] + 1
        model.to(opts.device)

    agent = Agent(model, optimizer, dataloader, opts)

    best_acc = -1.0
    for n_epoch in range(start_epoch, start_epoch + opts.num_epochs):
        log("EPOCH #%d" % n_epoch)

        # training
        loss_train = agent.train(n_epoch)

        # save the model checkpoint
        if n_epoch % opts.save_model_epochs == 0:
            agent.save(n_epoch, opts.checkpoint_dir)

        # validation
        if not opts.no_validation:
            loss_valid = agent.valid(n_epoch)

        # reduce the learning rate
        if opts.no_validation:
            scheduler.step()
        else:
            scheduler.step(loss_valid)
コード例 #29
0
ファイル: train.py プロジェクト: KevinHua/dvector
def train(train_dir, model_dir, config_path, checkpoint_path, n_steps,
          save_every, test_every, decay_every, n_speakers, n_valids,
          n_utterances, seg_len):
    """Train a d-vector network."""

    # setup
    total_steps = 0

    # load data
    dataset = SEDataset(train_dir, n_utterances, seg_len)
    train_set, valid_set = random_split(dataset,
                                        [len(dataset) - n_valids, n_valids])
    train_loader = DataLoader(train_set,
                              batch_size=n_speakers,
                              shuffle=True,
                              num_workers=4,
                              collate_fn=pad_batch,
                              drop_last=True)
    valid_loader = DataLoader(valid_set,
                              batch_size=n_speakers,
                              shuffle=True,
                              num_workers=4,
                              collate_fn=pad_batch,
                              drop_last=True)
    train_iter = iter(train_loader)

    print(f"Training starts with {len(train_set)} speakers. "
          f"(and {len(valid_set)} speakers for validation)")
    assert len(train_set) >= n_speakers
    assert len(valid_set) >= n_speakers

    # build network and training tools
    dvector = DVector().load_config_file(config_path)
    criterion = GE2ELoss()
    optimizer = SGD(list(dvector.parameters()) + list(criterion.parameters()),
                    lr=0.01)
    scheduler = StepLR(optimizer, step_size=decay_every, gamma=0.5)

    # load checkpoint
    if checkpoint_path is not None:
        ckpt = torch.load(checkpoint_path)
        total_steps = ckpt["total_steps"]
        dvector.load_state_dict(ckpt["state_dict"])
        criterion.load_state_dict(ckpt["criterion"])
        optimizer.load_state_dict(ckpt["optimizer"])
        scheduler.load_state_dict(ckpt["scheduler"])

    # prepare for training
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    dvector = dvector.to(device)
    criterion = criterion.to(device)
    writer = SummaryWriter(model_dir)
    pbar = tqdm.trange(n_steps)

    min_loss = 1e308

    # start training
    for step in pbar:

        total_steps += 1

        try:
            batch = next(train_iter)
        except StopIteration:
            train_iter = iter(train_loader)
            batch = next(train_iter)

        embd = dvector(batch.to(device)).view(n_speakers, n_utterances, -1)

        loss = criterion(embd)

        optimizer.zero_grad()
        loss.backward()

        grad_norm = torch.nn.utils.clip_grad_norm_(
            list(dvector.parameters()) + list(criterion.parameters()),
            max_norm=3)
        dvector.embedding.weight.grad.data *= 0.5
        criterion.w.grad.data *= 0.01
        criterion.b.grad.data *= 0.01

        optimizer.step()
        scheduler.step()

        pbar.set_description(f"global = {total_steps}, loss = {loss:.4f}")
        writer.add_scalar("Training loss", loss, total_steps)
        writer.add_scalar("Training min loss", min_loss, total_steps)
        writer.add_scalar("Gradient norm", grad_norm, total_steps)

        if (step + 1) % test_every == 0:
            batch = next(iter(valid_loader))
            embd = dvector(batch.to(device)).view(n_speakers, n_utterances, -1)
            valid_loss = criterion(embd)
            writer.add_scalar("validation loss", valid_loss, total_steps)

        if (step + 1) % save_every == 0:
            ckpt_path = os.path.join(model_dir, f"ckpt-{total_steps}.tar")
            ckpt_dict = {
                "total_steps": total_steps,
                "state_dict": dvector.state_dict(),
                "criterion": criterion.state_dict(),
                "optimizer": optimizer.state_dict(),
                "scheduler": scheduler.state_dict(),
            }
            torch.save(ckpt_dict, ckpt_path)

        if loss.item() < min_loss:
            min_loss = loss.item()
            ckpt_path = os.path.join(model_dir, f"dvector-ckpt-min-loss.tar")
            ckpt_dict = {
                "total_steps": total_steps,
                "state_dict": dvector.state_dict(),
                "criterion": criterion.state_dict(),
                "optimizer": optimizer.state_dict(),
                "scheduler": scheduler.state_dict(),
            }
            torch.save(ckpt_dict, ckpt_path)

            with open(os.path.join(model_dir, "min_loss_step.txt"),
                      "w",
                      encoding="utf-8") as f:
                import json
                json.dump({'total_steps': total_steps, 'loss': min_loss}, f)

    print("Training completed.")
コード例 #30
0
    #init_weight(model)

    if cuda:
        model = model.cuda()
        #loss_fn = loss_fn.cuda()

    # optimizer = Adam(
    #     [param for param in model.parameters() if param.requires_grad],
    #     lr=base_lr, weight_decay=1e-4)
    # scheduler = StepLR(optimizer, step_size=40, gamma=0.1)

    optimizer = Adam(
        [param for param in model.parameters() if param.requires_grad],
        lr=base_lr,
        weight_decay=config.weight_decay)
    scheduler = StepLR(optimizer, step_size=1, gamma=config.gamma)

    bind_nsml(model, optimizer, scheduler)
    if config.pause:
        nsml.paused(scope=locals())

    if mode == 'train':
        tr_loader, val_loader, val_label_file = data_loader_with_split(
            root=TRAIN_DATASET_PATH, train_split=train_split)

        num_batches = len(tr_loader)

        #local_eval(model, val_loader, val_label_file)

        #exit(0)