def train(train_loader, valid_loader, writer): model = VGG(batch_size=cf.batch_size) criterion = nn.BCELoss() optimizer = torch.optim.SGD(model.parameters(), lr=cf.learning_rate) for e in range(cf.epoch): batch_loss = 0 for batch_x, batch_y in train_loader: y_predict = model(batch_x) y_one_hot = to_convert_one_hot(y_predict) loss = criterion(y_one_hot, batch_y.float()) optimizer.zero_grad() loss.backward() optimizer.step() loss = loss.item() batch_loss += loss print(f'batch loss: {loss:.3f}') print(f'Epoch #{e}: --- Training loss: {batch_loss/cf.batch_size:.3f}') writer.log_training(batch_loss / cf.batch_size, e) save_path = './models/chkpt-%d.pt' % (e + 1) torch.save( { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': e }, save_path) print("Saved checkpoint to: %s" % save_path)
def run_train(opt, training_data_loader, validation_data_loader): if not os.path.exists(opt.checkpoint_dir): os.makedirs(opt.checkpoint_dir) log_file = os.path.join(opt.checkpoint_dir, 'vgg_log.csv') print('[Initialize networks for training]') net = VGG(opt) L2_criterion = nn.MSELoss() print(net) if opt.resume: opt.start_epoch, net = load_model(opt, opt.checkpoint_dir) else: with open(log_file, mode='w') as f: f.write('epoch, train_loss, train_acc, valid_loss, valid_acc\n') print('===> Setting GPU') print('CUDA Available', torch.cuda.is_available()) if opt.use_cuda and torch.cuda.is_available(): opt.use_cuda = True opt.device = 'cuda' else: opt.use_cuda = False opt.device = 'cpu' if torch.cuda.device_count() > 1 and opt.multi_gpu: print("Use" + str(torch.cuda.device_count()) + 'GPUs') net = nn.DataParallel(net) if opt.use_cuda: net = net.to(opt.device) L2_criterion = L2_criterion.to(opt.device) print("===> Setting Optimizer") optimizer = torch.optim.Adam(net.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2)) for epoch in range(opt.start_epoch, opt.n_epochs): opt.epoch_num = epoch train_loss, train_acc = train(opt, net, optimizer, training_data_loader, loss_criterion=L2_criterion) valid_loss, valid_acc = evaluate(opt, net, validation_data_loader, loss_criterion=L2_criterion) with open(log_file, mode='a') as f: f.write("%d, %08f,%08f,%08f,%08f\n" % (epoch, train_loss, train_acc, valid_loss, valid_acc)) save_checkpoint(opt, net, epoch, valid_loss)
def train(): train_dataloader, val_dataloader = loadData() pretrained_params = torch.load('VGG_pretrained.pth') model = VGG() # strict=False 使得预训练模型参数中和新模型对应上的参数会被载入,对应不上或没有的参数被抛弃。 model.load_state_dict(pretrained_params.state_dict(), strict=False) if torch.cuda.is_available(): model.cuda() # finetune 时冻结XXlayer的参数 # for p in model.XXlayers.parameters(): # p.requires_grad = False optimizer = optim.Adam(model.parameters(), lr=learning_rate) loss_func = nn.CrossEntropyLoss() best_acc = 0 for epoch in range(epochs): epoch_loss = 0 steps = 0 for i, data in enumerate(train_dataloader): inputs, labels = data if torch.cuda.is_available(): inputs, labels = inputs.cuda(), labels.cuda() inputs, labels = Variable(inputs), Variable(labels) model.train() optimizer.zero_grad() outputs = model(inputs) loss = loss_func(outputs, labels) loss.backward() optimizer.step() epoch_loss += loss.data[0] steps += 1 print('epoch:%d loss:%.3f' % (epoch + 1, epoch_loss / steps)) if epoch % 5 == 0: val_acc = evaluate(model, val_dataloader) if val_acc > best_acc: best_acc = val_acc torch.save(model, 'best_VGG.pkl') torch.save(model.state_dict(), 'best_VGG_params.pkl') print('test acc:'.format(val_acc)) print('Finished Training') torch.save(model, 'VGG.pkl') torch.save(model.state_dict(), 'VGG_params.pkl')
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=4) testset = Imagenet64x(root=args.data_path, train=False, download=True, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=4) # Model print('==> Building model..') net = VGG('VGG19') net = net.to(device) if device == 'cuda': net = torch.nn.DataParallel(net) cudnn.benchmark = True criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) # Training def train(epoch): print('\nEpoch: %d' % epoch) net.train() train_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(trainloader): inputs, targets = inputs.to(device), targets.to(device) optimizer.zero_grad() outputs = net(inputs) loss = criterion(outputs, targets) loss.backward() optimizer.step()
classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') #--------------------Model----------------------------------# print('==> Building model..') net = VGG("VGG16") net = net.to(device) if device == 'cuda': net = torch.nn.DataParallel(net) cudnn.benchmark = True criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.weight_decay) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150, 250], gamma=0.1) #--------------------Train and Test----------------------------------# def train(epoch): #print('\nEpoch: %d' % epoch) print('\nEpoch [{}/{}]'.format(epoch+1, args.epochs)) net.train() train_loss = 0 IXZ_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(trainloader): inputs, targets = inputs.to(device), targets.to(device)
def main(): # Training settings parser = argparse.ArgumentParser( description='ConvNets for Speech Commands Recognition') parser.add_argument('--train_path', default='data/train_training', help='path to the train data folder') parser.add_argument('--test_path', default='data/train_testing', help='path to the test data folder') parser.add_argument('--valid_path', default='data/train_validation', help='path to the valid data folder') parser.add_argument('--batch_size', type=int, default=100, metavar='N', help='training and valid batch size') parser.add_argument('--test_batch_size', type=int, default=100, metavar='N', help='batch size for testing') parser.add_argument( '--arc', default='VGG16', help='network architecture: LeNet, VGG11, VGG13, VGG16, VGG19') parser.add_argument('--epochs', type=int, default=100, metavar='N', help='number of epochs to train') parser.add_argument('--lr', type=float, default=0.001, metavar='LR', help='learning rate') parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='SGD momentum, for SGD only') parser.add_argument('--optimizer', default='adam', help='optimization method: sgd | adam') parser.add_argument('--no-cuda', dest='cuda', action='store_false') parser.add_argument('--seed', type=int, default=1234, metavar='S', help='random seed') parser.add_argument( '--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument( '--patience', type=int, default=5, metavar='N', help= 'how many epochs of no loss improvement should we wait before stop training' ) parser.add_argument('--checkpoint', default='checkpoint', metavar='CHECKPOINT', help='checkpoints directory') parser.add_argument('--no-train', dest='train', action='store_false') # feature extraction options parser.add_argument('--window_size', type=float, default=.02, help='window size for the stft') parser.add_argument('--window_stride', type=float, default=.01, help='window stride for the stft') parser.add_argument('--window_type', default='hamming', help='window type for the stft') parser.add_argument('--no-normalize', dest='normalize', action='store_false', help='do not not to normalize the spect') parser.add_argument( '--num_workers', type=int, default=4, help='int, how many subprocesses to use for data loading') args = parser.parse_args() print(args) args.cuda = args.cuda and torch.cuda.is_available() torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) print('Using CUDA with {0} GPUs'.format(torch.cuda.device_count())) # loading data if args.train: train_dataset = Loader(args.train_path, window_size=args.window_size, window_stride=args.window_stride, window_type=args.window_type, normalize=args.normalize) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=args.cuda, sampler=None) valid_dataset = Loader(args.valid_path, window_size=args.window_size, window_stride=args.window_stride, window_type=args.window_type, normalize=args.normalize) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=args.batch_size, shuffle=None, num_workers=args.num_workers, pin_memory=args.cuda, sampler=None) # build model if args.arc.startswith('VGG'): model = VGG(args.arc) elif args.arc == 'TDNN': model = TDNN() else: model = LeNet() if args.cuda: model = torch.nn.DataParallel(model).cuda() # define optimizer if args.optimizer.lower() == 'adam': optimizer = optim.Adam(model.parameters(), lr=args.lr) else: optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) best_valid_acc = 0 iteration = 0 epoch = 1 # trainint with early stopping while (epoch < args.epochs + 1) and (iteration < args.patience): train(train_loader, model, optimizer, epoch, args.cuda, args.log_interval, weight=train_dataset.weight) valid_loss, valid_acc = test(valid_loader, model, args.cuda, data_set='Validation') if not os.path.isdir(args.checkpoint): os.mkdir(args.checkpoint) torch.save(model.module if args.cuda else model, './{}/model{:03d}.t7'.format(args.checkpoint, epoch)) if valid_acc <= best_valid_acc: iteration += 1 print('Accuracy was not improved, iteration {0}'.format( str(iteration))) else: print('Saving state') iteration = 0 best_valid_acc = valid_acc state = { 'valid_acc': valid_acc, 'valid_loss': valid_loss, 'epoch': epoch, } if not os.path.isdir(args.checkpoint): os.mkdir(args.checkpoint) torch.save(state, './{}/ckpt.t7'.format(args.checkpoint)) epoch += 1 # test model test_dataset = Loader(args.test_path, window_size=args.window_size, window_stride=args.window_stride, window_type=args.window_type, normalize=args.normalize) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.test_batch_size, shuffle=None, num_workers=args.num_workers, pin_memory=args.cuda, sampler=None) state = torch.load('./{}/ckpt.t7'.format(args.checkpoint)) epoch = state['epoch'] print("Testing model {} (epoch {})".format(args.checkpoint, epoch)) model = torch.load('./{}/model{:03d}.t7'.format(args.checkpoint, epoch)) if args.cuda: model = torch.nn.DataParallel(model).cuda() results = './{}/{}.csv'.format(args.checkpoint, os.path.basename(args.test_path)) print("Saving results in {}".format(results)) test(test_loader, model, args.cuda, save=results)
step = int(train_size / bs) print("train size: " + str(train_size)) print("step: " + str(step)) dataloader = torch.utils.data.DataLoader(dataset, batch_size=bs, shuffle=True) start = time.time() lost = [] errors = [] for epoch in range(start_epoch, 20): data_iter = iter(dataloader) # divide the learning rate by 2 at epoch 10, 14 and 18 # create a new optimizer at the beginning of each epoch: give the current learning rate. optimizer = torch.optim.SGD(net.parameters(), lr=my_lr) # set the running quatities to zero at the beginning of the epoch running_loss = 0 running_error = 0 num_batches = 0 for count in range(start_step, step + 1): # Set the gradients to zeros optimizer.zero_grad() # create a minibatch minibatch_data, minibatch_label, _ = next(data_iter) # send them to the gpu
testing_data_loader = DataLoader(dataset=test_set, num_workers=2, batch_size=100, shuffle=False) print("Building model...") net = VGG('VGG19') best_acc = 0 if use_cuda: torch.cuda.set_device(opt.gpuids[0]) net = nn.DataParallel(net, device_ids=opt.gpuids, output_device=opt.gpuids[0]).cuda() net_optim = optim.Adam(net.parameters(), lr=opt.lr) criterion = nn.MSELoss() def train(epoch): print('\n Epoch:', epoch) net.train() train_loss = 0 correct = 0 total = 0 for i, batch in enumerate(training_data_loader, 1): input, target = Variable(batch[0]), Variable(batch[1]) if use_cuda: input = input.cuda() target = target.cuda()
transforms.CenterCrop(size=224), transforms.ToTensor() ]) dataset = torchvision.datasets.ImageFolder(root=opt.rootDir, transform=TRANSFORM_IMG) train_set, validation_set = torch.utils.data.random_split(dataset, [int(len(dataset)*0.8), len(dataset) - int(len(dataset)*0.8)]) classes = dataset.classes train_loader = DataLoader(dataset=train_set, num_workers=opt.threads, batch_size=opt.batchSize,shuffle=False) validation_loader = DataLoader(dataset=validation_set, num_workers=opt.threads, batch_size=opt.batchSize,shuffle=False) train_accuracy = [] validation_accuracy = [] model = CNN(num_classes=len(dataset.classes)).to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr,weight_decay=0.0001) def train(epoch): epoch_loss = 0 train_acc = 0 model.train() for iteration, (image,label) in enumerate(train_loader): images = image.to(device) labels = label.to(device) t0 = time.time() prediction = model(images) loss = criterion(prediction, labels) t1 = time.time()
def objective(params): kernel1 = int(params[0]) kernel2 = int(params[1]) kernel3 = int(params[2]) kernel4 = int(params[3]) kernel5 = int(params[4]) kernel6 = int(params[5]) kernel7 = int(params[6]) kernel8 = int(params[7]) kernel9 = int(params[8]) kernel10 = int(params[9]) dropout5 = float(params[10]) dropout6 = float(params[11]) net = VGG(kernel1=kernel1, kernel2=kernel2, kernel3=kernel3, kernel4=kernel4, kernel5=kernel5, kernel6=kernel6, kernel7=kernel7, kernel8=kernel8, kernel9=kernel9, kernel10=kernel10, dropout5=dropout5, dropout6=dropout6) if use_cuda and torch.cuda.device_count() > 1: net = nn.DataParallel(net) net.cuda() criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4) num_epochs = 50 for _ in range(num_epochs): # Training net.train() train_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(trainloader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() optimizer.zero_grad() inputs, targets = Variable(inputs), Variable(targets) outputs = net(inputs) loss = criterion(outputs, targets) loss.backward() optimizer.step() train_loss += loss.data[0] _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() #print("Train loss: {}".format(train_loss)) # Validation net.eval() val_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(validloader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = Variable(inputs, volatile=True), Variable(targets) outputs = net(inputs) loss = criterion(outputs, targets) val_loss += loss.data[0] _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() #print("Validation loss: {}".format(val_loss)) return val_loss
transform.RandomCrop(32, padding=4), transform.ToTensor(), transform.Normalize(mean, std) ])) trainloader = DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0) classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') vgg19 = VGG().to(device) # 损失函数 criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(vgg19.parameters(), lr=0.01, momentum=0.9, nesterov=True) print('开始训练VGG19……') # 图片类别 classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') # 加载模型 for epoch in range(50): running_loss = 0.0 for i, data in enumerate(trainloader): # 得到输入数据 inputs, labels = data # 使用gpu
def train(config): os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' device = torch.device(config.device) random.seed(config.seed) np.random.seed(config.seed) torch.manual_seed(config.seed) if config.use_gpu: torch.cuda.manual_seed_all(config.seed) # Dataset infection_train = Infection_Dataset('train') infection_test = Infection_Dataset('test') infection_val = Infection_Dataset('val') covid_train = Covid_Dataset('train') covid_test = Covid_Dataset('test') covid_val = Covid_Dataset('val') # Dataloader from dataset infection_train_loader = DataLoader(infection_train, batch_size=config.batch_size, shuffle=True) infection_test_loader = DataLoader(infection_test, batch_size=config.batch_size, shuffle=True) infection_val_loader = DataLoader(infection_val, batch_size=config.batch_size, shuffle=True) covid_train_loader = DataLoader(covid_train, batch_size=config.batch_size, shuffle=True) covid_test_loader = DataLoader(covid_test, batch_size=config.batch_size, shuffle=True) covid_val_loader = DataLoader(covid_val, batch_size=config.batch_size, shuffle=True) # L2 regularization parameter l2_lambda = 0.001 # Instantiate model, criterion and oprimizer model = VGG() if config.use_gpu: model.to(device) criterion = nn.BCEWithLogitsLoss() optimizer = optim.AdamW(model.parameters(), lr=config.lr) lr_sheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.7) best_acc = -0.1 best_epoch = -1 start_time = time.time() # Train the model that classifies normal and infection images print("***** Training the first classifier *****") for epoch in range(config.epochs): total_loss = 0.0 model.train() for images_data, target_labels in tqdm(infection_train_loader): # images_data: [batch_size, 1, 150, 150] # target_labels: [batch_size, 2] if config.use_gpu: images_data = images_data.cuda() target_labels = target_labels.cuda() total_loss = 0.0 model.train() optimizer.zero_grad() predicted_labels = model(images_data) loss = criterion(predicted_labels, target_labels) # L2 regularization l2_reg = torch.tensor(0.) if config.use_gpu: l2_reg = l2_reg.cuda() for param in model.parameters(): l2_reg += torch.norm(param) loss += l2_lambda * l2_reg loss.backward() optimizer.step() # Evaluate the performance and save the model parameters each epoch train_acc, train_loss = evaluate(infection_train_loader, model) val_acc, val_loss = evaluate(infection_test_loader, model) torch.save(model.state_dict(), './checkpoints/' + str(epoch) + '_params_infection.pth') # Save the best performing model parameters based on validation accuracy if val_acc > best_acc: best_acc = val_acc best_epoch = epoch torch.save(model.state_dict(), './checkpoints/' + 'best_params_infection.pth') print( f"{now()} Epoch{epoch}: train_loss: {train_loss}, val_loss: {val_loss}, train_acc: {train_acc}, val_acc: {val_acc}" ) lr_sheduler.step() # Record loss and accuracies for learning curve plots fieldnames = [ 'epoch', 'train_loss', 'val_loss', 'train_acc', 'val_acc' ] out_dict = { 'epoch': epoch, 'train_loss': train_loss, 'val_loss': val_loss, 'train_acc': train_acc, 'val_acc': val_acc } with open('./outputs/infection.csv', 'a') as out_f: writer = DictWriter(out_f, fieldnames=fieldnames) writer.writerow(out_dict) end_time = time.time() print("*" * 20) print( f"{now()} finished; epoch {best_epoch} best_acc: {best_acc}, time/epoch: {(end_time-start_time)/config.epochs}" ) print() # Instantiate another model model_covid = VGG() if config.use_gpu: model_covid.to(device) optimizer_covid = optim.AdamW(model_covid.parameters(), lr=config.lr) lr_sheduler_covid = optim.lr_scheduler.StepLR(optimizer_covid, step_size=3, gamma=0.7) best_acc_covid = -0.1 best_epoch_covid = -1 start_time_covid = time.time() # Train another model that classifies covid and non-covid images print("***** Training the second classifier *****") for epoch_covid in range(config.epochs): total_loss_covid = 0.0 model_covid.train() for images_data, target_labels in tqdm(covid_train_loader): # images_data: [batch_size, 1, 150, 150] # target_labels: [batch_size, 2] if config.use_gpu: images_data = images_data.cuda() target_labels = target_labels.cuda() total_loss_covid = 0.0 model_covid.train() optimizer_covid.zero_grad() predicted_labels = model_covid(images_data) loss = criterion(predicted_labels, target_labels) # L2 regularization l2_reg = torch.tensor(0.) if config.use_gpu: l2_reg = l2_reg.cuda() for param in model_covid.parameters(): l2_reg += torch.norm(param) loss += l2_lambda * l2_reg loss.backward() optimizer_covid.step() total_loss_covid += loss.item() # Evaluate the performance and save the model parameters each epoch train_acc_covid, train_loss_covid = evaluate(covid_train_loader, model_covid) val_acc_covid, val_loss_covid = evaluate(covid_test_loader, model_covid) torch.save(model_covid.state_dict(), './checkpoints/' + str(epoch_covid) + '_params_covid.pth') # Save the best performing model parameters based on validation accuracy if val_acc_covid > best_acc_covid: best_acc_covid = val_acc_covid best_epoch_covid = epoch_covid torch.save(model_covid.state_dict(), './checkpoints/' + 'best_params_covid.pth') print( f"{now()} epoch {epoch_covid}: train_loss: {train_loss_covid}, val_loss: {val_loss_covid}, train_acc: {train_acc_covid}, val_acc_covid: {val_acc_covid}" ) lr_sheduler_covid.step() # Record loss and accuracies for learning curve plots fieldnames = [ 'epoch', 'train_loss', 'val_loss', 'train_acc', 'val_acc' ] out_dict = { 'epoch': epoch_covid, 'train_loss': train_loss_covid, 'val_loss': val_loss_covid, 'train_acc': train_acc_covid, 'val_acc': val_acc_covid } with open('./outputs/infection.csv', 'a') as out_f: writer = DictWriter(out_f, fieldnames=fieldnames) writer.writerow(out_dict) end_time = time.time() print("*" * 20) print( f"{now()} finished; epoch {best_epoch_covid} best_acc_covid: {best_acc_covid}, time/epoch: {(end_time-start_time)/config.epochs}" )
init_lr = 0.0001 weight_decay = 0.0005 Epochs = 1000 train_dir = r"C:\Users\13249\Desktop\20200115-20200205\Calcification\Data\INBreast\Sample\train" valid_dir = r"C:\Users\13249\Desktop\20200115-20200205\Calcification\Data\INBreast\Sample\valid" # data_loader train_loader = get_data_loader(train_dir, data_augment=True) valid_loader = get_data_loader(train_dir, data_augment=False, batch_size=128) # model net = VGG(1).cuda() # define loss function (criterion) and optimizer loss_func = FocalLoss().cuda() opt = optim.Adam(net.parameters(), init_lr, weight_decay=weight_decay) lr_decay = torch.optim.lr_scheduler.StepLR(opt, step_size=5, gamma=0.5) # train writer = SummaryWriter() for epoch in range(0, Epochs + 1): net.train() losses = [] for i, (images, target) in enumerate(train_loader): images = images.cuda() target = target.cuda() output = net(images) loss = loss_func(output, target) opt.zero_grad()