def train(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") FloatTensor = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor dataset = MyDataset(data_path='D:/datasets/voc-custom/train.txt') dataloader = torch.utils.data.DataLoader( dataset, batch_size=8, shuffle=True, num_workers=3, pin_memory=True, collate_fn=dataset.collate_fn ) model = YoloNet().to(device) model.apply(weights_init_normal) optimizer = torch.optim.Adam(model.parameters()) #scheduler = lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1) for epoch in range(10): model.train() #scheduler.step() for batch_i, (imgs, targets) in enumerate(dataloader): imgs = Variable(imgs.to(device)) targets = Variable(targets.to(device), requires_grad=False) print(imgs, targets) output, loss = model(imgs, targets) print(epoch,batch_i,loss.detach().cpu().item()) optimizer.zero_grad() loss.backward() optimizer.step() if epoch % 5 == 0: torch.save(model.state_dict(), f"yolov3_ckpt_%d.pth" % epoch)
def evaluate_cnn_pytorch(model, batch_size): model.eval() test_data = MyDataset("digits/testDigits") test_loader = data.DataLoader(test_data, batch_size=batch_size, num_workers=0, shuffle=False) ok_predictions = 0 for step, test in enumerate(test_loader): x = torch.clone(test[0]).float() target = torch.clone(test[1]).long() if torch.cuda.is_available(): x = x.cuda() target = target.cuda() predictions = model(x) for i in range(len(predictions)): expected = torch.argmax(target[i]) prediction = torch.argmax(predictions[i]) if expected == prediction: ok_predictions += 1 accuracy = round((ok_predictions / len(test_data)) * 100, 2) wrong_numbers = len(test_data) - ok_predictions print("Accuracy on test data: " + str(accuracy) + "%") print(f"wrong_numbers: {wrong_numbers}") return accuracy, wrong_numbers
def run_performance_test(dataset: MyDataset, batch_size: int, epoch_num: int): loader = DataLoader(dataset, batch_size, num_workers=0, shuffle=True) print(f'Running for {dataset.name()}:') if isinstance(dataset, CachedDataset): # first epoch to load the data tic = perf_counter() for _ in loader: pass print( f'First epoch with caching the data took {perf_counter() - tic} seconds' ) dataset.use_cache = True epoch_num -= 1 ep1 = perf_counter() for i in range(epoch_num): for batch in loader: pass print(f'Mean epoch time is {(perf_counter() - ep1) / epoch_num} seconds, ' f'dataset contains {len(dataset)} images, ' f'with batch size of {batch_size}\n')
def main_worker(gpu, save_dir, ngpus_per_node, args): # basic setup cudnn.benchmark = True args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.distributed: args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) if args.log_name is not None: log_dir = "runs/%s" % args.log_name else: log_dir = "runs/time-%d" % time.time() if not args.distributed or (args.rank % ngpus_per_node == 0): writer = SummaryWriter(logdir=log_dir) else: writer = None if not args.use_latent_flow: # auto-encoder only args.prior_weight = 0 args.entropy_weight = 0 # multi-GPU setup model = PointFlow(args) if args.distributed: # Multiple processes, single GPU per process if args.gpu is not None: def _transform_(m): return nn.parallel.DistributedDataParallel( m, device_ids=[args.gpu], output_device=args.gpu, check_reduction=True) torch.cuda.set_device(args.gpu) model.cuda(args.gpu) model.multi_gpu_wrapper(_transform_) args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = 0 else: assert 0, "DistributedDataParallel constructor should always set the single device scope" elif args.gpu is not None: # Single process, single GPU per process torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: # Single process, multiple GPUs per process def _transform_(m): return nn.DataParallel(m) model = model.cuda() model.multi_gpu_wrapper(_transform_) # resume checkpoints start_epoch = 0 optimizer = model.make_optimizer(args) if args.resume_checkpoint is None and os.path.exists( os.path.join(save_dir, 'checkpoint-latest.pt')): args.resume_checkpoint = os.path.join( save_dir, 'checkpoint-latest.pt') # use the latest checkpoint if args.resume_checkpoint is not None: if args.resume_optimizer: model, optimizer, start_epoch = resume( args.resume_checkpoint, model, optimizer, strict=(not args.resume_non_strict)) else: model, _, start_epoch = resume(args.resume_checkpoint, model, optimizer=None, strict=(not args.resume_non_strict)) print('Resumed from: ' + args.resume_checkpoint) # initialize datasets and loaders tr_dataset = MyDataset(args.data_dir, istest=False) te_dataset = MyDataset(args.data_dir, istest=True) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( tr_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader(dataset=tr_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=0, pin_memory=True, sampler=train_sampler, drop_last=True, worker_init_fn=init_np_seed) test_loader = torch.utils.data.DataLoader(dataset=te_dataset, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=True, drop_last=False, worker_init_fn=init_np_seed) # save dataset statistics # if not args.distributed or (args.rank % ngpus_per_node == 0): # np.save(os.path.join(save_dir, "train_set_mean.npy"), tr_dataset.all_points_mean) # np.save(os.path.join(save_dir, "train_set_std.npy"), tr_dataset.all_points_std) # np.save(os.path.join(save_dir, "train_set_idx.npy"), np.array(tr_dataset.shuffle_idx)) # np.save(os.path.join(save_dir, "val_set_mean.npy"), te_dataset.all_points_mean) # np.save(os.path.join(save_dir, "val_set_std.npy"), te_dataset.all_points_std) # np.save(os.path.join(save_dir, "val_set_idx.npy"), np.array(te_dataset.shuffle_idx)) # load classification dataset if needed if args.eval_classification: from datasets import get_clf_datasets def _make_data_loader_(dataset): return torch.utils.data.DataLoader(dataset=dataset, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=True, drop_last=False, worker_init_fn=init_np_seed) clf_datasets = get_clf_datasets(args) clf_loaders = { k: [_make_data_loader_(ds) for ds in ds_lst] for k, ds_lst in clf_datasets.items() } else: clf_loaders = None # initialize the learning rate scheduler if args.scheduler == 'exponential': scheduler = optim.lr_scheduler.ExponentialLR(optimizer, args.exp_decay) elif args.scheduler == 'step': scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.epochs // 2, gamma=0.1) elif args.scheduler == 'linear': def lambda_rule(ep): lr_l = 1.0 - max(0, ep - 0.5 * args.epochs) / float( 0.5 * args.epochs) return lr_l scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule) else: assert 0, "args.schedulers should be either 'exponential' or 'linear'" # main training loop start_time = time.time() entropy_avg_meter = AverageValueMeter() latent_nats_avg_meter = AverageValueMeter() point_nats_avg_meter = AverageValueMeter() if args.distributed: print("[Rank %d] World size : %d" % (args.rank, dist.get_world_size())) print("Start epoch: %d End epoch: %d" % (start_epoch, args.epochs)) for epoch in range(start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) # adjust the learning rate if (epoch + 1) % args.exp_decay_freq == 0: scheduler.step(epoch=epoch) if writer is not None: writer.add_scalar('lr/optimizer', scheduler.get_lr()[0], epoch) # train for one epoch for bidx, data in enumerate(train_loader): idx_batch, tr_batch, te_batch = data['idx'], data[ 'train_points'], data['test_points'] step = bidx + len(train_loader) * epoch model.train() inputs = tr_batch.cuda(args.gpu, non_blocking=True) out = model(inputs, optimizer, step, writer) entropy, prior_nats, recon_nats = out['entropy'], out[ 'prior_nats'], out['recon_nats'] entropy_avg_meter.update(entropy) point_nats_avg_meter.update(recon_nats) latent_nats_avg_meter.update(prior_nats) if step % args.log_freq == 0: duration = time.time() - start_time start_time = time.time() print( "[Rank %d] Epoch %d Batch [%2d/%2d] Time [%3.2fs] Entropy %2.5f LatentNats %2.5f PointNats %2.5f" % (args.rank, epoch, bidx, len(train_loader), duration, entropy_avg_meter.avg, latent_nats_avg_meter.avg, point_nats_avg_meter.avg)) # evaluate on the validation set # if not args.no_validation and (epoch + 1) % args.val_freq == 0: # from utils import validate # validate(test_loader, model, epoch, writer, save_dir, args, clf_loaders=clf_loaders) # save visualizations if (epoch + 1) % args.viz_freq == 0: # reconstructions model.eval() samples = model.reconstruct(inputs) results = [] for idx in range(min(10, inputs.size(0))): res = visualize_point_clouds(samples[idx], inputs[idx], idx) results.append(res) res = np.concatenate(results, axis=1) scipy.misc.imsave( os.path.join( save_dir, 'images', 'tr_vis_conditioned_epoch%d-gpu%s.png' % (epoch, args.gpu)), res.transpose((1, 2, 0))) if writer is not None: writer.add_image('tr_vis/conditioned', torch.as_tensor(res), epoch) # samples if args.use_latent_flow: num_samples = min(10, inputs.size(0)) num_points = inputs.size(1) _, samples = model.sample(num_samples, num_points) results = [] for idx in range(num_samples): res = visualize_point_clouds(samples[idx], inputs[idx], idx) results.append(res) res = np.concatenate(results, axis=1) scipy.misc.imsave( os.path.join( save_dir, 'images', 'tr_vis_conditioned_epoch%d-gpu%s.png' % (epoch, args.gpu)), res.transpose((1, 2, 0))) if writer is not None: writer.add_image('tr_vis/sampled', torch.as_tensor(res), epoch) # save checkpoints if not args.distributed or (args.rank % ngpus_per_node == 0): if (epoch + 1) % args.save_freq == 0: save(model, optimizer, epoch + 1, os.path.join(save_dir, 'checkpoint-%d.pt' % epoch)) save(model, optimizer, epoch + 1, os.path.join(save_dir, 'checkpoint-latest.pt'))
args = p.parse_args() train_x, train_y, test_x, test_y = get_mnist() # normalize inputs to [0, 1] train_x /= 255 test_x /= 255 if torch.cuda.is_available(): dev = torch.device('cuda') else: dev = torch.device('cpu') numbers = range(10) # subset of numbers to train on train_mask = make_mask(train_y, numbers) xs = train_x[train_mask] dataset = MyDataset(xs) m = GAN(28 * 28, gen_input_dim=args.gen_input_dim, disc_learning_rate=args.disc_learning_rate, gen_learning_rate=args.gen_learning_rate, device=dev) m.fit(dataset, batch_size=args.batch_size, epochs=args.epochs, verbose=args.verbose) # generate digits and save if args.num_gen: noise = torch.randn(args.num_gen, args.gen_input_dim, device=dev) y = np.reshape( m.generate(noise).cpu().detach().numpy(), (args.num_gen, 28, 28))
self.optimizer.zero_grad() loss.backward() self.optimizer.step() if verbose: print('epoch', epoch, cum_loss) def init_memory(self): return .01 * torch.randn(*self.memory_size) @torch.no_grad() def predict(self, x): yhat, self.memory = self.controller(x, self.memory) return yhat if __name__ == '__main__': if torch.cuda.is_available(): dev = torch.device('cuda') else: dev = torch.device('cpu') # Copy task from section 4.1 vsize = 8 x_train = get_rand_vector_sequence(1, 21, vsize=vsize, num_samples=100) y_train = deepcopy(x_train) dataset = MyDataset(x_train, y_train) mem_size = (100, 20) ntm = NeuralTuringMachine( vsize, vsize, mem_size, controller=FeedforwardController, device=dev) ntm.fit(dataset, verbose=True)
def load_data(mode='cifar10', batch_size=16): assert mode in ['cifar10', 'mnist', 'faces'], '未知数据集' if mode == 'faces': root_path = 'G:/Dataset/celebAHQ/' image_list = [x for x in os.listdir(root_path) if is_image_file(x)] train_list = image_list[:int(0.8 * len(image_list))] test_list = image_list[int(0.8 * len(image_list)):] assert len(train_list) > 0 assert len(test_list) >= 0 trainset = MyDataset(train_list, root_path, input_height=None, crop_height=None, output_height=32, is_mirror=True) testset = MyDataset(test_list, root_path, input_height=None, crop_height=None, output_height=32, is_mirror=False) trainloader = MyDataLoader(trainset, batch_size) testloader = MyDataLoader(testset, batch_size, shuffle=False) classes = None return trainset, trainloader, testset, testloader, classes elif mode == 'cifar10': root_path = 'G:/Dataset/cifar10/' classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'trunk') transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) trainset = torchvision.datasets.CIFAR10(root=root_path, train=True, download=False, transform=transform) testset = torchvision.datasets.CIFAR10(root=root_path, train=False, download=False, transform=transform) elif mode == 'mnist': root_path = 'G:/Dataset/mnist/' classes = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9') transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, ))]) trainset = torchvision.datasets.MNIST(root=root_path, train=True, download=False, transform=transform) testset = torchvision.datasets.MNIST(root=root_path, train=False, download=False, transform=transform) trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, pin_memory=True, drop_last=False, num_workers=2) testloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, pin_memory=True, drop_last=False, num_workers=2) return trainset, trainloader, testset, testloader, classes
epoch_num -= 1 ep1 = perf_counter() for i in range(epoch_num): for batch in loader: pass print(f'Mean epoch time is {(perf_counter() - ep1) / epoch_num} seconds, ' f'dataset contains {len(dataset)} images, ' f'with batch size of {batch_size}\n') if __name__ == '__main__': transform = tf.Compose([ tf.Resize(512), # rescale tf.RandomResizedCrop(256), tf.ToTensor(), # convert to [0, 1] range ]) slow_dataset = MyDataset('../data/wikiart', transform=transform, img_limit=1000) fast_dataset = CachedDataset('../data/wikiart', transform=transform, img_limit=1000, max_cache_size=1000) # run_performance_test(slow_dataset, batch_size=8, epoch_num=2) # 25s per epoch for 1k images run_performance_test(fast_dataset, batch_size=8, epoch_num=2) # 10s per epoch after loading the data
def train(opt): opt = dotDict(opt) if not os.path.exists(opt.checkpoints_dir): os.makedirs(opt.checkpoints_dir) if not os.path.exists(os.path.join(opt.out_dir, opt.run_name)): os.makedirs(os.path.join(opt.out_dir, opt.run_name)) if torch.cuda.is_available() and not opt.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) ###### Definition of variables ###### # Networks G0 = GeometrySynthesizer() G1 = Generator(opt.input_nc, opt.output_nc) G2 = Generator(opt.input_nc, opt.output_nc) D1 = Discriminator(opt.input_nc) D2 = Discriminator(opt.output_nc) if opt.cuda: G0.cuda() G1.cuda() G2.cuda() D1.cuda() D2.cuda() G1.apply(weights_init_normal) G2.apply(weights_init_normal) D2.apply(weights_init_normal) D1.apply(weights_init_normal) # Optimizers & LR schedulers optimizer_G0 = torch.optim.Adam(G0.parameters(), lr=opt.lr_GS, betas=(0.5, 0.999)) optimizer_G = torch.optim.Adam(itertools.chain(G1.parameters(), G2.parameters()), lr=opt.lr_AS, betas=(0.5, 0.999)) optimizer_D1 = torch.optim.Adam(D1.parameters(), lr=opt.lr_AS, betas=(0.5, 0.999)) optimizer_D2 = torch.optim.Adam(D2.parameters(), lr=opt.lr_AS, betas=(0.5, 0.999)) if opt.G0_checkpoint is not None: G0 = load_G0_ckp(opt.G0_checkpoint, G0) if opt.AS_checkpoint is not None: _, G1, D1, G2, D2, optimizer_G, optimizer_D1, optimizer_D2 = load_AS_ckp( opt.AS_checkpoint, G1, D1, G2, D2, optimizer_G, optimizer_D1, optimizer_D2) if opt.resume_checkpoint is not None: opt.epoch, G0, G1, D1, G2, D2, optimizer_G0, optimizer_G, optimizer_D1, optimizer_D2 = load_ckp( opt.resume_checkpoint, G0, G1, D1, G2, D2, optimizer_G0, optimizer_G, optimizer_D1, optimizer_D2) lr_scheduler_G0 = torch.optim.lr_scheduler.LambdaLR( optimizer_G0, lr_lambda=LambdaLR(opt.n_epochs, opt.epoch, opt.decay_epoch).step) lr_scheduler_G = torch.optim.lr_scheduler.LambdaLR( optimizer_G, lr_lambda=LambdaLR(opt.n_epochs, opt.epoch, opt.decay_epoch).step) lr_scheduler_D1 = torch.optim.lr_scheduler.LambdaLR( optimizer_D1, lr_lambda=LambdaLR(opt.n_epochs, opt.epoch, opt.decay_epoch).step) lr_scheduler_D2 = torch.optim.lr_scheduler.LambdaLR( optimizer_D2, lr_lambda=LambdaLR(opt.n_epochs, opt.epoch, opt.decay_epoch).step) # Inputs & targets memory allocation Tensor = torch.cuda.FloatTensor if opt.cuda else torch.Tensor background_t = Tensor(opt.batchSize, opt.input_nc, opt.size, opt.size) foregound_t = Tensor(opt.batchSize, opt.input_nc, opt.size, opt.size) real_t = Tensor(opt.batchSize, opt.output_nc, opt.size, opt.size) target_real = Variable(Tensor(opt.batchSize).fill_(1.0), requires_grad=False) target_fake = Variable(Tensor(opt.batchSize).fill_(0.0), requires_grad=False) composed_buffer = ReplayBuffer() fake_real_buffer = ReplayBuffer() fake_composed_buffer = ReplayBuffer() # Dataset loader transforms_dataset = [ transforms.Resize(int(opt.size * 1.12), Image.BICUBIC), transforms.RandomCrop(opt.size), transforms.ToTensor(), # transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5)) ] transforms_masks = [transforms.ToTensor()] text = TextUtils(opt.root, transforms_=transforms_masks) dataset = MyDataset(opt.root, transforms_=transforms_dataset) print("No. of Examples = ", len(dataset)) dataloader = DataLoader(dataset, batch_size=opt.batchSize, shuffle=True, num_workers=opt.n_cpu) # Loss plot logger = Logger(opt.n_epochs, len(dataloader), os.path.join(opt.out_dir, opt.run_name), opt.epoch + 1) ################################### ###### Training ###### for epoch in range(opt.epoch, opt.n_epochs): for i, batch in enumerate(dataloader): # Set model input background = Variable(background_t.copy_(batch['X']), requires_grad=True) # foreground = Variable(foregound_t.copy_(batch['Y']), requires_grad=True) real = Variable(real_t.copy_(batch['Z']), requires_grad=True) foreground = Variable(foregound_t.copy_( text.get_text_masks(opt.batchSize)), requires_grad=True) ###### Geometric Synthesizer ###### composed_GS = G0( background, foreground) # concatenate background and foreground object ## optimize G0 loss optimizer_G0.zero_grad() loss_G0 = criterion_discriminator(D2(composed_GS), target_fake) loss_G0.backward() optimizer_G0.step() ###### Appearance Synthesizer ###### composed = composed_buffer.push_and_pop(composed_GS) ###### Generators G1 and G2 ###### optimizer_G.zero_grad() ## Identity loss # G1(X) should equal X if X = real same_real = G1(real) loss_identity_1 = criterion_identity(real, same_real) * 5.0 # G2(X) should equal X if X = composed same_composed = G2(composed) loss_identity_2 = criterion_identity(composed, same_composed) * 5.0 loss_identity = loss_identity_1 + loss_identity_2 ## GAN loss fake_real = G1(composed) loss_G1 = criterion_generator(D1(fake_real), target_real) fake_composed = G2(real) loss_G2 = criterion_generator(D2(fake_composed), target_real) loss_GAN = loss_G1 + loss_G2 ## Cycle loss recovered_real = G1(fake_composed) loss_cycle_real = criterion_cycle(recovered_real, real) * 10.0 recovered_composed = G2(fake_real) loss_cycle_composed = criterion_cycle(recovered_composed, composed) * 10.0 loss_cycle = loss_cycle_composed + loss_cycle_real # Total loss loss_G = loss_identity + loss_GAN + loss_cycle loss_G.backward() optimizer_G.step() ##################################### ###### Discriminator D1 ###### # real loss loss_D1_real = criterion_discriminator(D1(real), target_real) # fake loss new_fake_real = fake_real_buffer.push_and_pop(fake_real) loss_D1_fake = criterion_discriminator(D1(new_fake_real.detach()), target_fake) loss_D1 = (loss_D1_real + loss_D1_fake) * 0.5 loss_D1.backward() optimizer_D1.step() ###### Discriminator D2 ###### # real loss new_composed = composed_buffer.push_and_pop(composed) loss_D2_real = criterion_discriminator(D2(new_composed.detach()), target_real) # fake loss new_fake_composed = fake_composed_buffer.push_and_pop( fake_composed) loss_D2_fake = criterion_discriminator( D2(new_fake_composed.detach()), target_fake) loss_D2 = (loss_D2_real + loss_D2_fake) * 0.5 loss_D2.backward() optimizer_D2.step() ##################################### # Progress report (http://localhost:8097) losses = { 'loss_G0': loss_G0, 'loss_G': loss_G, 'loss_D1': loss_D1, 'loss_D2': loss_D2 } images = { 'background': background, 'foreground': foreground, 'real': real, 'composed_GS': composed_GS, 'composed': composed, 'synthesized': fake_real, 'adapted_real': fake_composed } logger.log(losses, images) # Update learning rates lr_scheduler_G.step() lr_scheduler_D1.step() lr_scheduler_D2.step() # Save models checkpoints checkpoint = { 'epoch': epoch + 1, 'state_dict': { "G0": G0.state_dict(), "G1": G1.state_dict(), "D1": D1.state_dict(), "G2": G2.state_dict(), "D2": D2.state_dict() }, 'optimizer': { "G0": optimizer_G0.state_dict(), "G": optimizer_G.state_dict(), "D1": optimizer_D1.state_dict(), "D2": optimizer_D2.state_dict() } } save_ckp(checkpoint, os.path.join(opt.checkpoints_dir, opt.run_name + '.pth'))
self.optimizer.step() if verbose: print('epoch', epoch, cum_loss) @torch.no_grad() def generate(self, mean, var): return self.ae.decode(mean, var) @torch.no_grad() def latent_representation(self, x): return self.ae.encode(x) if __name__ == '__main__': train_x, train_y, test_x, test_y = get_mnist() train_x /= 255 test_x /= 255 if torch.cuda.is_available(): dev = torch.device('cuda') else: dev = torch.device('cpu') dataset = MyDataset(train_x) m = VAE(28 * 28, device=dev) m.fit(dataset, epochs=5, verbose=True) mean = torch.randn(1, m.latent_dim) var = 10 * torch.rand(1, m.latent_dim) im = m.generate(mean, var) plt.imshow(im.detach().numpy().reshape(28, 28))
def main_worker(gpu, ngpus_per_node, args): args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: # args.rank = int(os.environ["RANK"]) args.rank = 1 if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # Training dataset train_dataset = [] if (args.dataset == 'VOC'): train_dataset = VOCDetection(root=args.dataset_root, transform=transforms.Compose([ Normalizer(), Augmenter(), Resizer() ])) valid_dataset = VOCDetection(root=args.dataset_root, image_sets=[('2007', 'test')], transform=transforms.Compose( [Normalizer(), Resizer()])) args.num_class = train_dataset.num_classes() elif (args.dataset == 'COCO'): train_dataset = CocoDataset(root_dir=args.dataset_root, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) valid_dataset = CocoDataset(root_dir=args.dataset_root, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) args.num_class = train_dataset.num_classes() elif (args.dataset == 'MyDataset'): train_dataset = MyDataset(root_dir=args.dataset_root, set_name='train', mode='train', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) valid_dataset = MyDataset(root_dir=args.dataset_root, set_name='valid', mode='train', transform=transforms.Compose( [Normalizer(), Resizer()])) args.num_class = train_dataset.num_classes() train_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.workers, shuffle=True, collate_fn=collater, pin_memory=True) valid_loader = DataLoader(valid_dataset, batch_size=1, num_workers=args.workers, shuffle=False, collate_fn=collater, pin_memory=True) checkpoint = [] if (args.resume is not None): if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(args.resume, map_location=loc) params = checkpoint['parser'] args.num_class = params.num_class args.network = params.network # args.start_epoch = checkpoint['epoch'] + 1 args.start_epoch = 0 del params model = EfficientDet(num_classes=args.num_class, network=args.network, W_bifpn=EFFICIENTDET[args.network]['W_bifpn'], D_bifpn=EFFICIENTDET[args.network]['D_bifpn'], D_class=EFFICIENTDET[args.network]['D_class']) if (args.resume is not None): # model.load_state_dict(checkpoint['state_dict']) pretrained_dict = checkpoint['state_dict'] model_dict = model.state_dict() # remove the keys corresponing to the linear layer in the pretrained_dict pretrained_dict.pop(bbox_head.retina_cls.weight) pretrained_dict.pop(bbox_head.retina_cls.bias) # now update the model dict with pretrained dict model_dict.update(pretrained_dict) del checkpoint if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int( (args.workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu], find_unused_parameters=True) print('Run with DistributedDataParallel with divice_ids....') else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) print('Run with DistributedDataParallel without device_ids....') elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: model = model.cuda() print('Run with DataParallel ....') model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) , optimizer, scheduler optimizer = optim.AdamW(model.parameters(), lr=args.lr) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) cudnn.benchmark = True for epoch in range(args.start_epoch, args.num_epoch): train(train_loader, model, scheduler, optimizer, epoch, args) if (epoch + 1) % 5 == 0: test(valid_dataset, model, epoch, args) state = { 'epoch': epoch, 'parser': args, 'state_dict': get_state_dict(model) } torch.save( state, os.path.join(args.save_folder, args.dataset, args.network, "checkpoint_{}.pth".format(epoch)))
default=32, help='Batch size training') parser.add_argument('--saved_model_path', type=str, default='./dumps/', help='saved model path') parser.add_argument('--logs_path', type=str, default='./logs', help='logs dir save score') opt = parser.parse_args() nll_loss = nn.NLLLoss() if __name__ == '__main__': train_dataset = MyDataset(opt.train_text_path, opt.train_label_path, opt.length) test_dataset = TestDataset(opt.valid_text_path, opt.valid_label_path, opt.length, word2id=train_dataset.word2id, id2word=train_dataset.id2word) if opt.model == 'GRU': model = GRUModel( vocab_size=train_dataset.vocab_size, embedding_size=opt.embedding_size, output_size=opt.output_dim, hidden_dim=opt.hidden_dim, n_layers=opt.n_layer, ) elif opt.model == 'BiLSTM':
def train(opt): opt = dotDict(opt) if not os.path.exists(opt.checkpoints_dir): os.makedirs(opt.checkpoints_dir) if not os.path.exists(os.path.join(opt.out_dir, opt.run_name)): os.makedirs(os.path.join(opt.out_dir, opt.run_name)) if torch.cuda.is_available() and not opt.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) ###### Definition of variables ###### # Networks G0 = GeometrySynthesizer() D2 = Discriminator(opt.output_nc) if opt.cuda: G0.cuda() D2.cuda() D2.apply(weights_init_normal) # Optimizers & LR schedulers optimizer_G0 = torch.optim.Adam(G0.parameters(), lr=opt.lr, betas=(0.5, 0.999)) optimizer_D2 = torch.optim.Adam(D2.parameters(), lr=opt.lr, betas=(0.5, 0.999)) if opt.resume_checkpoint is not None: opt.epoch, G0, D2, optimizer_G0, optimizer_D2 = load_GS_ckp( opt.resume_checkpoint, G0, D2, optimizer_G0, optimizer_D2) lr_scheduler_G0 = torch.optim.lr_scheduler.LambdaLR( optimizer_G0, lr_lambda=LambdaLR(opt.n_epochs, opt.epoch, opt.decay_epoch).step) lr_scheduler_D2 = torch.optim.lr_scheduler.LambdaLR( optimizer_D2, lr_lambda=LambdaLR(opt.n_epochs, opt.epoch, opt.decay_epoch).step) # Inputs & targets memory allocation Tensor = torch.cuda.FloatTensor if opt.cuda else torch.Tensor background_t = Tensor(opt.batchSize, opt.input_nc, opt.size, opt.size) foregound_t = Tensor(opt.batchSize, opt.input_nc, opt.size, opt.size) real_t = Tensor(opt.batchSize, opt.output_nc, opt.size, opt.size) target_real = Variable(Tensor(opt.batchSize).fill_(1.0), requires_grad=False) target_fake = Variable(Tensor(opt.batchSize).fill_(0.0), requires_grad=False) composed_buffer = ReplayBuffer() # Dataset loader transforms_dataset = [ transforms.Resize(int(opt.size * 1.12), Image.BICUBIC), transforms.RandomCrop(opt.size), transforms.ToTensor(), # transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5)) ] transforms_masks = [transforms.ToTensor()] text = TextUtils(opt.root, transforms_=transforms_masks) dataset = MyDataset(opt.root, transforms_=transforms_dataset) print("No. of Examples = ", len(dataset)) dataloader = DataLoader(dataset, batch_size=opt.batchSize, shuffle=True, num_workers=opt.n_cpu) # Loss plot logger = Logger(opt.n_epochs, len(dataloader), os.path.join(opt.out_dir, opt.run_name), opt.epoch + 1) ################################### ###### Training ###### for epoch in range(opt.epoch, opt.n_epochs): for i, batch in enumerate(dataloader): # Set model input background = Variable(background_t.copy_(batch['X']), requires_grad=True) # foreground = Variable(foregound_t.copy_(batch['Y']), requires_grad=True) real = Variable(real_t.copy_(batch['Z']), requires_grad=True) foreground = Variable(foregound_t.copy_( text.get_text_masks(opt.batchSize)), requires_grad=True) ###### Geometric Synthesizer ###### composed = G0( background, foreground) # concatenate background and foreground object ## optimize G0 loss optimizer_G0.zero_grad() loss_G0 = criterion_discriminator(D2(composed), target_real) loss_G0.backward() optimizer_G0.step() ## optimize D2 Geometry loss optimizer_D2.zero_grad() # real loss loss_D2_real = criterion_discriminator(D2(real), target_real) # composed loss new_composed = composed_buffer.push_and_pop(composed) loss_D2_composed = criterion_discriminator(D2(new_composed), target_fake) loss_D2 = (loss_D2_real + loss_D2_composed) * 0.5 if i % 5 == 0: loss_D2.backward() optimizer_D2.step() # Progress report (http://localhost:8097) losses = {'loss_G0': loss_G0, 'loss_D2': loss_D2} images = { 'background': background, 'foreground': foreground, 'real': real, 'composed': composed } logger.log(losses, images) # Update learning rates lr_scheduler_G0.step() lr_scheduler_D2.step() # Save models checkpoints checkpoint = { 'epoch': epoch + 1, 'state_dict': { "G0": G0.state_dict(), "D2": D2.state_dict() }, 'optimizer': { "G0": optimizer_G0.state_dict(), "D2": optimizer_D2.state_dict() } } save_ckp(checkpoint, os.path.join(opt.checkpoints_dir, opt.run_name + '.pth'))
def train_cnn_pytorch(): image_dim = 32 hidden_dim = 200 output_dim = 10 kernel_dim = 3 kernel_num = 64 batch_size = 8 lr = 0.01 dp_rate = 0.3 epochs = 1000 best_result = [0, 0] no_update = 0 # os.environ["CUDA_VISIBLE_DEVICES"] = 0 print("Start training") model = CNN(batch_size=batch_size, input_dim=image_dim, hidden_dim=hidden_dim, output_dim=output_dim, kernel_num=kernel_num, kernel_dim=kernel_dim, dp_rate=dp_rate) if torch.cuda.is_available(): model.cuda() optimizer = optim.SGD(model.parameters(), lr=lr) for epoch in range(epochs): train_data = MyDataset("digits/trainingDigits") train_loader = data.DataLoader(train_data, batch_size=batch_size, num_workers=0, shuffle=True) model.train() start = time.time() print(f"Epoch {epoch} start ") avg_loss = 0 count = 0 for step, input_data in enumerate(train_loader): x = torch.clone(input_data[0]).float() target = torch.clone(input_data[1]).long() if torch.cuda.is_available(): x = x.cuda() target = target.cuda() prediction = model(x) loss = F.cross_entropy(prediction, target.argmax(dim=1)) avg_loss += loss.item() count += 1 optimizer.zero_grad() loss.backward() optimizer.step() avg_loss /= len(train_data) end = time.time() print( f"Epoch {epoch} done, Train average loss: {avg_loss}, costing time: {end - start}" ) if epoch % 50 == 0: accuracy, wrong_numbers = evaluate_cnn_pytorch(model, batch_size) if accuracy > best_result[0]: best_result[0] = accuracy best_result[1] = wrong_numbers no_update = 0 else: no_update += 1 if no_update >= 5: print("Best Accuracy on test data: " + str(best_result[0]) + "%") print(f"Best wrong_numbers: {best_result[1]}") exit() print("Best Accuracy on test data: " + str(best_result[0]) + "%") print(f"Best wrong_numbers: {best_result[1]}")
def eval(valid_path): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") FloatTensor = torch.cuda.FloatTensor if torch.cuda.is_available( ) else torch.FloatTensor class_names = load_classes('D:/datasets/voc-custom/classes.names') model = YoloNet().to(device) model.load_state_dict(torch.load('yolov3_ckpt_10.pth')) model.eval() dataset = MyDataset(valid_path) dataloader = torch.utils.data.DataLoader(dataset, batch_size=8, shuffle=False, num_workers=1, collate_fn=dataset.collate_fn) labels = [] total_metrics = [] for batch_i, (imgs, targets) in enumerate(dataloader): labels += targets[:, 1].tolist() targets[:, 2:] = xywh2xyxy(targets[:, 2:]) * 416 imgs = Variable(imgs.type(FloatTensor), requires_grad=False) with torch.no_grad(): outputs, _ = model(imgs) outputs = nms(outputs) # 遍历每张图 batch_metrics = [] for img_i in range(len(outputs)): if outputs[img_i] is None: continue output = outputs[img_i] pred_boxes = output[:, :4] pred_scores = output[:, 4] pred_labels = output[:, -1] true_positives = np.zeros(pred_boxes.shape[0]) target = targets[targets[:, 0] == img_i][:, 1:] target_labels = target[:, 0] if len(target) else [] if len(target): detected_boxes = [] # 预测正确的标签框索引 target_boxes = target[:, 1:] # 遍历每个预测框 for pred_i, (pred_box, pred_label) in enumerate( zip(pred_boxes, pred_labels)): if len(detected_boxes) == len(target): break if pred_label not in target_labels: continue iou, box_index = cal_iou(pred_box.unsqueeze(0), target_boxes).max(0) if iou >= 0.5 and box_index not in detected_boxes: true_positives[pred_i] = 1 detected_boxes += [box_index] batch_metrics.append([true_positives, pred_scores, pred_labels]) total_metrics += batch_metrics true_positives, pred_scores, pred_labels = [ np.concatenate(x, 0) for x in list(zip(*sample_metrics)) ] precision, recall, AP, f1, ap_class = ap_per_class( true_positives, pred_scores, pred_labels, labels) print("Average Precisions:") for i, c in enumerate(ap_class): print(f"+ Class '{c}' ({class_names[c]}) - AP: {AP[i]}") print(f"mAP: {AP.mean()}")