def __init__(self, name: str, kind, mode, batch_size, epoch, lr): self.max_correct = 0 self.epoch = epoch self.kind = kind self.path = get_save_path(kind, name) self.device = torch.device( 'cuda:0' if torch.cuda.is_available() else 'cpu') # 得到神经网络、数据集 if name.startswith('VGG'): self.loader, self.dataset = get_data_loader( kind, mode, batch_size, 'VGG') self.model = MyVGG(name, len(self.dataset.name2label.keys()), lr) elif name == 'BnOpt': self.loader, self.dataset = get_data_loader( kind, mode, batch_size, 'BnOpt') self.model = MyBnOpt(len(self.dataset.name2label.keys()), lr) else: raise RuntimeError('None of model') if torch.cuda.device_count() > 1: print(f'We use {torch.cuda.device_count()} GPUs') self.model.model = nn.DataParallel(self.model.model) self.model.model.to(self.device) # 加载参数 if os.path.exists(self.path): checkpoint = torch.load(self.path) self.model.model.load_state_dict(checkpoint['net']) self.model.optimizer.load_state_dict(checkpoint['optimizer']) self.max_correct = checkpoint['max_correct']
def main(): config = get_config(CONFIG_FILENAME) real_data_loader = get_data_loader(config, target='real') fake_data_loader = get_data_loader(config, target='fake') metrics: List[str] = config['metrics']['metric_type']
def main(): opt, logger, stats, vis = utils.build(is_train=True, tb_dir='tb_train') np.save(os.path.join(opt.ckpt_path, 'opt.npy'), opt) data_loader = get_data_loader(opt) logger.print('Loading data from {}'.format(opt.dset_path)) print('####### Data loaded #########') # Validation val_opt = copy.deepcopy(opt) val_opt.is_train = False val_opt.data_limit = 20 val_loader = get_data_loader(val_opt) model = HeatModel(opt) for epoch in range(opt.start_epoch, opt.n_epochs): model.setup(is_train=True) for step, data in enumerate(data_loader): bc, final, x = data['bc'], data['final'], data['x'] f = None if 'f' not in data else data['f'] x = utils.initialize(x, bc, opt.initialization) loss_dict = model.train(x, final, bc, f) if (step + 1) % opt.log_every == 0: print('Epoch {}, step {}'.format(epoch, step)) vis.add_scalar(loss_dict, epoch * len(data_loader) + step) logger.print( ['[Summary] Epoch {}/{}:'.format(epoch, opt.n_epochs - 1)]) # Evaluate if opt.evaluate_every > 0 and (epoch + 1) % opt.evaluate_every == 0: model.setup(is_train=False) # Find eigenvalues if opt.iterator != 'cg' and opt.iterator != 'unet': w, _ = utils.calculate_eigenvalues(model, image_size=15) w = sorted(np.abs(w)) eigenvalues = {'first': w[-2], 'second': w[-3], 'third': w[-4]} vis.add_scalar({'eigenvalues': eigenvalues}, epoch) logger.print('Eigenvalues: {:.2f}, {:.3f}, {:.3f}, {:.3f}'\ .format(w[-1], w[-2], w[-3], w[-4])) # Evaluate entire val set results, images = evaluate(opt, model, val_loader, logger) vis.add_image({'errors': images['error_curves'][0]}, epoch + 1) vis.add_scalar( { 'steps': { 'Jacobi': results['Jacobi'], 'model': results['model'] }, 'ratio': results['ratio'] }, epoch + 1) if (epoch + 1) % opt.save_every == 0 or epoch == opt.n_epochs - 1: model.save(opt.ckpt_path, epoch + 1)
def main(config, dataset_root, resume): set_print_precision() seed_random() config = get_config(config) train_loader = get_data_loader(config, dataset_root, is_train=True) test_loader = get_data_loader(config, dataset_root, is_train=False) # model model = StarGAN(config, train_loader, test_loader) if not resume: model.train_starGAN(init_epoch=0) else: model.resume_train() # resume train after the last saved epoch model
def main(): opt, logger, vis = cfg.build(is_train=False) dloader = data.get_data_loader(opt) print('Val dataset: {}'.format(len(dloader.dataset))) model = DiveModel(opt) model.setup_training() model.initialize_weights() for epoch in opt.which_epochs: # Load checkpoint if epoch == -1: # Find the latest checkpoint checkpoints = glob.glob(os.path.join(opt.ckpt_path, 'net*.pth')) assert len(checkpoints) > 0 epochs = [ int(filename.split('_')[-1].split('.')[0]) for filename in checkpoints ] epoch = max(epochs) logger.print('Loading checkpoints from {}, epoch {}'.format( opt.ckpt_path, epoch)) model.load(opt.ckpt_path, epoch) results = evaluate(opt, dloader, model) for metric in results: logger.print('{}: {}'.format(metric, results[metric]))
def __init__(self,args,crition: nn.CrossEntropyLoss): self.args = args self.model_name = args.net self.config = self._parse_args(args.config) net_module = importlib.import_module(f"net.{self.model_name}") self.model_class = getattr(net_module, self.model_name) self.model:torch.nn.Module = self.model_class(*self._parse_model_args()) self.numclass = self.config['numclass'] self.save_path = self.config['save_path'] self.batch_size = self.config['batch_size'] self.crition = crition self.metricer = Metrics(self.numclass) self.test_dataloader = get_data_loader( self.config['test_data_path'], self.config['test_annot_path'], self.numclass, img_size=self.config['ori_size'], batch_size=8, name=self.config['dataset_name'], mode='test', return_name=True, ) self.model.load_state_dict(torch.load(self.save_path, map_location=torch.device("cuda:0")),strict=False) if torch.cuda.is_available(): self.model = self.model.cuda()
def main(): opt, logger, stats, vis = utils.build(is_train=False, tb_dir='tb_val') # Load model opt model_opt = np.load(os.path.join(opt.ckpt_path, 'opt.npy')).item() model_opt.is_train = False # Change geometry to the testing one model_opt.geometry = opt.geometry model = HeatModel(model_opt) logger.print('Loading data from {}'.format(opt.dset_path)) # For convenience opt.initialization = model_opt.initialization opt.iterator = model_opt.iterator data_loader = data.get_data_loader(opt) print('####### Data Loaded ########') for epoch in opt.which_epochs: if epoch < 0: # Pick last epoch checkpoints = glob.glob(os.path.join(opt.ckpt_path, 'net_*.pth')) assert len(checkpoints) > 0 epochs = [int(path[:-4].split('_')[-1]) for path in checkpoints] epoch = sorted(epochs)[-1] model.load(opt.ckpt_path, epoch) logger.print('Checkpoint loaded from {}, epoch {}'.format( opt.ckpt_path, epoch)) test(opt, model, data_loader, logger, vis)
def inference(): config_name = 'config.json' config, _, _, _ = load_config(config_name) if torch.cuda.is_available(): device = 'cuda' net = LDOPC(config['use_bn']).cuda() else: device = 'cpu' net = LDOPC(config['use_bn']).cpu() net.load_state_dict( torch.load(get_model_name(config['name']), map_location=device)) net.set_decode(True) loader, _ = get_data_loader(batch_size=1, use_npy=config['use_npy'], frame_range=config['frame_range']) net.eval() image_id = 25 threshold = config['cls_threshold'] with torch.no_grad(): pc_feature, label_map = loader.dataset[image_id] pc_feature = pc_feature.to(device) label_map = label_map.to(device) label_map_unnorm, label_list = loader.dataset.get_label(image_id) t_start = time.time() pred = net(pc_feature.unsqueeze(0)).squeeze_(0) print("Forward pass time", time.time() - t_start) cls_pred = pred[..., 0] activation = cls_pred > threshold num_boxes = int(activation.sum()) if num_boxes == 0: print("No bounding box found") return corners = torch.zeros((num_boxes, 8)) for i in range(1, 9): corners[:, i - 1] = torch.masked_select(pred[..., i], activation) corners = corners.view(-1, 4, 2).numpy() scores = (torch.masked_select(pred[..., 0], activation)).cpu().numpy() t_start = time.time() selected_ids = non_max_suppression(corners, scores, config['nms_iou_threshold']) corners = corners[selected_ids] scores = scores[selected_ids] print("Non max suppression time:", time.time() - t_start) pc_feature = pc_feature.cpu().numpy() plot_bev(pc_feature, label_list, window_name='GT') plot_bev(pc_feature, corners, window_name='Prediction') plot_label_map(cls_pred.cpu().numpy()) cv2.waitKey(0)
def train(): mkdir(args.save_dir) device = torch.device(args.cuda_id if torch.cuda.is_available() else 'cpu') train_loader, ds = get_data_loader(args.dataset, args.batch_size, args.n_steps) model = FCSwitchedVAE(args.y_ce_beta, args.y_hsic_beta, args.y_mmd_beta, args.z_hsic_beta, args.z_kl_beta, args.z2_kl_beta_max, args.z2_kl_stop_step, args.channels, args.n_branches, args.n_switches, args.n_dims_sm, args.fc_operator_type, args.fc_switch_type, args.n_latent_z2).to(device) optimizer = optim.Adam(model.parameters(), lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2)) ckpt_paths = [] model.train() for i, batch in enumerate(train_loader): step = i + 1 _, inputs = batch x = inputs.to(device).float() recon_x, z2, z2_mean, z2_logvar, ys_logits, ys_logits2, ys_idx, ys, zs_mean, zs_logvar, zs = \ model(x, backward_on_y=args.backward_on_y) loss, recon_loss, z2_kl_loss, z_hsic_loss, z_kl_loss, y_ce_loss, y_hsic_loss, y_mmd_loss = \ model.loss(x, recon_x, z2_mean, z2_logvar, ys_logits, ys_logits2, ys, zs_mean, zs_logvar, zs, step) optimizer.zero_grad() loss.backward() optimizer.step() if step % args.print_freq == 0: print( '[Step %d] loss: %.4f, recon_loss: %.4f, z2_kl_loss: %.4f, z_hsic_loss: %.4f, z_kl_loss: %.4f, ' 'y_ce_loss: %.4f, y_hsic_loss: %.4f, y_mmd_loss: %.4f' % (step, loss, recon_loss, z2_kl_loss, z_hsic_loss, z_kl_loss, y_ce_loss, y_hsic_loss, y_mmd_loss)) if step % args.save_freq == 0: path = os.path.join(args.save_dir, 'step-%d.ckpt' % step) torch.save( { 'step': step, 'loss': loss, 'recon_loss': recon_loss, 'z2_kl_loss': z2_kl_loss, 'z_hsic_loss': z_hsic_loss, 'z_kl_loss': z_kl_loss, 'y_ce_loss': y_ce_loss, 'y_hsic_loss': y_hsic_loss, 'y_mmd_loss': y_mmd_loss, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, path) ckpt_paths.append(path) return ckpt_paths
def run(beta=4, seed=1234): save_dir = os.path.join(SAVE_DIR, DATASET_NAME) if os.path.exists(save_dir): shutil.rmtree(save_dir) os.makedirs(save_dir) torch.manual_seed(seed) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') train_loader, ds = get_data_loader(DATASET_NAME, BATCH_SIZE, N_STEPS) model = BetaVAE(beta, IMG_CHANNELS, N_LATENTS).to(device) optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, betas=(ADAM_BETA1, ADAM_BETA2)) ckpt_paths = train(train_loader, model, optimizer, device, save_dir) visual_dir = os.path.join(OUTPUT_DIR, DATASET_NAME, 'visual') if os.path.exists(visual_dir): shutil.rmtree(visual_dir) os.makedirs(visual_dir) for path in ckpt_paths: eval_loader, _ = get_data_loader(DATASET_NAME, 1, 100) eval_visual(eval_loader, model, device, path, visual_dir)
def eval(self, cfg, net): if self.valid_loader is None: self.valid_loader = get_data_loader(cfg) net.eval() correct = 0 with torch.no_grad(): for data, target in self.valid_loader: data = data.to(self.device) target = target.to(self.device) output = net(data) pred = output.argmax(dim=1, keepdim=True) correct += pred.eq(target.view_as(pred)).sum().item() logging.info('\nTest set:Accuracy: {}/{} ({:.02f}%)\n'.format( correct, len(self.data_set.test_loader.dataset), 100. * correct / len(self.data_set.test_loader.dataset)))
def train(opt): train_loader = get_data_loader(opt) train_writer = tensorboardX.SummaryWriter(os.path.join(opt.output_path, opt.name)) model = DASGIL(opt).cuda() total_steps = 0 for epoch in range(opt.niter_epoch + 1): for i, data in enumerate(train_loader): total_steps += opt.batch_size print('total_steps:', total_steps, ' epoch_iter:', epoch) model.set_input(data) model.optimize_params() if (total_steps) % opt.print_iter == 0: print('Dis loss:',model.loss_f_D, 'Total gen loss:', model.total_loss, 'Gen loss:', model.loss_f_G) if (epoch + 1) % opt.log_epoch_freq == 0: write_loss_image(epoch, model, train_writer) if (epoch + 1) % opt.save_epoch_freq == 0: print('saving the model at the end of epoch %d, iters %d' % (epoch + 1, total_steps)) model.save(epoch + 1)
def run_eval_visual(beta=10, seed=1234): save_dir = os.path.join(SAVE_DIR, DATASET_NAME) torch.manual_seed(seed) device = torch.device('cuda:3' if torch.cuda.is_available() else 'cpu') model = BetaVAE(beta, IMG_CHANNELS, N_LATENTS).to(device) visual_dir = os.path.join(OUTPUT_DIR, DATASET_NAME, 'visual') if os.path.exists(visual_dir): shutil.rmtree(visual_dir) os.makedirs(visual_dir) for path in os.listdir(save_dir): path = os.path.join(save_dir, path) if path[-5:] == '.ckpt': eval_loader, _ = get_data_loader(DATASET_NAME, 1, 100) eval_visual(eval_loader, model, device, path, visual_dir)
def train(self, cfg): device = torch.device("cuda" if cfg.use_cuda else "cpu") net = Net().to(device) train_loader = get_data_loader(cfg, phase="train") loss_func = CELoss() optimizer = optim.SGD(params=net.parameters(), lr=cfg.TRAIN.lr, momentum=cfg.TRAIN.momentum) for epoch in range(1, cfg.TRAIN.epochs + 1): # set training model net.train() for batch_idx, (data, target) in enumerate(train_loader): # load batch data and set the corresponding device mode data = data.to(device) target = target.to(device) # zero the gradient optimizer.zero_grad() # infer the data output = net(data) # loss calculation loss = loss_func(output, target) # gradient calculation loss.backward() # back propagation optimizer.step() if batch_idx % cfg.TRAIN.log_interval == 0: logging.info( 'Train Epoch: {} [{}/{}]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader), loss.item())) model_file = os.path.join(cfg.checkpoint, cfg.model_name) net.save_model(model_file)
lr=1e-5, eps=1e-8, weight_decay=5e-4, momentum=0.9) scheduler = optim.lr_scheduler.StepLR(optimizer, 5, 0.1) num_epochs = 20 image_transform = transforms.Compose([ transforms.Resize((155, 220)), ImageOps.invert, transforms.ToTensor(), # TODO: add normalize ]) trainloader = get_data_loader(is_train=True, batch_size=args.batch_size, image_transform=image_transform, dataset=args.dataset) testloader = get_data_loader(is_train=False, batch_size=args.batch_size, image_transform=image_transform, dataset=args.dataset) os.makedirs('checkpoints', exist_ok=True) model.train() print(model) for epoch in range(num_epochs): print('Epoch {}/{}'.format(epoch, num_epochs)) print('Training', '-' * 20) train(model, optimizer, criterion, trainloader) print('Evaluating', '-' * 20) loss, acc = eval(model, criterion, testloader)
def run(args): # set up args if args.cuda and torch.cuda.is_available(): device = torch.device('cuda') args.cuda = True else: device = torch.device('cpu') args.cuda = False if args.train_mode == 'thermo' or args.train_mode == 'thermo_wake': partition = util.get_partition(args.num_partitions, args.partition_type, args.log_beta_min, device) util.print_with_time('device = {}'.format(device)) util.print_with_time(str(args)) # save args save_dir = util.get_save_dir() args_path = util.get_args_path(save_dir) util.save_object(args, args_path) # data binarized_mnist_train, binarized_mnist_valid, binarized_mnist_test = \ data.load_binarized_mnist(where=args.where) data_loader = data.get_data_loader(binarized_mnist_train, args.batch_size, device) valid_data_loader = data.get_data_loader(binarized_mnist_valid, args.valid_batch_size, device) test_data_loader = data.get_data_loader(binarized_mnist_test, args.test_batch_size, device) train_obs_mean = torch.tensor(np.mean(binarized_mnist_train, axis=0), device=device, dtype=torch.float) # init models util.set_seed(args.seed) generative_model, inference_network = util.init_models( train_obs_mean, args.architecture, device) # optim optim_kwargs = {'lr': args.learning_rate} # train if args.train_mode == 'ws': train_callback = train.TrainWakeSleepCallback( save_dir, args.num_particles * args.batch_size, test_data_loader, args.eval_num_particles, args.logging_interval, args.checkpoint_interval, args.eval_interval) train.train_wake_sleep(generative_model, inference_network, data_loader, args.num_iterations, args.num_particles, optim_kwargs, train_callback) elif args.train_mode == 'ww': train_callback = train.TrainWakeWakeCallback( save_dir, args.num_particles, test_data_loader, args.eval_num_particles, args.logging_interval, args.checkpoint_interval, args.eval_interval) train.train_wake_wake(generative_model, inference_network, data_loader, args.num_iterations, args.num_particles, optim_kwargs, train_callback) elif args.train_mode == 'reinforce' or args.train_mode == 'vimco': train_callback = train.TrainIwaeCallback( save_dir, args.num_particles, args.train_mode, test_data_loader, args.eval_num_particles, args.logging_interval, args.checkpoint_interval, args.eval_interval) train.train_iwae(args.train_mode, generative_model, inference_network, data_loader, args.num_iterations, args.num_particles, optim_kwargs, train_callback) elif args.train_mode == 'thermo': train_callback = train.TrainThermoCallback( save_dir, args.num_particles, partition, test_data_loader, args.eval_num_particles, args.logging_interval, args.checkpoint_interval, args.eval_interval) train.train_thermo(generative_model, inference_network, data_loader, args.num_iterations, args.num_particles, partition, optim_kwargs, train_callback) elif args.train_mode == 'thermo_wake': train_callback = train.TrainThermoWakeCallback( save_dir, args.num_particles, test_data_loader, args.eval_num_particles, args.logging_interval, args.checkpoint_interval, args.eval_interval) train.train_thermo_wake(generative_model, inference_network, data_loader, args.num_iterations, args.num_particles, partition, optim_kwargs, train_callback) # eval validation train_callback.valid_log_p, train_callback.valid_kl = train.eval_gen_inf( generative_model, inference_network, valid_data_loader, args.eval_num_particles) # save models and stats util.save_checkpoint(save_dir, iteration=None, generative_model=generative_model, inference_network=inference_network) stats_path = util.get_stats_path(save_dir) util.save_object(train_callback, stats_path)
format(input_word_ids.shape, input_mask.shape, labels.shape)) # Batch encode input validation data validation_input = data.encode(validation_df, tokenizer, max_len=args.max_sequence_length) validation_word_ids = validation_input['input_word_ids'] validation_mask = validation_input['input_mask'] validation_labels = validation_input['labels'] print( "Validation input shape: input_word_ids=>{}, input_mask=>{}, labels=>{}" .format(validation_word_ids.shape, validation_mask.shape, validation_labels.shape)) # Load the input data train_dataloader, validation_dataloader = data.get_data_loader( train_input, validation_input, args.batch_size) # Build the model by passing in the input params model_class = XLMRobertaForSequenceClassification model = Classifier(model_class, args.model_name, num_labels=args.num_labels, output_attentions=False, output_hidden_states=False) # Send the model to the device model.to(device) # Train the model train_losses, train_accuracies, validation_losses, validation_accuracies = train_fn( model, train_dataloader,
[int(os.path.splitext(os.path.basename(x))[0]) for x in filelist]) max_epoch = np.max(epochs) resume_file = os.path.join(params.checkpoint_dir, '{:d}.tar'.format(max_epoch)) return resume_file if __name__ == '__main__': np.random.seed(10) params = parse_args() with open(params.traincfg, 'r') as f: train_data_params = yaml.load(f) with open(params.valcfg, 'r') as f: val_data_params = yaml.load(f) train_loader = data.get_data_loader(train_data_params) val_loader = data.get_data_loader(val_data_params) model = get_model(params.model, params.num_classes) model = model.cuda() model = torch.nn.DataParallel(model) loss_fn = losses.GenericLoss(params.aux_loss_type, params.aux_loss_wt, params.num_classes) if not os.path.isdir(params.checkpoint_dir): os.makedirs(params.checkpoint_dir) start_epoch = params.start_epoch stop_epoch = params.stop_epoch if params.allow_resume: resume_file = get_resume_file(params.resume_file) if resume_file is not None:
def main(): opt, logger, vis = cfg.build(is_train=True, tb_dir='train_log') # Training Set train_loader = get_data_loader(opt) print('Train dataset: {}'.format(len(train_loader.dataset))) # Validation set val_opt = copy.deepcopy(opt) val_opt.is_train = False val_loader = get_data_loader(val_opt) print('Val dataset: {}'.format(len(val_loader.dataset))) # Initialize model model = DiveModel(opt) model.setup_training() model.initialize_weights() # Load checkpoints if opt.load_ckpt_epoch != 0: opt.load_ckpt_dir = opt.ckpt_name ckpt_dir = os.path.join(opt.ckpt_dir, opt.dset_name, opt.load_ckpt_dir) assert os.path.exists(ckpt_dir) logger.print('Loading checkpoint from {}'.format(ckpt_dir)) model.load(ckpt_dir, opt.load_ckpt_epoch) opt.start_epoch = opt.load_ckpt_epoch opt.n_epochs = max(opt.n_epochs, opt.n_iters // len(train_loader)) logger.print('Total epochs: {}'.format(opt.n_epochs)) for epoch in range(opt.start_epoch, opt.n_epochs): model.setup(is_train=True) print('Train epoch', epoch) hp_dict = model.update_hyperparameters(epoch, opt.n_epochs) vis.add_scalar(hp_dict, epoch) for step, data in enumerate(train_loader): input, output, _, _ = data _, loss_dict = model.train(*data[:2]) if step % opt.log_every == 0: # Write to tensorboard vis.add_scalar(loss_dict, epoch * len(train_loader) + step) # Visualization model.test(input, output) vis.add_images(model.get_visuals(), epoch * len(train_loader) + step, prefix='train') # Random sample test data input, output, _, _ = val_loader.dataset[np.random.randint( len(val_loader.dataset))] input = input.unsqueeze(0) output = output.unsqueeze(0) model.test(input, output) vis.add_images(model.get_visuals(), epoch * len(train_loader) + step, prefix='test') logger.print('Epoch {}/{}:'.format(epoch, opt.n_epochs - 1)) # Evaluate on val set if opt.evaluate_every > 0 and (epoch) % opt.evaluate_every == 0 and \ opt.n_frames_output > 0: results = evaluate(val_opt, val_loader, model) vis.add_scalar(results, epoch) file = open(os.path.join(opt.ckpt_path, str(epoch)), "w+") for metric in results.keys(): logger.print('{}: {}'.format(metric, results[metric])) file.write('{}\t{}\n'.format(metric, results[metric])) file.close() # Save model checkpoints if (epoch + 1 ) % opt.save_every == 0 and epoch > 0 or epoch == opt.n_epochs - 1: model.save(opt.ckpt_path, epoch + 1)
description="Train classification models on ImageNet", formatter_class=argparse.ArgumentDefaultsHelpFormatter) models.add_model_args(parser) fit.add_fit_args(parser) data.add_data_args(parser) dali.add_dali_args(parser) data.add_data_aug_args(parser) return parser.parse_args() def setup_logging(args): head = '{asctime}:{levelname}: {message}' logging.basicConfig(level=logging.DEBUG, format=head, style='{', handlers=[ logging.StreamHandler(sys.stderr), logging.FileHandler(args.log) ]) logging.info('Start with arguments {}'.format(args)) if __name__ == '__main__': args = parse_args() setup_logging(args) model = models.get_model(**vars(args)) data_loader = data.get_data_loader(args) fit.fit(args, model, data_loader)
import data # network models from networks import AudioDiscriminator, AudioGenerator # optimizers from optimizers import linear_adam_optimizer # training steps from train import train_discriminator, train_generator sample_size = 160000 # length of audio samples used to train # load the vctk utterance data vctk = data.vctk_data(max_len=sample_size) data_loader = data.get_data_loader(vctk, 100) # get the number of batches from the data loader num_batches = len(data_loader) # create the discriminator model + optimizer discriminator = AudioDiscriminator(sample_size) d_optimizer = linear_adam_optimizer(discriminator.parameters(), 0.0002) # create the generator model + optimizer generator = AudioGenerator(sample_size) g_optimizer = linear_adam_optimizer(generator.parameters(), 0.0002) # convert to gpu if available if torch.cuda.is_available(): generator.cuda()
def parse_args(): parser = argparse.ArgumentParser(description='Save features') parser.add_argument('--cfg', required=True, help='yaml file containing config for data') parser.add_argument('--outfile', required=True, help='save file') parser.add_argument('--modelfile', required=True, help='model file') parser.add_argument('--model', type=str, default='ResNet10', help='model') parser.add_argument('--num_classes', type=int,default=1000) return parser.parse_args() if __name__ == '__main__': params = parse_args() with open(params.cfg,'r') as f: data_params = yaml.load(f) data_loader = data.get_data_loader(data_params) model = get_model(params.model, params.num_classes) model = model.cuda() model = torch.nn.DataParallel(model) from torch.utils.serialization import load_lua #tmp = load_lua('/home/bharathh/local/cachedir/from_lua.t7') tmp = torch.load(params.modelfile) if ('module.classifier.bias' not in model.state_dict().keys()) and ('module.classifier.bias' in tmp['state'].keys()): tmp['state'].pop('module.classifier.bias') model.load_state_dict(tmp['state']) model.eval() dirname = os.path.dirname(params.outfile) if not os.path.isdir(dirname): os.makedirs(dirname)
def train(self, dataset, num_workers, epochs, batch_sizes, fade_in_percentage, logger, output, num_samples=36, start_depth=0, feedback_factor=100, checkpoint_factor=1): """ Utility method for training the GAN. Note that you don't have to necessarily use this you can use the optimize_generator and optimize_discriminator for your own training routine. :param dataset: object of the dataset used for training. Note that this is not the data loader (we create data loader in this method since the batch_sizes for resolutions can be different) :param num_workers: number of workers for reading the data. def=3 :param epochs: list of number of epochs to train the network for every resolution :param batch_sizes: list of batch_sizes for every resolution :param fade_in_percentage: list of percentages of epochs per resolution used for fading in the new layer not used for first resolution, but dummy value still needed. :param logger: :param output: Output dir for samples,models,and log. :param num_samples: number of samples generated in sample_sheet. def=36 :param start_depth: start training from this depth. def=0 :param feedback_factor: number of logs per epoch. def=100 :param checkpoint_factor: :return: None (Writes multiple files to disk) """ assert self.depth <= len(epochs), "epochs not compatible with depth" assert self.depth <= len(batch_sizes), "batch_sizes not compatible with depth" assert self.depth <= len(fade_in_percentage), "fade_in_percentage not compatible with depth" # turn the generator and discriminator into train mode self.gen.train() self.dis.train() if self.use_ema: self.gen_shadow.train() # create a global time counter global_time = time.time() # create fixed_input for debugging fixed_input = torch.randn(num_samples, self.latent_size).to(self.device) # config depend on structure logger.info("Starting the training process ... \n") if self.structure == 'fixed': start_depth = self.depth - 1 step = 1 # counter for number of iterations for current_depth in range(start_depth, self.depth): current_res = np.power(2, current_depth + 2) logger.info("Currently working on depth: %d", current_depth + 1) logger.info("Current resolution: %d x %d" % (current_res, current_res)) ticker = 1 # Choose training parameters and configure training ops. # TODO data = get_data_loader(dataset, batch_sizes[current_depth], num_workers) for epoch in range(1, epochs[current_depth] + 1): start = timeit.default_timer() # record time at the start of epoch logger.info("Epoch: [%d]" % epoch) # total_batches = len(iter(data)) total_batches = len(data) fade_point = int((fade_in_percentage[current_depth] / 100) * epochs[current_depth] * total_batches) for (i, batch) in enumerate(data, 1): # calculate the alpha for fading in the layers alpha = ticker / fade_point if ticker <= fade_point else 1 # extract current batch of data for training images = batch.to(self.device) gan_input = torch.randn(images.shape[0], self.latent_size).to(self.device) # optimize the discriminator: dis_loss = self.optimize_discriminator(gan_input, images, current_depth, alpha) # optimize the generator: gen_loss = self.optimize_generator(gan_input, images, current_depth, alpha) # provide a loss feedback if i % int(total_batches / feedback_factor + 1) == 0 or i == 1: elapsed = time.time() - global_time elapsed = str(datetime.timedelta(seconds=elapsed)).split('.')[0] self.writer.add_scalar("D_loss", dis_loss, step) self.writer.add_scalar("G_loss", gen_loss, step) logger.info( "Elapsed: [%s] Step: %d Batch: %d D_Loss: %f G_Loss: %f" % (elapsed, step, i, dis_loss, gen_loss)) # create a grid of samples and save it os.makedirs(os.path.join(output, 'samples'), exist_ok=True) gen_img_file = os.path.join(output, 'samples', "gen_" + str(current_depth) + "_" + str(epoch) + "_" + str(i) + ".png") with torch.no_grad(): self.create_grid( samples=self.gen(fixed_input, current_depth, alpha).detach() if not self.use_ema else self.gen_shadow(fixed_input, current_depth, alpha).detach(), scale_factor=int( np.power(2, self.depth - current_depth - 1)) if self.structure == 'linear' else 1, img_file=gen_img_file, writer=self.writer, step=step ) # increment the alpha ticker and the step ticker += 1 step += 1 elapsed = timeit.default_timer() - start elapsed = str(datetime.timedelta(seconds=elapsed)).split('.')[0] logger.info("Time taken for epoch: %s\n" % elapsed) if epoch % checkpoint_factor == 0 or epoch == 1 or epoch == epochs[current_depth]: save_dir = os.path.join(output, 'models') os.makedirs(save_dir, exist_ok=True) gen_save_file = os.path.join(save_dir, "GAN_GEN_" + str(current_depth) + "_" + str(epoch) + ".pth") dis_save_file = os.path.join(save_dir, "GAN_DIS_" + str(current_depth) + "_" + str(epoch) + ".pth") gen_optim_save_file = os.path.join( save_dir, "GAN_GEN_OPTIM_" + str(current_depth) + "_" + str(epoch) + ".pth") dis_optim_save_file = os.path.join( save_dir, "GAN_DIS_OPTIM_" + str(current_depth) + "_" + str(epoch) + ".pth") torch.save(self.gen.state_dict(), gen_save_file) logger.info("Saving the model to: %s\n" % gen_save_file) torch.save(self.dis.state_dict(), dis_save_file) torch.save(self.gen_optim.state_dict(), gen_optim_save_file) torch.save(self.dis_optim.state_dict(), dis_optim_save_file) # also save the shadow generator if use_ema is True if self.use_ema: gen_shadow_save_file = os.path.join( save_dir, "GAN_GEN_SHADOW_" + str(current_depth) + "_" + str(epoch) + ".pth") torch.save(self.gen_shadow.state_dict(), gen_shadow_save_file) logger.info("Saving the model to: %s\n" % gen_shadow_save_file) logger.info('Training completed.\n')
criterion_pixelwise.to(device) if args.epoch != 0: # Load pretrained models generator.load_state_dict(torch.load("saved_models/%s/generator_%d.pth" % (args.dataset_name, args.epoch))) discriminator.load_state_dict(torch.load("saved_models/%s/discriminator_%d.pth" % (args.dataset_name, args.epoch))) else: # Initialize weights generator.apply(weights_init_normal) discriminator.apply(weights_init_normal) # Optimizers optimizer_G = torch.optim.Adam(generator.parameters(), lr=args.lr, betas=(args.b1, args.b2)) optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=args.lr, betas=(args.b1, args.b2)) dataloader, val_dataloader = get_data_loader(args.batch_size, args.target_hour) def sample_images(batches_done): """Saves a generated sample from the validation set""" imgs = next(iter(val_dataloader)) real_A = imgs[0].to(device) real_B = imgs[1].to(device) fake_B = generator(real_A) img_dir = "sample_images/%s/%s.png" % (args.dataset_name, batches_done) set_fig_settings((FIG_REG_WIDTH * 2, FIG_REG_WIDTH * 1.25)) fig = plt.figure() for i in range(2): plt.subplot(1, 2, i + 1) plt.title('Real') if i == 0 else plt.title('Fake')
parser = ArgumentParser() parser.add_argument('-bs', '--batch_size', type=int, default=1, help="batch size of the data") parser.add_argument('-e', '--epochs', type=int, default=100, help='epoch of the train') parser.add_argument('-lr', '--learning_rate', type=float, default=1e-3, help='learning rate') args = parser.parse_args() batch_size = args.batch_size learning_rate = args.learning_rate max_epochs = args.epochs use_cuda = torch.cuda.is_available() config_name = 'config.json' config, _, _, _ = load_config(config_name) train_data_loader, val_data_loader = get_data_loader(batch_size=batch_size, use_npy=config['use_npy'], frame_range=config['frame_range']) criterion = CustomLoss(device=device, num_classes=1) optimizer = Adam(net.parameters()) def train(epoch): net.train() total_loss = 0. for batch_idx, (pc_feature, label_map) in enumerate(train_data_loader): N = pc_feature.size(0) pc_feature = pc_feature.to(device) label_map = label_map.to(device)
import copy import numpy as np import os import data import models import utils import sys from test import evaluate opt, logger, vis = utils.build(is_train=True, tb_dir='tb_train') train_loader = data.get_data_loader(opt) # Validation set val_opt = copy.deepcopy(opt) val_opt.is_train = False val_opt.num_objects = [1] # Only matters for MNIST val_loader = data.get_data_loader(val_opt) print('Val dataset: {}'.format(len(val_loader.dataset))) model = models.get_model(opt) save_every = 5 if opt.load_ckpt_dir != '': ckpt_dir = os.path.join(opt.ckpt_dir, opt.dset_name, opt.load_ckpt_dir) assert os.path.exists(ckpt_dir) logger.print('Loading checkpoint from {}'.format(ckpt_dir)) model.load(ckpt_dir, opt.load_ckpt_epoch) opt.n_epochs = max(opt.n_epochs, opt.n_iters // len(train_loader)) logger.print('Total epochs: {}'.format(opt.n_epochs))
def train(config): train_dataloader = get_data_loader(config.train_data_path, config) val_dataloader = get_data_loader(config.val_data_path, config) encoder, decoder = get_model(config) encoder_optimizer = optim.Adam(encoder.parameters(), lr=config.learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=config.learning_rate) dt = datetime.datetime.utcnow() writer = create_summary_writer(train_dataloader, log_dir=f"logs/{dt}") weight = torch.ones(config.vocab_size) weight[PAD_IDX] = 0 criterion = maybe_cuda(nn.NLLLoss(weight), cuda=config.cuda) use_teacher_forcing = np.random.uniform( 0, 1) < config.use_teacher_forcing_perc def train_step(engine, batch): encoder.train() decoder.train() encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() (x, x_len), (y, y_len) = batch full_decoder_output, attn_ls = run_model_on_batch( encoder=encoder, decoder=decoder, config=config, use_teacher_forcing=use_teacher_forcing, batch_data=batch, ) y_var = maybe_cuda(Variable(y, requires_grad=False), cuda=config.cuda) batch_loss = criterion( full_decoder_output.view(-1, full_decoder_output.size()[2]), y_var.view(-1)) batch_loss.backward() encoder_optimizer.step() decoder_optimizer.step() return batch_loss def validate_step(engine, batch): encoder.eval() decoder.eval() (x, x_len), (y, y_len) = batch full_decoder_output, attn_ls = run_model_on_batch( encoder=encoder, decoder=decoder, config=config, use_teacher_forcing=False, batch_data=batch, ) y_var = maybe_cuda(Variable(y, requires_grad=False), cuda=config.cuda) batch_loss = criterion( full_decoder_output.view(-1, full_decoder_output.size()[2]), y_var.view(-1)) return batch_loss def inspect_step(engine, batch): (x, x_len), (y, y_len) = batch num = 0 # We only inspect one element (default: first) of the batch single_elem_batch_data = ( (x[num:num + 1], x_len[num:num + 1]), (y[num:num + 1], y_len[num:num + 1]), ) encoder.eval() decoder.eval() full_decoder_output, attn_ls = run_model_on_batch( encoder=encoder, decoder=decoder, config=config, use_teacher_forcing=False, batch_data=single_elem_batch_data, ) topv, topi = full_decoder_output.data.topk(1) pred_y = topi.squeeze(2) mapper = lambda x: val_dataloader.dataset.input_sequence.batch_tensor_to_string( x) print("Input string:\n {}\n".format(mapper(x)[0])) print("Expected output:\n {}\n".format(mapper(y)[0])) print("Predicted output:\n {}\n".format(mapper(pred_y)[0])) return trainer = Engine(train_step) train_len = len(train_dataloader) evaluator = Engine(validate_step) inspect = Engine(inspect_step) @trainer.on(Events.ITERATION_COMPLETED) def log_training_loss(engine): print( json.dumps({ "metric": "train_loss", "value": float(engine.state.output), "step": engine.state.epoch, })) writer.add_scalar("training/loss", engine.state.output, engine.state.iteration) @trainer.on(Events.EPOCH_COMPLETED) def log_validation_results(engine): evaluator.run(val_dataloader) print( json.dumps({ "metric": "val_loss", "value": float(evaluator.state.output), "step": engine.state.epoch, })) writer.add_scalar("validation/loss", evaluator.state.output, engine.state.iteration) @trainer.on(Events.EPOCH_COMPLETED) def log_inspect_results(engine): inspect.run(val_dataloader) # early stopping def score_function(engine): val_loss = engine.state.output return -val_loss # stop if 10 epochs are worse than before handler = EarlyStopping(patience=20, score_function=score_function, trainer=trainer) evaluator.add_event_handler(Events.COMPLETED, handler) # save model if better than last one checkpoint_handler = ModelCheckpoint( "models", "torch_add", score_function=score_function, n_saved=1, require_empty=False, ) trainer.add_event_handler( Events.EPOCH_COMPLETED, checkpoint_handler, to_save={ "encoder": encoder, "decoder": decoder }, ) # timer timer = Timer(average=True) timer.attach( trainer, start=Events.EPOCH_STARTED, resume=Events.ITERATION_STARTED, pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED, ) @trainer.on(Events.EPOCH_COMPLETED) def print_times(engine): print( f"Epoch {engine.state.epoch} done. Time per batch: {timer.value():.3f}[s]" ) timer.reset() trainer.run(train_dataloader, max_epochs=config.epochs) writer.close()
def infer_runner(img_set_folder, model_file, samples_limit=None, tta=False, batch_size=64, write=True): set_type = img_set_folder.split("/")[-1] model_type = model_type_from_model_file(model_file) image_dataset, dataloader = get_data_loader(img_set_folder, model_type, set_type, batch_size=batch_size, tta=tta, use_test_transforms=True) class_names = image_dataset.classes print("Is CUDA available?: {}".format(torch.cuda.is_available())) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = get_model(model_type, len(class_names), model_file=model_file) model = model.to(device) #model.thresholds = np.load("thresholds.npy") image_ids, labels, preds, confidences, global_scores, per_class_scores = \ infer(model, dataloader, device, samples_limit=samples_limit, threshold=model.thresholds) # Uncomment for calculate the thresholds for a particular model if True and set_type in ['validation', 'train']: print("Calculating thresholds on the fly.") model.thresholds = calculate_optimal_thresholds_one_by_one( labels, confidences, slices=250, old_thresholds=model.thresholds) vec_preds = np.array( confidences ) > model.thresholds # updating prediction with new thresholds. preds = vector_to_index_list(vec_preds) global_scores = f1_score(*reduce_stats(*multilabel_stats( np.array(labels), np.array(confidences), model.thresholds))) if write: np.save("thresholds", model.thresholds) np.save(model_file + ".thresholds", model.thresholds) #if set_type in ['train', 'validation']: # print("Global results for {}. F1: {:.3}, precision: {:.3}, recall: {:.3}".format(set_type, *global_scores)) # np.savetxt("{}_per_class_scores.csv".format(set_type), # np.array([image_dataset.class_frequency()] + list(per_class_scores)).T, # header="original_frequency, f1, precision, recall", delimiter=",") if write and \ ((samples_limit is None and set_type in ['validation', 'test']) or (samples_limit > 25000 and set_type == 'train')): # Saving results just for full sets inference # They can be used for ensembling base_path = os.path.dirname(model_file) results_file = os.path.join( base_path, "inference_{}_{}.th".format(set_type, "tta" if tta else "no_tta")) torch.save( { "image_ids": image_ids, "thresholds": model.thresholds, "labels": labels, "confidences": confidences, "f1": global_scores[0] }, results_file) performance_file = os.path.join( base_path, "performance_{}_{}.txt".format(set_type, "tta" if tta else "no_tta")) with open(performance_file, "w") as f: f.write("{:.4}\n".format(global_scores[0])) if write and set_type == 'test': save_kaggle_submision("kaggle_submision.csv", image_ids, preds, image_dataset.classes)
print("epoch,lr and loss are", i + 1, current_lr, loss) # Evaluate our model and add visualizations on tensorboard if i % cfg['val_every'] == 0: # Run the network on the test set, and get the loss and accuracy on the test set testloss, acc = run(net, i, test_loader, optimizer, criterion, scheduler, train=False) print("Epoch: %d, Test Accuracy:%2f , test loss: %f" % (i + 1, acc * 100.0, testloss)) if __name__ == '__main__': # TODO: Create a network object net = Network() #normal # net= Network('xavier') # net = Network('zero')#zero init #net = Network('one') # TODO: Create a tensorboard object for logging # TODO: Create train data loader train_loader = get_data_loader('train') # TODO: Create test data loader test_loader = get_data_loader('test') # Run the training! train(net, train_loader, test_loader)
parser.add_argument('--cfg', required=True, help='yaml file containing config for data') parser.add_argument('--outfile', required=True, help='save file') parser.add_argument('--modelfile', required=True, help='model file') parser.add_argument('--model', type=str, default='ResNet10', help='model') parser.add_argument('--num_classes', type=int, default=1000) return parser.parse_args() if __name__ == '__main__': params = parse_args() with open(params.cfg, 'r') as f: data_params = yaml.load(f) data_loader = data.get_data_loader(data_params) model = get_model(params.model, params.num_classes) model = model.cuda() model = torch.nn.DataParallel(model) # from torch.utils.serialization import load_lua #tmp = load_lua('/home/bharathh/local/cachedir/from_lua.t7') tmp = torch.load(params.modelfile) if ('module.classifier.bias' not in model.state_dict().keys()) and ('module.classifier.bias' in tmp['state'].keys()): tmp['state'].pop('module.classifier.bias') model.load_state_dict(tmp['state']) model.eval() dirname = os.path.dirname(params.outfile)
x = Variable(x.cuda()) scores = one_shot_model(x) x=(np.argmax(scores.data)==y[0]).data.numpy() total = total + x acc=total/len(val_features) print('\n---> mean accuracy : {:.2f}%'.format(acc*100)) if __name__ == '__main__': with open(cfg,'r') as f: data_params = yaml.load(f) data_loader = data.get_data_loader(data_params) with open(val_cfg,'r') as f: val_params = yaml.load(f) val_loader = data.get_data_loader(val_params) model = get_model(model, num_classes) model = model.cuda() model = torch.nn.DataParallel(model) tmp = torch.load(modelfile) if ('module.classifier.bias' not in model.state_dict().keys()) and ('module.classifier.bias' in tmp['state'].keys()): tmp['state'].pop('module.classifier.bias') # loading pretrained imagenet model pretrained_dict=tmp['state']