def test_CRUD_dataset(capsys): datasets.create_dataset( service_account_json, api_key, project_id, cloud_region, dataset_id) datasets.get_dataset( service_account_json, api_key, project_id, cloud_region, dataset_id) datasets.list_datasets( service_account_json, api_key, project_id, cloud_region) # Test and also clean up datasets.delete_dataset( service_account_json, api_key, project_id, cloud_region, dataset_id) out, _ = capsys.readouterr() # Check that create/get/list/delete worked assert 'Created dataset' in out assert 'Time zone' in out assert 'Dataset' in out assert 'Deleted dataset' in out
def main(argv=None): # pylint: disable=unused-argument assert args.detect or args.segment, "Either detect or segment should be True" if args.trunk == 'resnet50': net = ResNet depth = 50 if args.trunk == 'vgg16': net = VGG depth = 16 net = net(config=net_config, depth=depth, training=True, weight_decay=args.weight_decay) if args.dataset == 'voc07': dataset = get_dataset('voc07_trainval') if args.dataset == 'voc12-trainval': dataset = get_dataset('voc12-train-segmentation', 'voc12-val') if args.dataset == 'voc12-train': dataset = get_dataset('voc12-train-segmentation') if args.dataset == 'voc12-val': dataset = get_dataset('voc12-val-segmentation') if args.dataset == 'voc07+12': dataset = get_dataset('voc07_trainval', 'voc12_train', 'voc12_val') if args.dataset == 'voc07+12-segfull': dataset = get_dataset('voc07-trainval-segmentation', 'voc12-train-segmentation', 'voc12-val') if args.dataset == 'voc07+12-segmentation': dataset = get_dataset('voc07-trainval-segmentation', 'voc12-train-segmentation') if args.dataset == 'coco': # support by default for coco trainval35k split dataset = get_dataset('coco-train2014-*', 'coco-valminusminival2014-*') if args.dataset == 'coco-seg': # support by default for coco trainval35k split dataset = get_dataset('coco-seg-train2014-*', 'coco-seg-valminusminival2014-*') train(dataset, net, net_config)
def __init__(self, dset_name, imsize, nc, data_root='./data', results_root='./results', noise_dim=100, dout_dim=1, batch_size=64, clip_disc=True, max_giters=50000, lr=1e-4, disc_size=64, batch_norm=True, disc_net='flexible-dcgan', gen_net='flexible-dcgan'): """Intializer for base GAN model. Arguments: dset_name {str} -- Name of the dataset. imsize {int} -- Size of the image. nc {int} -- Number of channels. Keyword Arguments: data_root {str} -- Directory where datasets are stored (default: {'./data'}). results_root {str} -- Directory where results will be saved (default: {'./results'}). noise_dim {int} -- Dimension of noise input to generator (default: {100}). dout_dim {int} -- Dimension of output from discriminator (default: {1}). batch_size {int} -- Batch size (default: {64}). clip_disc {bool} -- Whether to clip the parameters of discriminator in [-0.01, 0.01]. This should be True when gradient penalty is not used (default: {True}). max_giters {int} -- Maximum number of generator iterations (default: {50000}). lr {[type]} -- Learning rate (default: {1e-4}). disc_size {int} -- Number of filters in the first Conv layer of critic. (default: {64}) batch_norm {bool} -- Whether to use batch norm in discriminator. This should be False when gradient penalty is used (default: {True}). disc_net {str} -- Discriminator network type. (default: {'flexible-dcgan'}) gen_net {str} -- Generator network type. (default: {'flexible-dcgan'}) """ self.imsize = imsize self.nc = nc self.noise_dim = noise_dim self.dout_dim = dout_dim self.disc_size = disc_size self.batch_norm = batch_norm self.disc_net = disc_net self.gen_net = gen_net self._build_model() self.g_optim = torch.optim.RMSprop(self.generator.parameters(), lr=lr) self.d_optim = torch.optim.RMSprop(self.discriminator.parameters(), lr=lr) self.giters = 1 self.diters = 5 self.max_giters = max_giters self.data_root = data_root suffix = self.__class__.__name__.lower() suffix += '_' + str(self.disc_size) if self.disc_size != 64 else '' self.results_root = os.path.join(results_root, dset_name, suffix) self.clip_disc = clip_disc self.model_save_interval = 1000 self.fixed_im_interval = 100 self.fixed_noise = torch.cuda.FloatTensor(batch_size, self.noise_dim, 1, 1).normal_(0, 1) self.noise_tensor = torch.cuda.FloatTensor(batch_size, self.noise_dim, 1, 1) train_dataset = get_dataset(dset_name, data_root=self.data_root, imsize=self.imsize) self.train_dataloader = tdata.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, drop_last=True) self.real_data = self.get_real_batch()
cv2.imwrite(os.path.join(dirname, 'virtualization', 'image.jpg'), img) cv2.imwrite(os.path.join(dirname, 'virtualization', 'bbox.jpg'), img1) cv2.imwrite(os.path.join(dirname, 'virtualization', 'brec.jpg'), img2) cv2.imwrite(os.path.join(dirname, 'virtualization', 'post_process.jpg'), img3) cv2.imwrite(os.path.join(dirname, 'virtualization', 'label_mask.jpg'), label_mask) cv2.imwrite(os.path.join(dirname, 'virtualization', 'pred_mask.jpg'), pred_mask) cv2.imwrite(os.path.join(dirname, 'virtualization', 'result.jpg'), img4) cv2.imwrite(os.path.join(dirname, 'virtualization', 'heat.jpg'), img5) for i, chip in enumerate(chip_list): cv2.imwrite(os.path.join(dirname, 'virtualization', 'chip_%d.jpg' % i), chip) plt.show() cv2.waitKey(0) if __name__ == '__main__': args = parse_args() dataset = get_dataset(args.dataset) dest_datadir = dataset.region_voc_dir image_dir = dest_datadir + '/JPEGImages' segmentation_dir = dest_datadir + '/SegmentationClass' list_folder = dest_datadir + '/ImageSets' pred_mask_dir = '../pytorch-deeplab-xception/run/mask-%s-val' % args.dataset.lower() val_list = dataset.get_imglist('val') for img_path in val_list[15:]: print(img_path) _vis(img_path, dataset)
] def logger(info): fold, epoch = info['fold'] + 1, info['epoch'] val_loss, test_acc = info['val_loss'], info['test_acc'] print('{:02d}/{:03d}: Val Loss: {:.4f}, Test Accuracy: {:.3f}'.format( fold, epoch, val_loss, test_acc)) results = [] for dataset_name, Net in product(datasets, nets): best_result = (float('inf'), 0, 0) # (loss, acc, std) print('-----\n{} - {}'.format(dataset_name, Net.__name__)) for num_layers, hidden in product(layers, hiddens): dataset = get_dataset(dataset_name, sparse=Net != DiffPool) model = Net(dataset, num_layers, hidden) loss, acc, std = cross_validation_with_val_set( dataset, model, folds=10, epochs=args.epochs, batch_size=args.batch_size, lr=args.lr, lr_decay_factor=args.lr_decay_factor, lr_decay_step_size=args.lr_decay_step_size, weight_decay=0, logger=None, ) if loss < best_result[0]: best_result = (loss, acc, std)
def train(self): source_dataset, source_test_dataset = get_dataset(self.args, self.config.source) source_loader = DataLoader(source_dataset, batch_size=self.config.training.batch_size, shuffle=True, num_workers=self.config.source.data.num_workers, drop_last=True) source_batches = iter(source_loader) target_dataset, target_test_dataset = get_dataset(self.args, self.config.target) target_loader = DataLoader(target_dataset, batch_size=self.config.training.batch_size, shuffle=True, num_workers=self.config.target.data.num_workers, drop_last=True) target_batches = iter(target_loader) cpat = get_compatibility(self.config) cpat_opt = get_optimizer(self.config, cpat.parameters()) if(self.args.resume_training): states = torch.load(os.path.join(self.args.log_path, 'checkpoint.pt')) cpat.load_state_dict(states[0]) cpat_opt.load_state_dict(states[1]) logging.info(f"Resuming training after {states[2]} steps.") logging.info("Optimizing the compatibility function.") with tqdm(total=self.config.training.n_iters) as progress: for d_step in range(self.config.training.n_iters): try: (Xs, ys) = next(source_batches) (Xt, yt) = next(target_batches) except StopIteration: # Refresh after one epoch source_batches = iter(source_loader) target_batches = iter(target_loader) (Xs, ys) = next(source_batches) (Xt, yt) = next(target_batches) Xs = data_transform(self.config.source, Xs) Xs = Xs.to(self.config.device) Xt = data_transform(self.config.target, Xt) Xt = Xt.to(self.config.device) obj = cpat_opt.step(lambda: self._cpat_closure(Xs, Xt, cpat, cpat_opt)) avg_density = torch.mean(cpat.forward(Xs, Xt)) obj_val = round(obj.item(), 5) avg_density_val = round(avg_density.item(), 5) progress.update(1) progress.set_description_str(f"Average Density: {avg_density_val}") self.config.tb_logger.add_scalars('Optimization', { 'Objective': obj_val, 'Average Density': avg_density_val }, d_step) if(d_step % self.config.training.snapshot_freq == 0): states = [ cpat.state_dict(), cpat_opt.state_dict(), d_step ] torch.save(states, os.path.join(self.args.log_path, f'checkpoint_{d_step}.pth')) torch.save(states, os.path.join(self.args.log_path, f'checkpoint.pth'))
def main(args): train_set = get_dataset( args.dataset, args.data_dir, transform=get_aug(args.model, args.image_size, True), train=True, download=args.download # default is False ) if args.debug: args.batch_size = 2 args.num_epochs = 1 # train only one epoch args.num_workers = 0 train_set = torch.utils.data.Subset(train_set, range(0, args.batch_size)) # take only one batch train_loader = torch.utils.data.DataLoader( dataset=train_set, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, drop_last=True ) # define model model = get_model(args.model, args.backbone).to(args.device) model = torch.nn.DataParallel(model) if torch.cuda.device_count() > 1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) # define optimizer optimizer = get_optimizer( args.optimizer, model, lr=args.base_lr*args.batch_size/256, momentum=args.momentum, weight_decay=args.weight_decay) # TODO: linear lr warm up for byol simclr swav # args.warm_up_epochs # define lr scheduler lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, args.num_epochs, eta_min=0) loss_meter = AverageMeter(name='Loss') # Start training for epoch in tqdm(range(0, args.num_epochs), desc=f'Training'): loss_meter.reset() model.train() p_bar=tqdm(train_loader, desc=f'Epoch {epoch}/{args.num_epochs}') for idx, ((images1, images2), labels) in enumerate(p_bar): # breakpoint() model.zero_grad() loss = model.forward(images1.to(args.device), images2.to(args.device)) loss.backward() optimizer.step() loss_meter.update(loss.item()) p_bar.set_postfix({"loss":loss_meter.val, 'loss_avg':loss_meter.avg}) lr_scheduler.step() # Save checkpoint os.makedirs(args.output_dir, exist_ok=True) model_path = os.path.join(args.output_dir, f'{args.model}-{args.dataset}-epoch{epoch+1}.pth') torch.save({ 'epoch': epoch+1, 'state_dict':model.module.state_dict(), # 'optimizer':optimizer.state_dict(), # will double the checkpoint file size 'lr_scheduler':lr_scheduler.state_dict(), 'args':args, 'loss_meter':loss_meter }, model_path) print(f"Model saved to {model_path}")
def train_multi_task(param_file): with open('configs.json') as config_params: configs = json.load(config_params) with open(param_file) as json_params: params = json.load(json_params) exp_identifier = [] for (key, val) in params.items(): if 'tasks' in key: continue exp_identifier += ['{}={}'.format(key, val)] exp_identifier = '|'.join(exp_identifier) params['exp_id'] = exp_identifier writer = SummaryWriter(log_dir='runs/{}_{}'.format( params['exp_id'], datetime.datetime.now().strftime("%I:%M%p on %B %d, %Y"))) train_loader, train_dst, val_loader, val_dst = datasets.get_dataset( params, configs) loss_fn = losses.get_loss(params) metric = metrics.get_metrics(params) model = model_selector.get_model(params) model_params = [] for m in model: model_params += model[m].parameters() if 'RMSprop' in params['optimizer']: optimizer = torch.optim.RMSprop(model_params, lr=params['lr']) elif 'Adam' in params['optimizer']: optimizer = torch.optim.Adam(model_params, lr=params['lr']) elif 'SGD' in params['optimizer']: optimizer = torch.optim.SGD(model_params, lr=params['lr'], momentum=0.9) tasks = params['tasks'] all_tasks = configs[params['dataset']]['all_tasks'] print('Starting training with parameters \n \t{} \n'.format(str(params))) if 'mgda' in params['algorithm']: approximate_norm_solution = params['use_approximation'] if approximate_norm_solution: print('Using approximate min-norm solver') else: print('Using full solver') n_iter = 0 loss_init = {} for epoch in tqdm(range(NUM_EPOCHS)): start = timer() print('Epoch {} Started'.format(epoch)) if (epoch + 1) % 10 == 0: # Every 50 epoch, half the LR for param_group in optimizer.param_groups: param_group['lr'] *= 0.85 print('Half the learning rate{}'.format(n_iter)) for m in model: model[m].train() for batch in train_loader: n_iter += 1 # First member is always images images = batch[0] images = Variable(images.cuda()) labels = {} # Read all targets of all tasks for i, t in enumerate(all_tasks): if t not in tasks: continue labels[t] = batch[i + 1] labels[t] = Variable(labels[t].cuda()) # Scaling the loss functions based on the algorithm choice loss_data = {} grads = {} scale = {} mask = None masks = {} if 'mgda' in params['algorithm']: # Will use our MGDA_UB if approximate_norm_solution is True. Otherwise, will use MGDA if approximate_norm_solution: optimizer.zero_grad() # First compute representations (z) images_volatile = Variable(images.data, volatile=True) rep, mask = model['rep'](images_volatile, mask) # As an approximate solution we only need gradients for input if isinstance(rep, list): # This is a hack to handle psp-net rep = rep[0] rep_variable = [ Variable(rep.data.clone(), requires_grad=True) ] list_rep = True else: rep_variable = Variable(rep.data.clone(), requires_grad=True) list_rep = False # Compute gradients of each loss function wrt z for t in tasks: optimizer.zero_grad() out_t, masks[t] = model[t](rep_variable, None) loss = loss_fn[t](out_t, labels[t]) loss_data[t] = loss.data[0] loss.backward() grads[t] = [] if list_rep: grads[t].append( Variable(rep_variable[0].grad.data.clone(), requires_grad=False)) rep_variable[0].grad.data.zero_() else: grads[t].append( Variable(rep_variable.grad.data.clone(), requires_grad=False)) rep_variable.grad.data.zero_() else: # This is MGDA for t in tasks: # Comptue gradients of each loss function wrt parameters optimizer.zero_grad() rep, mask = model['rep'](images, mask) out_t, masks[t] = model[t](rep, None) loss = loss_fn[t](out_t, labels[t]) loss_data[t] = loss.data[0] loss.backward() grads[t] = [] for param in model['rep'].parameters(): if param.grad is not None: grads[t].append( Variable(param.grad.data.clone(), requires_grad=False)) # Normalize all gradients, this is optional and not included in the paper. See the notebook for details gn = gradient_normalizers(grads, loss_data, params['normalization_type']) for t in tasks: for gr_i in range(len(grads[t])): grads[t][gr_i] = grads[t][gr_i] / gn[t] # Frank-Wolfe iteration to compute scales. sol, min_norm = MinNormSolver.find_min_norm_element( [grads[t] for t in tasks]) for i, t in enumerate(tasks): scale[t] = float(sol[i]) else: for t in tasks: masks[t] = None scale[t] = float(params['scales'][t]) # Scaled back-propagation optimizer.zero_grad() rep, _ = model['rep'](images, mask) for i, t in enumerate(tasks): out_t, _ = model[t](rep, masks[t]) loss_t = loss_fn[t](out_t, labels[t]) loss_data[t] = loss_t.data[0] if i > 0: loss = loss + scale[t] * loss_t else: loss = scale[t] * loss_t loss.backward() optimizer.step() writer.add_scalar('training_loss', loss.data[0], n_iter) for t in tasks: writer.add_scalar('training_loss_{}'.format(t), loss_data[t], n_iter) for m in model: model[m].eval() tot_loss = {} tot_loss['all'] = 0.0 met = {} for t in tasks: tot_loss[t] = 0.0 met[t] = 0.0 num_val_batches = 0 for batch_val in val_loader: val_images = Variable(batch_val[0].cuda(), volatile=True) labels_val = {} for i, t in enumerate(all_tasks): if t not in tasks: continue labels_val[t] = batch_val[i + 1] labels_val[t] = Variable(labels_val[t].cuda(), volatile=True) val_rep, _ = model['rep'](val_images, None) for t in tasks: out_t_val, _ = model[t](val_rep, None) loss_t = loss_fn[t](out_t_val, labels_val[t]) tot_loss['all'] += loss_t.data[0] tot_loss[t] += loss_t.data[0] metric[t].update(out_t_val, labels_val[t]) num_val_batches += 1 for t in tasks: writer.add_scalar('validation_loss_{}'.format(t), tot_loss[t] / num_val_batches, n_iter) metric_results = metric[t].get_result() for metric_key in metric_results: writer.add_scalar('metric_{}_{}'.format(metric_key, t), metric_results[metric_key], n_iter) metric[t].reset() writer.add_scalar('validation_loss', tot_loss['all'] / len(val_dst), n_iter) if epoch % 3 == 0: # Save after every 3 epoch state = { 'epoch': epoch + 1, 'model_rep': model['rep'].state_dict(), 'optimizer_state': optimizer.state_dict() } for t in tasks: key_name = 'model_{}'.format(t) state[key_name] = model[t].state_dict() torch.save( state, "saved_models/{}_{}_model.pkl".format(params['exp_id'], epoch + 1)) end = timer() print('Epoch ended in {}s'.format(end - start))
from train import ( get_trainer, loop, ) from datasets import ( get_dataset, get_gabe_planktons, ) from pylearn2.models import mlp warnings.filterwarnings("ignore") if __name__ == '__main__': train, valid, test = get_dataset() trainer = get_trainer(train, valid, test) in_space = Conv2DSpace( shape=[IMG_SIZE, IMG_SIZE], num_channels=1, # axes=['c', 0, 1, 'b'] ) net = mlp.MLP( layers=[conv0, conv1, conv2, rect0, rect1, smax], input_space=in_space, # nvis=784, ) net = loop(trainer, net)
# parse arguments args, model_args = parse_args() # define logger logdir = args.logdir logger = Logger(logdir, read_only=args.test_only) logger.log('args: %s' % str(args)) logger.log('model args: %s' % str(model_args)) # define model model = models.get_model(args.model, model_args).cuda() # logger.log('full-model FLOPs: %d' % measure(model, torch.zeros(1, 3, 32, 32).cuda(), k=-1)[0]) # define datasets - 0: train, 1: val, 2: test datasets = get_dataset(args.dataset, val_size=args.valsize) dataloaders = [] for d in datasets: dataloaders.append( DataLoader(d, batch_size=args.batch_size, shuffle=True, num_workers=4)) # define loss criterion = nn.CrossEntropyLoss().cuda() # define optimizer optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.wd, nesterov=args.nesterov)
def main(opt): # make folder base_path = 'result' os.makedirs(base_path, exist_ok=True) result_path = make_folder(base_path, opt.save_folder) # Dataset print(f'Preparing Dataset....{opt.dataset}') transform = { 'trian': transforms.Compose([ transforms.RandomHorizontalFlip(), ]) } train_transform = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) test_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) train_set, test_set = get_dataset(opt.dataset, train_transform, test_transform) # Load Dataset train_loader = DataLoader(train_set, batch_size=opt.train_batch_size, shuffle=True) test_loader = DataLoader(test_set, batch_size=opt.test_batch_size, shuffle=False) # GPU if torch.cuda.is_available() and opt.cuda: device = 'cuda' torch.backends.cudnn.benchmark = True else: device = 'cpu' print(f'Using {device}') # model from torchvision.models import vgg16_bn print(f'Preparing Model....{opt.model}') model = get_model(opt.model, opt.num_classes) model.to(device) # resuming if opt.resume: print('Resuming from checkpoint') assert os.path.isdir(f'{opt.resume}') checkpoint = torch.load(f'{opt.resume}/{opt.model}_ckpt.pth') model.load_state_dict(checkpoint['model']) best_acc = checkpoint['acc'] start_epoch = checkpoint['epoch'] train_result = checkpoint['train_result'] test_result = checkpoint['test_result'] else: start_epoch = 0 best_acc = 0 train_result, test_result = [], [] # optmizer loss_func = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=opt.lr, weight_decay=0.0001) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5) # Training start = time.time() for e in range(start_epoch, start_epoch + opt.epoch): train_result += train(model, train_loader, optimizer, loss_func, device, start_epoch, scheduler, e) test_result += test(model, test_loader, loss_func, device, start_epoch, e) scheduler.step() # Save checkpoint if test_result[1::2][-1] > best_acc: print(f'Saving Model....({result_path})') state = { 'model': model.state_dict(), 'epoch': e + 1, 'acc': test_result[1::2][-1], 'train_result': train_result, 'test_result': test_result } torch.save(state, f'{result_path}/{opt.model}_ckpt.pth') best = test_result[1::2][-1] # Save Result if opt.save_result: print(f'Saving Result....({result_path})') save_result(train_result, test_result, result_path) end = time.time() with open(f'{result_path}/time_log.txt', 'w') as f: f.write(str(datetime.timedelta(seconds=end - start))) f.write(str(datetime.timedelta(seconds=end - start))) f.close()
def fit_predict_categorical_encoding(datasets, str_preprocess, encoders, classifiers, test_size, n_splits, n_jobs, results_path): ''' Learning with dirty categorical variables. ''' results_path = os.path.join(BENCHMARK_HOME, results_path) if not os.path.exists(results_path): os.makedirs(results_path) for dataset in datasets: n_rows = choose_nrows(dataset_name=dataset) for encoder in encoders: print('Dataset: %s' % dataset) data = get_dataset(dataset).get_df() data.preprocess(n_rows=n_rows, str_preprocess=str_preprocess) print('Data shape: %d, %d' % data.df.shape) n_cats = len(np.unique(data.df[data.special_column])) if (n_cats > 5000) and 'OneHotEncoder' in encoder: print('Skipping this encoder, too many categories ' '({0})'.format(n_cats)) continue cv = select_cross_val(data.clf_type, n_splits, test_size) scaler = preprocessing.StandardScaler(with_mean=False) # Define classifiers clfs = instantiate_estimators(data.clf_type, classifiers, clf_seed, y=data.df.loc[:, data.ycol].values) for i, clf in enumerate(clfs): # import pdb; pdb.set_trace() # print( # '{}: {} \n{}: {} \n{}: {} \n{}: {} \n{}: {}'.format( # 'Prediction column', data.ycol, # 'Task type', str(data.clf_type), # 'Classifier', clf, # 'Encoder', encoder)) try: try: clf2 = clf.estimator except AttributeError: clf2 = clf clf_name = clf2.__class__.__name__ results_dict = { 'dataset': data.name, 'n_splits': n_splits, 'test_size': test_size, 'n_rows': n_rows, 'encoder': encoder, 'str_preprocess': str_preprocess, 'clf': [classifiers[i], clf_name, clf2.get_params()], 'ShuffleSplit': [cv.__class__.__name__], 'scaler': [scaler.__class__.__name__, scaler.get_params()], 'sample_seed': sample_seed, 'shuffleseed': shuffle_seed, 'col_action': data.col_action, 'clf_type': data.clf_type, } if verify_if_exists(results_path, results_dict): print('Prediction already exists.\n') continue start = time.time() column_action = get_column_action(data.col_action, data.xcols, encoder, data.clf_type) pred = Parallel(n_jobs=n_jobs)( delayed(fit_predict_fold)( data, scaler, column_action, clf, encoder, fold, cv.n_splits, train_index, test_index) for fold, (train_index, test_index) in enumerate( cv.split(data.df, data.df[data.ycol].values))) pred = np.array(pred) results = { 'fold': list(pred[:, 0]), 'n_train_samples': list(pred[:, 1]), 'n_train_features': list(pred[:, 2]), 'score': list(pred[:, 3]), 'train_score': list(pred[:, 4]), 'encoding_time': list(pred[:, 5]), 'training_time': list(pred[:, 6]) } results_dict['results'] = results # Saving results pc_name = socket.gethostname() now = ''.join([ c for c in str(datetime.datetime.now()) if c.isdigit() ]) filename = ( '%s_%s_%s_%s_%s.json' % (pc_name, data.name, classifiers[i], encoder, now)) results_file = os.path.join(results_path, filename) results_dict = array2list(results_dict) write_json(results_dict, results_file) print('prediction time: %.1f s.' % (time.time() - start)) print('Saving results to: %s\n' % results_file) except Exception as e: # noqa print('Prediction failed: ', str(e))
from utils.utils import load_checkpoint, set_random_seed P = parse_args() ### Set torch device ### if torch.cuda.is_available(): torch.cuda.set_device(P.local_rank) device = torch.device(f"cuda" if torch.cuda.is_available() else "cpu") P.n_gpus = torch.cuda.device_count() assert P.n_gpus <= 1 # no multi GPU set_random_seed(P.seed) ### Initialize dataset ### train_set, test_set, image_size, n_classes = get_dataset(P, dataset=P.dataset, augment=P.augment_type) P.image_size = image_size P.n_classes = n_classes ### Define data loader ### kwargs = {'pin_memory': True, 'num_workers': 8} train_loader = DataLoader(train_set, shuffle=True, batch_size=P.batch_size, **kwargs) test_loader = DataLoader(test_set, shuffle=False, batch_size=P.test_batch_size, **kwargs) if P.augment_type == 'autoaug_sche': train_set_second, _, _, _ = get_dataset(P, dataset=P.dataset, augment='autoaug') P.train_second_loader = DataLoader(train_set_second, shuffle=True, batch_size=P.batch_size, **kwargs) ### Initialize model ### model = C.get_classifier(P, n_classes=P.n_classes).to(device) optimizer, lr_decay_gamma = get_optimizer(P, model)
# load image train_img_shape = tuple([int(x) for x in args.train_img_shape]) img_transform = Compose([ Scale(train_img_shape, Image.BILINEAR), ToTensor(), Normalize([.485, .456, .406], [.229, .224, .225]) ]) label_transform = Compose([ Scale(train_img_shape, Image.NEAREST), ToLabel(), ReLabel(255, args.n_class - 1), # convert label ]) source_dataset = get_dataset(dataset_name='source', img_lists=args.source_list, label_lists=args.source_label_list, img_transform=img_transform, label_transform=label_transform, test=False) target_dataset = get_dataset(dataset_name='target', img_lists=args.target_list, label_lists=None, img_transform=img_transform, label_transform=None, test=False) train_loader = torch.utils.data.DataLoader(ConcatDataset( source_dataset, target_dataset), batch_size=args.batch_size, shuffle=True, pin_memory=True)
SAMPLING_MODE = args.sampling_mode # Pre-computed weights to restore CHECKPOINT = args.restore # Learning rate for the SGD LEARNING_RATE = args.lr # Automated class balancing CLASS_BALANCING = args.class_balancing # Training ground truth file TRAIN_GT = args.train_set # Testing ground truth file TEST_GT = args.test_set TEST_STRIDE = args.test_stride if args.download is not None and len(args.download) > 0: for dataset in args.download: get_dataset(dataset, target_folder=FOLDER) quit() viz = visdom.Visdom(env=DATASET + ' ' + MODEL) if not viz.check_connection: print("Visdom is not connected. Did you run 'python -m visdom.server' ?") hyperparams = vars(args) # Load the dataset img, gt, LABEL_VALUES, IGNORED_LABELS, RGB_BANDS, palette = get_dataset( DATASET, FOLDER) # Number of classes N_CLASSES = len(LABEL_VALUES) # Number of bands (last dimension of the image tensor) N_BANDS = img.shape[-1]
params = HParams(args.cfg_file) pprint(params.dict) os.environ['CUDA_VISIBLE_DEVICES'] = params.gpu np.random.seed(params.seed) tf.set_random_seed(params.seed) ############################################################ logging.basicConfig(filename=params.exp_dir + '/train.log', filemode='w', level=logging.INFO, format='%(message)s') logging.info(pformat(params.dict)) ############################################################ trainset = get_dataset('train', params) validset = get_dataset('valid', params) testset = get_dataset('test', params) logging.info(f"trainset: {trainset.size} \ validset: {validset.size} \ testset: {testset.size}") x_ph = tf.placeholder(tf.float32, [None, params.dimension]) y_ph = tf.placeholder(tf.float32, [None]) b_ph = tf.placeholder(tf.float32, [None, params.dimension]) m_ph = tf.placeholder(tf.float32, [None, params.dimension]) model = get_model(params) model.build(x_ph, y_ph, b_ph, m_ph) total_params = 0
if args['display']: plt.ion() else: plt.ioff() plt.switch_backend("agg") if args['save']: if not os.path.exists(args['save_dir']): os.makedirs(args['save_dir']) # set device device = torch.device("cuda:0" if args['cuda'] else "cpu") # dataloader dataset = get_dataset(args['dataset']['name'], args['dataset']['kwargs']) dataset_it = torch.utils.data.DataLoader( dataset, batch_size=1, shuffle=False, drop_last=False, num_workers=4, pin_memory=True if args['cuda'] else False) # load model model = get_model(args['model']['name'], args['model']['kwargs']) model = torch.nn.DataParallel(model).to(device) # load snapshot if os.path.exists(args['checkpoint_path']): state = torch.load(args['checkpoint_path'])
def get_dataset_path(self, dset_name): return get_dataset(dset_name)
def run_exp_lib(dataset_feat_net_triples, get_model=get_model_with_default_configs): results = [] exp_nums = len(dataset_feat_net_triples) print("-----\nTotal %d experiments in this run:" % exp_nums) for exp_id, (dataset_name, feat_str, net) in enumerate( dataset_feat_net_triples): print('{}/{} - {} - {} - {}'.format( exp_id+1, exp_nums, dataset_name, feat_str, net)) print("Here we go..") sys.stdout.flush() for exp_id, (dataset_name, feat_str, net) in enumerate( dataset_feat_net_triples): print('-----\n{}/{} - {} - {} - {}'.format( exp_id+1, exp_nums, dataset_name, feat_str, net)) sys.stdout.flush() dataset = get_dataset( dataset_name, sparse=True, feat_str=feat_str, root=args.data_root) model_func = get_model(net) if 'MNIST' in dataset_name or 'CIFAR' in dataset_name: train_dataset, test_dataset = dataset train_acc, acc, duration = single_train_test( train_dataset, test_dataset, model_func, epochs=args.epochs, batch_size=args.batch_size, lr=args.lr, lr_decay_factor=args.lr_decay_factor, lr_decay_step_size=args.lr_decay_step_size, weight_decay=0, epoch_select=args.epoch_select, with_eval_mode=args.with_eval_mode) std = 0 else: train_acc, acc, std, duration = cross_validation_with_val_set( dataset, model_func, folds=10, epochs=args.epochs, batch_size=args.batch_size, lr=args.lr, lr_decay_factor=args.lr_decay_factor, lr_decay_step_size=args.lr_decay_step_size, weight_decay=0, epoch_select=args.epoch_select, with_eval_mode=args.with_eval_mode, logger=logger, model_PATH=model_PATH, semi_split=args.semi_split) with open(log_PATH, "a+") as f: f.write(args.model_lr + " " + args.model_epoch + ": ") f.write(str(acc) + " " + str(std)) f.write("\n") summary1 = 'data={}, model={}, feat={}, eval={}'.format( dataset_name, net, feat_str, args.epoch_select) summary2 = 'train_acc={:.2f}, test_acc={:.2f} ± {:.2f}, sec={}'.format( train_acc*100, acc*100, std*100, round(duration, 2)) results += ['{}: {}, {}'.format('fin-result', summary1, summary2)] print('{}: {}, {}'.format('mid-result', summary1, summary2)) sys.stdout.flush() print('-----\n{}'.format('\n'.join(results))) sys.stdout.flush()