def restore_session(logdir, device='auto'): """load model from a saved session""" if logdir[-1] == '/': logdir = logdir[:-1] run_id = logdir.split('/')[-1] # load the hyperparameters and arguments hyperparameters = pickle.load( open(os.path.join(logdir, "hyperparameters.p"), "rb")) opt = json.load(open(os.path.join(logdir, "config.json"))) # instantiate the model model = DeepVae(**hyperparameters) device = available_device() if device == 'auto' else device model.to(device) # load pretrained weights load_model(model, logdir) # define likelihood and evaluator likelihood = { 'cifar': DiscretizedMixtureLogits(opt['nr_mix']), 'binmnist': Bernoulli }[opt['dataset']] evaluator = VariationalInference(likelihood, iw_samples=1) # load the dataset if opt['dataset'] == 'binmnist': train_dataset, valid_dataset, test_dataset = get_binmnist_datasets( opt['data_root']) elif opt['dataset'] == 'cifar10': from torchvision.transforms import Lambda transform = Lambda(lambda x: x * 2 - 1) train_dataset, valid_dataset, test_dataset = get_cifar10_datasets( opt.data_root, transform=transform) else: raise NotImplementedError return { 'model': model, 'device': device, 'run_id': run_id, 'hyperparameters': hyperparameters, 'opt': hyperparameters, 'likelihood': likelihood, 'evaluator': evaluator, 'train_dataset': train_dataset, 'valid_dataset': valid_dataset, 'test_dataset': test_dataset, }
x = self.fc2(x) return x # check GPUs n_gpus = torch.cuda.device_count() device_ids = list(range(n_gpus)) if n_gpus else None print(logging_sep("=")) print(f"N gpus = {n_gpus}, Devices = {device_ids}") if n_gpus == 1: print("Use more than one GPU to test multi-GPUs capabilities.") # init model and evaluator model = Classifier() evaluator = Classification(10) model.to(available_device()) # fuse model + evaluator pipeline = Pipeline(model, evaluator) # wrap as DataParallel parallel_pipeline = DataParallelPipeline(pipeline, device_ids=device_ids) # evaluate model data = next(iter(loader)) print(logging_sep("-")) print("x.shape =", next(iter(data)).shape) loss, diagnostics, output = parallel_pipeline(data) print(logging_sep("-")) print(f"Loss = {loss:.3f}, device = {loss.device}")
if opt['silent']: tqdm = notqdm # defining the run identifier deterministic_id = get_hash_from_opt(opt) run_id = f"asymptotic-{opt['estimators']}-iw{opt['iw_min']}-{opt['iw_max']}-{opt['iw_steps']}-seed{opt['seed']}-eps{opt['epsilon']}-alpha{opt['alpha']}" if opt['exp'] != "": run_id += f"-{opt['exp']}" run_id += f"{deterministic_id}" _exp_id = f"asymptotic-{opt['exp']}-{opt['seed']}" # defining the run directory logdir = init_logging_directory(opt, run_id) # device device = available_device() # save configuration with open(os.path.join(logdir, 'config.json'), 'w') as fp: fp.write(json.dumps(opt, default=lambda x: str(x), indent=4)) # wrap the training loop inside with `Success` to write the outcome of the run to a file with Success(logdir=logdir): # define logger base_logger, *_ = get_loggers(logdir, keys=['base']) print_info(logdir=logdir, device=device, run_id=run_id, logger=base_logger) # setting the random seed torch.manual_seed(opt['seed']) np.random.seed(opt['seed'])
'latents': latents, 'nonlinearity': 'elu', 'q_dropout': opt.q_dropout, 'p_dropout': opt.p_dropout, 'type': opt.model_type, 'features_out': features_out, 'no_skip': opt.no_skip, 'log_var_act': log_var_act } # save hyper parameters for easy loading pickle.dump(hyperparameters, open(os.path.join(logdir, "hyperparameters.p"), "wb")) # instantiate the model and move to target device model = DeepVae(**hyperparameters) device = available_device() if opt.device == 'auto' else opt.device model.to(device) # define the evaluator evaluator = VariationalInference(likelihood, iw_samples=1) # define evaluation model with Exponential Moving Average ema = EMA(model, opt.ema) # data dependent init for weight normalization (automatically done during the first forward pass) with torch.no_grad(): model.train() x = next(iter(train_loader)).to(device) model(x) # print stages
def main(): parser = argparse.ArgumentParser() add_base_args(parser, exp="efficiency", dataset="binmnist") add_iw_sweep_args(parser, min=10, max=5e3, steps=10) add_model_architecture_args(parser, model='sbm', depth=1, nlayers=0, N=200) parser.add_argument('--load', default='', help='existing experiment path to load from') parser.add_argument('--num_runs', default=5, type=int, help='number of runs') parser.add_argument('--max_epoch_length', default=1e9, type=int, help='maximum number of epochs per run') # estimators parser.add_argument( '--estimators', default='vimco-arithmetic,ovis-gamma1,reinforce,tvo-part2-config1', help='comma separated list of estimators') parser.add_argument('--bs', default=24, type=int, help='batch size') parser.add_argument('--filter', default='', help='filter estimator when plotting') opt = vars(parser.parse_args()) # defining the run identifier deterministic_id = get_hash_from_opt(opt) run_id = f"efficiency-{opt['estimators']}-iw{opt['iw_min']}-{opt['iw_max']}-{opt['iw_steps']}-seed{opt['seed']}" if opt['exp'] != "": run_id += f"-{opt['exp']}" run_id += f"{deterministic_id}" exp_id = f"efficiency-{opt['exp']}-{opt['seed']}" # number of particles iws = [ int(k) for k in np.geomspace( start=opt['iw_min'], stop=opt['iw_max'], num=opt['iw_steps'])[::-1] ] # estimator ids estimator_ids = opt['estimators'].replace(" ", "").split(",") # defining the run directory logdir = init_logging_directory(opt, run_id) # save run configuration to the log directory with open(os.path.join(logdir, 'config.json'), 'w') as fp: opt['hash'] = hash fp.write(json.dumps(opt, default=lambda x: str(x), indent=4)) # wrap the training loop inside with `Success` to write the outcome of the run to a file with Success(logdir=logdir): if opt['load'] == '': # get the device (cuda/cpu) device = available_device() assert 'cuda' in device, "No CUDA device detected." # define logger base_logger, *_ = get_loggers(logdir, keys=[exp_id]) print_info(logdir=logdir, device=device, run_id=run_id, logger=base_logger) # setting the random seed torch.manual_seed(opt['seed']) np.random.seed(opt['seed']) # dataset & loader assert opt[ 'dataset'] == 'binmnist', "Only implemented for Binarized MNIST" dset, *_ = get_binmnist_datasets(opt['data_root'], transform=ToTensor()) loader = DataLoader(dset, batch_size=opt['bs'], shuffle=True, num_workers=1) # model model, hyperparameters = init_model(opt, dset[0], loader) model.to(device) model.train() # optimizer optimizer = Adam(model.parameters(), lr=1e-3) data = [] iter_per_epoch = min(opt['max_epoch_length'], -(-len(dset) // opt['bs'])) num_of_iterations = len(estimator_ids) * len( iws) * opt['num_runs'] * iter_per_epoch pbar = tqdm(total=num_of_iterations) for e, estimator_id in enumerate(estimator_ids): for i, iw in enumerate(iws): # estimator Estimator, config = parse_estimator_id(estimator_id) estimator = Estimator(baseline=None, mc=1, iw=iw, **config) estimator.to(device) for run_i in range(opt['num_runs']): pbar.set_description( f"{estimator_id} [{e + 1}/{len(estimator_ids)}], " f"K={iw} [{i + 1}/{len(iws)}], " f"[{run_i + 1}/{opt['num_runs']}]") # reset trackers torch.cuda.reset_max_memory_allocated(device=device) start = time.time() # training epoch for step, batch in enumerate(loader): x, y = preprocess(batch, device) training_step(x, model, estimator, [optimizer], y=y, return_diagnostics=False) pbar.update(1) if step >= opt['max_epoch_length']: break # end trackers elapsed_time = time.time() - start max_memory = torch.cuda.max_memory_allocated( device=device) / 1e6 # store data data += [{ 'estimator': estimator_id, 'iw': iw, 'run_i': run_i, 'max_memory': max_memory, 'elapsed_time': elapsed_time }] # compile data into DataFrame and save to .csv data = pd.DataFrame(data) data.to_csv(os.path.join(logdir, 'efficiency.csv')) else: data = pd.read_csv(os.path.join(opt['load'], 'efficiency.csv')) data['estimator'] = data['estimator'].map(format_estimator_name) # plotting set_matplotlib_style() keys = ['max_memory', 'elapsed_time'] fig, axes = plt.subplots(nrows=1, ncols=len(keys), figsize=(2 * PLOT_WIDTH, 2 * PLOT_HEIGHT), dpi=DPI) hue_order = list(data['estimator'].unique()) if opt['filter'] != "": hue_order = [e for e in hue_order if opt['filter'] not in e] sort_estimator_keys(hue_order) legend = Legend(fig) for ax, key in zip(axes, keys): for e, estimator in enumerate(hue_order): sub_df = data[data['estimator'] == estimator] # color and marker if estimator in ESTIMATOR_STYLE: style = { 'color': ESTIMATOR_STYLE[estimator]['color'], 'marker': ESTIMATOR_STYLE[estimator]['marker'] } else: style = { 'color': sns.color_palette()[e], 'marker': MARKERS[e] } # extract mean and std series = sub_df[['iw', key]].groupby('iw').agg(['mean', 'std']) series.reset_index(inplace=True) # area plot for CI ax.fill_between(series['iw'], series[key]['mean'] - 0.5 * series[key]['std'], series[key]['mean'] + 0.5 * series[key]['std'], color=style['color'], alpha=0.2) # plot mean value ax.plot(series['iw'], series[key]['mean'], markersize=0, alpha=0.5, **style) ax.plot(series['iw'], series[key]['mean'], label=estimator, alpha=1, **style) # labels and axis scale ax.set_yscale('log', basey=10) ax.set_xscale('log', basex=10) ax.set_ylabel(key) ax.set_xlabel("iw") legend.update(ax) update_labels(axes, METRIC_DISPLAY_NAME) legend.draw(group=True) plt.savefig(os.path.join(logdir, "efficiency.png")) plt.close() with Header(f"Data [Logging Directory: {os.path.abspath(logdir)} ]"): print( data.pivot_table(values=['max_memory', 'elapsed_time'], index=['iw', 'estimator'], aggfunc=np.mean))