def get_full_data(dataset, scaling=False, n_train=None): data_path = os.path.dirname(os.path.abspath(__file__)) data_cfg = dict(scaling=scaling) if n_train is not None: data_cfg.update(n_train=n_train) file_path = os.path.join( data_path, 'datasets', dataset, var_to_str(data_cfg) + '_full_data') + format_files if os.path.exists(file_path): full_data = load(open(file_path, 'rb')) else: train_data, test_data = get_data(dataset, scaling=scaling) train_loader = DataLoader(train_data, batch_size=128) test_loader = DataLoader(test_data, batch_size=128) full_data = dict(train=dict(X=[], y=[]), test=dict(X=[], y=[])) for datasplit, loader in zip(full_data.values(), [train_loader, test_loader]): for x, y in loader: datasplit['X'].append(x) datasplit['y'].append(y) datasplit['X'] = torch.cat(datasplit['X']) datasplit['y'] = torch.cat(datasplit['y']) if n_train is not None: full_data['train']['X'] = full_data['train']['X'][:n_train] full_data['train']['y'] = full_data['train']['y'][:n_train] full_data.update(n_class=n_class_datasets[dataset]) save(full_data, open(file_path, 'wb')) return full_data
def get_hyper_params_ref(dataset, scaling=True): subsample = 5 dataset_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'datasets', dataset) data_cfg = dict(scaling=scaling) file_path = os.path.join( dataset_path, var_to_str(data_cfg) + '_hyper_params_ref') + format_files if os.path.exists(file_path): hyperparams_ref = load(open(file_path, 'rb')) else: full_data = get_full_data(dataset, scaling=scaling) X_train = full_data['train']['X'] X_train = X_train[::subsample] del full_data cov = (X_train.t().mm(X_train) / len(X_train)).numpy() lambda_max = np.real(splinalg.eigs(cov, 1)[0]) reg_ref = (lambda_max / np.trace(cov)).item() reg_ref = float('{:1.2e}'.format(reg_ref)) print(f'reg ref: {reg_ref}') del cov dists = torch.cdist(X_train, X_train)**2 del X_train dists = torch.triu(dists) sigma_ref = torch.sqrt(torch.median(dists[dists != 0]) / 2).item() sigma_ref = float('{:1.2e}'.format(sigma_ref)) print(f'sigma ref: {sigma_ref}') os.makedirs(os.path.dirname(file_path), exist_ok=True) hyperparams_ref = dict(reg_ref=reg_ref, sigma_ref=sigma_ref) save(hyperparams_ref, open(file_path, 'wb')) return hyperparams_ref
def get_reg_ref_rbf(dataset, scaling, nb_train, sigma): subsample = 5 dataset_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'datasets', dataset) data_cfg = dict(scaling=scaling, nb_train=nb_train, sigma=sigma) file_path = os.path.join( dataset_path, var_to_str(data_cfg) + '_reg_rbf_ref') + format_files if os.path.exists(file_path): reg_ref_rbf = load(open(file_path, 'rb')) else: gram_data = preprocess_gram(dataset, scaling=scaling, n_train=nb_train, kernel='rbf', sigma=sigma) gram_train = gram_data['train']['gram'] del gram_data gram_train = gram_train[::subsample, ::subsample] / gram_train.size(0) lambda_max = np.real(splinalg.eigs(gram_train, 1)[0]) reg_ref_rbf = (lambda_max / np.trace(gram_train)).item() reg_ref_rbf = float('{:1.2e}'.format(reg_ref_rbf)) print(f'reg_ref_rbf: {reg_ref_rbf}') os.makedirs(os.path.dirname(file_path), exist_ok=True) save(reg_ref_rbf, open(file_path, 'wb')) return reg_ref_rbf
def preprocess_gram(dataset, scaling, n_train=None, kernel='linear', reg=None, sigma=None): data_path = os.path.dirname(os.path.abspath(__file__)) data_cfg = dict(scaling=scaling, kernel=kernel) if reg is not None: data_cfg.update(reg=reg) if sigma is not None: data_cfg.update(sigma=sigma) if n_train is not None: data_cfg.update(n_train=n_train) file_path = os.path.join(data_path, 'datasets', dataset, var_to_str(data_cfg) + 'gram_data') + format_files if os.path.exists(file_path): gram_data = load(open(file_path, 'rb')) else: full_data = get_full_data(dataset, scaling, n_train) gram_train = compute_gram(full_data['train']['X'], full_data['train']['X'], kernel, reg, sigma) gram_test = compute_gram(full_data['test']['X'], full_data['train']['X'], kernel, reg, sigma) gram_data = dict(train=dict(gram=gram_train, y=full_data['train']['y']), test=dict(gram=gram_test, y=full_data['test']['y']), n_class=full_data['n_class']) save(gram_data, open(file_path, 'wb')) return gram_data
def wrapped_exp_core(data_cfg, optim_cfg): exp_cfg = dict(data_cfg=data_cfg, optim_cfg=optim_cfg) print(*['{0}:{1}'.format(key, value) for key, value in exp_cfg.items()], sep='\n') exp_path = os.path.dirname(os.path.abspath(__file__)) file_path = os.path.join(exp_path, 'results', var_to_str(data_cfg), var_to_str(optim_cfg)) + format_files if os.path.exists(file_path): _, test_acc = load(open(file_path, 'rb')) else: test_acc = exp_core(**data_cfg, **optim_cfg) os.makedirs(os.path.dirname(file_path), exist_ok=True) save([exp_cfg, test_acc], open(file_path, 'wb')) result = {**data_cfg, **optim_cfg} result.update(test_acc=test_acc.item()) result = DataFrame(result, index=[0]) return result