def run_ivae_exp(args, config): """run iVAE simulations""" data_dim = config.data_dim n_segments = config.n_segments n_layers = config.n_layers n_obs_per_seg = config.n_obs_per_seg data_seed = config.data_seed max_iter = config.ivae.max_iter lr = config.ivae.lr cuda = config.ivae.cuda results = {l: {n: [] for n in n_obs_per_seg} for l in n_layers} nSims = args.nSims dataset = args.dataset test = args.test for l in n_layers: for n in n_obs_per_seg: x, y, s = generate_synthetic_data(data_dim, n_segments, n, l, seed=data_seed, simulationMethod=dataset, one_hot_labels=True, varyMean=True) for seed in range(nSims): print('Running exp with L={} and n={}; seed={}'.format( l, n, seed)) # generate data # run iVAE ckpt_file = os.path.join( args.checkpoints, 'ivae_{}_l{}_n{}_s{}.pt'.format(dataset, l, n, seed)) res_iVAE = IVAE_wrapper(X=x, U=y, n_layers=l + 1, hidden_dim=data_dim * 2, cuda=cuda, max_iter=max_iter, lr=lr, ckpt_file=ckpt_file, seed=seed, test=test) # store results results[l][n].append( mean_corr_coef(res_iVAE[0].detach().numpy(), s)) print(mean_corr_coef(res_iVAE[0].detach().numpy(), s)) # prepare output Results = { 'data_dim': data_dim, 'data_segments': n_segments, 'CorrelationCoef': results } return Results
def compute_mcc(args, config): rep1 = pickle.load( open( os.path.join(args.checkpoints, 'seed{}'.format(args.seed), 'test_representations.p'), 'rb'))['rep'] rep2 = pickle.load( open( os.path.join(args.checkpoints, 'seed{}'.format(args.second_seed), 'test_representations.p'), 'rb'))['rep'] # cutoff = 50 if args.dataset == 'CIFAR100' else 5 # ii = np.where(res_cond[0]['lab'] < cutoff)[0] # in sample points to learn from # iinot = np.where(res_cond[0]['lab'] >= cutoff)[0] # out of sample points cutoff = 5000 # half the test dataset ii = np.arange(cutoff) iinot = np.arange(cutoff, 2 * cutoff) mcc_strong_out = mean_corr_coef_out_of_sample(x=rep1[ii], y=rep2[ii], x_test=rep1[iinot], y_test=rep2[iinot]) mcc_strong_in = (mean_corr_coef(x=rep1[ii], y=rep2[ii])) pickle.dump({ 'in': mcc_strong_in, 'out': mcc_strong_out }, open( os.path.join( args.output, 'mcc_strong_{}_{}.p'.format(args.seed, args.second_seed)), 'wb')) cca_dim = 20 cca = CCA(n_components=cca_dim) cca.fit(rep1[ii], rep2[ii]) res_out = cca.transform(rep1[iinot], rep2[iinot]) mcc_weak_out = mean_corr_coef(res_out[0], res_out[1]) res_in = cca.transform(rep1[ii], rep2[ii]) mcc_weak_in = mean_corr_coef(res_in[0], res_in[1]) pickle.dump({ 'in': mcc_weak_in, 'out': mcc_weak_out }, open( os.path.join( args.output, 'mcc_weak_{}_{}.p'.format(args.seed, args.second_seed)), 'wb'))
def run_icebeem_exp(args, config): """run ICE-BeeM simulations""" data_dim = config.data_dim n_segments = config.n_segments n_layers = config.n_layers n_obs_per_seg = config.n_obs_per_seg data_seed = config.data_seed lr_flow = config.icebeem.lr_flow lr_ebm = config.icebeem.lr_ebm n_layers_flow = config.icebeem.n_layers_flow ebm_hidden_size = config.icebeem.ebm_hidden_size results = {l: {n: [] for n in n_obs_per_seg} for l in n_layers} nSims = args.nSims dataset = args.dataset test = args.test for l in n_layers: for n in n_obs_per_seg: x, y, s = generate_synthetic_data(data_dim, n_segments, n, l, seed=data_seed, simulationMethod=dataset, one_hot_labels=True) for seed in range(nSims): print('Running exp with L={} and n={}; seed={}'.format( l, n, seed)) # generate data n_layers_ebm = l + 1 ckpt_file = os.path.join( args.checkpoints, 'icebeem_{}_l{}_n{}_s{}.pt'.format(dataset, l, n, seed)) recov_sources = ICEBEEM_wrapper( X=x, Y=y, ebm_hidden_size=ebm_hidden_size, n_layers_ebm=n_layers_ebm, n_layers_flow=n_layers_flow, lr_flow=lr_flow, lr_ebm=lr_ebm, seed=seed, ckpt_file=ckpt_file, test=test) # store results results[l][n].append( np.max([mean_corr_coef(z, s) for z in recov_sources])) print(np.max([mean_corr_coef(z, s) for z in recov_sources])) # prepare output Results = { 'data_dim': data_dim, 'data_segments': n_segments, 'CorrelationCoef': results } return Results
def run_tcl_exp(args, config): """run TCL simulations""" stepDict = { 1: [int(5e3), int(5e3)], 2: [int(1e4), int(1e4)], 3: [int(1e4), int(1e4)], 4: [int(1e4), int(1e4)], 5: [int(1e4), int(1e4)] } data_dim = config.data_dim n_segments = config.n_segments n_layers = config.n_layers n_obs_per_seg = config.n_obs_per_seg data_seed = config.data_seed results = {l: {n: [] for n in n_obs_per_seg} for l in n_layers} results_no_ica = {l: {n: [] for n in n_obs_per_seg} for l in n_layers} num_comp = data_dim nSims = args.nSims dataset = args.dataset test = args.test for l in n_layers: for n in n_obs_per_seg: # generate data x, y, s = generate_synthetic_data(data_dim, n_segments, n, l, seed=data_seed, simulationMethod=dataset, one_hot_labels=False) for seed in range(nSims): print('Running exp with L={} and n={}; seed={}'.format( l, n, seed)) # checkpointing done in TF is more complicated than pytorch, create a separate folder per arg tuple ckpt_folder = os.path.join(args.checkpoints, args.dataset, str(l), str(n), str(seed)) # run TCL res_TCL = TCL_wrapper(sensor=x.T, label=y, random_seed=seed, list_hidden_nodes=[num_comp * 2] * (l - 1) + [num_comp], max_steps=stepDict[l][0] * 2, max_steps_init=stepDict[l][1], ckpt_dir=ckpt_folder, test=test) # store results mcc_no_ica = mean_corr_coef(res_TCL[0].T, s**2) mcc_ica = mean_corr_coef(res_TCL[1].T, s**2) print('TCL mcc (no ICA): {}\t mcc: {}'.format( mcc_no_ica, mcc_ica)) results[l][n].append(mcc_ica) results_no_ica[l][n].append(mcc_no_ica) # prepare output Results = { 'data_dim': data_dim, 'data_segments': n_segments, 'CorrelationCoef': results, 'CorrelationCoef_no_ica': results_no_ica, } return Results