def test_emd_equivalence(M1, M2, norm, R): gdim = 2 events1 = np.random.rand(nev, M1, gdim + 1) events2 = np.random.rand(nev, M2, gdim + 1) # test two different sets emds1 = np.zeros((nev, nev)) for i, ev1 in enumerate(events1): for j, ev2 in enumerate(events2): emds1[i, j] = emd.emd(ev1, ev2, R=R, norm=norm, gdim=gdim) emds2 = emd.emds(events1, events2, R=R, norm=norm, verbose=0, n_jobs=1, gdim=gdim) assert epsilon_diff(emds1, emds2, 10**-12) # test same set emds1 = np.zeros((nev, nev)) for i, ev1 in enumerate(events1): for j in range(i): emds1[i, j] = emd.emd(ev1, events1[j], R=R, norm=norm, gdim=gdim) emds1 += emds1.T emds2 = emd.emds(events1, R=R, norm=norm, verbose=0, n_jobs=1, gdim=gdim) assert epsilon_diff(emds1, emds2, 10**-12)
def test_periodic_phi(gdim, M): events = np.random.rand(nev, M, 1 + gdim) for phi_col in range(1, gdim + 1): emds1 = emd.emds(events, R=1.0, gdim=gdim, n_jobs=1, verbose=0) events_c = np.copy(events) events_c[:, :, phi_col] += 2 * np.pi * np.random.randint( -10, 10, size=(nev, M)) emds2 = emd.emds(events_c, R=1.0, gdim=gdim, periodic_phi=True, phi_col=phi_col, n_jobs=1, verbose=0) assert epsilon_diff(emds1, emds2, 10**-12) ev1 = np.random.rand(10, 1 + gdim) * 4 * np.pi ev2 = np.random.rand(20, 1 + gdim) * 4 * np.pi thetaw = np.zeros((len(ev1), len(ev2))) thetar = np.zeros((len(ev1), len(ev2))) for i, p1 in enumerate(ev1): for j, p2 in enumerate(ev2): dw, dr = 0., 0. for m, (k1, k2) in enumerate(zip(p1, p2)): if m == 0: continue elif m == phi_col: dw += (k1 - k2)**2 dr += np.min([ abs(k1 - (k2 + 2 * np.pi * n)) for n in range(-3, 3) ])**2 else: dw += (k1 - k2)**2 dr += (k1 - k2)**2 thetaw[i, j] = np.sqrt(dw) thetar[i, j] = np.sqrt(dr) zs1 = np.ascontiguousarray(ev1[:, 0] / np.sum(ev1[:, 0])) zs2 = np.ascontiguousarray(ev2[:, 0] / np.sum(ev2[:, 0])) ot_w, ot_r = ot.emd2(zs1, zs2, thetaw), ot.emd2(zs1, zs2, thetar) ef_w = emd.emd(ev1, ev2, norm=True, gdim=gdim, periodic_phi=False, phi_col=phi_col) ef_r = emd.emd(ev1, ev2, norm=True, gdim=gdim, periodic_phi=True, phi_col=phi_col) assert epsilon_diff(ot_w, ef_w, 10**-14) assert epsilon_diff(ot_r, ef_r, 10**-14)
def test_gdim(gdim, evdim, M, norm, R): if R < np.sqrt(gdim) / 2: pytest.skip('R too small') events = np.random.rand(nev, M, 1 + evdim) emds1 = emd.emds(events, gdim=gdim, norm=norm, R=R, n_jobs=1, verbose=0) emds2 = emd.emds(events[:, :, :1 + gdim], gdim=100, norm=norm, R=R, n_jobs=1, verbose=0) assert epsilon_diff(emds1, emds2, 10**-13)
def test_n_jobs(n_jobs, M, norm, R): events = np.random.rand(nev, M, 3) emds1 = np.zeros((nev, nev)) for i, ev1 in enumerate(events): for j in range(i): emds1[i, j] = emd.emd(ev1, events[j], R=R, norm=norm) emds1 += emds1.T emds2 = emd.emds(events, R=R, norm=norm, verbose=0, n_jobs=n_jobs) assert epsilon_diff(emds1, emds2, 10**-12)
def calc_cov_mmd(args, X, gen_out, losses, labels=None): X_rn, mask_real = utils.unnorm_data( args, X.cpu().detach().numpy()[:args.eval_tot_samples], real=True) gen_out_rn, mask_gen = utils.unnorm_data(args, gen_out[:args.eval_tot_samples], real=False) # converting into EFP format X_rn = np.concatenate((np.expand_dims(X_rn[:, :, 2], 2), X_rn[:, :, :2], np.zeros((X_rn.shape[0], X_rn.shape[1], 1))), axis=2) gen_out_rn = np.concatenate( (np.expand_dims(gen_out_rn[:, :, 2], 2), gen_out_rn[:, :, :2], np.zeros((gen_out_rn.shape[0], gen_out_rn.shape[1], 1))), axis=2) if args.clabels == 1: abs_labels = (labels[:args.eval_tot_samples, 0] * args.maxjf[0]).detach().numpy() logging.info("Calculating coverage and MMD") covs = [] mmds = [] if args.clabels == 1: intra_covs = [] intra_mmds = [] for j in range(args.cov_mmd_num_batches): G_rand_sample = rng.choice(args.eval_tot_samples, size=args.cov_mmd_num_samples) X_rand_sample = rng.choice(args.eval_tot_samples, size=args.cov_mmd_num_samples) Gsample = gen_out_rn[G_rand_sample] Xsample = X_rn[X_rand_sample] dists = emds(Gsample, Xsample) mmds.append(np.mean(np.min(dists, axis=0))) covs.append( np.unique(np.argmin(dists, axis=1)).size / args.cov_mmd_num_samples) # Intra-W1 if args.clabels == 1: num_regions = len(pt_regions) - 1 covs_all = [] mmds_all = [] for i in range(num_regions): cut = (abs_labels > pt_regions[i]) * (abs_labels < pt_regions[i + 1]) tot_cut = np.sum(cut) Gcut = gen_out_rn[cut] Xcut = X_rn[cut] G_rand_sample = rng.choice(tot_cut, size=args.cov_mmd_num_samples) X_rand_sample = rng.choice(tot_cut, size=args.cov_mmd_num_samples) Gsample = Gcut[G_rand_sample] Xsample = Xcut[X_rand_sample] dists = emds(Gsample, Xsample) mmds_all += [np.mean(np.min(dists, axis=0))] covs_all += [ np.unique(np.argmin(dists, axis=1)).size / args.cov_mmd_num_samples ] intra_covs.append(covs_all) intra_mmds.append(mmds_all) losses['coverage'].append(np.mean(np.array(covs))) losses['mmd'].append(np.mean(np.array(mmds))) if args.clabels == 1: losses['intra_coverage'].append(np.mean(np.array(intra_covs), axis=0)) losses['intra_mmd'].append(np.mean(np.array(intra_mmds), axis=0))
h.heap() importlib.reload(utils) realefp = utils.efp(args, X_rn, mask=mask_real, real=True) genefp = utils.efp(args, gen_out_rn, mask=mask_gen, real=False) save_outputs.plot_jet_feats(args, realjf, genjf, realefp, genefp, 'j', show=True) Gsample = utils.ef_format(gen_out_rn[:10]) Xsample = utils.ef_format(X_rn[:10]) Gsample dists = emds(Gsample, Xsample) dists mmd = np.mean(np.min(dists, axis=0)) cov = np.unique(np.argmin(dists, axis=1)).size / 10 mmd = np.mean(np.min(dists, axis=1)) mmd.append(np.mean(np.min(dists, axis=1))) covs.append()
2), samples_dict[dataset][0][:, :, :2], np.zeros((samples_dict[dataset][0].shape[0], samples_dict[dataset][0].shape[1], 1))), axis=2) covs = [] mmds = [] for j in range(10): X_rand_sample = rng.choice(50000, size=100) X_rand_sample2 = rng.choice(50000, size=100) Xsample = X_rn[X_rand_sample] Xsample2 = X_rn[X_rand_sample2] dists = emds(Xsample, Xsample2) mmds.append(np.mean(np.min(dists, axis=0))) covs.append(np.unique(np.argmin(dists, axis=1)).size / 100) print(f"mmds: {np.mean(mmds)}") print(f"cov: {np.mean(covs)}") args_txt = { 'g': 'args/236_g30_dea_no_pos_diffs_graphcnngang_mpgand.txt', 't': 'args/237_t30_lrx2_dea_no_pos_diffs_graphcnngang_mpgand.txt', 'q': 'args/238_q30_lrx05_dea_no_pos_diffs_graphcnngang_mpgand.txt' } for dataset in samples_dict.keys(): print(dataset)