Exemple #1
0
def get_embeddings(loader, model):
    zf, zh, labels = [], [], []
    for x, y in tqdm(loader):
        z_ = model.flow(x)[1]
        zf.append(utils.tonp(z_))
        labels.append(utils.tonp(y))
    return np.concatenate(zf), np.concatenate(labels)
Exemple #2
0
def get_embeddings(loader, model):
    zf, zh, labels = [], [], []
    for x, y in tqdm(loader):
        z_ = model.flow(x)[1]
        zf.append(utils.tonp(z_))
        zh.append(utils.tonp(model.prior.flow(z_[:, -args.ssl_dim:, None, None], y)[1]))
        labels.append(utils.tonp(y))
    return np.concatenate(zf), np.concatenate(zh), np.concatenate(labels)
Exemple #3
0
def plot_gmm(gmm, colors, ax):
    pi = utils.tonp(torch.softmax(gmm.weights, 0))
    for i, [covar, mean, w,
            c] in enumerate(zip(gmm.covariances, gmm.means, pi, colors[1:])):
        draw_ellipse(utils.tonp(covar),
                     utils.tonp(mean),
                     c,
                     ax,
                     alpha=w,
                     label=r'$\pi_{}$ = {:.3f}'.format(i + 1, w))
def get_metrics(model, loader):
    logp, acc = [], []
    for x, y in loader:
        x = x.to(device)
        log_det, z = model.flow(x)
        log_prior_full = model.prior.log_prob_full(z)
        pred = torch.softmax(log_prior_full, dim=1).argmax(1)
        logp.append(utils.tonp(log_det + model.prior.log_prob(z)))
        acc.append(utils.tonp(pred) == utils.tonp(y))
    return np.mean(np.concatenate(logp)), np.mean(np.concatenate(acc))
def train():
    logger.info('start building vocab data')

    vocab = Vocab(hps.vocab_file, hps.vocab_size)

    logger.info('end building vocab data')
    logger.info('vocab size: %s' % vocab.size())

    model = Model(vocab, hps)
    if hps.use_cuda:
        model = model.cuda()
    if hps.restore is not None:
        # raise ValueError('Noe data to restore')
        model.load_state_dict(torch.load(hps.restore))

    logger.info('----Start training----')
    timer = Timer()
    timer.start()
    for step in range(hps.start_step, hps.num_iters + 1):

        # Forward -------------------------------------------------------------
        outputs = model(None, None, 'infer')
        _, pred = torch.max(outputs, 1)
        pred = tonp(pred)
        logger.info('pred: %s' % restore_text(pred, vocab, [vocab.pad_id()]))

    lap, _ = timer.lap('end')
    print('pred time: %f', lap)
Exemple #6
0
def plot(title='', data=None):
    x, y = map(utils.tonp, [x_test, y_test])
    z = flow.f(torch.FloatTensor(x).to(d))[0].detach().cpu().numpy()

    plt.figure(figsize=(12, 12))
    for t, c in zip(np.unique(y), colors):
        idxs = (y == t)
        plt.subplot(2, 2, 1)
        plt.title('Z space (Data map)')
        plt.scatter(z[idxs, 0], z[idxs, 1], c=[c], alpha=.6)
        plt.subplot(2, 2, 2)
        plt.title('X space (Data)')
        plt.scatter(x[idxs, 0], x[idxs, 1], c=[c], alpha=.6)

    z, y = flow.prior.sample((10000, ), labels=True)
    x = flow.g(z).detach().cpu().numpy()
    splot = plt.subplot(2, 2, 3)
    plt.title('Z space (Prior samples)')
    z = utils.tonp(z)
    plt.scatter(z[:, 0], z[:, 1], s=5, c=colors[0], alpha=.0)
    pi = utils.tonp(flow.prior.get_weights())
    arg = np.arange(args.k_prior)
    covs = utils.tonp(flow.prior.covariances)
    mu = utils.tonp(torch.stack([m for m in flow.prior.means]))
    for i, [covar, mean, w, c] in enumerate(
            zip(covs[arg[:10]], mu[arg[:10]], pi[arg[:10]], colors[1:])):
        draw_ellipse(covar,
                     mean,
                     c,
                     splot,
                     alpha=w,
                     label=r'$\pi_{}$ = {:.3f}'.format(i + 1, w))
    splot.legend()

    splot = plt.subplot(2, 2, 4)
    plt.title('X space (Flow samples)')
    plt.scatter(x[:, 0], x[:, 1], s=5, alpha=.2, c=colors[1:][y.clip(0, 9)])
    plt.suptitle(title, size=25)
Exemple #7
0
    def auto_attacker2(self, team, role, opp, ball, side=0, tic=0):
        """Strategy for an attacker working with DQN."""
        angle_to_ball = angle_to(team[role].pos, ball.pos)
        angle_to_goal = angle_to(team[role].pos, goals[not side])
        angle_ball_to_goal = angle_to(team[role].pos, goals[not side])
        dist_ball_to_goal = dist(ball.pos, goals[not side])
        dist_to_goal = dist(team[role].pos, goals[not side])
        dist_to_ball = dist(team[role].pos, ball.pos)

        print "angle_to_ball", angle_to_ball
        print "angle_to_goal", angle_to_goal
        print "angle_ball_to_goal", angle_ball_to_goal
        print "dist_ball_to_goal", dist_ball_to_goal
        print "dist_to_goal", dist_to_goal
        print "dist_to_ball", dist_to_ball

        # Decision tree
        target = None
        if abs(angle_diff(angle_to_goal, angle_ball_to_goal)) < 1 and \
                dist_ball_to_goal < dist_to_goal:
            # run to score!
            target = ball.pos
            print "GO KICK"
        else:
            if team[role].pos[0] < ball.pos[0]:
                # go behind ball, aligned with goal
                vec = vector_to(goals[not side], ball.pos)
                vec = normalize_vector(vec, 0.7)
                target = tonp(ball.pos) + vec
                print "PREPARE TO KICK"
            else:
                # go behind goal
                target = list(ball.pos)
                if angle_to_ball > 0:
                    target[1] -= 0.5
                else:
                    target[0] += 0.5
                target[0] += 0.1
                print "GO BEHIND BALL"

        print team[role].pos, ball.pos, target
        team[role].move_to(target)
Exemple #8
0
        if args.odenet:
            model.set_nfe(0)
        logits = model(x)
        if args.odenet:
            train_forward_nfe.update(model.get_nfe())
            model.set_nfe(0)
        loss = criterion(logits, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if args.odenet:
            train_backward_nfe.update(model.get_nfe())
            model.set_nfe(0)
        train_loss += loss.item() * x.size(0)
        prediction = F.softmax(logits, dim=1).argmax(dim=1)
        train_acc += np.sum(utils.tonp(prediction) == utils.tonp(y))
    train_loss /= len(train_loader.sampler.indices)
    train_acc /= len(train_loader.sampler.indices)

    if epoch > args.warm:
        scheduler.step(train_loss)

    if epoch % args.log_each == 0 or epoch == 1 or epoch == args.epochs:
        with torch.no_grad():
            model.eval()
            val_loss, val_acc, val_nfe = utils.get_classification_metrics(
                model, val_loader, device, args.odenet)
        # Save best model
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            torch.save(model.state_dict(),
def load_dataset(data,
                 train_bs,
                 test_bs,
                 num_examples=None,
                 data_root=DATA_ROOT,
                 shuffle=True,
                 seed=42,
                 supervised=-1,
                 logs_root='',
                 sup_sample_weight=-1,
                 sup_only=False,
                 device=None):
    bits = None
    sampler = None
    if data in ['moons', 'circles']:
        if data == 'moons':
            x, y = datasets.make_moons(n_samples=int(num_examples * 1.5),
                                       noise=0.1,
                                       random_state=seed)
            train_x, test_x, train_y, test_y = train_test_split(
                x, y, train_size=num_examples, random_state=seed)
        elif data == 'circles':
            x, y = datasets.make_circles(n_samples=int(num_examples * 1.5),
                                         noise=0.1,
                                         factor=0.2,
                                         random_state=seed)
            train_x, test_x, train_y, test_y = train_test_split(
                x, y, train_size=num_examples, random_state=seed)

        if supervised not in [-1, len(train_y), 0]:
            unsupervised_idxs, _ = train_test_split(np.arange(len(train_y)),
                                                    test_size=supervised,
                                                    stratify=train_y)
            train_y[unsupervised_idxs] = -1
        elif supervised == 0:
            train_y[:] = -1

        torch.save(
            {
                'train_x': train_x,
                'train_y': train_y,
                'test_x': test_x,
                'test_y': test_y,
            }, os.path.join(logs_root, 'data.torch'))

        trainset = torch.utils.data.TensorDataset(
            torch.FloatTensor(train_x[..., None, None]),
            torch.LongTensor(train_y))
        testset = torch.utils.data.TensorDataset(
            torch.FloatTensor(test_x[..., None, None]),
            torch.LongTensor(test_y))
        data_shape = [2, 1, 1]
        bits = np.nan
    elif data == 'mnist':
        train_transform = transforms.Compose([
            transforms.ToTensor(),
            UniformNoise(),
        ])

        test_transform = transforms.Compose([
            transforms.ToTensor(),
            UniformNoise(),
        ])
        trainset = torchvision.datasets.MNIST(root=data_root,
                                              train=True,
                                              download=True,
                                              transform=train_transform)
        testset = torchvision.datasets.MNIST(root=data_root,
                                             train=False,
                                             download=True,
                                             transform=test_transform)

        if num_examples != -1 and num_examples != len(
                trainset) and num_examples is not None:
            idxs, _ = train_test_split(np.arange(len(trainset)),
                                       train_size=num_examples,
                                       random_state=seed,
                                       stratify=utils.tonp(trainset.targets))
            trainset.data = trainset.data[idxs]
            trainset.targets = trainset.targets[idxs]

        if supervised == 0:
            trainset.targets[:] = -1
        elif supervised != -1:
            unsupervised_idxs, _ = train_test_split(np.arange(
                len(trainset.targets)),
                                                    test_size=supervised,
                                                    stratify=trainset.targets)
            trainset.targets[unsupervised_idxs] = -1

        if sup_only:
            mask = trainset.targets != -1
            trainset.targets = trainset.targets[mask]
            trainset.data = trainset.data[mask]

        data_shape = (1, 28, 28)
        bits = 256
    else:
        raise NotImplementedError

    nw = 2
    if sup_sample_weight == -1:
        trainloader = torch.utils.data.DataLoader(trainset,
                                                  batch_size=train_bs,
                                                  shuffle=shuffle,
                                                  num_workers=nw,
                                                  pin_memory=True)
    else:
        sampler = ImbalancedDatasetSampler(trainset,
                                           sup_weight=sup_sample_weight)
        trainloader = torch.utils.data.DataLoader(trainset,
                                                  batch_size=train_bs,
                                                  sampler=sampler,
                                                  num_workers=nw,
                                                  pin_memory=True)

    testloader = torch.utils.data.DataLoader(testset,
                                             batch_size=test_bs,
                                             shuffle=False,
                                             num_workers=nw,
                                             pin_memory=True)
    return trainloader, testloader, data_shape, bits
# Create model
if args.prior == 'gmm':
    if args.gmm_k == 1 and args.prior_train_algo == 'no':
        prior = torch.distributions.MultivariateNormal(torch.zeros((2,)), torch.eye(2))
    else:
        prior = distributions.GMM(k=args.gmm_k, dim=2, normalize=args.prior_train_algo == 'GD')
else:
    raise NotImplementedError

if args.model == 'toy':
    flow = realnvp.get_toy_nvp(prior=prior, device=device)
else:
    raise NotImplementedError

if args.gmm_init == 'GMM':
    z = utils.batch_eval(lambda x: utils.tonp(flow.f(x[0].to(device))[0]), trainloader)
    z = np.concatenate(z)

    gmm = GaussianMixture(n_components=args.gmm_k, covariance_type=args.gmm_cov).fit(z)
    for i in range(args.gmm_k):
        flow.prior.means[i].data = torch.FloatTensor(gmm.means_[i]).to(device)
        flow.prior.set_covariance(i, torch.FloatTensor(gmm.covariances_[i]).to(device))
    flow.prior.weights.data = torch.FloatTensor(np.log(gmm.weights_)).to(device)

torch.save(flow.state_dict(), os.path.join('../data/', 'model.torch'))

if args.gmm_k == 1:
    pass
elif args.prior_train_algo == 'no':
    for p in flow.prior.parameters():
        p.requires_grad = False
            log_prior[y != -1] = model.prior.log_prob(z[y != -1],
                                                      y=y[y != -1].to(
                                                          x.device))
        elbo = log_det + log_prior

        weights = torch.ones((elbo.size(0), )).to(elbo)
        weights[y != -1] = args.sup_weight
        weights /= weights.sum()

        gen_loss = -(elbo * weights.detach()).sum()

        cl_loss = 0
        if n_sup != 0:
            logp_full = model.prior.log_prob_full(z[y != -1])
            prediction = logp_full
            train_acc.add(utils.tonp(prediction.argmax(1).to(y) == y[y != -1]))
            if args.cl_weight != 0:
                cl_loss = F.cross_entropy(prediction,
                                          y[y != -1].to(prediction.device),
                                          reduction='none')
                train_cl.add(utils.tonp(cl_loss))
                cl_loss = cl_loss.mean()

        loss = gen_loss + args.cl_weight * cl_loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_elbo.add(utils.tonp(elbo))
        train_loss += loss.item() * x.size(0)
Exemple #12
0
def get_logp(model, loader):
    logp = []
    for x, _ in loader:
        x = x.to(device)
        logp.append(utils.tonp(model.log_prob(x)))
    return np.concatenate(logp)
Exemple #13
0
 def one_sample(self, labels=False):
     k = np.random.choice(len(self.weights), p=utils.tonp(self.weights))
     if labels:
         return self.base_ditributions[k].sample(), k
     return self.base_ditributions[k].sample()
Exemple #14
0
    encoder = vae_module.Encoder5x5(args.z_dim, args.hidden_dim)

vae = vae_module.VAE(encoder, decoder, device=device)
vae.load_state_dict(torch.load(args.model))

# Reconstructions
n, m = 10, 5
np.random.seed(42)
fig, axes = plt.subplots(figsize=(15, 12), nrows=n, ncols=m)
img_idxs = np.random.randint(0, len(data), size=(n * m))

for i, ax in enumerate(axes.flat):
    c = data[img_idxs[i]][np.newaxis]
    inp = torch.FloatTensor(c).to(device)
    _, [rec, _] = vae(inp)
    rec = tonp(rec)
    c = np.concatenate([c, rec], 3)[0, 0]
    sns.heatmap(c, ax=ax)
    ax.axis('off')

plt.savefig(os.path.join(args.log_dir, 'reconstruction'), dpi=200)
plt.clf()

# Samples
n, m = 10, 10
z = torch.randn(n * m, args.z_dim, 1, 1).to(device)
mu, var = vae.decode(z)

x = tonp(mu)[:, 0]

fig, axes = plt.subplots(figsize=(5, 5), nrows=n, ncols=m)
    def process_try_except(self, lazy_df):
        accumulator = self.accumulator.identity()        
        df = MaskedLazyDataFrame.from_lazy(lazy_df)
        
        sample_name = df['dataset']
        accumulator['cutflow'][f'{sample_name}_start'] += df.shape[0]

        # compute PU weights
        if sample_name.startswith('Single'):
            df['weight'] = np.ones(df.shape[0])
        else:
            key = f'pu_{sample_name}'
            if key not in self.weights_lookup:
                key = 'pu_mc'
            if key not in self.weights_lookup:
                print('Probably this is just a test, assigning a random PU weight')
                key = list(self.weights_lookup.keys())[0]
            central = self.weights_lookup['pu_data'](df['pu_trueInteraction']) / self.weights_lookup[key](df['pu_trueInteraction'])
            up = self.weights_lookup['pu_data_up'](df['pu_trueInteraction']) / self.weights_lookup[key](df['pu_trueInteraction'])
            down = self.weights_lookup['pu_data_down'](df['pu_trueInteraction']) / self.weights_lookup[key](df['pu_trueInteraction'])
            df['weight'] = central
            df['pileup_up'] = up / central
            df['pileup_down'] = down / central

        # Make objects (electrons, muons, jets, SVs...) here
        df['muons'] = objects.make_muons(df)
        df['svs'] = objects.make_svs(df)
        df['jets'] = objects.make_jets(df)
        
        #count loose muons (- tight)
        df['nLooseMu'] =  df.muons.isLoose.sum() - 1
        # Get prompt muon and select events
        prompt_mu_mask = (df.muons.p4.pt > 25) & (np.abs(df.muons.p4.eta) < 2.5) & (df.muons.dbIso < 0.1) & df.muons.isTight
        all_prompt_mus = df['muons'][prompt_mu_mask]
        df['prompt_mu'] = all_prompt_mus[:,:1] # pick only the first one
        trig_to_use = 'passIsoMu27All' if '2017' in self.jobid else 'passIsoMu24'
        trig_and_prompt = df[trig_to_use] & (all_prompt_mus.counts > 0) 
        accumulator['cutflow'][f'{sample_name}_trigg&promptMu'] += trig_and_prompt.sum()
        
        # Get trailing muon, here we need some gym due to broadcasting issues
        displ_mu_mask = (df['muons'].p4.pt > 5) & (np.abs(df['muons'].p4.eta) < 2.5) & df['muons'].isLoose
        # check that we are not selecting the same muon
        lft, rht = df['muons'].p4.cross(df['prompt_mu'].p4, nested = True).unzip() # make combinations
        min_dr = lft.delta_r(rht).min() # min DR
        displ_mu_mask = trig_and_prompt & displ_mu_mask & (min_dr > 0.0000001)
        all_displ_mu = df['muons'][displ_mu_mask]

        # select SVs
        good_svs = df['svs'][df['svs'].pt > 0] \
                   if (df['svs'].counts > 0).any() else \
                      df['svs']

        # make skimming and attach trailing mu and sv
        presel = trig_and_prompt & (all_displ_mu.counts > 0) & (good_svs.counts > 0) 
        df['n_displaced_mu'] = all_displ_mu.counts
        df['second_mu'] = all_displ_mu[:,:1]
        df['goodsv'] = good_svs[:,:1]
        skim = df[presel]
        accumulator['cutflow'][f'{sample_name}_skimming'] += presel.sum()

        # flatten objects to avoid messing around
        skim['prompt_mu'] = skim['prompt_mu'].flatten()
        skim['second_mu'] = skim['second_mu'].flatten()
        skim['goodsv'] = skim['goodsv'].flatten()

        # select and match jet to the second muon
        jets = skim['jets'] # just a shortcut to avoid writing it constantly
        # jet ID selection by bins, useful given that muons are only up to 2.4? 
        selection_eta_bins = [
            # |eta| < 2.4
            (np.abs(jets.p4.eta) <= 2.4) & \
            (jets.charHadEnFrac > 0.) & (jets.chargedMult > 0) & (jets.charEmEnFrac <= 0.99),
            # |eta| < 2.7
            (np.abs(jets.p4.eta) <= 2.7) & (jets.neutHadEnFrac <= 0.9) & \
            (jets.neutEmEnFrac <= 0.90) & ((jets.chargedMult + jets.neutMult) >= 1),
            # |eta| < 3.0
            (np.abs(jets.p4.eta) <= 2.7) & (jets.neutHadEnFrac <= 0.98) & \
            (jets.neutEmEnFrac <= 0.01) & (jets.neutMult >= 2),
            # any eta \_(-.-)_/
            (jets.neutEmEnFrac <= 0.9) & (jets.neutMult >= 10),
        ]
        # compute the or of the masks
        jet_id = reduce(lambda x, y: x | y, selection_eta_bins)
        
        # compute DR w.r.t. the prompt and second muon
        dr_promt  = utils.tonp( 
            skim['prompt_mu'].p4.delta_r(jets.p4) 
        )
        dr_second = utils.tonp( 
            skim['second_mu'].p4.delta_r(jets.p4) 
        )
        
        selection_dr = (dr_promt >= 0.4) & (dr_second <= 0.7) & \
                    (jets.p4.pt > 20) & jet_id
        
        selected_jets = jets[selection_dr]
        selected_dr_second = dr_second[selection_dr]
        ##skim['n_selected_jets'] = None  TO ADD when we understand what Mohamed did
        
        # pick the closest jet, keep jaggedness to avoid 
        # messing up skim. This BADLY needs awkward v2.0
        # to fix this mess
        min_dr = selected_dr_second.argmin()
        matched_jets = selected_jets[min_dr]
        at_least_one_jet = (selected_dr_second.count() > 0)

        # make preselection variables and cuts
        skim['m1_vtx_mass'] = (skim.prompt_mu.p4 + skim.goodsv.p4).mass
        skim['ll_mass'] = (skim.prompt_mu.p4 + skim.second_mu.p4).mass
        skim['ll_dr'] = skim.prompt_mu.p4.delta_r(skim.second_mu.p4)

        #transverse mass
        skim['tmass_goodsv'] = np.sqrt(pow(skim.goodsv.p4.pt  + skim.pfMet_pt,2) - \
                                       pow(skim.goodsv.p4.x + skim.pfMet_px,2) - \
                                       pow(skim.goodsv.p4.y + skim.pfMet_py,2))
        skim['tmass_promptmu'] = np.sqrt(pow(skim.prompt_mu.p4.pt  + skim.pfMet_pt,2) - \
                                         pow(skim.prompt_mu.p4.x + skim.pfMet_px,2) - \
                                         pow(skim.prompt_mu.p4.y + skim.pfMet_py,2))
        svPlusmu_p4 = skim.goodsv.p4 + skim.prompt_mu.p4
        skim['tmass_svmu'] = np.sqrt(pow(svPlusmu_p4.pt  + skim.pfMet_pt,2) - \
                                     pow(svPlusmu_p4.x + skim.pfMet_px,2) - \
                                     pow(svPlusmu_p4.y + skim.pfMet_py,2))

        skim['dimu_deltaphi'] = np.abs(skim.prompt_mu.p4.delta_phi(skim.second_mu.p4))
        
        goodsv_pt2 = (skim.goodsv.position.cross(skim.goodsv.p3).mag2)/(skim.goodsv.position.mag2)
        skim['mass_corr'] = np.sqrt(skim.goodsv.p4.mass * skim.goodsv.p4.mass + goodsv_pt2) + \
                            np.sqrt(goodsv_pt2)

        # make preslection cut
        preselection_mask = (skim.prompt_mu.absdxy < 0.005) & (skim.prompt_mu.absdz < 0.1) & \
                    (skim.second_mu.absdxy > 0.02) & \
                    (0.3 < skim.ll_dr) & at_least_one_jet
        #(40 < skim.m1_vtx_mass) & (skim.m1_vtx_mass < 90) & \ # removed from preselection_mask

        preselection = skim[preselection_mask]
        matched_jet = matched_jets[preselection_mask][:,0] # cannot attach to preselection for some reason FIXME!
        
        selection_mask = (1 < preselection.ll_dr) & (preselection.ll_dr < 5) & \
                         (preselection.jet_pt.counts > 0) & (preselection.jet_pt.max() > 20)

        #(20 < preselection.ll_mass) & (preselection.ll_mass < 85) & \ # removed from selection_mask

        same_sign = (preselection.prompt_mu.charge * preselection.second_mu.charge) >0.
        opp_sign = np.invert(same_sign)
        preselection['isOS'] = opp_sign
        
        accumulator['cutflow'][f'{sample_name}_preselection'] += preselection.shape[0]
        accumulator['cutflow'][f'{sample_name}_selection'] += selection_mask.sum()

        #print(preselection.shape[0], preselection['weight'].sum())
        # fill preselection histograms

        for category, mask in [
            ('preselection_SS', same_sign),
            ('preselection_OS', opp_sign),
            ('selection_SS', selection_mask & same_sign),
            ('selection_OS', selection_mask & opp_sign),]:

            masked_df = preselection[mask]
            masked_jets = matched_jet[mask]
            accumulator[category]['prompt_pt'  ].fill(
                weight = masked_df['weight'], sample = sample_name, 
                pt = utils.tonp(masked_df.prompt_mu.p4.pt)
            )
            accumulator[category]['diplaced_pt'].fill(
                weight = masked_df['weight'], sample = sample_name, 
                pt = utils.tonp(masked_df.second_mu.p4.pt)
            )
            accumulator[category]['di_mu_M'    ].fill(
                weight = masked_df['weight'], sample = sample_name, 
                mass = utils.tonp(masked_df['ll_mass'])
            )
            accumulator[category]['di_mu_DR'   ].fill(
                weight = masked_df['weight'], sample = sample_name, 
                dr = utils.tonp(masked_df.ll_dr)
            )
            accumulator[category]['sv_lxy'   ].fill(
                weight = masked_df['weight'], sample = sample_name,
                lxy = utils.tonp(masked_df['goodsv']['lxy'])
            )
            accumulator[category]['sv_mass'   ].fill(
                weight = masked_df['weight'], sample = sample_name,
                mass = utils.tonp(masked_df.goodsv.p4.mass)
            )
            accumulator[category]['m1_vtx_mass'   ].fill(
                weight = masked_df['weight'], sample = sample_name,
                mass = utils.tonp(masked_df['m1_vtx_mass'])  
            )
            
            if category == 'preselection_SS' or category == 'preselection_OS':

                accumulator[category]['sv_tM'].fill(
                    weight = masked_df['weight'], sample = sample_name,
                    mass = utils.tonp(masked_df.tmass_goodsv)
                )

                accumulator[category]['mu_tM'].fill(
                    weight = masked_df['weight'], sample = sample_name,
                    mass = utils.tonp(masked_df.tmass_promptmu)
                )

                accumulator[category]['musv_tM'].fill(
                    weight = masked_df['weight'], sample = sample_name,
                    mass = utils.tonp(masked_df.tmass_svmu)
                )

                accumulator[category]['corr_M'].fill(
                    weight = masked_df['weight'], sample = sample_name,
                    mass = utils.tonp(masked_df.mass_corr)
                )

            if category.startswith('selection'):
                # variables for CNN
                accumulator['columns'][sample_name]['isOS'] += utils.tonp(masked_df['isOS'])
                accumulator['columns'][sample_name]['pu_weight'] += utils.tonp(masked_df['weight'])
                accumulator['columns'][sample_name]['m1_vtx_mass'] += utils.tonp(masked_df['m1_vtx_mass'])
                accumulator['columns'][sample_name]['mu2_absdxy'] += utils.tonp(masked_df['second_mu']['absdxy'])
                accumulator['columns'][sample_name]['mu2_absdz'] += utils.tonp(masked_df['second_mu']['absdz'])
                accumulator['columns'][sample_name]['mu2_phi'] += utils.tonp(masked_df['second_mu'].p4.phi)
                accumulator['columns'][sample_name]['mu2_ptBT'] += utils.tonp(masked_df['second_mu']['pt_BT'])
                accumulator['columns'][sample_name]['mu2_etaBT'] += utils.tonp(masked_df['second_mu']['eta_BT'])
                accumulator['columns'][sample_name]['mu2_absdxySig'] += utils.tonp(masked_df['second_mu']['absdxySig'])
                accumulator['columns'][sample_name]['mu2_absdzSig'] += utils.tonp(masked_df['second_mu']['absdzSig'])
                accumulator['columns'][sample_name]['mu2_deltaBeta'] += utils.tonp(masked_df['second_mu']['deltaBeta'])
                accumulator['columns'][sample_name]['mu2_nDof'] += utils.tonp(masked_df['second_mu']['nDof'])
                accumulator['columns'][sample_name]['mu2_timeAtIpInOut'] += utils.tonp(masked_df['second_mu']['timeAtIpInOut'])
                accumulator['columns'][sample_name]['mu2_timeAtIpInOutErr'] += utils.tonp(masked_df['second_mu']['timeAtIpInOutErr'])
                accumulator['columns'][sample_name]['mu2_timeAtIpOutIn'] += utils.tonp(masked_df['second_mu']['timeAtIpOutIn'])
                accumulator['columns'][sample_name]['mu2_timeAtIpOutInErr'] += utils.tonp(masked_df['second_mu']['timeAtIpOutInErr'])
                accumulator['columns'][sample_name]['mu2_segmentComp'] += utils.tonp(masked_df['second_mu']['segmentComp'])
                accumulator['columns'][sample_name]['mu2_trkKink'] += utils.tonp(masked_df['second_mu']['trkKink'])
                accumulator['columns'][sample_name]['mu2_chi2LocalPosition'] += utils.tonp(masked_df['second_mu']['chi2LocalPosition'])
                accumulator['columns'][sample_name]['mu2_rhoRelIso'] += utils.tonp(masked_df['second_mu']['rho_rel_iso'])
                accumulator['columns'][sample_name]['sv_mass'] += utils.tonp(masked_df['goodsv'].p4.mass)
                accumulator['columns'][sample_name]['sv_pt'] += utils.tonp(masked_df['goodsv'].p4.pt)
                accumulator['columns'][sample_name]['sv_lxySig'] += utils.tonp(masked_df['goodsv']['lxySig'])
                accumulator['columns'][sample_name]['sv_lxyzSig'] += utils.tonp(masked_df['goodsv']['lxyzSig'])
                accumulator['columns'][sample_name]['sv_lxy'] += utils.tonp(masked_df['goodsv']['lxy'])
                accumulator['columns'][sample_name]['sv_lxyz'] += utils.tonp(masked_df['goodsv']['lxyz'])
                accumulator['columns'][sample_name]['sv_angle3D'] += utils.tonp(masked_df['goodsv']['angle3D'])
                accumulator['columns'][sample_name]['sv_angle2D'] += utils.tonp(masked_df['goodsv']['angle2D'])
                accumulator['columns'][sample_name]['sv_gamma'] += utils.tonp(masked_df['goodsv']['gamma'])
                accumulator['columns'][sample_name]['sv_chi2'] += utils.tonp(masked_df['goodsv']['chi2'])                
                accumulator['columns'][sample_name]['sv_sum_tracks_dxySig'] += utils.tonp(masked_df['goodsv']['sum_tracks_dxySig']).astype(np.float64)
                accumulator['columns'][sample_name]['mujet_eta'] += utils.tonp(masked_jets.p4.eta)
                accumulator['columns'][sample_name]['mujet_phi'] += utils.tonp(masked_jets.p4.phi)
                accumulator['columns'][sample_name]['mujet_neutHadEnFrac'] += utils.tonp(masked_jets['neutHadEnFrac'])
                accumulator['columns'][sample_name]['mujet_neutEmEnFrac'] += utils.tonp(masked_jets['neutEmEnFrac'])
                accumulator['columns'][sample_name]['mujet_charHadEnFrac'] += utils.tonp(masked_jets['charHadEnFrac'])
                accumulator['columns'][sample_name]['mujet_charEmEnFrac'] += utils.tonp(masked_jets['charEmEnFrac'])
                accumulator['columns'][sample_name]['mujet_neutMult'] += utils.tonp(masked_jets['neutMult'])
                accumulator['columns'][sample_name]['mujet_smeared_pt'] += utils.tonp(masked_jets['smeared_pt'])
                accumulator['columns'][sample_name]['mujet_dCsv_bb'] += utils.tonp(masked_jets['dCsv_bb'])
                accumulator['columns'][sample_name]['mujet_charEmEn'] += utils.tonp(masked_jets['charEmEn'])
                accumulator['columns'][sample_name]['mujet_charHadEn'] += utils.tonp(masked_jets['charHadEn'])
                accumulator['columns'][sample_name]['mujet_charMuEn'] += utils.tonp(masked_jets['charMuEn'])
                accumulator['columns'][sample_name]['mujet_charMuEnFrac'] += utils.tonp(masked_jets['charMuEnFrac'])
                accumulator['columns'][sample_name]['mujet_muonEn'] += utils.tonp(masked_jets['muonEn'])
                accumulator['columns'][sample_name]['mujet_muonEnFrac'] += utils.tonp(masked_jets['muonEnFrac'])
                accumulator['columns'][sample_name]['mujet_muonEn'] += utils.tonp(masked_jets['muonEn'])
                accumulator['columns'][sample_name]['mujet_muonEnFrac'] += utils.tonp(masked_jets['muonEnFrac'])
                accumulator['columns'][sample_name]['mujet_neutEmEn'] += utils.tonp(masked_jets['neutEmEn'])
                accumulator['columns'][sample_name]['mujet_neutHadEn'] += utils.tonp(masked_jets['neutHadEn'])
                accumulator['columns'][sample_name]['sv_tM'] += utils.tonp(masked_df['tmass_goodsv'])
                accumulator['columns'][sample_name]['mu1_tM'] += utils.tonp(masked_df['tmass_promptmu'])
                accumulator['columns'][sample_name]['mu2_tM'] += utils.tonp(masked_df['tmass_svmu'])
                accumulator['columns'][sample_name]['corr_M'] += utils.tonp(masked_df['mass_corr'])
                accumulator['columns'][sample_name]['dimu_deltaphi'] += utils.tonp(masked_df['dimu_deltaphi'])
                accumulator['columns'][sample_name]['dimu_mass'] += utils.tonp(masked_df['ll_mass'])
                accumulator['columns'][sample_name]['dimu_dr'] += utils.tonp(masked_df['ll_dr'])
                accumulator['columns'][sample_name]['nLooseMu'] += utils.tonp(masked_df['nLooseMu'])
                accumulator['columns'][sample_name]['nDisplacedMu'] += utils.tonp(masked_df['n_displaced_mu'])

                #######
                accumulator['columns'][sample_name]['sv_tracks_charge'] += utils.tonp(masked_df['goodsv']['tracks_charge'])
                accumulator['columns'][sample_name]['sv_tracks_eta'] += utils.tonp(masked_df['goodsv']['tracks_eta'] )
                accumulator['columns'][sample_name]['sv_tracks_phi'] += utils.tonp(masked_df['goodsv']['tracks_phi'])
                accumulator['columns'][sample_name]['sv_tracks_pt'] += utils.tonp(masked_df['goodsv']['tracks_pt'])
                accumulator['columns'][sample_name]['sv_tracks_p'] += utils.tonp(masked_df['goodsv']['tracks_p'])
                accumulator['columns'][sample_name]['sv_tracks_dxySig'] += utils.tonp(masked_df['goodsv']['tracks_dxySig'])
                accumulator['columns'][sample_name]['sv_tracks_dxy'] += utils.tonp(masked_df['goodsv']['tracks_dxy'])
                accumulator['columns'][sample_name]['sv_tracks_dxyz'] += utils.tonp(masked_df['goodsv']['tracks_dxyz'])
        
        return accumulator