def test__infer_on_cut(dummy_data_training): momenta, cut_mom, near_mom, labels, cut_labs, near_labs, delta_cut, delta_near = dummy_data_training fks = FKSPartition(momenta=momenta, labels=labels, all_legs=False) cut_momenta, near_momenta, cut_labels, near_labels = fks.cut_near_split( delta_cut, delta_near) n_gluon = 1 NN = Model((n_gluon + 2 - 1) * 4, cut_momenta, cut_labels, all_jets=False, all_legs=False) _, _, _, _, _, _, _, _ = NN.process_training_data() model_cut, x_mean_cut, x_std_cut, y_mean_cut, y_std_cut = train_cut_network( cut_momenta=cut_momenta, NJ_cut=cut_labels, order='LO', n_gluon=1, delta_cut=delta_cut, points=len(cut_momenta) * 2, model_dir='', epochs=1) y_pred_cuts = infer_on_cut(NN=NN, moms=cut_momenta, model=model_cut, x_mean_cut=x_mean_cut, x_std_cut=x_std_cut, y_mean_cut=y_mean_cut, y_std_cut=y_std_cut)
def train_cut_network_general(input_size, cut_momenta, NJ_cut, delta_cut, model_dir='', all_jets=False, all_legs=False, model_dataset=False, **kwargs): scaling = kwargs.get('scaling', 'standardise') lr = kwargs.get('lr', 0.001) layers = kwargs.get('layers', [20, 40, 20]) print('Using learning rate {}'.format(lr)) activation = kwargs.get('activation', 'tanh') loss = kwargs.get('loss', 'mean_squared_error') epochs = kwargs.get('epochs', 1000000) high_precision = kwargs.get('high_precision', False) NN_cut = Model(input_size=input_size, momenta=cut_momenta, labels=NJ_cut, all_jets=all_jets, all_legs=all_legs, model_dataset=model_dataset, high_precision=high_precision) model_cut, x_mean_cut, x_std_cut, y_mean_cut, y_std_cut = NN_cut.fit( scaling=scaling, layers=layers, lr=lr, activation=activation, loss=loss, epochs=epochs) if model_dir != '': cut_dir = model_dir + 'cut_{}'.format(delta_cut) if not os.path.exists(cut_dir): os.mkdir(cut_dir) model_cut.save(cut_dir + '/model') with open(cut_dir + '/model_arch.json', 'w') as fout: fout.write(model_cut.to_json()) model_cut.save_weights(cut_dir + '/model_weights.h5') metadata = { 'x_mean': x_mean_cut, 'x_std': x_std_cut, 'y_mean': y_mean_cut, 'y_std': y_std_cut } pickle_out = open(cut_dir + '/dataset_metadata.pickle', "wb") pickle.dump(metadata, pickle_out) pickle_out.close() return model_cut, x_mean_cut, x_std_cut, y_mean_cut, y_std_cut
def test_networks(self, momenta, nj, models, x_means, x_stds, y_means, y_stds, return_predictions=False): try: momenta = momenta.tolist() except: pass if self.all_legs: all_jets = False nlegs = self.nlegs + 2 else: all_jets = True nlegs = self.nlegs NN = Model(input_size=(nlegs) * 4, momenta=momenta, labels=nj, all_jets=all_jets, all_legs=self.all_legs, model_datset=self.model_dataset, high_precision=self.high_precision) _, _, _, _, _, _, _, _ = NN.process_training_data() for i in range(self.training_reruns): print('Predicting on model {}'.format(i)) model_dir_new = self.model_base_dir + self.model_dir + '_{}/'.format( i) x_standard = NN.process_testing_data(moms=momenta, x_mean=x_means[i], x_std=x_stds[i], y_mean=y_means[i], y_std=y_stds[i]) pred = models[i].predict(x_standard) y_pred = NN.destandardise_data(y_pred=pred.reshape(-1), x_mean=x_means[i], x_std=x_stds[i], y_mean=y_means[i], y_std=y_stds[i]) if not return_predictions: np.save(model_dir_new + '/pred_{}'.format(len(momenta)), y_pred) else: return y_pred print('############### Finished ###############')
def load_models(self, momenta, nj): try: momenta = momenta.tolist() except: pass if self.all_legs: all_jets = False nlegs = self.nlegs + 2 else: all_jets = True nlegs = self.nlegs NN = Model(input_size=(nlegs) * 4, momenta=momenta, labels=nj, all_jets=all_jets, all_legs=self.all_legs, model_dataset=self.model_dataset, high_precision=self.high_precision) models = [] x_means = [] y_means = [] x_stds = [] y_stds = [] for i in range(self.training_reruns): print('Working on model {}'.format(i)) model_dir_new = self.model_base_dir + self.model_dir + '_{}/'.format( i) print('Looking for directory {}'.format(model_dir_new)) if os.path.exists(model_dir_new) == False: os.mkdir(model_dir_new) print('Directory created') else: print('Directory already exists') model = load_model(model_dir_new + 'model', custom_objects={ 'root_mean_squared_error': NN.root_mean_squared_error }) models.append(model) pickle_out = open(model_dir_new + "/dataset_metadata.pickle", "rb") metadata = pickle.load(pickle_out) pickle_out.close() x_means.append(metadata['x_mean']) y_means.append(metadata['y_mean']) x_stds.append(metadata['x_std']) y_stds.append(metadata['y_std']) print('############### All models loaded ###############') return models, x_means, x_stds, y_means, y_stds
def train_cut_network(cut_momenta, NJ_cut, order, n_gluon, delta_cut, points, model_dir='', **kwargs): lr = kwargs.get('lr', 0.001) layers = kwargs.get('layers', [20, 40, 20]) print('Using learning rate {}'.format(lr)) epochs = kwargs.get('epochs', 1000000) NN_cut = Model((n_gluon + 2 - 1) * 4, cut_momenta, NJ_cut) model_cut, x_mean_cut, x_std_cut, y_mean_cut, y_std_cut = NN_cut.fit( layers=layers, lr=lr, epochs=epochs) if model_dir != '': if not os.path.exists(model_dir + '{}_cut_{}_{}_{}'.format(order, n_gluon + 2, delta_cut, points)): os.mkdir(model_dir + '{}_cut_{}_{}_{}'.format(order, n_gluon + 2, delta_cut, points)) model_cut.save(model_dir + '{}_cut_{}_{}_{}/model'.format(order, n_gluon + 2, delta_cut, points)) metadata = { 'x_mean': x_mean_cut, 'x_std': x_std_cut, 'y_mean': y_mean_cut, 'y_std': y_std_cut } pickle_out = open( model_dir + "{}_cut_{}_{}_{}/dataset_metadata.pickle".format( order, n_gluon + 2, delta_cut, points), "wb") pickle.dump(metadata, pickle_out) pickle_out.close() return model_cut, x_mean_cut, x_std_cut, y_mean_cut, y_std_cut
def test__infer_on_near_splits_separate(dummy_data_training): momenta, cut_mom, near_mom, labels, cut_labs, near_labs, delta_cut, delta_near = dummy_data_training fks = FKSPartition(momenta=momenta, labels=labels, all_legs=False) cut_momenta, near_momenta, cut_labels, near_labels = fks.cut_near_split( delta_cut, delta_near) pairs, labs_split = fks.weighting() n_gluon = 1 NN = Model((n_gluon + 2 - 1) * 4, near_momenta, labs_split[0], all_jets=False, all_legs=False) _, _, _, _, _, _, _, _ = NN.process_training_data() model_near, x_mean_near, x_std_near, y_mean_near, y_std_near = train_near_networks( pairs=pairs, near_momenta=near_momenta, NJ_split=labs_split, order='LO', n_gluon=1, delta_near=delta_near, points=len(near_momenta) * 2, model_dir='', epochs=1) y_preds_nears, y_pred_nears = infer_on_near_splits_separate( NN=NN, moms=near_momenta, models=model_near, x_mean_near=x_mean_near, x_std_near=x_std_near, y_mean_near=y_mean_near, y_std_near=y_std_near)
def create_model_all_legs_dataset(dummy_data_all_legs_training): momenta, cut_mom, near_mom, labels, cut_labs, near_labs, delta_cut, delta_near = dummy_data_all_legs_training nlegs = len(momenta[0]) - 2 fks = FKSPartition(momenta=momenta, labels=labels, all_legs=True) cut_momenta, near_momenta, cut_labels, near_labels = fks.cut_near_split( delta_cut, delta_near) pairs, labs_split = fks.weighting() model = Model(input_size=(nlegs + 2) * 4, momenta=cut_momenta, labels=cut_labels, all_jets=False, all_legs=True, model_dataset=True) return model
def create_model_high_precision(dummy_data_training): momenta, cut_mom, near_mom, labels, cut_labs, near_labs, delta_cut, delta_near = dummy_data_training nlegs = len(momenta[0]) - 2 n_gluon = 1 fks = FKSPartition(momenta=momenta, labels=labels, all_legs=False) cut_momenta, near_momenta, cut_labels, near_labels = fks.cut_near_split( delta_cut, delta_near) pairs, labs_split = fks.weighting() model = Model(input_size=(n_gluon + 2 - 1) * 4, momenta=cut_momenta, labels=cut_labels, all_jets=False, all_legs=False, high_precision=True) return model
def train_networks(self, momenta, nj): indices = np.arange(len(nj)) np.random.shuffle(indices) momenta = momenta[indices] nj = nj[indices] momenta = momenta.tolist() if self.model_base_dir == "": pass elif os.path.exists(self.model_base_dir) == False: os.mkdir(self.model_base_dir) print('Creating base directory') else: print('Base directory already exists') for i in range(self.training_reruns): print('Working on model {}'.format(i)) if self.model_base_dir == "": model_dir_new = "" elif self.model_dir == "": model_dir_new = "" else: model_dir_new = self.model_base_dir + self.model_dir + '_{}/'.format( i) print('Looking for directory {}'.format(model_dir_new)) if os.path.exists(model_dir_new) == False: os.mkdir(model_dir_new) print('Directory created') else: print('Directory already exists') if self.all_legs: all_jets = False nlegs = self.nlegs + 2 else: all_jets = True nlegs = self.nlegs NN = Model(input_size=nlegs * 4, momenta=momenta, labels=nj, all_jets=all_jets, all_legs=self.all_legs, model_dataset=self.model_dataset, high_precision=self.high_precision) model, x_mean, x_std, y_mean, y_std = NN.fit( scaling=self.scaling, layers=self.layers, epochs=self.epochs, lr=self.lr, activation=self.activation, loss=self.loss) if model_dir_new != "": model.save(model_dir_new + '/model') with open(model_dir_new + '/model_arch.json', 'w') as fout: fout.write(model.to_json()) model.save_weights(model_dir_new + '/model_weights.h5') metadata = { 'x_mean': x_mean, 'x_std': x_std, 'y_mean': y_mean, 'y_std': y_std } pickle_out = open(model_dir_new + "/dataset_metadata.pickle", "wb") pickle.dump(metadata, pickle_out) pickle_out.close()
def train_near_networks_general(input_size, pairs, near_momenta, NJ_split, delta_near, model_dir='', all_jets=False, all_legs=False, model_dataset=False, **kwargs): ''' Train 'near' networks on pairs of jets ''' scaling = kwargs.get('scaling', 'standardise') lr = kwargs.get('lr', 0.001) layers = kwargs.get('layers', [20, 40, 20]) print('Using learning rate {}'.format(lr)) activation = kwargs.get('activation', 'tanh') loss = kwargs.get('loss', 'mean_squared_error') epochs = kwargs.get('epochs', 1000000) high_precision = kwargs.get('high_precision', False) if type(near_momenta) != list: raise AssertionError('Momentum must be in the form of a list') NN_near = [] model_near = [] x_mean_near = [] x_std_near = [] y_mean_near = [] y_std_near = [] for idx, i in enumerate(pairs): NN = Model(input_size=input_size, momenta=near_momenta, labels=NJ_split[idx], all_jets=all_jets, all_legs=all_legs, model_dataset=model_dataset, high_precision=high_precision) model, x_mean, x_std, y_mean, y_std = NN.fit( scaling=scaling, layers=layers, lr=lr, activation=activation, loss=loss, epochs=epochs, ) NN_near.append(NN) model_near.append(model) x_mean_near.append(x_mean) x_std_near.append(x_std) y_mean_near.append(y_mean) y_std_near.append(y_std) if model_dir != '': pair_dir = model_dir + 'pair_{}_{}'.format(delta_near, idx) if os.path.exists(pair_dir) == False: os.mkdir(pair_dir) model.save(pair_dir + '/model') with open(pair_dir + '/model_arch.json', 'w') as fout: fout.write(model.to_json()) model.save_weights(pair_dir + '/model_weights.h5') metadata = { 'x_mean': x_mean, 'x_std': x_std, 'y_mean': y_mean, 'y_std': y_std } pickle_out = open(pair_dir + "/dataset_metadata.pickle", "wb") pickle.dump(metadata, pickle_out) pickle_out.close() return model_near, x_mean_near, x_std_near, y_mean_near, y_std_near
def train_near_networks(pairs, near_momenta, NJ_split, order, n_gluon, delta_near, points, model_dir='', **kwargs): ''' Train 'near' networks on pairs of jets :param pairs: array of pairs of jet positions :param near_momenta: list of PS points between delta_near and delta_cut :param NJ_split: array of NJet results weighted by different partition functions :param model_dir: the directory in which to create sub-directories to save the networks ''' lr = kwargs.get('lr', 0.001) layers = kwargs.get('layers', [20, 40, 20]) print('Using learning rate {}'.format(lr)) epochs = kwargs.get('epochs', 1000000) if type(near_momenta) != list: raise AssertionError('Momentum must be in the form of a list') NN_near = [] model_near = [] x_mean_near = [] x_std_near = [] y_mean_near = [] y_std_near = [] for idx, i in enumerate(pairs): NN = Model((n_gluon + 2 - 1) * 4, near_momenta, NJ_split[idx]) model, x_mean, x_std, y_mean, y_std = NN.fit(layers=layers, lr=lr, epochs=epochs) NN_near.append(NN) model_near.append(model) x_mean_near.append(x_mean) x_std_near.append(x_std) y_mean_near.append(y_mean) y_std_near.append(y_std) if model_dir != '': pair_dir = model_dir + '/{}_near_{}_{}_{}_{}_{}/'.format( order, i[0], i[1], n_gluon + 2, delta_near, points) if os.path.exists(pair_dir) == False: os.mkdir(pair_dir) model.save(pair_dir + '/model') metadata = { 'x_mean': x_mean, 'x_std': x_std, 'y_mean': y_mean, 'y_std': y_std } pickle_out = open(pair_dir + "/dataset_metadata.pickle", "wb") pickle.dump(metadata, pickle_out) pickle_out.close() return model_near, x_mean_near, x_std_near, y_mean_near, y_std_near
try: test_momenta = np.load(mom_file, allow_pickle=True) test_nj = np.load(nj_file, allow_pickle=True) except: test_momenta = np.load(mom_file, allow_pickle=True, encoding="latin1") test_nj = np.load(nj_file, allow_pickle=True, encoding="latin1") test_momenta = test_momenta.tolist() nlegs = len(test_momenta[0]) - 2 NN = Model( input_size=(nlegs + 2) * 4, momenta=test_momenta, labels=test_nj, all_jets=False, all_legs=True, high_precision=False, ) _, _, _, _, _, _, _, _ = NN.process_training_data() model = load_model( model_dir + "model", custom_objects={"root_mean_squared_error": NN.root_mean_squared_error}, ) pickle_out = open(model_dir + "dataset_metadata.pickle", "rb") metadata = pickle.load(pickle_out) pickle_out.close()
def test_networks(self, near_momenta, cut_momenta, near_nj_split, model_nears, model_cuts, x_mean_nears, x_mean_cuts, x_std_nears, x_std_cuts, y_mean_nears, y_mean_cuts, y_std_nears, y_std_cuts, return_predictions=False): if self.all_legs: all_jets = False nlegs = self.nlegs + 2 else: all_jets = True nlegs = self.nlegs NN = Model(input_size=(nlegs) * 4, momenta=near_momenta, labels=near_nj_split[0], all_jets=all_jets, all_legs=self.all_legs, model_dataset=self.model_dataset, high_precision=self.high_precision) _, _, _, _, _, _, _, _ = NN.process_training_data() for i in range(self.training_reruns): print('Predicting on model {}'.format(i)) model_dir_new = self.model_base_dir + self.model_dir + '_{}/'.format( i) y_pred_near = infer_on_near_splits(NN=NN, scaling=self.scaling, moms=near_momenta, models=model_nears[i], x_mean_near=x_mean_nears[i], x_std_near=x_std_nears[i], y_mean_near=y_mean_nears[i], y_std_near=y_std_nears[i]) if not return_predictions: np.save( model_dir_new + '/pred_near_{}'.format(len(near_momenta + cut_momenta)), y_pred_near) for i in range(self.training_reruns): print('Predicting on model {}'.format(i)) model_dir_new = self.model_base_dir + self.model_dir + '_{}/'.format( i) y_pred_cut = infer_on_cut(NN=NN, scaling=self.scaling, moms=cut_momenta, model=model_cuts[i], x_mean_cut=x_mean_cuts[i], x_std_cut=x_std_cuts[i], y_mean_cut=y_mean_cuts[i], y_std_cut=y_std_cuts[i]) if not return_predictions: np.save( model_dir_new + '/pred_cut_{}'.format(len(near_momenta + cut_momenta)), y_pred_cut) if return_predictions: return y_pred_near, y_pred_cut
def load_models(self, cut_momenta, near_momenta, cut_nj, near_nj, pairs, near_nj_split): if self.all_legs: all_jets = False nlegs = self.nlegs + 2 else: all_jets = True nlegs = self.nlegs NN = Model(input_size=(nlegs) * 4, momenta=near_momenta, labels=near_nj_split[0], all_jets=all_jets, all_legs=self.all_legs, model_dataset=self.model_dataset, high_precision=self.high_precision) _, _, _, _, _, _, _, _ = NN.process_training_data() models = [] x_means = [] y_means = [] x_stds = [] y_stds = [] model_nears = [] model_cuts = [] x_mean_nears = [] x_std_nears = [] y_mean_nears = [] y_std_nears = [] x_mean_cuts = [] x_std_cuts = [] y_mean_cuts = [] y_std_cuts = [] for i in range(self.training_reruns): print('Working on model {}'.format(i)) model_dir_new = self.model_base_dir + self.model_dir + '_{}/'.format( i) print('Looking for directory {}'.format(model_dir_new)) if os.path.exists(model_dir_new) == False: os.mkdir(model_dir_new) print('Directory created') else: print('Directory already exists') model_near, x_mean_near, x_std_near, y_mean_near, y_std_near = get_near_networks_general( NN=NN, pairs=pairs, delta_near=self.delta_near, model_dir=model_dir_new) assert len(model_near) == len(pairs) model_cut, x_mean_cut, x_std_cut, y_mean_cut, y_std_cut = get_cut_network_general( NN=NN, delta_cut=self.delta_cut, model_dir=model_dir_new) model_nears.append(model_near) model_cuts.append(model_cut) x_mean_nears.append(x_mean_near) x_std_nears.append(x_std_near) y_mean_nears.append(y_mean_near) y_std_nears.append(y_std_near) x_mean_cuts.append(x_mean_cut) x_std_cuts.append(x_std_cut) y_mean_cuts.append(y_mean_cut) y_std_cuts.append(y_std_cut) print('############### All models loaded ###############') return model_nears, model_cuts, x_mean_nears, x_mean_cuts, x_std_nears, x_std_cuts, y_mean_nears, y_mean_cuts, y_std_nears, y_std_cuts
try: test_momenta = np.load(mom_file, allow_pickle=True) test_nj = np.load(nj_file, allow_pickle=True) except: test_momenta = np.load(mom_file, allow_pickle=True, encoding='latin1') test_nj = np.load(nj_file, allow_pickle=True, encoding='latin1') test_momenta = test_momenta.tolist() nlegs = len(test_momenta[0])-2 NN = Model( input_size = (nlegs+2)*4, momenta = test_momenta, labels = test_nj, all_jets = False, all_legs = True, high_precision = args.high_precision ) X_train,X_test,y_train,y_test,_,_,_,_ = NN.process_training_data() lr=0.01 model, x_mean, x_std, y_mean, y_std = NN.fit(layers=[20,40,20], lr=lr, epochs=1) metadata = {} metadata['x_mean'] = x_mean metadata['y_mean'] = y_mean metadata['x_std'] = x_std metadata['y_std'] = y_std