def __init__(self, **kwargs): self.name = kwargs.get('name', 'validation') self.no_prep = kwargs.get('no_prep', True) if not self.no_prep: self.global_features = numpy.transpose(numpy.array(kwargs.get('global_features'))) self.objects = utils.pad_array(kwargs.get('objects')) else: self.global_features = numpy.array(kwargs.get('global_features')) self.objects = numpy.array(kwargs.get('objects')) if 'leptons' in kwargs: if not kwargs.get('no_prep'): self.leptons = utils.pad_array(kwargs.get('leptons', [])) else: self.leptons = numpy.array(kwargs.get('leptons')) self.features = [self.global_features, self.objects, self.leptons] self.channel = "Leptonic" else: self.features = [self.global_features, self.objects] self.channel = "Hadronic" self.label = numpy.array(kwargs.get('label', [])) self.weights = numpy.array(kwargs.get('weights', [])) self.references = kwargs.get('references', {}) # dictionary of reference BDT/DNN values for comparisons
def __getitem__(self, index): q_id, doc_id, begin, end = self.samples[index] p = pad_array(np.load(self.data_root + "paragraphs/words_{}.npy".format(doc_id)), self.par_padding) p[p==-1] = self.last_index q = pad_array(np.load(self.data_root + "questions/words_{}.npy".format(q_id)), self.query_padding) q[q==-1] = self.last_index p_char = pad_array(np.load(self.data_root + "paragraphs/chars_{}.npy".format(doc_id)), self.par_padding) p_char[p_char==-1] = self.last_index q_char = pad_array(np.load(self.data_root + "questions/chars_{}.npy".format(q_id)), self.query_padding) q_char[q_char==-1] = self.last_index target = np.zeros((2, self.par_padding)) target[0, begin] = 1. target[1, end] = 1. if self.dictionary_like: return {"paragraph": p, "query": q, "par_char": p_char, "query_char": q_char}, target else: return [p, q, p_char, q_char], target
def get_x_train_test_syntax(self, komn, pad=False): x_train, x_test = [], [] for i in range(len(self.ready_tagged)): sc = komn.get_syntactic_concatenation(self.ready_tagged[i]) if pad: sc = pad_array(sc, 80) if i < TRAIN_SENTENCES: x_train.append(sc) else: x_test.append(sc) return np.array(x_test), np.array(x_train)
def get_embeddings(sentences): all_embeddings = [] for s in sentences: embs = embedding.get_word_emb_list(s) if len(embs) == 0: # No embeddings found for sentence, ignore it continue if pad: padded = pad_array(embs, pad_size) all_embeddings.append(padded) return np.array(all_embeddings)
def evaluate(self, image, return_detailed=False): """ Apply the model to an input image, represented as a 2D numpy array. """ # use zero padding if there is no constant border value if not border_is_constant(image): pad_value = 0 else: pad_value = image[0,0] # compute filter output for every orientation orientation_output = np.empty((image.shape[0], image.shape[1], len(self.multiscale_filters))) for i, multiscale_filter in enumerate(self.multiscale_filters): scale_output = np.empty((image.shape[0], image.shape[1], len(self.spatial_scales))) # convolve filters at all spatial scales, within one orientation, # with the image for j, kernel in enumerate(multiscale_filter): y_padding = (kernel.shape[0] + 1) / 2 x_padding = (kernel.shape[1] + 1) / 2 tmp_img = pad_array(image, np.array(((y_padding, y_padding), (x_padding, x_padding))), pad_value) scale_output[:,:,j] = fftconvolve(tmp_img, kernel, 'same')[ y_padding:tmp_img.shape[0] - y_padding, x_padding:tmp_img.shape[1] - x_padding] # compute the weighted sum over different spatial scales orientation_output[:,:,i] = np.dot(scale_output, self.scale_weights) # normalize filter response within each orientation with its std normalization = 1. / orientation_output[:,:,i].std() # set filters with practically no signal to 0 (rather arbitrary) if normalization > 1e10: normalization = 0 orientation_output[:,:,i] *= normalization if return_detailed: return (orientation_output.sum(2), orientation_output) return orientation_output.sum(2)
print("Calculating dnn scores") print((len(dnn_features))) print([feat for feat in dnn_features]) i = 0 print(dnn_features) for dnn_model in dnn_models: with open(dnn_model, "r") as f_in: model = json.load(f_in) dnn_features_data = dnn_helper.DNN_Features( name='data', global_features=utils.create_array(features_data, dnn_features, model["preprocess_scheme"], True), objects=utils.preprocess_array( utils.pad_array(features_data["objects_"]), model["preprocess_scheme"])) #print dnn_features_data.global_features #print dnn_features_data.objects #print dnn_features_data.features dnn_features_validation = dnn_helper.DNN_Features( name='validation', global_features=utils.create_array(features_validation, dnn_features, model["preprocess_scheme"], True), objects=utils.preprocess_array( utils.pad_array(features_validation["objects_"]), model["preprocess_scheme"])) dnn_features_final_fit = dnn_helper.DNN_Features( name='final_fit',
preprocess_dict = {} if args.z_score: for feat in training_features: mean, std = utils.get_mean_and_std(features_train[feat]) preprocess_dict[feat] = {"mean": float(mean), "std_dev": float(std)} with open(z_score_json, "w") as f_out: json.dump(preprocess_dict, f_out, indent=4, sort_keys=True) f_out = h5py.File(output_file, "w") f_out.create_dataset("feature_names", data=training_features) for set in features.keys(): global_features, label = prep_utils.create_features_and_label( features[set], training_features, signal, bkg, preprocess_dict, args.z_score) f_out.create_dataset("global_%s" % set, data=global_features) f_out.create_dataset("label_%s" % set, data=label) for misc in default_branches: array = features[set][misc] f_out.create_dataset("%s_%s" % (misc, set), data=array) if args.objects: objects = utils.pad_array(features[set]["objects_"]) f_out.create_dataset("objects_%s" % set, data=objects) f_out.close()
with open(preprocess_scheme) as f_in: preprocess_scheme = json.load(f_in) print("Preprocessing scheme: ", preprocess_scheme) dnn_branches = [] if do_dnn: print("Calculating dnn scores") print((len(dnn_features))) print([feat for feat in dnn_features]) i = 0 for dnn_model in dnn_models: with open(dnn_model, "r") as f_in: model = json.load(f_in) dnn_features_data = dnn_helper.DNN_Features(name = 'data', global_features = utils.create_array(features_data, dnn_features, model["preprocess_scheme"], True), objects = utils.preprocess_array(utils.pad_array(features_data["objects_"]), model["preprocess_scheme"])) dnn_features_validation = dnn_helper.DNN_Features(name = 'test', global_features = utils.create_array(features_test, dnn_features, model["preprocess_scheme"], True), objects = utils.preprocess_array(utils.pad_array(features_test["objects_"]), model["preprocess_scheme"]), lumi = features_test["lumi_"], run = features_test["run_"], evt = features_test["evt_"]) dnn_features_train = dnn_helper.DNN_Features(name = 'train', global_features = utils.create_array(features_train, dnn_features, model["preprocess_scheme"], True), objects = utils.preprocess_array(utils.pad_array(features_train["objects_"]), model["preprocess_scheme"])) dnn = dnn_helper.DNN_Helper(features_validation = dnn_features_validation, features_train = dnn_features_train, features_data = dnn_features_data, metadata = model, weights_file = "dnn_weights/" + model["weights"], train_mode = False) dnn_predictions.append(dnn.predict(debug=True)) training_features.append("dnn_score_%d" % i) dnn_branches.append("dnn_score_%d" % i) i += 1 print(dnn_predictions) preprocess_dict = {} if args.z_score: for feat in training_features: mean, std = utils.get_mean_and_std(features_train[feat])
#if args.do_top_tag: # dnn_features += ["top_tag_score_"] if (args.fcnc_hut or args.fcnc_hct) and args.channel == "Hadronic" and not args.no_mass_constraint: dnn_features += ["m_ggj_", "m_jjj_"] if do_dnn: print("Calculating dnn scores") print((len(dnn_features))) print([feat for feat in dnn_features]) i = 0 print(dnn_features) for dnn_model in dnn_models: with open(dnn_model, "r") as f_in: model = json.load(f_in) dnn_features_data = dnn_helper.DNN_Features(name = 'data', global_features = utils.create_array(features_data, dnn_features, model["preprocess_scheme"], True), objects = utils.preprocess_array(utils.pad_array(features_data["objects_"]), model["preprocess_scheme"])) #print dnn_features_data.global_features #print dnn_features_data.objects #print dnn_features_data.features dnn_features_validation = dnn_helper.DNN_Features(name = 'validation', global_features = utils.create_array(features_validation, dnn_features, model["preprocess_scheme"], True), objects = utils.preprocess_array(utils.pad_array(features_validation["objects_"]), model["preprocess_scheme"])) dnn_features_final_fit = dnn_helper.DNN_Features(name = 'final_fit', global_features = utils.create_array(features_final_fit, dnn_features, model["preprocess_scheme"], True), objects = utils.preprocess_array(utils.pad_array(features_final_fit["objects_"]), model["preprocess_scheme"])) dnn_features_train = dnn_helper.DNN_Features(name = 'train', global_features = utils.create_array(features, dnn_features, model["preprocess_scheme"], True), objects = utils.preprocess_array(utils.pad_array(features["objects_"]), model["preprocess_scheme"])) dnn = dnn_helper.DNN_Helper(features_validation = dnn_features_validation, features_train = dnn_features_train, features_data = dnn_features_data, features_final_fit = dnn_features_final_fit, metadata = model, weights_file = "dnn_weights/" + model["weights"], train_mode = False) #dnn.predict() #dnn_predictions.append([dnn.predictions["train"], dnn.predictions["validation"], dnn.predictions["data"]]) dnn_predictions.append(dnn.predict(debug=True)) feature_names.append("dnn_score_%d" % i) i += 1 print(dnn_predictions)
"mean": float(mean), "std_dev": float(stddev) } global_features = utils.create_array(features, feature_names, preprocess_dict, args.z_score) global_features_validation = utils.create_array(features_validation, feature_names, preprocess_dict, args.z_score) global_features_data = utils.create_array(features_data, feature_names, preprocess_dict, args.z_score) global_features_final_fit = utils.create_array(features_final_fit, feature_names, preprocess_dict, args.z_score) object_features = utils.pad_array(object_features) object_features_validation = utils.pad_array(object_features_validation) object_features_data = utils.pad_array(object_features_data) object_features_final_fit = utils.pad_array(object_features_final_fit) if args.z_score: n_object_features = len(object_features[0][0]) for i in range(n_object_features): mean, stddev = utils.get_mean_and_std(object_features[:, :, i]) preprocess_dict["objects_" + str(i)] = { "mean": mean, "std_dev": stddev } with open("preprocess_scheme_%s_%s.json" % (args.channel, args.tag), "w") as f_out: