Esempio n. 1
0
  def __init__(self, **kwargs):
    self.name = kwargs.get('name', 'validation')
    self.no_prep = kwargs.get('no_prep', True)
    if not self.no_prep:
      self.global_features = numpy.transpose(numpy.array(kwargs.get('global_features')))
      self.objects = utils.pad_array(kwargs.get('objects'))
    else:
      self.global_features = numpy.array(kwargs.get('global_features'))
      self.objects = numpy.array(kwargs.get('objects'))

    if 'leptons' in kwargs:
      if not kwargs.get('no_prep'):
        self.leptons = utils.pad_array(kwargs.get('leptons', []))
      else:
        self.leptons = numpy.array(kwargs.get('leptons'))
      self.features = [self.global_features, self.objects, self.leptons]
      self.channel = "Leptonic"
    else:
      self.features = [self.global_features, self.objects]
      self.channel = "Hadronic"
 
    self.label = numpy.array(kwargs.get('label', []))
    self.weights = numpy.array(kwargs.get('weights', []))

    self.references = kwargs.get('references', {}) # dictionary of reference BDT/DNN values for comparisons
Esempio n. 2
0
    def __getitem__(self, index):
        q_id, doc_id, begin, end = self.samples[index]

        p = pad_array(np.load(self.data_root + "paragraphs/words_{}.npy".format(doc_id)), self.par_padding)
        p[p==-1] = self.last_index
        q = pad_array(np.load(self.data_root + "questions/words_{}.npy".format(q_id)), self.query_padding)
        q[q==-1] = self.last_index
        p_char = pad_array(np.load(self.data_root + "paragraphs/chars_{}.npy".format(doc_id)), self.par_padding)
        p_char[p_char==-1] = self.last_index
        q_char = pad_array(np.load(self.data_root + "questions/chars_{}.npy".format(q_id)), self.query_padding)
        q_char[q_char==-1] = self.last_index
        target = np.zeros((2, self.par_padding))
        target[0, begin] = 1.
        target[1, end] = 1.
        if self.dictionary_like:
            return {"paragraph": p, "query": q, "par_char": p_char, "query_char": q_char}, target
        else:
            return [p, q, p_char, q_char], target
Esempio n. 3
0
 def get_x_train_test_syntax(self, komn, pad=False):
     x_train, x_test = [], []
     for i in range(len(self.ready_tagged)):
         sc = komn.get_syntactic_concatenation(self.ready_tagged[i])
         if pad:
             sc = pad_array(sc, 80)
         if i < TRAIN_SENTENCES:
             x_train.append(sc)
         else:
             x_test.append(sc)
     return np.array(x_test), np.array(x_train)
Esempio n. 4
0
 def get_embeddings(sentences):
     all_embeddings = []
     for s in sentences:
         embs = embedding.get_word_emb_list(s)
         if len(embs) == 0:
             # No embeddings found for sentence, ignore it
             continue
         if pad:
             padded = pad_array(embs, pad_size)
         all_embeddings.append(padded)
     return np.array(all_embeddings)
Esempio n. 5
0
 def evaluate(self, image, return_detailed=False):
     """
     Apply the model to an input image, represented as a 2D numpy array.
     """
     # use zero padding if there is no constant border value
     if not border_is_constant(image):
         pad_value = 0
     else:
         pad_value = image[0,0]
     # compute filter output for every orientation
     orientation_output = np.empty((image.shape[0], image.shape[1],
                                     len(self.multiscale_filters)))
     for i, multiscale_filter in enumerate(self.multiscale_filters):
         scale_output = np.empty((image.shape[0], image.shape[1],
                                     len(self.spatial_scales)))
         # convolve filters at all spatial scales, within one orientation,
         # with the image
         for j, kernel in enumerate(multiscale_filter):
             y_padding = (kernel.shape[0] + 1) / 2
             x_padding = (kernel.shape[1] + 1) / 2
             tmp_img = pad_array(image, np.array(((y_padding, y_padding),
                 (x_padding, x_padding))), pad_value)
             scale_output[:,:,j] = fftconvolve(tmp_img, kernel, 'same')[
                                 y_padding:tmp_img.shape[0] - y_padding,
                                 x_padding:tmp_img.shape[1] - x_padding]
         # compute the weighted sum over different spatial scales
         orientation_output[:,:,i] = np.dot(scale_output, self.scale_weights)
         # normalize filter response within each orientation with its std
         normalization = 1. / orientation_output[:,:,i].std()
         # set filters with practically no signal to 0 (rather arbitrary)
         if normalization > 1e10:
             normalization = 0
         orientation_output[:,:,i] *= normalization
     if return_detailed:
         return (orientation_output.sum(2), orientation_output)
     return orientation_output.sum(2)
Esempio n. 6
0
    print("Calculating dnn scores")
    print((len(dnn_features)))
    print([feat for feat in dnn_features])
    i = 0
    print(dnn_features)
    for dnn_model in dnn_models:
        with open(dnn_model, "r") as f_in:
            model = json.load(f_in)

        dnn_features_data = dnn_helper.DNN_Features(
            name='data',
            global_features=utils.create_array(features_data, dnn_features,
                                               model["preprocess_scheme"],
                                               True),
            objects=utils.preprocess_array(
                utils.pad_array(features_data["objects_"]),
                model["preprocess_scheme"]))
        #print dnn_features_data.global_features
        #print dnn_features_data.objects
        #print dnn_features_data.features
        dnn_features_validation = dnn_helper.DNN_Features(
            name='validation',
            global_features=utils.create_array(features_validation,
                                               dnn_features,
                                               model["preprocess_scheme"],
                                               True),
            objects=utils.preprocess_array(
                utils.pad_array(features_validation["objects_"]),
                model["preprocess_scheme"]))
        dnn_features_final_fit = dnn_helper.DNN_Features(
            name='final_fit',
Esempio n. 7
0
preprocess_dict = {}
if args.z_score:
    for feat in training_features:
        mean, std = utils.get_mean_and_std(features_train[feat])
        preprocess_dict[feat] = {"mean": float(mean), "std_dev": float(std)}

    with open(z_score_json, "w") as f_out:
        json.dump(preprocess_dict, f_out, indent=4, sort_keys=True)

f_out = h5py.File(output_file, "w")
f_out.create_dataset("feature_names", data=training_features)

for set in features.keys():
    global_features, label = prep_utils.create_features_and_label(
        features[set], training_features, signal, bkg, preprocess_dict,
        args.z_score)

    f_out.create_dataset("global_%s" % set, data=global_features)
    f_out.create_dataset("label_%s" % set, data=label)

    for misc in default_branches:
        array = features[set][misc]
        f_out.create_dataset("%s_%s" % (misc, set), data=array)

    if args.objects:
        objects = utils.pad_array(features[set]["objects_"])
        f_out.create_dataset("objects_%s" % set, data=objects)

f_out.close()
Esempio n. 8
0
    with open(preprocess_scheme) as f_in:
        preprocess_scheme = json.load(f_in)

print("Preprocessing scheme: ", preprocess_scheme)

dnn_branches = []

if do_dnn:
    print("Calculating dnn scores")
    print((len(dnn_features)))
    print([feat for feat in dnn_features])
    i = 0
    for dnn_model in dnn_models:
        with open(dnn_model, "r") as f_in:
            model = json.load(f_in)
        dnn_features_data = dnn_helper.DNN_Features(name = 'data', global_features = utils.create_array(features_data, dnn_features, model["preprocess_scheme"], True), objects = utils.preprocess_array(utils.pad_array(features_data["objects_"]), model["preprocess_scheme"]))
        dnn_features_validation = dnn_helper.DNN_Features(name = 'test', global_features = utils.create_array(features_test, dnn_features, model["preprocess_scheme"], True), objects = utils.preprocess_array(utils.pad_array(features_test["objects_"]), model["preprocess_scheme"]), lumi = features_test["lumi_"], run = features_test["run_"], evt = features_test["evt_"])
        dnn_features_train = dnn_helper.DNN_Features(name = 'train', global_features = utils.create_array(features_train, dnn_features, model["preprocess_scheme"], True), objects = utils.preprocess_array(utils.pad_array(features_train["objects_"]), model["preprocess_scheme"]))

        dnn = dnn_helper.DNN_Helper(features_validation = dnn_features_validation, features_train = dnn_features_train, features_data = dnn_features_data, metadata = model, weights_file = "dnn_weights/" + model["weights"], train_mode = False)
        dnn_predictions.append(dnn.predict(debug=True))
        training_features.append("dnn_score_%d" % i)
        dnn_branches.append("dnn_score_%d" % i)
        i += 1

print(dnn_predictions)

preprocess_dict = {}
if args.z_score:
    for feat in training_features:
        mean, std = utils.get_mean_and_std(features_train[feat])
Esempio n. 9
0
#if args.do_top_tag:
#    dnn_features += ["top_tag_score_"]
if (args.fcnc_hut or args.fcnc_hct) and args.channel == "Hadronic" and not args.no_mass_constraint:
    dnn_features += ["m_ggj_", "m_jjj_"] 

if do_dnn:
  print("Calculating dnn scores")
  print((len(dnn_features)))
  print([feat for feat in dnn_features])
  i = 0
  print(dnn_features)
  for dnn_model in dnn_models:
    with open(dnn_model, "r") as f_in:
      model = json.load(f_in)

    dnn_features_data = dnn_helper.DNN_Features(name = 'data', global_features = utils.create_array(features_data, dnn_features, model["preprocess_scheme"], True), objects = utils.preprocess_array(utils.pad_array(features_data["objects_"]), model["preprocess_scheme"]))
    #print dnn_features_data.global_features
    #print dnn_features_data.objects
    #print dnn_features_data.features
    dnn_features_validation = dnn_helper.DNN_Features(name = 'validation', global_features = utils.create_array(features_validation, dnn_features, model["preprocess_scheme"], True), objects = utils.preprocess_array(utils.pad_array(features_validation["objects_"]), model["preprocess_scheme"]))
    dnn_features_final_fit = dnn_helper.DNN_Features(name = 'final_fit', global_features = utils.create_array(features_final_fit, dnn_features, model["preprocess_scheme"], True), objects = utils.preprocess_array(utils.pad_array(features_final_fit["objects_"]), model["preprocess_scheme"]))
    dnn_features_train = dnn_helper.DNN_Features(name = 'train', global_features = utils.create_array(features, dnn_features, model["preprocess_scheme"], True), objects = utils.preprocess_array(utils.pad_array(features["objects_"]), model["preprocess_scheme"]))

    dnn = dnn_helper.DNN_Helper(features_validation = dnn_features_validation, features_train = dnn_features_train, features_data = dnn_features_data, features_final_fit = dnn_features_final_fit, metadata = model, weights_file = "dnn_weights/" + model["weights"], train_mode = False)
    #dnn.predict()
    #dnn_predictions.append([dnn.predictions["train"], dnn.predictions["validation"], dnn.predictions["data"]])
    dnn_predictions.append(dnn.predict(debug=True))
    feature_names.append("dnn_score_%d" % i)
    i += 1 

print(dnn_predictions)
Esempio n. 10
0
                "mean": float(mean),
                "std_dev": float(stddev)
            }

global_features = utils.create_array(features, feature_names, preprocess_dict,
                                     args.z_score)
global_features_validation = utils.create_array(features_validation,
                                                feature_names, preprocess_dict,
                                                args.z_score)
global_features_data = utils.create_array(features_data, feature_names,
                                          preprocess_dict, args.z_score)
global_features_final_fit = utils.create_array(features_final_fit,
                                               feature_names, preprocess_dict,
                                               args.z_score)

object_features = utils.pad_array(object_features)
object_features_validation = utils.pad_array(object_features_validation)
object_features_data = utils.pad_array(object_features_data)
object_features_final_fit = utils.pad_array(object_features_final_fit)

if args.z_score:
    n_object_features = len(object_features[0][0])
    for i in range(n_object_features):
        mean, stddev = utils.get_mean_and_std(object_features[:, :, i])
        preprocess_dict["objects_" + str(i)] = {
            "mean": mean,
            "std_dev": stddev
        }

    with open("preprocess_scheme_%s_%s.json" % (args.channel, args.tag),
              "w") as f_out: