Beispiel #1
0
 def sub_test_store(self, readWrite):
     td = TrainData()
     x,y,w = self.createSimpleArray('int32'), self.createSimpleArray('float32'), self.createSimpleArray('int32')
     x_orig=x.copy()
     x2,y2,_ = self.createSimpleArray('float32'), self.createSimpleArray('float32'), self.createSimpleArray('int32')
     x2_orig=x2.copy()
     y_orig=y.copy()
     
     td._store([x,x2], [y,y2], [w])
     
     if readWrite:
         td.writeToFile("testfile.tdjctd")
         td = TrainData()
         td.readFromFile("testfile.tdjctd")
         os.system('rm -f testfile.tdjctd')
     
     shapes = td.getNumpyFeatureShapes()
     self.assertEqual([[3, 5, 6], [1], [3, 5, 6], [1]], shapes,"shapes")
     
     self.assertEqual(2, td.nFeatureArrays())
     self.assertEqual(2, td.nTruthArrays())
     self.assertEqual(1, td.nWeightArrays())
     
     f = td.transferFeatureListToNumpy(False)
     t = td.transferTruthListToNumpy(False)
     w = td.transferWeightListToNumpy(False)
     
     xnew = SimpleArray(f[0],np.array(f[1],dtype='int64'))
     self.assertEqual(x_orig, xnew)
     
     xnew = SimpleArray(f[2],np.array(f[3],dtype='int64'))
     self.assertEqual(x2_orig, xnew)
     
     ynew = SimpleArray(t[0],np.array(t[1],dtype='int64'))
     self.assertEqual(y_orig, ynew)
Beispiel #2
0
    def test_TrainDataRead(self):
        print('TestCompatibility TrainData')
        td = TrainData()
        td.readFromFile('trainData_previous.djctd')

        self.assertEqual(td.nFeatureArrays(), 1)

        arr = np.load("np_arr.npy")
        rs = np.load("np_rs.npy")

        b = SimpleArray(arr, rs)

        a = td.transferFeatureListToNumpy(False)
        a, rs = a[0], a[1]

        a = SimpleArray(a, np.array(rs, dtype='int64'))

        self.assertEqual(a, b)
Beispiel #3
0
    def test_AddToFile(self):
        print('TestTrainData: AddToFile')

        td = TrainData()
        x, y, w = self.createSimpleArray('int32'), self.createSimpleArray(
            'float32'), self.createSimpleArray('int32')
        xo, yo, wo = x.copy(), y.copy(), w.copy()
        x2, y2, _ = self.createSimpleArray('float32'), self.createSimpleArray(
            'float32'), self.createSimpleArray('int32')
        x2o, y2o = x2.copy(), y2.copy()
        td._store([x, x2], [y, y2], [w])

        td.writeToFile("testfile.tdjctd")
        td.addToFile("testfile.tdjctd")

        td2 = TrainData()
        td2._store([xo, x2o], [yo, y2o], [wo])
        td2.append(td)

        td.readFromFile("testfile.tdjctd")
        os.system('rm -f testfile.tdjctd')

        self.assertEqual(td, td2)
Beispiel #4
0
    def __init__(
            self,
            samplefile,
            function_to_apply=None,  #needs to be function(counter,[model_input], [predict_output], [truth])
            after_n_batches=50,
            batchsize=10,
            on_epoch_end=False,
            use_event=0,
            decay_function=None,
            offset=0):
        super(PredictCallback, self).__init__()
        self.samplefile = samplefile
        self.function_to_apply = function_to_apply
        self.counter = 0
        self.call_counter = offset
        self.decay_function = decay_function

        self.after_n_batches = after_n_batches
        self.run_on_epoch_end = on_epoch_end

        if self.run_on_epoch_end and self.after_n_batches >= 0:
            print(
                'PredictCallback: can only be used on epoch end OR after n batches, falling back to epoch end'
            )
            self.after_n_batches = 0

        td = TrainData()
        td.readFromFile(samplefile)
        if use_event >= 0:
            td.skim(use_event)

        self.batchsize = 1
        self.td = td
        self.gen = trainDataGenerator()
        self.gen.setBatchSize(batchsize)
        self.gen.setSkipTooLargeBatches(False)
Beispiel #5
0
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.patches as patches
import math
from numba import jit

from inference import collect_condensates, make_inference_dict

parser = ArgumentParser('make plots')
parser.add_argument('inputFile')

args = parser.parse_args()

#use traindata as data storage
td = TrainData()
td.readFromFile(args.inputFile)

td.x = td.transferFeatureListToNumpy()

data = make_inference_dict(td.x[0], td.x[1], td.x[2])

betaselection = collect_condensates(data, 0.1, 0.8)  #0.2/2.0

print('betaselection', betaselection.shape)


def makeRectangle(size, pos, edgecolor='y'):
    return patches.Rectangle([pos[0] - size[0] / 2., pos[1] - size[1] / 2.],
                             size[0],
                             size[1],
                             linewidth=1,
Beispiel #6
0
parser.add_argument("-e", help="event number ", default="0")

args = parser.parse_args()

import DeepJetCore
from keras.models import load_model
from DeepJetCore.compiled.c_trainDataGenerator import trainDataGenerator
from DeepJetCore.evaluation import predict_from_TrainData
from DeepJetCore.customObjects import get_custom_objects
from DeepJetCore.TrainData import TrainData
import matplotlib.pyplot as plt
from ragged_plotting_tools import make_cluster_coordinates_plot, make_original_truth_shower_plot
from index_dicts import create_index_dict, create_feature_dict

td = TrainData()
td.readFromFile(args.i)
td.skim(int(args.e))
#td=td.split(int(args.e)+1)#get the first e+1 elements
#if int(args.e)>0:
#    td.split(1) #reduce to the last element (the e'th one)

model = load_model(args.inputModel, custom_objects=get_custom_objects())

predicted = predict_from_TrainData(model, td, batchsize=100000)

pred = predicted[0]
feat = td.transferFeatureListToNumpy()
rs = feat[1]
feat = feat[0]
#weights = td.transferWeightListToNumpy()
truth = td.transferTruthListToNumpy()[0]
Beispiel #7
0
args = parser.parse_args()

allparticles = []
all_ev_prop = []
names = ""

with open(args.inputFile) as file:
    for inputfile in file:
        inputfile = inputfile.replace('\n', '')
        if len(inputfile) < 1: continue

        print('inputfile', inputfile)

        td = TrainData()
        td.readFromFile(inputfile)
        indata = td.transferFeatureListToNumpy()
        pred, feat, truth = indata[0], indata[1], indata[2]
        del td

        d = make_particle_inference_dict(pred, feat, truth)
        condensate_mask = np.squeeze(collect_condensates(d, 0.1, 0.8),
                                     axis=2)  #B x V x 1

        pred_E = d['f_E'] * d['p_E_corr']
        pred_pos = d['f_pos'] + d['p_pos_offs']
        calo_energy = None  #not supported by data formet..
        #np.sum(d['f_E'][:,0:16*16,0],axis=-1)#calo energy
        #loop over events here.. easier

        nevents = pred.shape[0]
Beispiel #8
0
from DeepJetCore.TrainData import TrainData
from DeepJetCore.dataPipeline import TrainDataGenerator
from LayersRagged import RaggedConstructTensor
import index_dicts
import tensorflow as tf
import os

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "4"

td = TrainData()
td.readFromFile(
    '/eos/cms/store/cmst3/group/hgcal/CMG_studies/pepr/50_part_with_noise_Jul2020/converted/HGCalML_data/50_part_with_noise_Jul2020/988_windowntup.djctd'
)
gen = TrainDataGenerator()
gen.setBatchSize(100000)
gen.setSkipTooLargeBatches(False)
gen.setBuffer(td)

with tf.device('/CPU:0'):
    ragged_constructor = RaggedConstructTensor()

while True:
    feat, truth = next(
        gen.feedNumpyData())  # this is  [ [features],[truth],[None] ]

    if gen.lastBatch():
        break

    row_splits = feat[1][:, 0]
Beispiel #9
0
import numpy as np
from DeepJetCore.TrainData import TrainData
from argparse import ArgumentParser
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.patches as patches
import math
from numba import jit
from inference import collect_condensates, make_inference_dict

td = TrainData()
#td.readFromFile("../results_partial/predictions/pred_9.djctd")
td.readFromFile("../data/test_data/9.djctd")
td.x = td.transferFeatureListToNumpy()
td.y = td.transferTruthListToNumpy()
td.z = td.transferWeightListToNumpy()

x = td.x
y = td.y
z = td.z

print(len(x))

print(x[0].shape)
print(x[1].shape)
print(x[2].shape)
#print(y.shape)
#print(z.shape)

data = make_inference_dict(td.x[0], td.x[1], td.x[2])
Beispiel #10
0
from argparse import ArgumentParser
from plotting_tools import plotevent
import numpy as np

parser = ArgumentParser('Make some plots')
parser.add_argument('inputFile')
args = parser.parse_args()

infile = str(args.inputFile)

from DeepJetCore.TrainData import TrainData
import matplotlib.pyplot as plt

td=TrainData()
td.readFromFile(infile)

feat = td.transferFeatureListToNumpy()[0]
truth = td.transferTruthListToNumpy()[0]
nevents = min(len(feat),10)


for e in range(nevents):
    
    print('true energy', truth[e])
    print('reco sum   ', np.sum(feat[e,:,:,:,0]))
    
    fig = plt.figure()
    ax = fig.gca(projection='3d')
    ax.set_xlabel("x [idx]")
    ax.set_zlabel("y [idx]")
Beispiel #11
0
    def __init__(self,
                 samplefile,
                 accumulate_after_batches=5,
                 plot_after_batches=50,
                 batchsize=10,
                 beta_threshold=0.6,
                 distance_threshold=0.6,
                 iou_threshold=0.1,
                 n_windows_for_plots=5,
                 n_windows_for_scalar_metrics=5000000,
                 outputdir=None,
                 publish = None,
                 n_ccoords=None,
                 n_average_over_samples=5,
                 ):
        """

        :param samplefile: the file to pick validation data from
        :param accumulate_after_batches: run performance metrics after n batches (a good value is 5)
        :param plot_after_batches: update and upload plots after n batches
        :param batchsize: batch size
        :param beta_threshold: beta threshold for running prediction on obc
        :param distance_threshold: distance threshold for running prediction on obc
        :param iou_threshold: iou threshold to use to match both for obc and for ticl
        :param n_windows_for_plots: how many windows to average to do running performance plots
        :param n_windows_for_scalar_metrics: the maximum windows to store data for scalar performance metrics as a function of iteration
        :param outputdir: the output directory where to store results
        :param publish: where to publish, could be ssh'able path
        :param n_ccoords: n coords for plots
        :param n_average_over_samples: average scalar metrics over samples
        """
        super(plotRunningPerformanceMetrics, self).__init__()
        self.samplefile = samplefile
        self.counter = 0
        self.call_counter = 0
        self.decay_function = None
        self.outputdir = outputdir
        self.n_ccords=n_ccoords
        self.publish=publish

        self.accumulate_after_batches = accumulate_after_batches
        self.plot_after_batches = plot_after_batches
        self.run_on_epoch_end = False

        if self.run_on_epoch_end and self.accumulate_after_batches >= 0:
            print('PredictCallback: can only be used on epoch end OR after n batches, falling back to epoch end')
            self.accumulate_after_batches = 0

        td = TrainData()
        td.readFromFile(samplefile)
        # td_selected = td.split(self.n_events)  # check if this works in ragged out of the box
        # if use_event >= 0:
        #     if use_event < td.nElements():
        #         td.skim(use_event)
        #     else:
        #         td.skim(use_event % td.nElements())
        self.batchsize = batchsize
        self.td = td
        self.gen = TrainDataGenerator()
        self.gen.setBatchSize(self.batchsize)
        self.gen.setSkipTooLargeBatches(False)
        self.gen.setBuffer(td)

        self.n_batches=self.gen.getNBatches()


        with tf.device('/CPU:0'):
            self.ragged_constructor = RaggedConstructTensor()
        self.window_id = 0
        self.window_analysis_dicts = []
        self.n_windows_for_plots = n_windows_for_plots
        self.n_windows_for_scalar_metrics = n_windows_for_scalar_metrics
        self.beta_threshold = beta_threshold
        self.distance_threshold = distance_threshold
        self.iou_threshold = iou_threshold

        self.scalar_metrics = dict()
        self.scalar_metrics['efficiency'] = []
        self.scalar_metrics['efficiency_ticl'] = []
        self.scalar_metrics['fake_rate'] = []
        self.scalar_metrics['fake_rate_ticl'] = []
        self.scalar_metrics['var_response'] = []
        self.scalar_metrics['var_response_ticl'] = []
        self.scalar_metrics['iteration'] = []

        self.n_average_over_samples = n_average_over_samples

        self.plot_process = None