Exemple #1
0
def dcToDf(dc_file, df_out):
    dc = DataCollection()
    dc.readFromFile(dc_file)

    NENT = 1  # Can skip some events
    filelist = []
    i = 0
    storeInputs = True
    count = 0

    feature_names = dc.dataclass.branches[1]
    spectator_names = dc.dataclass.branches[0]
    labels_names = dc.getUsedTruth()
    labels_names = ['truth' + l for l in labels_names]

    for s in dc.samples:
        if count > 1000000: break
        spath = dc.getSamplePath(s)
        filelist.append(spath)
        h5File = h5py.File(spath)
        f = h5File
        features_val_i = [
            h5File['x%i' % j][()]
            for j in range(0, h5File['x_listlength'][()][0])
        ]
        features_val_i = features_val_i[0][::NENT, 0, :]
        #predict_test_i = model.predict(features_val)
        weights_val_i = h5File['w0'][()]
        labels_val_i = h5File['y0'][()][::NENT, :]
        spectators_val_i = h5File['z0'][()][::NENT, 0, :]
        if storeInputs: raw_features_val_i = h5File['z1'][()][::NENT, 0, :]
        if i == 0:
            #predict_test = predict_test_i
            weights_val = weights_val_i
            labels_val = labels_val_i
            spectators_val = spectators_val_i
            features_val = features_val_i
            if storeInputs: raw_features_val = raw_features_val_i
        else:
            #predict_test = np.concatenate((predict_test,predict_test_i))
            weights_val = np.concatenate((weights_val, weights_val_i))
            labels_val = np.concatenate((labels_val, labels_val_i))
            features_val = np.concatenate((features_val, features_val_i))
            spectators_val = np.concatenate((spectators_val, spectators_val_i))
            if storeInputs:
                raw_features_val = np.concatenate(
                    (raw_features_val, raw_features_val_i))
        i += 1
        count += labels_val.shape[0]

    entries = np.hstack((raw_features_val, spectators_val, labels_val,
                         weights_val.reshape((len(weights_val), 1))))
    df = pd.DataFrame(entries,
                      columns=feature_names + spectator_names + labels_names +
                      ['weight'])
    #df = pd.DataFrame(raw_features_val+spectators_val , columns = feature_names+spectator_names)
    #print df
    if df_out != None:
        df.to_pickle(df_out)
        print "Saved df to", df_out
Exemple #2
0
class TrainingInfo:

    def __init__( self, directory ):

        filename = os.path.join( directory, 'dataCollection.dc')
        file_    = open( filename, 'rb')

        self.samples    =   pickle.load(file_)
        sampleentries   =   pickle.load(file_)
        originRoots     =   pickle.load(file_)
        nsamples        =   pickle.load(file_)
        useweights      =   pickle.load(file_)
        batchsize       =   pickle.load(file_)
        dataclass       =   pickle.load(file_)
        weighter        =   pickle.load(file_)
        self._means     =   pickle.load(file_)
        file_.close()


        # Get means dictionary
        self.means = {name : (self._means[0][i], self._means[1][i]) for i, name in enumerate( self._means.dtype.names) }

        # Get DeepJetCore DataCollection
        self.dataCollection = DataCollection()
        self.dataCollection.readFromFile(filename) 

        # Reading first sample & get branch structure
        fullpath = self.dataCollection.getSamplePath(self.samples[0])
        self.dataCollection.dataclass.readIn(fullpath)
        self.branches = self.dataCollection.dataclass.branches

        print "Branches:"
        for i in range(len(self.branches)):
            print "Collection", i
            for i_b, b in enumerate(self.branches[i]):
                print "  branch %2i/%2i %40s   mean %8.5f var %8.5f" %( i, i_b, b, self.means[b][0], self.means[b][1])
            print

    def dump( self, filename):
        pickle.dump( [ self.branches, self.means], file( filename, 'w' ) )
        print "Written", filename
Exemple #3
0
              kernel_initializer=keras.initializers.RandomNormal(
                  mean=0.0, stddev=0.01))(x)

    x = Concatenate(name="concatlast",
                    axis=-1)([x] + coords + [n_showers] + [etas_phis])
    x = Multiply()([x, mask])
    predictions = [x]
    return Model(inputs=Inputs, outputs=predictions)


train = training_base(testrun=False, resumeSilently=True, renewtokens=True)

plotdc = DataCollection(
    os.path.dirname(os.path.realpath(train.inputData)) + '/merged_test.dc')

samplefile = plotdc.getSamplePath(plotdc.samples[0])


#gets called every epoch
def decay_function(aftern_batches):
    return aftern_batches  # int(aftern_batches+5)


ppdts = [
    plot_truth_pred_plus_coords_during_training(
        samplefile=samplefile,
        output_file=train.outputDir + '/train_progress' + str(0),
        use_event=use_event,
        x_index=5,
        y_index=6,
        z_index=7,