def dcToDf(dc_file, df_out): dc = DataCollection() dc.readFromFile(dc_file) NENT = 1 # Can skip some events filelist = [] i = 0 storeInputs = True count = 0 feature_names = dc.dataclass.branches[1] spectator_names = dc.dataclass.branches[0] labels_names = dc.getUsedTruth() labels_names = ['truth' + l for l in labels_names] for s in dc.samples: if count > 1000000: break spath = dc.getSamplePath(s) filelist.append(spath) h5File = h5py.File(spath) f = h5File features_val_i = [ h5File['x%i' % j][()] for j in range(0, h5File['x_listlength'][()][0]) ] features_val_i = features_val_i[0][::NENT, 0, :] #predict_test_i = model.predict(features_val) weights_val_i = h5File['w0'][()] labels_val_i = h5File['y0'][()][::NENT, :] spectators_val_i = h5File['z0'][()][::NENT, 0, :] if storeInputs: raw_features_val_i = h5File['z1'][()][::NENT, 0, :] if i == 0: #predict_test = predict_test_i weights_val = weights_val_i labels_val = labels_val_i spectators_val = spectators_val_i features_val = features_val_i if storeInputs: raw_features_val = raw_features_val_i else: #predict_test = np.concatenate((predict_test,predict_test_i)) weights_val = np.concatenate((weights_val, weights_val_i)) labels_val = np.concatenate((labels_val, labels_val_i)) features_val = np.concatenate((features_val, features_val_i)) spectators_val = np.concatenate((spectators_val, spectators_val_i)) if storeInputs: raw_features_val = np.concatenate( (raw_features_val, raw_features_val_i)) i += 1 count += labels_val.shape[0] entries = np.hstack((raw_features_val, spectators_val, labels_val, weights_val.reshape((len(weights_val), 1)))) df = pd.DataFrame(entries, columns=feature_names + spectator_names + labels_names + ['weight']) #df = pd.DataFrame(raw_features_val+spectators_val , columns = feature_names+spectator_names) #print df if df_out != None: df.to_pickle(df_out) print "Saved df to", df_out
class TrainingInfo: def __init__( self, directory ): filename = os.path.join( directory, 'dataCollection.dc') file_ = open( filename, 'rb') self.samples = pickle.load(file_) sampleentries = pickle.load(file_) originRoots = pickle.load(file_) nsamples = pickle.load(file_) useweights = pickle.load(file_) batchsize = pickle.load(file_) dataclass = pickle.load(file_) weighter = pickle.load(file_) self._means = pickle.load(file_) file_.close() # Get means dictionary self.means = {name : (self._means[0][i], self._means[1][i]) for i, name in enumerate( self._means.dtype.names) } # Get DeepJetCore DataCollection self.dataCollection = DataCollection() self.dataCollection.readFromFile(filename) # Reading first sample & get branch structure fullpath = self.dataCollection.getSamplePath(self.samples[0]) self.dataCollection.dataclass.readIn(fullpath) self.branches = self.dataCollection.dataclass.branches print "Branches:" for i in range(len(self.branches)): print "Collection", i for i_b, b in enumerate(self.branches[i]): print " branch %2i/%2i %40s mean %8.5f var %8.5f" %( i, i_b, b, self.means[b][0], self.means[b][1]) print def dump( self, filename): pickle.dump( [ self.branches, self.means], file( filename, 'w' ) ) print "Written", filename
kernel_initializer=keras.initializers.RandomNormal( mean=0.0, stddev=0.01))(x) x = Concatenate(name="concatlast", axis=-1)([x] + coords + [n_showers] + [etas_phis]) x = Multiply()([x, mask]) predictions = [x] return Model(inputs=Inputs, outputs=predictions) train = training_base(testrun=False, resumeSilently=True, renewtokens=True) plotdc = DataCollection( os.path.dirname(os.path.realpath(train.inputData)) + '/merged_test.dc') samplefile = plotdc.getSamplePath(plotdc.samples[0]) #gets called every epoch def decay_function(aftern_batches): return aftern_batches # int(aftern_batches+5) ppdts = [ plot_truth_pred_plus_coords_during_training( samplefile=samplefile, output_file=train.outputDir + '/train_progress' + str(0), use_event=use_event, x_index=5, y_index=6, z_index=7,