Python DataCollection.getUsedTruth Exemples

Langage de programmation: Python

Espace de nommage/Pack: DeepJetCore.DataCollection

Class/Type: DataCollection

Méthode/Fonction: getUsedTruth

Exemples au hotexamples.com: 3

Python DataCollection.getUsedTruth - 3 exemples trouvés. Ce sont les exemples réels les mieux notés de DeepJetCore.DataCollection.DataCollection.getUsedTruth extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

DataCollection(28)

readFromFile(11)

writeToFile(6)

dataclass(5)

invokeGenerator(5)

convertListOfRootFiles(4)

setBatchSize(4)

getAllFeatures(3)

samples(3)

getAllLabels(3)

getUsedTruth(3)

createTestDataForDataCollection(3)

getSamplePath(3)

prependToSampleFiles(2)

getAllWeights(2)

getInputShapes(2)

getNClassificationTargets(2)

getNRegressionTargets(2)

split(2)

sourceList(2)

dataDir(2)

validate(1)

setDataClass(1)

removeEntry(1)

recoverCreateDataFromRootFromSnapshot(1)

generatorFunction(1)

batch_mode(1)

nprocs(1)

generator(1)

meansnormslimit(1)

istestdata(1)

defineCustomPredictionLabels(1)

createDataFromRoot(1)

__nsamples(1)

getBatchSize(1)

no_copy_on_convert(1)

Méthodes fréquemment utilisées

DataCollection (28)

readFromFile (11)

writeToFile (6)

dataclass (5)

invokeGenerator (5)

convertListOfRootFiles (4)

setBatchSize (4)

getAllFeatures (3)

samples (3)

getAllLabels (3)

Méthodes fréquemment utilisées

getUsedTruth (3)

createTestDataForDataCollection (3)

getSamplePath (3)

prependToSampleFiles (2)

getAllWeights (2)

getInputShapes (2)

getNClassificationTargets (2)

getNRegressionTargets (2)

split (2)

sourceList (2)

dataDir (2)

validate (1)

setDataClass (1)

removeEntry (1)

recoverCreateDataFromRootFromSnapshot (1)

generatorFunction (1)

batch_mode (1)

nprocs (1)

generator (1)

meansnormslimit (1)

Méthodes fréquemment utilisées

dataDir (2)

validate (1)

setDataClass (1)

removeEntry (1)

recoverCreateDataFromRootFromSnapshot (1)

generatorFunction (1)

batch_mode (1)

nprocs (1)

generator (1)

meansnormslimit (1)

istestdata (1)

defineCustomPredictionLabels (1)

createDataFromRoot (1)

__nsamples (1)

getBatchSize (1)

no_copy_on_convert (1)

Méthodes fréquemment utilisées

istestdata (1)

defineCustomPredictionLabels (1)

createDataFromRoot (1)

__nsamples (1)

getBatchSize (1)

no_copy_on_convert (1)

Exemple #1

0

Afficher le fichier

Fichier : dcToDf.py Projet : rsyarif/DeepJet

def dcToDf(dc_file, df_out): dc = DataCollection() dc.readFromFile(dc_file) NENT = 1 # Can skip some events filelist = [] i = 0 storeInputs = True count = 0 feature_names = dc.dataclass.branches[1] spectator_names = dc.dataclass.branches[0] labels_names = dc.getUsedTruth() labels_names = ['truth' + l for l in labels_names] for s in dc.samples: if count > 1000000: break spath = dc.getSamplePath(s) filelist.append(spath) h5File = h5py.File(spath) f = h5File features_val_i = [ h5File['x%i' % j][()] for j in range(0, h5File['x_listlength'][()][0]) ] features_val_i = features_val_i[0][::NENT, 0, :] #predict_test_i = model.predict(features_val) weights_val_i = h5File['w0'][()] labels_val_i = h5File['y0'][()][::NENT, :] spectators_val_i = h5File['z0'][()][::NENT, 0, :] if storeInputs: raw_features_val_i = h5File['z1'][()][::NENT, 0, :] if i == 0: #predict_test = predict_test_i weights_val = weights_val_i labels_val = labels_val_i spectators_val = spectators_val_i features_val = features_val_i if storeInputs: raw_features_val = raw_features_val_i else: #predict_test = np.concatenate((predict_test,predict_test_i)) weights_val = np.concatenate((weights_val, weights_val_i)) labels_val = np.concatenate((labels_val, labels_val_i)) features_val = np.concatenate((features_val, features_val_i)) spectators_val = np.concatenate((spectators_val, spectators_val_i)) if storeInputs: raw_features_val = np.concatenate( (raw_features_val, raw_features_val_i)) i += 1 count += labels_val.shape[0] entries = np.hstack((raw_features_val, spectators_val, labels_val, weights_val.reshape((len(weights_val), 1)))) df = pd.DataFrame(entries, columns=feature_names + spectator_names + labels_names + ['weight']) #df = pd.DataFrame(raw_features_val+spectators_val , columns = feature_names+spectator_names) #print df if df_out != None: df.to_pickle(df_out) print "Saved df to", df_out

Exemple #2

0

Afficher le fichier

Fichier : eval_functions.py Projet : rsyarif/DeepJet

def evaluate(testd, trainData, model, outputDir, storeInputs=False, adv=False): NENT = 1 # Can skip some events filelist = [] i = 0 for s in testd.samples: #for s in testd.samples[0:1]: spath = testd.getSamplePath(s) filelist.append(spath) h5File = h5py.File(spath) f = h5File #features_val = [h5File['x%i_shape'%j][()] for j in range(0, h5File['x_listlength'][()][0])] features_val = [ h5File['x%i' % j][()] for j in range(0, h5File['x_listlength'][()][0]) ] #features_val=testd.getAllFeatures() predict_test_i = model.predict(features_val) labels_val_i = h5File['y0'][()][::NENT, :] spectators_val_i = h5File['z0'][()][::NENT, 0, :] if storeInputs: raw_features_val_i = h5File['z1'][()][::NENT, 0, :] if i == 0: predict_test = predict_test_i labels_val = labels_val_i spectators_val = spectators_val_i if storeInputs: raw_features_val = raw_features_val_i else: predict_test = np.concatenate((predict_test, predict_test_i)) labels_val = np.concatenate((labels_val, labels_val_i)) spectators_val = np.concatenate((spectators_val, spectators_val_i)) if storeInputs: raw_features_val = np.concatenate( (raw_features_val, raw_features_val_i)) i += 1 # Value #labels_val=testd.getAllLabels()[0][::NENT,:] #features_val=testd.getAllFeatures()[0][::NENT,0,:] #spectators_val = testd.getAllSpectators()[0][::NENT,0,:] #if storeInputs: raw_features_val = testd.getAllSpectators()[-1][::NENT,0,:] # Labels print testd.dataclass.branches feature_names = testd.dataclass.branches[1] spectator_names = testd.dataclass.branches[0] #truthnames = testd.getUsedTruth() from DeepJetCore.DataCollection import DataCollection traind = DataCollection() traind.readFromFile(trainData) truthnames = traind.getUsedTruth() # Store features print "Coulmns", spectator_names df = pd.DataFrame(spectators_val, columns=spectator_names) if storeInputs: for i, tname in enumerate(feature_names): df[tname] = raw_features_val[:, i] # Add predictions print truthnames print predict_test.shape for i, tname in enumerate(truthnames): df['truth' + tname] = labels_val[:, i] #print "Mean 0th label predict predict of ", tname, np.mean(predict_test[:,0]), ", Stats:", np.sum(labels_val[:,i]), "/", len(labels_val[:,i]) if adv: df['predict' + tname] = predict_test[:, NBINS + i] for j in range(NBINS): df['predict_massbin%i' % j] = predict_test[:, j + i] else: df['predict' + tname] = predict_test[:, i] print "Testing prediction:" print "Total: ", len(predict_test[:, 0]) for lab in truthnames: print lab, ":", sum(df['truth' + lab].values) df.to_pickle(outputDir + '/output.pkl') #to save the dataframe, df to 123.pkl return df print "Finished storing dataframe"

Exemple #3

0

Afficher le fichier

def evaluate(testd, trainData, model, outputDir): NENT = 1 # Can skip some events filelist = [] i = 0 for s in testd.samples: spath = testd.getSamplePath(s) filelist.append(spath) h5File = h5py.File(spath) f = h5File #features_val = [h5File['x%i_shape'%j][()] for j in range(0, h5File['x_listlength'][()][0])] features_val = [ h5File['x%i' % j][()] for j in range(0, h5File['x_listlength'][()][0]) ] #features_val=testd.getAllFeatures() predict_test_i = model.predict(features_val) if i == 0: predict_test = predict_test_i else: predict_test = np.concatenate((predict_test, predict_test_i)) i += 1 # Value labels_val = testd.getAllLabels()[0][::NENT, :] features_val = testd.getAllFeatures()[0][::NENT, 0, :] spectators_val = testd.getAllSpectators()[0][::NENT, 0, :] raw_features_val = testd.getAllSpectators()[-1][::NENT, 0, :] # Labels print testd.dataclass.branches feature_names = testd.dataclass.branches[1] spectator_names = testd.dataclass.branches[0] #truthnames = testd.getUsedTruth() from DeepJetCore.DataCollection import DataCollection traind = DataCollection() traind.readFromFile(trainData) truthnames = traind.getUsedTruth() # Store features df = pd.DataFrame(spectators_val) df.columns = [spectator_names] for i, tname in enumerate(feature_names): df[tname] = raw_features_val[:, i] # Add predictions print truthnames print predict_test.shape for i, tname in enumerate(truthnames): df['truth' + tname] = labels_val[:, i] df['predict' + tname] = predict_test[:, i] df.to_pickle(outputDir + '/output.pkl') #to save the dataframe, df to 123.pkl print df dt = pd.read_pickle(outputDir + '/output.pkl') print dt def dists(xdf, truthnames): truths = truthnames print truths def distribution(xdf, predict="Hcc"): plt.figure(figsize=(10, 7)) bins = np.linspace(0, 1, 70) trus = [] for tru in truths: trus.append(xdf['truth' + tru].values) preds = [xdf['predict' + predict].values] * len(truths) plt.hist(preds, bins=bins, weights=trus, alpha=0.8, normed=True, label=truths, stacked=True) plt.xlabel("Probability " + predict) plt.title("Stacked Distributions") plt.semilogy() plt.legend(title="True labels:") plt.savefig(outputDir + '/dist' + predict + '.png', dpi=300) for pred in truths: distribution(xdf, predict=pred) dists(df, truthnames) print "Testing prediction:" print "Total: ", len(predict_test[:, 0]) for lab in truthnames: print lab, ":", sum(df['truth' + lab].values) print "Finished"