def BraveSt(report, classifier, cut, signal, bck): iron = calc_util.classifier_flatten(report.prediction_sig[classifier]) memory = signal.get_data(['DiskSize'])[iron(report.prediction_sig[classifier]) >= cut].values.sum()\ +bck.get_data(['DiskSize'])[iron(report.prediction_bck[classifier]) >= cut].values.sum() return memory
def SafeSt(report, classifier, cut, signal, bck): iron = calc_util.classifier_flatten(report.prediction_sig[classifier]) nzrs = (signal.get_data(['Nb Replicas']).values >= 1)[:,0] nzrb = (bck.get_data(['Nb Replicas']).values >= 1)[:,0] memory = signal.get_data(['DiskSize'])[(iron(report.prediction_sig[classifier]) >= cut)&nzrs].values.sum()\ +bck.get_data(['DiskSize'])[(iron(report.prediction_bck[classifier]) >= cut)&nzrb].values.sum()\ -signal.get_data(['LFNSize'])[(iron(report.prediction_sig[classifier]) >= cut)&nzrs].values.sum()\ -bck.get_data(['LFNSize'])[(iron(report.prediction_bck[classifier]) >= cut)&nzrb].values.sum() return memory
def RFiles(report, signal_test, bck_test, classifier='xgboost', mincut=0.01, maxcut=1, N=100, pq=95): print "Total number of the 'signal' files is ", signal_test.get_indices().shape[0] print "Total number of files is ", signal_test.get_indices().shape[0]+bck_test.get_indices().shape[0] step = (maxcut - mincut)/N cuts = [mincut + step*i for i in range(0, N+1)] iron = calc_util.classifier_flatten(report.prediction_sig[classifier]) x=cuts nb_signals = [] nb_true_signals = [] nb_rels = [] cut_pq = 1 for i in cuts: nb_signal=((iron(report.prediction[classifier]) >= i)*1).sum() nb_true_signal=((iron(report.prediction_sig[classifier]) >= i)*1).sum() if nb_signal!=0: nb_rel=float(nb_true_signal)/float(nb_signal)*100 else: nb_rel=100 if cut_pq==1 and nb_rel>=pq: cut_pq=i nb_signals.append(nb_signal) nb_true_signals.append(nb_true_signal) nb_rels.append(nb_rel) plt.figure(figsize=(5, 3)) plt.subplot(1,1,1) plt.plot(x, nb_signals, 'b', label = 'nb signal files') plt.plot(x, nb_true_signals, 'r', label = 'nb true signal files') plt.legend(loc = 'best') plt.show() plt.figure(figsize=(5, 3)) plt.subplot(1,1,1) plt.plot(x, nb_rels, 'r', label = 'ratio of the true signals to the signals(%)') plt.legend(loc = 'best') plt.show() return cut_pq
def CondSize(report, signal_test, bck_test, classifier='xgboost', cut=0.6, peaks=5, imax=26): iron = calc_util.classifier_flatten(report.prediction_sig[classifier]) cond_sig = (iron(report.prediction_sig[classifier]) < cut)\ &(signal_test.get_data(['nb_peaks']).values<=peaks)[:,0]\ &(signal_test.get_data(['inter_max']).values>=imax)[:,0] cond_bck = (iron(report.prediction_bck[classifier]) < cut)\ &(bck_test.get_data(['nb_peaks']).values<=peaks)[:,0]\ &(bck_test.get_data(['inter_max']).values>=imax)[:,0] nzrs = (signal_test.get_data(['Nb Replicas']).values >= 1)[:,0] nzrb = (bck_test.get_data(['Nb Replicas']).values >= 1)[:,0] sz_signal=signal_test.get_data(['DiskSize'])[(cond_sig)&nzrs].values.sum()\ +bck_test.get_data(['DiskSize'])[(cond_bck)&nzrb].values.sum()\ -signal_test.get_data(['LFNSize'])[(cond_sig)&nzrs].values.sum()\ -bck_test.get_data(['LFNSize'])[(cond_bck)&nzrb].values.sum() return sz_signal
def CombineSt(report, classifier, s_cut, cut, signal, bck): iron = calc_util.classifier_flatten(report.prediction_sig[classifier]) nzrs = (signal.get_data(['Nb Replicas']).values >= 1)[:,0] nzrb = (bck.get_data(['Nb Replicas']).values >= 1)[:,0] memory231 = signal.get_data(['DiskSize'])[(iron(report.prediction_sig[classifier]) >= s_cut)&nzrs].values.sum()\ +bck.get_data(['DiskSize'])[(iron(report.prediction_bck[classifier]) >= s_cut)&nzrb].values.sum()\ -signal.get_data(['LFNSize'])[(iron(report.prediction_sig[classifier]) >= s_cut)&nzrs].values.sum()\ -bck.get_data(['LFNSize'])[(iron(report.prediction_bck[classifier]) >= s_cut)&nzrb].values.sum() memory232 = signal.get_data(['DiskSize'])[(iron(report.prediction_sig[classifier]) >= cut)&nzrs].values.sum()\ +bck.get_data(['DiskSize'])[(iron(report.prediction_bck[classifier]) >= cut)&nzrb].values.sum()\ -signal.get_data(['LFNSize'])[(iron(report.prediction_sig[classifier]) >= cut)&nzrs].values.sum()\ -bck.get_data(['LFNSize'])[(iron(report.prediction_bck[classifier]) >= cut)&nzrb].values.sum() memory233 = signal.get_data(['DiskSize'])[iron(report.prediction_sig[classifier]) >= cut].values.sum()\ +bck.get_data(['DiskSize'])[iron(report.prediction_bck[classifier]) >= cut].values.sum() memory23 = memory231-memory232+memory233 return memory23
def RSize(report, signal_test, bck_test, classifier='xgboost', mincut=0.01, maxcut=1, N=100, cond=0.9, Flag=False, pq=95): print "Total memory can be released is ", signal_test.get_data(['DiskSize']).values.sum() print "Total memory is ", signal_test.get_data(['DiskSize']).values.sum()+bck_test.get_data(['DiskSize']).values.sum() step = (maxcut - mincut)/N cuts = [mincut + step*i for i in range(0, N+1)] iron = calc_util.classifier_flatten(report.prediction_sig[classifier]) x=cuts sz_signals = [] sz_true_signals = [] sz_rels = [] cut_pq = 1 nzrs = (signal_test.get_data(['Nb Replicas']).values >= 1)[:,0] nzrb = (bck_test.get_data(['Nb Replicas']).values >= 1)[:,0] for i in cuts: if i>=cond: sz_signal=signal_test.get_data(['DiskSize'])[(iron(report.prediction_sig[classifier]) >= i)].values.sum()\ +bck_test.get_data(['DiskSize'])[(iron(report.prediction_bck[classifier]) >= i)].values.sum() sz_true_signal=signal_test.get_data(['DiskSize'])[(iron(report.prediction_sig[classifier]) >= i)].values.sum() if sz_signal!=0: sz_rel=float(sz_true_signal)/float(sz_signal)*100. else: sz_rel=100 if cut_pq==1 and sz_rel>=pq: cut_pq=i else: sz_signal=signal_test.get_data(['DiskSize'])[(iron(report.prediction_sig[classifier]) >= i)&nzrs].values.sum()\ +bck_test.get_data(['DiskSize'])[(iron(report.prediction_bck[classifier]) >= i)&nzrb].values.sum()\ -signal_test.get_data(['LFNSize'])[(iron(report.prediction_sig[classifier]) >= i)&nzrs].values.sum()\ -bck_test.get_data(['LFNSize'])[(iron(report.prediction_bck[classifier]) >= i)&nzrb].values.sum() sz_true_signal=signal_test.get_data(['DiskSize'])[(iron(report.prediction_sig[classifier]) >= i)&nzrs].values.sum()\ -signal_test.get_data(['LFNSize'])[(iron(report.prediction_sig[classifier]) >= i)&nzrs].values.sum() if sz_signal!=0: sz_rel=float(sz_true_signal)/float(sz_signal)*100. else: sz_rel=100 if cut_pq==1 and sz_rel>=pq: cut_pq=i sz_signals.append(sz_signal) sz_true_signals.append(sz_true_signal) sz_rels.append(sz_rel) if Flag==True: plt.figure(figsize=(5, 3)) plt.subplot(1,1,1) plt.plot(x, sz_signals, 'b', label = 'signal files size') plt.plot(x, sz_true_signals, 'r', label = 'true signal files size') plt.legend(loc = 'best') plt.show() plt.figure(figsize=(5, 3)) plt.subplot(1,1,1) plt.plot(x, sz_rels, 'r') plt.title('Ratio(%)') plt.legend(loc = 'best') plt.show() else: plt.figure(figsize=(5, 3)) plt.subplot(1,1,1) plt.plot(x, sz_signals, 'b', label = 'released memory') plt.legend(loc = 'best') plt.show() return cut_pq
def precision(s, b, t_s, t_b, s_NORM=1., b_NORM = 1.): return 1- b/t_b report.metrics_vs_cut({'precision': precision, 'accuracy': accuracy}).plot(new_plot=True, figsize=(8, 4)) # <codecell> figure(figsize=(10, 6)) report.prediction_pdf(bins = 20, normed = True, plot_type='bar').plot() # <codecell> #Normed signal %pylab inline from cern_utils import calc_util iron = calc_util.classifier_flatten(report.prediction_sig['xgboost']) _ = hist(iron(report.prediction_sig['xgboost']), histtype='bar', bins=20, alpha=0.5, label='signal') _ = hist(iron(report.prediction_bck['xgboost']), histtype='bar', bins=20, alpha=0.5, label='bck') legend(loc='best') # <codecell> from cern_utils import calc_util def CondSize(report, signal_test, bck_test, classifier='xgboost', cut=0.6, peaks=5, imax=26): iron = calc_util.classifier_flatten(report.prediction_sig[classifier]) cond_sig = (iron(report.prediction_sig[classifier]) < cut)\ &(signal_test.get_data(['nb_peaks']).values<=peaks)[:,0]\