def __init__(self, options): self.VERSION = int(VERSION) self.options = options # To record all the settings self.accuracy = [] self.precision = [] self.recall = [] self.errorRate = [] self.nWindows = [] self.meanProcessingTime = [] self.stdProcessingTime = [] self.stdErrorProcTime = [] self.vuRecall = [] self.vuPrecision = [] self.meanVuProcessingTime = [] self.stdVuProcessingTime = [] self.stdVuErrorProcTime = [] annotationsMap = PlottingUtils.ParseAnnotationFiles( options, annotationDir=options.root_results_dir) bags, datasets, cascadeModels = PlottingUtils.FindVUBags(options, useRoot=True) for model in cascadeModels: bagParse = BagParse([bags[(model, x)] for x in datasets], [annotationsMap[x] for x in datasets], options.frame_subset_rate, options.min_overlap, imageIdRegex=options.imageid_regex, frameSubsetOffset=1) rospy.loginfo('Calculating stats for model: %s' % model) self.AppendStatsFromParse(bagParse, options.min_overlap, options.hog_thresh)
def Run(self, processList): len_histoList = len(self.t_histoList) t_tagList = PythonUtils.makeTupleFromFile(processList, ',') l_files = [] f = open(processList, 'r') for file in f: l_files.append(file.strip()) for i in xrange(len(t_tagList)): pFile, tagText = t_tagList[i] PythonUtils.doesFileExist(pFile) t_processList = PythonUtils.makeTupleFromFile(pFile, ',') for j in xrange(len(self.t_histoList)): t_plot = [] name = self.t_histoList[j][2] for k in xrange(len(t_processList)): histoLocation = t_processList[k][1] + self.t_histoList[j][1] t_plot.append([ROOTUtils.retrieveHistogram(self.histFile, histoLocation, name)]) saveString = self.saveDir + self.t_histoList[j][0] + '_' + t_processList[k][7] PlottingUtils.stackHistograms(t_plot, t_processList, self.t_histoList[j], self.t_legendList[j], self.t_textList[j], self.t_axisList[j], self.l_saveAs, self.t_ratioList[j], saveString, tagText)
def Run(self): ## Create lists from configFile info l_saveAs = PythonUtils.makeListFromString(self.saveAs, ',') ## Check the .list and tree files exist l_files = [self.treeFile, self.axisList, self.histoList, self.scatterList, self.textList, self.fileList] for file in l_files: PythonUtils.doesFileExist(file) ## Make the tuples from the list files t_axisList = PythonUtils.makeTupleFromFile(self.axisList, ',') t_fileList = PythonUtils.makeTupleFromFile(self.fileList, ',') t_histoList = PythonUtils.makeTupleFromFile(self.histoList, ',') t_scatterList = PythonUtils.makeTupleFromFile(self.scatterList, ',') t_textList = PythonUtils.makeTupleFromFile(self.textList, ',') ## Match the first item in the tuples l_tuples = [t_fileList, t_histoList, t_scatterList, t_textList] for item in l_tuples: PythonUtils.firstItemMatching(t_axisList, item) for i in xrange(len(t_scatterList)): PlottingUtils.scatterPlot(t_scatterList[i], t_fileList[i], t_histoList[i], t_axisList[i], t_textList[i], self.treeFile, self.saveDir, l_saveAs)
def Run(self, processList): len_histoList = len(self.t_histoList) t_tagList = PythonUtils.makeTupleFromFile(processList, ',') l_files = [] f = open(processList, 'r') for file in f: l_files.append(file.strip()) for i in xrange(len(t_tagList)): pFile, tagText = t_tagList[i] PythonUtils.doesFileExist(pFile) t_processList = PythonUtils.makeTupleFromFile(pFile, ',') for j in xrange(len(self.t_histoList)): t_plot = [] name = self.t_histoList[j][2] for k in xrange(len(t_processList)): histoLocation = t_processList[k][1] + self.t_histoList[j][1] t_plot.append([ ROOTUtils.retrieveHistogram(self.histFile, histoLocation, name) ]) saveString = self.saveDir + self.t_histoList[j][ 0] + '_' + t_processList[k][7] PlottingUtils.stackHistograms( t_plot, t_processList, self.t_histoList[j], self.t_legendList[j], self.t_textList[j], self.t_axisList[j], self.l_saveAs, self.t_ratioList[j], saveString, tagText)
def calculateSensitivity(t_plot, l_histo, h_title): ''' Calculate the bin by bin sensitivity for our stacked plots ''' h_st, location, v_rebin, pTVbin = l_histo nBins = 0 for i in xrange(len(t_plot)): histogram = PlottingUtils.rebin(t_plot[i][0], float(v_rebin)) histogram.Scale(float(t_plot[i][3])) nBins = histogram.GetXaxis().GetNbins() totSen = 0 totErr = 0 for bin in range(1, nBins + 1, 1): sigNum = 0 sigErr = 0 bkgNum = 0 bkgErr = 0 totBkg = 0 totBkgErr = 0 totSigErr = 0 for j in range(len(t_plot)): histogram = PlottingUtils.rebin(t_plot[j][0], float(v_rebin)) if t_plot[j][2] == "SIGNAL": sigNum += histogram.GetBinContent(bin) sigErr += histogram.GetBinError(bin) elif t_plot[j][2] == "BACKGROUND": bkgNum += histogram.GetBinContent(bin) bkgErr += histogram.GetBinError(bin) else: pass totBkgErr += bkgErr * bkgErr totSigErr += sigErr * sigErr if bkgNum > 0: sen = sigNum / (math.sqrt(bkgNum)) sen_sqrd = sen * sen totSen += sen_sqrd if bkgNum > 0 and sigNum > 0: senErr = sen * sen * math.sqrt(((2 * sigErr / sigNum) * (2 * sigErr / sigNum)) + ((bkgErr / bkgNum) * (bkgErr / bkgNum))) totErr += senErr * senErr sensitivity = math.sqrt(totSen) totalErr = (0.5 * math.sqrt(totErr)) / sensitivity return sensitivity, totalErr
def Run(self): # Create lists from config file info l_saveAs = PythonUtils.makeListFromString(self.saveAs, ',') ## Check the .list files exist! l_listFile = [ self.axisList, self.histoList, self.legendList, self.overlayList, self.textList, self.histFile ] for file in l_listFile: PythonUtils.doesFileExist(file) ## Make the tuples from the input files t_axisList = PythonUtils.makeTupleFromFile(self.axisList, ',') t_histoList = PythonUtils.makeTupleFromFile(self.histoList, ',') t_legendList = PythonUtils.makeTupleFromFile(self.legendList, ',') t_textList = PythonUtils.makeTupleFromFile(self.textList, ',') ## match the first item to make sure they're compatible l_tuples = [t_histoList, t_legendList, t_textList] for item in l_tuples: PythonUtils.firstItemMatching(t_axisList, item) l_files = [] f = open(self.overlayList, 'r') for line in f: if line.startswith('#'): continue else: l_files.append(line.strip()) for i in xrange(len(l_files)): PythonUtils.doesFileExist(l_files[i]) t_overlayList = PythonUtils.makeTupleFromFile(l_files[i], ',') t_plots = [] for j in xrange(len(t_overlayList)): histoLocation = t_overlayList[j][1] + t_histoList[i][1] t_plots.append([ ROOTUtils.retrieveHistogram(self.histFile, histoLocation, t_overlayList[j][2]) ]) saveString = self.saveDir + t_histoList[i][0] PlottingUtils.overlayHistograms(t_plots, t_overlayList, t_histoList[i], t_legendList[i], t_textList[i], t_axisList[i], l_saveAs, saveString)
def calculateSensitivity(t_plot, l_histo, h_title): ''' Calculate the bin by bin sensitivity for our stacked plots ''' h_st, location, v_rebin, pTVbin = l_histo nBins = 0 for i in xrange(len(t_plot)): histogram = PlottingUtils.rebin(t_plot[i][0], float(v_rebin)) histogram.Scale(float(t_plot[i][3])) nBins = histogram.GetXaxis().GetNbins() totSen = 0 totErr = 0 for bin in range(1, nBins + 1, 1): sigNum = 0 sigErr = 0 bkgNum = 0 bkgErr = 0 totBkg = 0 totBkgErr = 0 totSigErr = 0 for j in range(len(t_plot)): histogram = PlottingUtils.rebin(t_plot[j][0], float(v_rebin)) if t_plot[j][2] == "SIGNAL": sigNum += histogram.GetBinContent(bin) sigErr += histogram.GetBinError(bin) elif t_plot[j][2] == "BACKGROUND": bkgNum += histogram.GetBinContent(bin) bkgErr += histogram.GetBinError(bin) else: pass totBkgErr += bkgErr * bkgErr totSigErr += sigErr * sigErr if bkgNum > 0: sen = sigNum / (math.sqrt(bkgNum)) sen_sqrd = sen*sen totSen += sen_sqrd if bkgNum > 0 and sigNum > 0: senErr = sen * sen * math.sqrt(((2 * sigErr/sigNum)*(2*sigErr/sigNum)) + ((bkgErr/bkgNum)*(bkgErr/bkgNum))) totErr += senErr * senErr sensitivity = math.sqrt(totSen) totalErr = (0.5 * math.sqrt(totErr)) / sensitivity return sensitivity, totalErr
def Run(self): # Create lists from config file info l_saveAs = PythonUtils.makeListFromString(self.saveAs, ',') ## Check the .list files exist! l_listFile = [self.axisList, self.histoList, self.legendList, self.overlayList, self.textList, self.histFile] for file in l_listFile: PythonUtils.doesFileExist(file) ## Make the tuples from the input files t_axisList = PythonUtils.makeTupleFromFile(self.axisList, ',') t_histoList = PythonUtils.makeTupleFromFile(self.histoList, ',') t_legendList = PythonUtils.makeTupleFromFile(self.legendList, ',') t_textList = PythonUtils.makeTupleFromFile(self.textList, ',') ## match the first item to make sure they're compatible l_tuples = [t_histoList, t_legendList, t_textList] for item in l_tuples: PythonUtils.firstItemMatching(t_axisList, item) l_files = [] f = open(self.overlayList, 'r') for line in f: if line.startswith('#'): continue else: l_files.append(line.strip()) for i in xrange(len(l_files)): PythonUtils.doesFileExist(l_files[i]) t_overlayList = PythonUtils.makeTupleFromFile(l_files[i], ',') t_plots = [] for j in xrange(len(t_overlayList)): histoLocation = t_overlayList[j][1] + t_histoList[i][1] t_plots.append([ROOTUtils.retrieveHistogram(self.histFile, histoLocation, t_overlayList[j][2])]) saveString = self.saveDir + t_histoList[i][0] PlottingUtils.overlayHistograms(t_plots, t_overlayList, t_histoList[i], t_legendList[i], t_textList[i], t_axisList[i], l_saveAs, saveString)
def PlotCosts(self, SMOOTH_STEP=20, MAXSIZE=500): """Plots and saves costs at batch- and epoch- level""" def _PreprocessCurve(arr, SMOOTH_STEP=SMOOTH_STEP, MAXSIZE=MAXSIZE): """Truncates and smoothes a 1-D cost curve - arg: list""" # Trunkating excessively large cost curve if len(arr) > MAXSIZE: arr = arr[len(arr) - MAXSIZE:len(arr)] # Using a median sliding filter to smooth out 1-D signal if len(arr) > 2 * SMOOTH_STEP: for i in range(len(arr) - SMOOTH_STEP): arr[i] = np.median(arr[i:i + SMOOTH_STEP]) return arr # Plot cost and save - batch_level if self.BATCHES_RUN > 0: c_batches_train = np.array( _PreprocessCurve(self.Errors_batchLevel_train)) c_batches_valid = np.array( _PreprocessCurve(self.Errors_batchLevel_valid)) plotutils.PlotCost(Cost_train = c_batches_train, \ savename ='CostvsBatch_train', \ RESULTPATH =self.RESULTPATH+'costs/', \ Level="batch") plotutils.PlotCost(Cost_train = c_batches_valid, \ savename ='CostvsBatch_valid', \ RESULTPATH =self.RESULTPATH+'costs/', \ Level="batch") # Plot cost and save - epoch_level if self.EPOCHS_RUN > 1: Errs_train = np.array(self.Errors_epochLevel_train) Errs_valid = np.array(self.Errors_epochLevel_valid) plotutils.PlotCost(Cost_train=Errs_train[:,1], Cost_valid=Errs_valid[:,1], \ savename='CostvsEpoch', RESULTPATH=self.RESULTPATH+'costs/', \ Level="epoch")
def PlotConfusionMat(self, labelNames=[], predNames=[], SCALEFACTOR=1): """Plots confusion matrix using saved predictions""" # Get names of images, labels, and preds _, labelNames, predNames = self._get_PredNames() plotutils.PlotConfusionMatrix(PREDPATH = self.RESULTPATH + 'preds/', \ LABELPATH = self.LABELPATH, \ RESULTPATH = self.RESULTPATH + 'costs/', \ labelNames=labelNames, predNames=predNames, SCALEFACTOR = SCALEFACTOR, CLASSLABELS = self.CLASSLABELS, label_mapping = self.label_mapping, IGNORE_EXCLUDED = True, EXCLUDE_LBL = self.EXCLUDE_LBL, cMap = self.cMap, cMap_lbls= self.cMap_lbls)
def PlotComparisons(self, SCALEFACTOR=1): """Saves side-by-side comparisons of images, labels and predictions""" # Get names of images, labels, and preds imNames, labelNames, predNames = self._get_PredNames() plotutils.SaveComparisons(IMAGEPATH = self.IMAGEPATH, \ LABELPATH = self.LABELPATH, \ PREDPATH = self.RESULTPATH +'preds/', \ RESULTPATH = self.RESULTPATH+'comparisons/', \ imNames = imNames, labelNames = labelNames, predNames = predNames, SCALEFACTOR = SCALEFACTOR, CLASSLABELS = self.CLASSLABELS, label_mapping = self.label_mapping, EXCLUDE_LBL = self.EXCLUDE_LBL, cMap = self.cMap, cMap_lbls= self.cMap_lbls)
def CalculateResamplingModel(options, windowCount, baselineTime=None): nWindows, maxHogTime, EstimateTime = PlottingUtils.ParseHogTiming(options) if baselineTime is None: baselineTime = maxHogTime nPeople = len(windowCount) fracWindows = np.exp(np.arange(0.0, 6, 0.01)) nonZeroCounts = filter(lambda x: x>0, windowCount) nFound = np.array([sum([min(count/frac,1) for count in nonZeroCounts]) for frac in fracWindows]) #nFound = np.append(nFound, [0.0]) meanRecall = np.divide(nFound, nPeople) nVariance = np.array([sum([GetVarianceOfOnePerson(count, frac) for count in nonZeroCounts]) for frac in fracWindows]) #nVariance = np.append(nVariance, [0.0]) stdRecall = np.divide(np.sqrt(nVariance), nPeople) windowCount = np.array([nWindows/frac for frac in fracWindows]) #windowCount = np.append(windowCount, [0.0]) processingTime = EstimateTime(windowCount) speedup = np.divide(baselineTime, processingTime) return (speedup, meanRecall, stdRecall, windowCount, processingTime)
def Run(self): ## Create lists from config file options l_saveAs = PythonUtils.makeListFromString(self.saveAs, ',') ## Check the .list files exist l_listFile = [ self.axisList, self.fitList, self.histoList, self.legendList, self.textList, self.variableList, self.histFile ] for file in l_listFile: PythonUtils.doesFileExist(file) ## Create the tuples from the .list files t_axisList = PythonUtils.makeTupleFromFile(self.axisList, ',') t_fitList = PythonUtils.makeTupleFromFile(self.fitList, ',') t_histoList = PythonUtils.makeTupleFromFile(self.histoList, ',') t_legendList = PythonUtils.makeTupleFromFile(self.legendList, ',') t_textList = PythonUtils.makeTupleFromFile(self.textList, ',') ## Math the first titems l_tuples = [t_fitList, t_histoList, t_legendList, t_textList] for item in l_tuples: PythonUtils.firstItemMatching(t_axisList, item) ## Loop over the variable files to get the relevant info for j in xrange(len(t_axisList)): f_str, vRebin, vdoNorm, vXMin, vXMax, vFit, vXValue, vYValue = t_fitList[ j] l_files = [] f = open(self.variableList, 'r') for file in f: l_files.append(file.strip()) l_twoD = [] t_legend = [] PythonUtils.doesFileExist(l_files[j]) g = open(l_files[j], 'r') for vFile in g: PythonUtils.doesFileExist(vFile.strip()) vFileParser = ConfigParser.SafeConfigParser() vFileParser.read(vFile.strip()) ## Options vTitle = vFileParser.get('Options', 'title') vVariable = vFileParser.get('Options', 'variable') vColour = vFileParser.getint('Options', 'colour') vMarker = vFileParser.getint('Options', 'marker') vMarkerSize = vFileParser.getfloat('Options', 'markerSize') vFitMin = vFileParser.get('Options', 'fitMin') vFitMax = vFileParser.get('Options', 'fitMax') ## Create l_statInfo l_statInfo = [ t_histoList[j][1], t_histoList[j][1], float(vRebin), int(vdoNorm), float(vXMin), float(vXMax), vFitMin, vFitMax ] getMean = 0 getRMS = 0 getRES = 0 if vXValue == 'MEAN': getMean = 1 elif vXValue == 'RMS': getRMS = 1 elif vXValue == 'RESOLUTION': getRES = 1 else: getMean = getRMS = getRES = 0 l_xOption = [getMean, getRMS, getRES] getMean = 0 getRMS = 0 getRES = 0 if vYValue == 'MEAN': getMean = 1 elif vYValue == 'RMS': getRMS = 1 elif vYValue == 'RESOLUTION': getRES = 1 else: getMean = getRMS = getRES = 0 l_yOption = [getMean, getRMS, getRES] ## Get the list of stats l_plot = [ ROOTUtils.retrieveHistogram(self.histFile, t_histoList[j][1], vVariable) ] l_xValue, l_xErr = ROOTUtils.getHistoStat( l_plot, l_statInfo, l_xOption, vFit, vVariable) l_yValue, l_yErr = ROOTUtils.getHistoStat( l_plot, l_statInfo, l_yOption, vFit, vVariable) ## Get the value we want from the list so we can plot it! xValue = -1 for stat in l_xValue: if stat != -1: xValue = stat for stat in l_yValue: if stat != -1: yValue = stat ## Create a 2D plot and save to a list so can overlay them twoDPlot = PlottingUtils.createTwoD(t_histoList[j], xValue, yValue, vMarker, vMarkerSize, vColour) l_twoD.append(twoDPlot) if vTitle: l_legend = [twoDPlot, vTitle, 'p'] t_legend.append(l_legend) ## Overlay our 2D plots onto one canvas PlottingUtils.overlayTwoD(l_twoD, t_legend, t_histoList[j], t_legendList[j], t_textList[j], t_axisList[j], self.saveDir, l_saveAs)
def Run(self, doProfile, createFile): ## Create the lists from the main configFile t_select = PythonUtils.makeTupleFromString(self.selectBins, ',', '?') t_muon = PythonUtils.makeTupleFromString(self.muBins, ',', '?') if createFile: PythonUtils.Info('Creating Histogram File for Profile Plots') ## Check the list files exist l_fList = [self.variableList, self.treeList] for f in l_fList: PythonUtils.doesFileExist(f) ## Make the lists from the list files t_var = PythonUtils.makeTupleFromFile(self.variableList, ',') t_tree = PythonUtils.makeTupleFromFile(self.treeList, ',') for i in xrange(len(t_tree)): tFile, tName, sDir = t_tree[i] PythonUtils.doesFileExist(tFile) d_histType = self.createHistograms(t_var, sDir, tFile, t_muon, self.selectOn, t_select) d_filledHists = self.fillHistograms(d_histType, t_var, tFile, tName, sDir, t_muon, t_select, self.selectOn, self.nEvents) self.saveHistograms(d_filledHists, t_var, self.outHist, sDir, t_muon, self.selectOn, t_select) if doProfile: PythonUtils.Info("Creating Profile Plots!") ## Create the Lists from profile cfg l_saveAs = PythonUtils.makeListFromString(self.plotType, ',') ## Check the list files exist l_pList = [self.axisList, self.histoList, self.legendList, self.textList, self.varList, self.inFile] for f in l_pList: PythonUtils.doesFileExist(f) ## Create the list of lists from each list file t_axisList = PythonUtils.makeTupleFromFile(self.axisList, ',') t_histoList = PythonUtils.makeTupleFromFile(self.histoList, ',') t_legendList = PythonUtils.makeTupleFromFile(self.legendList, ',') t_textList = PythonUtils.makeTupleFromFile(self.textList, ',') ## Check teh first Item matches: l_tList = [t_histoList, t_legendList, t_textList] for l in l_tList: PythonUtils.firstItemMatching(l, t_axisList) ## Add the var list files to a list l_varList = [] f = open(self.varList, 'r') for line in f: if line.startswith('#'): continue else: l_varList.append(line) for i in xrange(len(l_varList)): PythonUtils.doesFileExist(l_varList[i].strip()) t_varList = PythonUtils.makeTupleFromFile(l_varList[i].strip(), ',') l_prof = [] t_legend = [] for j in xrange(len(t_varList)): d_pTMean = {} l_err = [] for bin in t_select: if bin[0] == 'all' or bin[1] == 'Inf': continue if bin[1]: select = self.selectOn + '-' + bin[0] + '-' + bin[1] else: select = self.selectOn + '-' + bin[0] location = t_varList[j][2] + '/' + t_varList[j][6] + '/' + select + '/' + t_varList[j][7] pTbin = float(bin[0]) + (float(bin[1]) - float(bin[0])) / 2 pTMean = ROOTUtils.retrieveHistogram(self.inFile, location, t_varList[j][1]).GetMean() pTMeanErr = ROOTUtils.retrieveHistogram(self.inFile, location, t_varList[j][1]).GetMeanError() l_err.append(pTMeanErr) d_pTMean[pTbin] = pTMean h_prof, l_legend= PlottingUtils.twoDprofile(d_pTMean, t_histoList[i], t_varList[j], l_err) l_prof.append(h_prof) t_legend.append(l_legend) saveString = self.plotDir + t_histoList[i][0] PlottingUtils.overlayProfile(l_prof, t_axisList[i], t_histoList[i], t_legendList[i], t_textList[i], t_legend, l_saveAs, saveString)
parser.add_option('--hog_thresh', type='float', help='Minimum HOG value required to label a hit', default=0.0) parser.add_option( '--frame_subset_rate', type='int', default=1, help= 'In order to run quicker, you can only run the detector on a subset of the frames. This specifies how often to run the detector. So, for example if it is 5, it will run the detector once every 5 frames.' ) parser.add_option('--output_data', '-o', default='CascadeAccuracy.stats', help='Filename where the stats will be pickled to') parser = PlottingUtils.AddCommandLineOptions(parser) (options, args) = parser.parse_args() stats = CascadeAccuracyStats(options) rospy.loginfo('Saving calculated statistics to: %s' % options.output_data) dirName = os.path.dirname(options.output_data) if not os.path.exists(dirName) and dirName <> '': os.makedirs(dirName) outputFile = open(options.output_data, 'wb') try: pickle.dump(stats, outputFile) finally: outputFile.close()
def main(dataset2="diabetic.csv", dataset1="earthquake_processed.csv", run_part_1=True): #sys.stdout = open(os.path.join(LOG_PATH, 'log' + time.strftime("%Y%m%d-%H%M%S") + ".txt"), 'w+') if dataset1 != "": print("Loading dataset " + dataset1, flush=True) # Load the data. dataset_csv_path = os.path.join(DATA_PATH, dataset1) dataset = pd.read_csv(dataset_csv_path) X = dataset.drop("class", axis=1) y = dataset["class"].copy() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) y_train = y_train.tolist() y_test = y_test.tolist() pipe = DataPreprocessor.preprocess_data(X_train) X_train_transformed = pipe.fit_transform(X_train) if run_part_1: PlottingUtils.generate_pair_plot("Feature Pair Plot - True Labels - DR Dataset", X_train_transformed, np.array(y_train), columns=dataset.columns, x_labels=dataset.columns.tolist()[:-1], y_labels=dataset.columns.tolist()[:-1]) k_values = np.arange(2, 10, 1) PlottingUtils.plot_k_means_scores(k_values, X_train_transformed, "Normalized Scores of Various Metrics vs K - DR Dataset") # By inspection, 3 was the best number of clusters. kmeans = KMeans(n_clusters=3, max_iter=500) kmeans.fit_predict(X_train_transformed) homogeneity, completeness, v_measure = homogeneity_completeness_v_measure(kmeans.labels_, y_train) print("Scores for DR Dataset") print("K Means") print("homogeneity:" + str(homogeneity)) print("completeness:" + str(completeness)) print("v measure:" + str(v_measure)) print("Adjusted mutual info score:" + str(adjusted_mutual_info_score(y_train, kmeans.labels_))) print() PlottingUtils.generate_pair_plot("Feature Pair Plot - KMeans - DR Dataset", X_train_transformed, kmeans.labels_, columns=dataset.columns,x_labels=dataset.columns.tolist()[:-1], y_labels=dataset.columns.tolist()[:-1]) k_values = np.arange(2, 15, 1) PlottingUtils.plot_gmm_scores(k_values, X_train_transformed, "EM - DR Dataset") # By inspection, 4 clusters were best. gmm = GaussianMixture(4, max_iter=500, n_init=10) gmm.fit(X_train_transformed) labels = np.array(gmm.predict(X_train_transformed)) homogeneity, completeness, v_measure = homogeneity_completeness_v_measure(labels, y_train) print("EM") print("homogeneity:" + str(homogeneity)) print("completeness:" + str(completeness)) print("v measure:" + str(v_measure)) print("Adjusted mutual info score:" + str(adjusted_mutual_info_score(y_train, labels))) print() PlottingUtils.generate_pair_plot("Feature Pair Plot - EM - DR Dataset", X_train_transformed, labels, columns=dataset.columns, x_labels=dataset.columns.tolist()[:-1], y_labels=dataset.columns.tolist()[:-1]) if dataset2 != "": print("Loading dataset " + dataset2) # Load the data. dataset_csv_path = os.path.join(DATA_PATH, dataset2) dataset = pd.read_csv(dataset_csv_path) X = dataset.drop("class", axis=1) y = dataset["class"].copy() numeric_features = list(X.select_dtypes(include=np.number)) cat_features = list(X.select_dtypes(exclude=np.number)) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) pipe = DataPreprocessor.preprocess_data(X_train) X_train_transformed = pipe.fit_transform(X_train) enc_cat_features = pipe.named_transformers_['cat'].get_feature_names() labels = np.concatenate([numeric_features, enc_cat_features]) transformed_df_columns = pd.DataFrame(pipe.transform(X_train), columns=labels).columns.tolist() transformed_df_columns.append("class") if run_part_1: k_values = np.arange(2, 100, 1) #PlottingUtils.plot_k_means_scores(k_values, X_train_transformed, "Normalized Scores of Various Metrics vs K - Earthquake Dataset") kmeans = KMeans(n_clusters=21, max_iter=500) kmeans.fit_predict(X_train_transformed) homogeneity, completeness, v_measure = homogeneity_completeness_v_measure(kmeans.labels_, y_train) print("Scores for Earthquake Dataset") print("K Means") print("homogeneity:" + str(homogeneity)) print("completeness:" + str(completeness)) print("v measure:" + str(v_measure)) print("Adjusted mutual info score:" + str(adjusted_mutual_info_score(y_train, kmeans.labels_))) print() #PlottingUtils.generate_tsne("TSNE Visualization of K-Means Clusters - Earthquake Dataset", X_train_transformed, kmeans.labels_) k_values = np.arange(2, 50, 1) #PlottingUtils.plot_gmm_scores(k_values, X_train_transformed, "BIC & AIC Scores EM - Earthquake Dataset") gmm = GaussianMixture(17, max_iter=500, n_init=10) gmm.fit(X_train_transformed) labels = np.array(gmm.predict(X_train_transformed)) homogeneity, completeness, v_measure = homogeneity_completeness_v_measure(labels, y_train) print("EM") print("homogeneity:" + str(homogeneity)) print("completeness:" + str(completeness)) print("v measure:" + str(v_measure)) print("Adjusted mutual info score:" + str(adjusted_mutual_info_score(y_train, labels))) PlottingUtils.generate_tsne("TSNE Visualization of EM Clusters - Earthquake Dataset", X_train_transformed, labels) sys.stdout.flush() sys.stdout.close()
def _RunBigBatch(self, sess, Bigbatch, \ istesting=False, SavePreds=False): ''' Runs big batch in small sub-batches - can be used to train optimizer as well as to obtain cost and save predictions sess - initialized tf session ''' bigbatch_imnames = Bigbatch['imNames'] bigbatch_fovbounds = Bigbatch['fovBounds'] batch_imgs = Bigbatch['imgs'] batch_lbls = Bigbatch['lbls'] Bigbatch = None # Get batch indices batch_idx = list( np.arange(0, len(bigbatch_imnames) + 1, self.SUBBATCH_SIZE)) if batch_idx[len(batch_idx) - 1] < len(bigbatch_imnames): batch_idx.append(len(bigbatch_imnames)) N_batches = len(batch_idx) - 1 cost_batch = 0 # subbatch = 0 for subbatch in range(N_batches): putils.Log_and_print("Sub-batch = {} of {}"\ .format(subbatch+1, N_batches)) # isolate batch idxmin = batch_idx[subbatch] idxmax = batch_idx[subbatch + 1] feed_dict_subbatch = {self.vgg.images: batch_imgs[idxmin:idxmax,:,:,:], \ self.vgg.labels: batch_lbls[idxmin:idxmax,:,:,:], \ self.vgg.cumloss: cost_batch} if not istesting: # evaluate cost and add to cumulative cost for big batch cost_batch = cost_batch + self.vgg.cost.eval( feed_dict=feed_dict_subbatch) else: if self.SOFTPREDS: # fetch soft predictions fetches = [self.vgg.cost, self.vgg.upscore32] else: # fetch final predicted class (argmax) fetches = [self.vgg.cost, self.vgg.pred_up] # Evaluate cost and fetch c_test, pred_batch = \ sess.run(fetches, feed_dict=feed_dict_subbatch) cost_batch = cost_batch + c_test if SavePreds: # save batch predictions subbatch_imnames = bigbatch_imnames[idxmin:idxmax] subbatch_fovbounds = bigbatch_fovbounds[idxmin:idxmax, :] # imidx = 0 for imidx in range(len(subbatch_imnames)): if self.SOFTPREDS: pred_label = pred_batch[imidx, :, :, :] else: pred_label = pred_batch[imidx, :, :] pred_label = self._FixPredictionLabels(pred_label) fovindices = "_rowmin{}".format(subbatch_fovbounds[imidx, :][0]) + \ "_rowmax{}".format(subbatch_fovbounds[imidx, :][1]) + \ "_colmin{}".format(subbatch_fovbounds[imidx, :][2]) + \ "_colmax{}".format(subbatch_fovbounds[imidx, :][3]) basename = subbatch_imnames[imidx] # fix numpy string array type (extract pure string) if len(basename) < 2: basename = basename[0] if not self.IS_UNLABELED: savename = self.Model.RESULTPATH+"preds/pred_" + \ basename.split(self.EXT_IMGS)[0] else: # save in main result folder and maintain naming # convention if predicting unlabeled images savename = self.Model.RESULTPATH+ \ basename.split(self.EXT_IMGS)[0] if self.SOFTPREDS: ext = ".mat" else: ext = self.EXT_IMGS if "rowmin" in savename: savename = savename + ext else: savename = savename + fovindices + ext # Exclude white mask (empty regions) to improve prediction im = batch_imgs[idxmin + imidx, :, :, :] whiteMask = plotutils.getWhiteMask(im, THRESH=220) pred_label[whiteMask == 1] = 0 # save while preserving pixel values if self.SOFTPREDS: savemat(savename, {'pred_label': pred_label}) else: pred_label = scipy.misc.toimage(pred_label, high=np.max(pred_label),\ low=np.min(pred_label), mode='I') pred_label.save(savename) # Get mean cost cost_batch = cost_batch / N_batches # Now update weights with new cost if not istesting: putils.Log_and_print( "Updating weights with mean loss over all sub-batches ...") # Define dict subbatch = 0 # Doesn't matter what subbatch you use if self.SUBBATCH_SIZE == 1: batch_idx.append(batch_idx[subbatch] + 1) feed_dict_batch = \ {self.vgg.images: batch_imgs[idxmin:idxmax,:,:,:], \ self.vgg.labels: batch_lbls[idxmin:idxmax,:,:,:], \ self.vgg.cumloss: cost_batch} # Now run optimizer sess.run(self.vgg.optimizer, feed_dict=feed_dict_batch) return cost_batch
def __init__(self, options): self.VERSION = int( VERSION ) # Version number if we change how the stats are calculated. self.options = options # To record all the settings self.accuracy = [] self.precision = [] self.recall = [] self.errorRate = [] self.groundTruths = [] self.resampleHogStats = [] self.scaledHogStats = [] self.nWindows = [] self.processingTime = [] self.matthewsCoefficients = [] self.hogPRCurves = [] self.vuNegPrecision = [] self.vuRecall = [] self.vuPrecision = [] self.peopleWindowCount = { } # (imageFile, personId)-># positive windows PrepareCache(options) garb, self.hogProcessingTime, hogTiming = \ PlottingUtils.ParseHogTiming(options) annotationsMap = PlottingUtils.ParseAnnotationFiles(options) bags, datasets, vuTypes = PlottingUtils.FindVUBags(options) scaledHogBags = FindScaledHogBags(options) self.nVUEstimators = len(vuTypes) if options.do_resampling: self.nVUEstimators += 1 scalingFactors = sorted( set([ scalingFactor for x, scalingFactor in scaledHogBags.iterkeys() ])) timingData = PlottingUtils.FindTimingData(vuTypes, options) hogBag = CreateVUScore([bags[(options.hog_name, x)] for x in datasets], [annotationsMap[x] for x in datasets], options.frame_subset_rate, options.do_nms, options.hog_thresh, options.min_overlap, cacheDir=options.cache_dir, imageIdRegex=options.imageid_regex) hogOffsetBag = None if options.frame_subset_rate >= 2: hogOffsetBag = hogBag hogBag = CreateVUScore( [bags[(options.hog_name, x)] for x in datasets], [annotationsMap[x] for x in datasets], options.frame_subset_rate, options.do_nms, options.hog_thresh, options.min_overlap, cacheDir=options.cache_dir, imageIdRegex=options.imageid_regex, frameSubsetOffset=1) for vuType in vuTypes: offsetVuBags = None vuBags = [ CreateVUScore([bags[(vuType, x)] for x in datasets], [annotationsMap[x] for x in datasets], options.frame_subset_rate, thresholdTimes=timingData[vuType], cacheDir=options.cache_dir, imageIdRegex=options.imageid_regex) ] if options.frame_subset_rate >= 2: offsetVuBags = vuBags vuBags = [ CreateVUScore([bags[(vuType, x)] for x in datasets], [annotationsMap[x] for x in datasets], options.frame_subset_rate, thresholdTimes=timingData[vuType], cacheDir=options.cache_dir, imageIdRegex=options.imageid_regex, frameSubsetOffset=1) ] scoreMatrix = ScoreMatrix(hogBag, vuBags, options.min_overlap, options.hog_thresh, options.cache_dir, options.do_vu_nms, options.prop_pos, offsetVuBags, hogOffsetBag) (curAccuracy, curPrecision, curRecall, curErrorRate, curNWindows, curProcessingTime, curMatCoefs, curVuNegPrecision, curVuPrecision, curVuRecall) = \ scoreMatrix.GetStats(options.min_overlap, options.hog_thresh, 20, hogTiming) self.accuracy.append(curAccuracy) self.precision.append(curPrecision) self.recall.append(curRecall) self.errorRate.append(curErrorRate) self.nWindows.append(curNWindows) self.processingTime.append(curProcessingTime) self.matthewsCoefficients.append(curMatCoefs) self.vuNegPrecision.append(curVuNegPrecision) self.vuPrecision.append(curVuPrecision) self.vuRecall.append(curVuRecall) if len(self.peopleWindowCount) == 0: self.peopleWindowCount = scoreMatrix.peopleWindowCount del vuBags del scoreMatrix gc.collect() # Do the calculator for resampling the boxes if options.do_resampling: for scaleStep in range(1, 8, 2): for strideStep in range(1, 8, 2): self.resampleHogStats.append( CalculateHOGStatistics(hogBag, options.min_overlap, options.hog_thresh, strideStep, scaleStep, options.cache_dir)) self.resampleHogStats.extend([ tuple([float('nan') for x in range(10)]) for y in range(curAccuracy.shape[1] - len(self.resampleHogStats)) ]) resampleProcessingTime = hogTiming( [x[4] for x in self.resampleHogStats]) sortIdx = np.argsort(resampleProcessingTime) self.accuracy.append( [[self.resampleHogStats[x][0] for x in sortIdx]]) self.precision.append( [[self.resampleHogStats[x][1] for x in sortIdx]]) self.recall.append([[self.resampleHogStats[x][2] for x in sortIdx]]) self.errorRate.append( [[self.resampleHogStats[x][3] for x in sortIdx]]) self.nWindows.append( [[self.resampleHogStats[x][4] for x in sortIdx]]) self.matthewsCoefficients.append( [[self.resampleHogStats[x][5] for x in sortIdx]]) self.processingTime.append([np.sort(resampleProcessingTime)]) self.vuNegPrecision.append( [[self.resampleHogStats[x][7] for x in sortIdx]]) self.vuRecall.append( [[self.resampleHogStats[x][9] for x in sortIdx]]) self.vuPrecision.append( [[self.resampleHogStats[x][8] for x in sortIdx]]) # Calculate the statistics for no VU filter self.groundTruths.append( CalculateHOGStatistics(hogBag, options.min_overlap, options.hog_thresh, cacheDir=options.cache_dir)) self.hogPRCurves.append(CalculatePRCurve(hogBag, options.min_overlap)) del hogBag gc.collect() # Now cycle through all the scaled hog bags to get a baseline # assuming resizing. if options.do_scaling: for scalingFactor in scalingFactors: scaledHogBag = CreateVUScore( [scaledHogBags[(x, scalingFactor)] for x in datasets], [annotationsMap[x] for x in datasets], options.frame_subset_rate, options.do_nms, options.hog_thresh, options.min_overlap, cacheDir=options.cache_dir, imageIdRegex=options.imageid_regex) self.scaledHogStats.append( CalculateHOGStatistics(scaledHogBag, options.min_overlap, options.hog_thresh, cacheDir=options.cache_dir)) del scaledHogBag gc.collect() self.accuracy = np.concatenate(self.accuracy, 0) self.precision = np.concatenate(self.precision, 0) self.recall = np.concatenate(self.recall, 0) self.errorRate = np.concatenate(self.errorRate, 0) self.nWindows = np.concatenate(self.nWindows, 0) self.processingTime = np.concatenate(self.processingTime, 0) self.matthewsCoefficients = np.concatenate(self.matthewsCoefficients, 0) self.vuNegPrecision = np.concatenate(self.vuNegPrecision, 0) self.vuPrecision = np.concatenate(self.vuPrecision, 0) self.vuRecall = np.concatenate(self.vuRecall, 0) self.vuTypes = vuTypes
def Run(self, doProfile, createFile): ## Create the lists from the main configFile t_select = PythonUtils.makeTupleFromString(self.selectBins, ',', '?') t_muon = PythonUtils.makeTupleFromString(self.muBins, ',', '?') if createFile: PythonUtils.Info('Creating Histogram File for Profile Plots') ## Check the list files exist l_fList = [self.variableList, self.treeList] for f in l_fList: PythonUtils.doesFileExist(f) ## Make the lists from the list files t_var = PythonUtils.makeTupleFromFile(self.variableList, ',') t_tree = PythonUtils.makeTupleFromFile(self.treeList, ',') for i in xrange(len(t_tree)): tFile, tName, sDir = t_tree[i] PythonUtils.doesFileExist(tFile) d_histType = self.createHistograms(t_var, sDir, tFile, t_muon, self.selectOn, t_select) d_filledHists = self.fillHistograms(d_histType, t_var, tFile, tName, sDir, t_muon, t_select, self.selectOn, self.nEvents) self.saveHistograms(d_filledHists, t_var, self.outHist, sDir, t_muon, self.selectOn, t_select) if doProfile: PythonUtils.Info("Creating Profile Plots!") ## Create the Lists from profile cfg l_saveAs = PythonUtils.makeListFromString(self.plotType, ',') ## Check the list files exist l_pList = [ self.axisList, self.histoList, self.legendList, self.textList, self.varList, self.inFile ] for f in l_pList: PythonUtils.doesFileExist(f) ## Create the list of lists from each list file t_axisList = PythonUtils.makeTupleFromFile(self.axisList, ',') t_histoList = PythonUtils.makeTupleFromFile(self.histoList, ',') t_legendList = PythonUtils.makeTupleFromFile(self.legendList, ',') t_textList = PythonUtils.makeTupleFromFile(self.textList, ',') ## Check teh first Item matches: l_tList = [t_histoList, t_legendList, t_textList] for l in l_tList: PythonUtils.firstItemMatching(l, t_axisList) ## Add the var list files to a list l_varList = [] f = open(self.varList, 'r') for line in f: if line.startswith('#'): continue else: l_varList.append(line) for i in xrange(len(l_varList)): PythonUtils.doesFileExist(l_varList[i].strip()) t_varList = PythonUtils.makeTupleFromFile( l_varList[i].strip(), ',') l_prof = [] t_legend = [] for j in xrange(len(t_varList)): d_pTMean = {} l_err = [] for bin in t_select: if bin[0] == 'all' or bin[1] == 'Inf': continue if bin[1]: select = self.selectOn + '-' + bin[0] + '-' + bin[1] else: select = self.selectOn + '-' + bin[0] location = t_varList[j][2] + '/' + t_varList[j][ 6] + '/' + select + '/' + t_varList[j][7] pTbin = float( bin[0]) + (float(bin[1]) - float(bin[0])) / 2 pTMean = ROOTUtils.retrieveHistogram( self.inFile, location, t_varList[j][1]).GetMean() pTMeanErr = ROOTUtils.retrieveHistogram( self.inFile, location, t_varList[j][1]).GetMeanError() l_err.append(pTMeanErr) d_pTMean[pTbin] = pTMean h_prof, l_legend = PlottingUtils.twoDprofile( d_pTMean, t_histoList[i], t_varList[j], l_err) l_prof.append(h_prof) t_legend.append(l_legend) saveString = self.plotDir + t_histoList[i][0] PlottingUtils.overlayProfile(l_prof, t_axisList[i], t_histoList[i], t_legendList[i], t_textList[i], t_legend, l_saveAs, saveString)
scores = VUScores(options) # Plot a histogram of the number of positive windows per person nonZeroCounts = filter(lambda x: x > 0, scores.windowCount.values()) figure() hist(nonZeroCounts, 50, histtype='step') # Plot a histogram of the size of people figure() hist([math.sqrt(x.Area()) for x in scores.personLoc.itervalues()], 50, histtype='step') # Plot a model for the recall vs. speedup assuming resampling nWindows, maxHogTime, EstimateTime = PlottingUtils.ParseHogTiming(options) nPeople = len(scores.windowCount) fracWindows = np.arange(1.0, 40.0) nFound = np.array([ sum([min(count / frac, 1) for count in nonZeroCounts]) for frac in fracWindows ]) meanRecall = np.divide(nFound, nPeople) nVariance = np.array([ sum([GetVarianceOfOnePerson(count, frac) for count in nonZeroCounts]) for frac in fracWindows ]) stdRecall = np.sqrt(np.divide(nVariance, nPeople * nPeople)) speedup = [ maxHogTime / EstimateTime(nWindows / frac) for frac in fracWindows ]
def getHistoStat(t_plot, l_histoList, l_option, fit, name): # if the string is crea then we only return the intergral / nEntries h_str, location_suff, v_rebin, doNorm, xMin, xMax, fMin, fMax = l_histoList mean = -1 eMean = -1 rms = -1 eRMS = -1 res = -1 eRes = -1 if 'crea' in location_suff: intergral = t_plot[i][0].Integral() entries = t_plot[i][0].GetEntries() return integral / entries elif 're' in location_suff and name == 't': l_stat = [0, 0, 0] l_sErr = [0, 0, 0] return l_stat, l_sErr elif 'b' in location_suff and name == 't': l_stat = [-1, -1, -1] l_sErr = [-1, -1, -1] if l_option[0]: l_stat = [1, -1, -1] l_sErr = [0, 0, 0] if l_option[1]: l_stat = [-1, 0, -1] l_sErr = [0, 0, 0] if l_option[2]: l_stat = [-1, -1, 0] l_sErr = [0, 0, 0] return l_stat, l_sErr else: h = PlottingUtils.rebin(t_plot[0], v_rebin) h.GetXaxis().SetRangeUser(float(xMin), float(xMax)) if int(doNorm): scale = 1 / h.Integral() h.Scale(scale) gROOT.SetBatch(1) c = TCanvas('c', 'c', 800, 600) h.Draw() ## Recalculate fMin and fMax for SD values if 'SIGMA' in fMin or 'SIGMA' in fMax: fMin, fMax = getSigmaFitRange(h, fMin, fMax) ## Now we add the fit and get the mean, RMS and Resolution fh = h if fit == 'GAUSS': fh.Fit("gaus", "Q", "") fh = fh.GetFunction('gaus') fh.Draw('SAME') elif fit == 'BUKIN': fh = drawBukin(h, float(fMin), float(fMax)) fh.Draw('SAME') if fit == 'GAUSS' or fit == 'BUKIN': if l_option[0]: mean = fh.GetParameter(1) eMean = h.GetMeanError() if l_option[1]: rms = fh.GetParameter(2) eRMS = h.GetRMSError() if l_option[2]: res_mean = fh.GetParameter(1) res_eMean = h.GetMeanError() res_rms = fh.GetParameter(2) res_eRMS = h.GetRMSError() res = res_rms / res_mean eRes = res * res * math.sqrt((res_eMean / res_mean) * (res_eMean / res_mean) + (res_eRMS / res_rms) * (res_eRMS / res_rms)) else: if l_option[0]: mean = fh.GetMean() eMean = h.GetMeanError() if l_option[1]: rms = fh.GetRMS() eRMS = h.GetRMSError() if l_option[2]: res_mean = fh.GetMean() res_eMean = h.GetMeanError() res_rms = fh.GetRMS() res_eRMS = h.GetRMSError() res = res_rms / res_mean eRes = res * res * math.sqrt((res_eMean / res_mean) * (res_eMean / res_mean) + (res_eRMS / res_rms) * (res_eRMS / res_rms)) l_stat = [mean, rms, res] l_err = [eMean, eRMS, eRes] return l_stat, l_err
stats.nWindows[0:1], groupColors, groupMarkers, groupNames, 'Recall', 'Number of Windows', stats.nVUEstimators, validEntries=validEntries) AddLineToPlot(curFig, resamplingModel[1], resamplingModel[3], '.', 'chocolate', 'Resampling Initial Boxes', 2) if options.output_prefix is not None: savefig(options.output_prefix + '_winVrecall.eps') savefig(options.output_prefix + '_winVrecall.png') # Calculate the timing of just using the visual utility estimator garb1, garb2, EstimateTime = PlottingUtils.ParseHogTiming(options) vuProcessingTime = stats.processingTime - EstimateTime(stats.nWindows) # Bayes risk vs. speedup PlotAccuracyScatter([slowHogProcessingTime / x[6] for x in stats.scaledHogStats], [x[1] for x in stats.scaledHogStats], (stats.accuracy / np.max(stats.accuracy)) + (stats.processingTime / slowHogProcessingTime), stats.recall, groupColors, groupMarkers, groupNames, 'Bayes Risk * Processing Time', 'Recall', stats.nVUEstimators,
import roslib roslib.load_manifest('hima_experiment') import rospy import rosbag import numpy as np import PlottingUtils import scipy.interpolate import re import HimaDataLoader from optparse import OptionParser import VUAccuracy if __name__ == '__main__': parser = OptionParser() parser = PlottingUtils.AddCommandLineOptions(parser) parser.add_option('--hog_thresh', type='float', help='Minimum HOG value required to label a hit', default=0.0) parser.add_option( '--frame_subset_rate', type='int', default=1, help= 'In order to run quicker, you can only run the detector on a subset of the frames. This specifies how often to run the detector. So, for example if it is 5, it will run the detector once every 5 frames.' ) parser.add_option('--imageid_regex', default='([0-9]+)\.((png)|(jpg)|(bmp))', help='Regex to extract the image id from a filename')
def Run(self): ## Create lists from config file options l_saveAs = PythonUtils.makeListFromString(self.saveAs, ',') ## Check the .list files exist l_listFile = [self.axisList, self.fitList, self.histoList, self.legendList, self.textList, self.variableList, self.histFile] for file in l_listFile: PythonUtils.doesFileExist(file) ## Create the tuples from the .list files t_axisList = PythonUtils.makeTupleFromFile(self.axisList, ',') t_fitList = PythonUtils.makeTupleFromFile(self.fitList, ',') t_histoList = PythonUtils.makeTupleFromFile(self.histoList, ',') t_legendList = PythonUtils.makeTupleFromFile(self.legendList, ',') t_textList = PythonUtils.makeTupleFromFile(self.textList, ',') ## Math the first titems l_tuples = [t_fitList, t_histoList, t_legendList, t_textList] for item in l_tuples: PythonUtils.firstItemMatching(t_axisList, item) ## Loop over the variable files to get the relevant info for j in xrange(len(t_axisList)): f_str, vRebin, vdoNorm, vXMin, vXMax, vFit, vXValue, vYValue = t_fitList[j] l_files = [] f = open(self.variableList, 'r') for file in f: l_files.append(file.strip()) l_twoD = [] t_legend = [] PythonUtils.doesFileExist(l_files[j]) g = open(l_files[j], 'r') for vFile in g: PythonUtils.doesFileExist(vFile.strip()) vFileParser = ConfigParser.SafeConfigParser() vFileParser.read(vFile.strip()) ## Options vTitle = vFileParser.get('Options','title') vVariable = vFileParser.get('Options','variable') vColour = vFileParser.getint('Options','colour') vMarker = vFileParser.getint('Options','marker') vMarkerSize = vFileParser.getfloat('Options','markerSize') vFitMin = vFileParser.get('Options','fitMin') vFitMax = vFileParser.get('Options','fitMax') ## Create l_statInfo l_statInfo = [t_histoList[j][1], t_histoList[j][1], float(vRebin), int(vdoNorm), float(vXMin), float(vXMax), vFitMin, vFitMax] getMean = 0 getRMS = 0 getRES = 0 if vXValue == 'MEAN': getMean = 1 elif vXValue == 'RMS': getRMS = 1 elif vXValue == 'RESOLUTION': getRES = 1 else: getMean = getRMS = getRES = 0 l_xOption = [getMean, getRMS, getRES] getMean = 0 getRMS = 0 getRES = 0 if vYValue == 'MEAN': getMean = 1 elif vYValue == 'RMS': getRMS = 1 elif vYValue == 'RESOLUTION': getRES = 1 else: getMean = getRMS = getRES = 0 l_yOption = [getMean, getRMS, getRES] ## Get the list of stats l_plot = [ROOTUtils.retrieveHistogram(self.histFile, t_histoList[j][1], vVariable)] l_xValue, l_xErr = ROOTUtils.getHistoStat(l_plot, l_statInfo, l_xOption, vFit, vVariable) l_yValue, l_yErr = ROOTUtils.getHistoStat(l_plot, l_statInfo, l_yOption, vFit, vVariable) ## Get the value we want from the list so we can plot it! xValue = -1 for stat in l_xValue: if stat != -1: xValue = stat for stat in l_yValue: if stat != -1: yValue = stat ## Create a 2D plot and save to a list so can overlay them twoDPlot = PlottingUtils.createTwoD(t_histoList[j], xValue, yValue, vMarker, vMarkerSize, vColour) l_twoD.append(twoDPlot) if vTitle: l_legend = [twoDPlot, vTitle, 'p'] t_legend.append(l_legend) ## Overlay our 2D plots onto one canvas PlottingUtils.overlayTwoD(l_twoD, t_legend, t_histoList[j], t_legendList[j], t_textList[j], t_axisList[j], self.saveDir, l_saveAs)
def getHistoStat(t_plot, l_histoList, l_option, fit, name): # if the string is crea then we only return the intergral / nEntries h_str, location_suff, v_rebin, doNorm, xMin, xMax, fMin, fMax = l_histoList mean = -1 eMean = -1 rms = -1 eRMS = -1 res = -1 eRes = -1 if 'crea' in location_suff: intergral = t_plot[i][0].Integral() entries = t_plot[i][0].GetEntries() return integral / entries elif 're' in location_suff and name == 't': l_stat = [0, 0, 0] l_sErr = [0, 0, 0] return l_stat, l_sErr elif 'b' in location_suff and name == 't': l_stat = [-1, -1, -1] l_sErr = [-1, -1, -1] if l_option[0]: l_stat = [1, -1, -1] l_sErr = [0, 0, 0] if l_option[1]: l_stat = [-1, 0, -1] l_sErr = [0, 0, 0] if l_option[2]: l_stat = [-1, -1, 0] l_sErr = [0, 0, 0] return l_stat, l_sErr else: h = PlottingUtils.rebin(t_plot[0], v_rebin) h.GetXaxis().SetRangeUser(float(xMin), float(xMax)) if int(doNorm): scale = 1 / h.Integral() h.Scale(scale) gROOT.SetBatch(1) c = TCanvas('c', 'c', 800, 600) h.Draw() ## Recalculate fMin and fMax for SD values if 'SIGMA' in fMin or 'SIGMA' in fMax: fMin, fMax = getSigmaFitRange(h, fMin, fMax) ## Now we add the fit and get the mean, RMS and Resolution fh = h if fit == 'GAUSS': fh.Fit("gaus","Q","") fh = fh.GetFunction('gaus') fh.Draw('SAME') elif fit == 'BUKIN': fh = drawBukin(h, float(fMin), float(fMax)) fh.Draw('SAME') if fit == 'GAUSS' or fit == 'BUKIN': if l_option[0]: mean = fh.GetParameter(1) eMean = h.GetMeanError() if l_option[1]: rms = fh.GetParameter(2) eRMS = h.GetRMSError() if l_option[2]: res_mean = fh.GetParameter(1) res_eMean = h.GetMeanError() res_rms = fh.GetParameter(2) res_eRMS = h.GetRMSError() res = res_rms / res_mean eRes = res * res * math.sqrt((res_eMean / res_mean) * (res_eMean / res_mean) + (res_eRMS / res_rms) * (res_eRMS / res_rms)) else: if l_option[0]: mean = fh.GetMean() eMean = h.GetMeanError() if l_option[1]: rms = fh.GetRMS() eRMS = h.GetRMSError() if l_option[2]: res_mean = fh.GetMean() res_eMean = h.GetMeanError() res_rms = fh.GetRMS() res_eRMS = h.GetRMSError() res = res_rms / res_mean eRes = res * res * math.sqrt((res_eMean / res_mean) * (res_eMean / res_mean) + (res_eRMS / res_rms) * (res_eRMS / res_rms)) l_stat = [mean, rms, res] l_err = [eMean, eRMS, eRes] return l_stat, l_err