def finalize( self ): from Gaugi import save, mkdir_p outputname = self.getProperty("OutputFile") for etBinIdx in range(len(self._etbins)-1): for etaBinIdx in range(len(self._etabins)-1): key = 'et%d_eta%d' % (etBinIdx,etaBinIdx) mkdir_p( outputname ) if self._event[key] is None: continue d = { "features" : self._event_label, "etBins" : self._etbins, "etaBins" : self._etabins, "etBinIdx" : etBinIdx, "etaBinIdx" : etaBinIdx } d[ 'pattern_'+key ] = np.array( self._event[key] ) MSG_INFO( self, 'Saving %s with : (%d, %d)', key, d['pattern_'+key].shape[0], d['pattern_'+key].shape[1] ) save( d, outputname+'/'+outputname+"_"+key , protocol = 'savez_compressed') return StatusCode.SUCCESS
def finalize(self): d = { "features": self._event_label, "data": np.array(self._event), "cells_em1": np.array(self._cells['em1']), "cells_em2": np.array(self._cells['em2']), "cells_em3": np.array(self._cells['em3']), "cells_had1": np.array(self._cells['had3']), "cells_had2": np.array(self._cells['had2']), "cells_had3": np.array(self._cells['had1']), } save( d, self._outputname, protocol='savez_compressed', ) return StatusCode.SUCCESS
def save(self, ofile): d = self.toRawObj() from Gaugi import save save(d, ofile, compress=True)
def save(self, fname): d = self.toRawObj() d['__version'] = self.__version from Gaugi import save save( d, fname, compress=True)
def __call__(self, sgnFileList, bkgFileList, ofile, dump_csv=False): # get all keys paths = expandFolders(sgnFileList) jobIDs = sorted( list( set([ self._pat.match(f).group('binID') for f in paths if self._pat.match(f) is not None ]))) npatterns = {} etBins = None etaBins = None debug = False for id in jobIDs: sgnSubFileList = [] for f in expandFolders(sgnFileList): if id in f: sgnSubFileList.append(f) if debug: sgnSubFileList = sgnSubFileList[0:11] reader = ReaderPool(sgnSubFileList, DataReader(self._skip_these_keys), self._nFilesPerJob, self._nthreads) MSG_INFO(self, "Reading signal files...") outputs = reader() sgnDict = outputs.pop() if len(outputs) > 0: for from_dict in progressbar(outputs, len(outputs), 'Mearging signal files: ', 60, logger=self._logger): DataReader.merge(from_dict, sgnDict, self._skip_these_keys) bkgSubFileList = [] for f in expandFolders(bkgFileList): if id in f: bkgSubFileList.append(f) if debug: bkgSubFileList = bkgSubFileList[0:11] reader = ReaderPool(bkgSubFileList, DataReader(self._skip_these_keys), self._nFilesPerJob, self._nthreads) MSG_INFO(self, "Reading background files...") outputs = reader() bkgDict = outputs.pop() if len(outputs) > 0: for from_dict in progressbar(outputs, len(outputs), 'Mearging background files: ', 60, logger=self._logger): DataReader.merge(from_dict, bkgDict, self._skip_these_keys) # Loop over regions d = { "features": sgnDict["features"], "etBins": sgnDict["etBins"], "etaBins": sgnDict["etaBins"], "etBinIdx": sgnDict["etBinIdx"], "etaBinIdx": sgnDict["etaBinIdx"], } #if not etBins: etBins = sgnDict["etBins"] etBins = sgnDict["etBins"] #if not etaBins: etaBins = sgnDict["etaBins"] etaBins = sgnDict["etaBins"] d['data'] = np.concatenate( (sgnDict['pattern_' + id], bkgDict['pattern_' + id])).astype('float32') d['target'] = np.concatenate( (np.ones((sgnDict['pattern_' + id].shape[0], )), np.zeros( (bkgDict['pattern_' + id].shape[0], )))).astype('int16') if sgnDict['pattern_' + id] is not None: MSG_INFO(self, 'sgnData_%s : (%d, %d)', id, sgnDict['pattern_' + id].shape[0], sgnDict['pattern_' + id].shape[1]) else: MSG_INFO(self, 'sgnData_%s : empty', id) if bkgDict['pattern_' + id] is not None: MSG_INFO(self, 'bkgData_%s : (%d, %d)', id, bkgDict['pattern_' + id].shape[0], bkgDict['pattern_' + id].shape[1]) else: MSG_INFO(self, 'bkgData_%s : empty', id) MSG_INFO(self, "Saving: %s", ofile + '_' + id) npatterns['sgnPattern_' + id] = int(sgnDict['pattern_' + id].shape[0]) npatterns['bkgPattern_' + id] = int(bkgDict['pattern_' + id].shape[0]) save(d, ofile + '_' + id, protocol='savez_compressed') if dump_csv: # Save as csv for pandas dd = {} for ikey, key in enumerate(d['features']): dd[key] = d['data'][:, ikey] dd['target'] = d['target'] df = pd.DataFrame(dd) df.to_csv(ofile + '_' + id + '.csv') self.plotNSamples(npatterns, etBins, etaBins)