Ejemplo n.º 1
0
  def finalize( self ):

    from Gaugi import save, mkdir_p

    outputname = self.getProperty("OutputFile")

    for etBinIdx in range(len(self._etbins)-1):
      for etaBinIdx in range(len(self._etabins)-1):

        key =  'et%d_eta%d' % (etBinIdx,etaBinIdx)
        mkdir_p( outputname )
        if self._event[key] is None:
          continue

        d = {
            "features"  : self._event_label,
            "etBins"    : self._etbins,
            "etaBins"   : self._etabins,
            "etBinIdx"  : etBinIdx,
            "etaBinIdx" : etaBinIdx
            }

        d[ 'pattern_'+key ] = np.array( self._event[key] )
        MSG_INFO( self, 'Saving %s with : (%d, %d)', key, d['pattern_'+key].shape[0], d['pattern_'+key].shape[1] )
        save( d, outputname+'/'+outputname+"_"+key , protocol = 'savez_compressed')
    return StatusCode.SUCCESS
Ejemplo n.º 2
0
    def finalize(self):

        d = {
            "features": self._event_label,
            "data": np.array(self._event),
            "cells_em1": np.array(self._cells['em1']),
            "cells_em2": np.array(self._cells['em2']),
            "cells_em3": np.array(self._cells['em3']),
            "cells_had1": np.array(self._cells['had3']),
            "cells_had2": np.array(self._cells['had2']),
            "cells_had3": np.array(self._cells['had1']),
        }

        save(
            d,
            self._outputname,
            protocol='savez_compressed',
        )
        return StatusCode.SUCCESS
Ejemplo n.º 3
0
 def save(self, ofile):
     d = self.toRawObj()
     from Gaugi import save
     save(d, ofile, compress=True)
Ejemplo n.º 4
0
 def save(self, fname):
   d = self.toRawObj()
   d['__version'] = self.__version
   from Gaugi import save
   save( d, fname, compress=True)
Ejemplo n.º 5
0
    def __call__(self, sgnFileList, bkgFileList, ofile, dump_csv=False):

        # get all keys
        paths = expandFolders(sgnFileList)
        jobIDs = sorted(
            list(
                set([
                    self._pat.match(f).group('binID') for f in paths
                    if self._pat.match(f) is not None
                ])))
        npatterns = {}
        etBins = None
        etaBins = None

        debug = False

        for id in jobIDs:

            sgnSubFileList = []
            for f in expandFolders(sgnFileList):
                if id in f: sgnSubFileList.append(f)

            if debug:
                sgnSubFileList = sgnSubFileList[0:11]

            reader = ReaderPool(sgnSubFileList,
                                DataReader(self._skip_these_keys),
                                self._nFilesPerJob, self._nthreads)
            MSG_INFO(self, "Reading signal files...")
            outputs = reader()
            sgnDict = outputs.pop()
            if len(outputs) > 0:
                for from_dict in progressbar(outputs,
                                             len(outputs),
                                             'Mearging signal files: ',
                                             60,
                                             logger=self._logger):
                    DataReader.merge(from_dict, sgnDict, self._skip_these_keys)

            bkgSubFileList = []
            for f in expandFolders(bkgFileList):
                if id in f: bkgSubFileList.append(f)

            if debug:
                bkgSubFileList = bkgSubFileList[0:11]

            reader = ReaderPool(bkgSubFileList,
                                DataReader(self._skip_these_keys),
                                self._nFilesPerJob, self._nthreads)
            MSG_INFO(self, "Reading background files...")
            outputs = reader()
            bkgDict = outputs.pop()
            if len(outputs) > 0:
                for from_dict in progressbar(outputs,
                                             len(outputs),
                                             'Mearging background files: ',
                                             60,
                                             logger=self._logger):
                    DataReader.merge(from_dict, bkgDict, self._skip_these_keys)

            # Loop over regions
            d = {
                "features": sgnDict["features"],
                "etBins": sgnDict["etBins"],
                "etaBins": sgnDict["etaBins"],
                "etBinIdx": sgnDict["etBinIdx"],
                "etaBinIdx": sgnDict["etaBinIdx"],
            }

            #if not etBins:  etBins = sgnDict["etBins"]
            etBins = sgnDict["etBins"]
            #if not etaBins:  etaBins = sgnDict["etaBins"]
            etaBins = sgnDict["etaBins"]

            d['data'] = np.concatenate(
                (sgnDict['pattern_' + id],
                 bkgDict['pattern_' + id])).astype('float32')
            d['target'] = np.concatenate(
                (np.ones((sgnDict['pattern_' + id].shape[0], )),
                 np.zeros(
                     (bkgDict['pattern_' + id].shape[0], )))).astype('int16')

            if sgnDict['pattern_' + id] is not None:
                MSG_INFO(self, 'sgnData_%s : (%d, %d)', id,
                         sgnDict['pattern_' + id].shape[0],
                         sgnDict['pattern_' + id].shape[1])
            else:
                MSG_INFO(self, 'sgnData_%s : empty', id)
            if bkgDict['pattern_' + id] is not None:
                MSG_INFO(self, 'bkgData_%s : (%d, %d)', id,
                         bkgDict['pattern_' + id].shape[0],
                         bkgDict['pattern_' + id].shape[1])
            else:
                MSG_INFO(self, 'bkgData_%s : empty', id)
            MSG_INFO(self, "Saving: %s", ofile + '_' + id)

            npatterns['sgnPattern_' + id] = int(sgnDict['pattern_' +
                                                        id].shape[0])
            npatterns['bkgPattern_' + id] = int(bkgDict['pattern_' +
                                                        id].shape[0])
            save(d, ofile + '_' + id, protocol='savez_compressed')

            if dump_csv:
                # Save as csv for pandas
                dd = {}
                for ikey, key in enumerate(d['features']):
                    dd[key] = d['data'][:, ikey]
                dd['target'] = d['target']
                df = pd.DataFrame(dd)
                df.to_csv(ofile + '_' + id + '.csv')

        self.plotNSamples(npatterns, etBins, etaBins)