def interface(self, set, indir, outdir):
    # Copy indirvar file
    vfilename = '%s/%s' % (outdir, Common.Inputvars)
    if indir:
      shutil.copyfile('%s/%s' % (indir, Common.Selectvars), vfilename)
    else:
      # Creating output directory
      topovars = '%s/scratch/%s/%s.txt' % (
        Common.NeatDirectory, self.getParameter('topovars'), channelName(set)
      )
      shutil.copyfile(topovars, vfilename)

    # Reading variable list
    variables = open(vfilename).readlines()
    variables = [variable.rstrip() for variable in variables]

    # Reading signal topovars
    signal = TopovarReader(variables)
    self.message('Reading signal files')

    # Adding signal files
    for sample in Common.TrainingSignals:
      signal.add('%s/%s/%s_zero_Topo_%s.root' % (
          Common.SampleLocation, Common.TrainingSample, channelName(set, sample), Common.TrainingSample
        )
      )
    # Create and save a random sampling
    signal.saveRandomSampling('%s/signals.txt' % outdir, Common.RuleFitTrainingEvents)

    # Reading background topovars
    background = TopovarReader(variables)

    self.message('Reading background files')

    # Adding background files
    for sample in Common.TrainingBackgrounds:
      background.add('%s/%s/%s_zero_Topo_%s.root' % (
          Common.SampleLocation, Common.TrainingSample, channelName(set, sample), Common.TrainingSample
        )
      )

    # Create and save a random sampling
    background.saveRandomSampling('%s/backgrounds.txt' % outdir, Common.RuleFitTrainingEvents) 
Example #2
0
  def process(self, set, lock=None):

    # Print channel being processed
    self.message('Processing channel %s.' % set['channel'])
    
    # Placeholder for winner
    winner = None
    # Variable Normalization
    aves = {}; stds = {}
    
    # Loop over the training sets
    for counter in xrange(1,Common.NeatNumberTries+1):

      # Setting the indir directory
      indir = '%s/scratch/%s/Trainings/%s/Training%05d' % (
        Common.NeatDirectory, self.getParameter('input'), set['channel'], counter
      )

      # Creating output directory
      outdir = '%s/scratch/%s/Trainings/%s' % (
        Common.NeatDirectory, self.getParameter('input'), set['channel']
      )

      # Look for missing files  
      files = ['neat.config', 'winner.dat']
      missing = False      
      for file in files:
        if not os.path.isfile("%s/%s" % (indir, file)):
          self.message('%s does not exist in %s.' % (file, indir))
          missing = True
      if missing: continue
     
      # Compute the normalization if needed
      if len (aves) == 0 and self.getParameter('normalization','true') == 'true':
        variables = open('%s/inputvars.txt' % indir).readlines()
        variables = [variable.rstrip() for variable in variables]

        # Reading training sample for computing normalization
        training = TopovarReader(variables)

        # Adding training files
        for sample in Common.TrainingBackgrounds + Common.TrainingSignals:
          training.add('%s/%s/%s.root' % (
              Common.SampleLocation, Common.TrainingSample, filename(set, sample)
            )
          )

        normalizer = VariableNormalizer(variables)
        normalizer.add(training.sample(True))
        normalizer.report()

        for variable in variables:
          ave, std = normalizer(variable)
          aves[variable] = ave
          stds[variable] = std

      ## Write winner files
      
      # Read the winner
      net = pickle.load(open('%s/winner.dat' % indir))
  
      # Winner information
      candidate = {
      	'training' : 'Training%05d' % counter,
      	'fitness' : net.fitness,
      	'variables' : variables,
      	'aves' : aves,
      	'stds' : stds
      }

      if not winner or candidate['fitness'] > winner['fitness']:
        self.message('Winner candidate for channel %s found in training %s with fitness %.4g' % (
            set['channel'], candidate['training'], candidate['fitness']
          )
        )
        winner = candidate
            
    file = open('%s/winner.info' % outdir,'w')
    pickle.dump(winner, file)
Example #3
0
def Evaluate(set):
  global signalYield, signalSample, backgroundYield, backgroundSample

  # Reading variable list
  variables = open('%s/inputvars.txt' % set['directory']).readlines()
  variables = [variable.rstrip() for variable in variables]

  # Setting the number of input and output in the neat config file
  file = open ('%s/neat.config' % set['directory'])
  template = string.Template(file.read())
  file.close()
  file = open ('%s/neat.config' % set['directory'], 'w')
  file.write(template.safe_substitute(input_nodes = len(variables), output_nodes = 1))
  file.close()

  # Read neat configuration file
  config.load('%s/neat.config' % set['directory'])
  
  print('Training: Reading signal files')

  # Signal topovars
  signals = TopovarReader(variables)

  # Adding signal files
  for sample in Common.TrainingSignals:
    signals.add('%s/%s/%s.root' % (
        Common.SampleLocation, Common.TrainingSample, Common.filename(set, sample)
      )
    )

  # Creating a variable normalizer
  normalizer = VariableNormalizer(variables)

  # Saving the population in buffer
  signalSample = signals.sample(compress = True)
  
  # Adding the sample to the normalizer
  normalizer.add(signalSample)
    
  # Compute the total weight
  signalYield = normalizer.getTotalWeight()

  print ('Training: Reading background files')

  # Background topovars
  backgrounds = TopovarReader(variables)

  # Adding background files
  for sample in Common.TrainingBackgrounds:
    backgrounds.add('%s/%s/%s.root' % (
        Common.SampleLocation, Common.TrainingSample, Common.filename(set, sample)
      )
    )

  # Saving the population in buffer
  backgroundSample = backgrounds.sample(compress = True)

  # Adding the sample to the normalizer
  normalizer.add(backgroundSample)

  # Compute the total weight
  backgroundYield = normalizer.getTotalWeight() - signalYield

  # Reporting the normalization
  normalizer.report()

  # Normalization of the variables
  normalizer.normalizeSample(signalSample)
  normalizer.normalizeSample(backgroundSample)

  # NEAT training
  chromosome.node_gene_type = genome.NodeGene  
  population.Population.evaluate = FitnessFunctionWrapper
  pop = population.Population()
  pop.epoch(int(set['number_generations']), report=True, save_best=False, checkpoint_interval = None)
  winner = pop.stats[0][-1]
  print 'Training: Number of evaluations: %d' % winner.id
  print 'Training: Best NN fitness: %0.2f' % winner.fitness

  # Save the best network
  file = open('%s/winner.dat' % set['directory'], 'w')
  pickle.dump(winner, file)
  file.close()
  # Save the best network
  file = open('%s/winner-fitness.txt' % set['directory'], 'w')
  file.write('%f' % winner.fitness)
  file.close()
Example #4
0
    def interface(self, set, indir, outdir):
        # Copy indirvar file
        vfilename = '%s/%s' % (outdir, Common.Inputvars)
        if indir:
            shutil.copyfile('%s/%s' % (indir, Common.Selectvars), vfilename)
        else:
            # Creating output directory
            topovars = '%s/scratch/%s/%s.txt' % (Common.NeatDirectory,
                                                 self.getParameter('topovars'),
                                                 channelName(set))
            shutil.copyfile(topovars, vfilename)

        # Reading variable list
        variables = open(vfilename).readlines()
        variables = [variable.rstrip() for variable in variables]

        # Reading signal topovars
        signal = TopovarReader(variables)
        self.message('Reading signal files')

        # Adding signal files
        for sample in Common.TrainingSignals:
            signal.add('%s/%s/%s_zero_Topo_%s.root' %
                       (Common.SampleLocation, Common.TrainingSample,
                        channelName(set, sample), Common.TrainingSample))
        # Create and save a random sampling
        signal.saveRandomSampling('%s/signals.txt' % outdir,
                                  Common.RuleFitTrainingEvents)

        # Reading background topovars
        background = TopovarReader(variables)

        self.message('Reading background files')

        # Adding background files
        for sample in Common.TrainingBackgrounds:
            background.add('%s/%s/%s_zero_Topo_%s.root' %
                           (Common.SampleLocation, Common.TrainingSample,
                            channelName(set, sample), Common.TrainingSample))

        # Create and save a random sampling
        background.saveRandomSampling('%s/backgrounds.txt' % outdir,
                                      Common.RuleFitTrainingEvents)
  def process(self, set):

    self.message('Processing channel %s' % set['channel'])

    # Setting the indir directory
    indir = '%s/scratch/%s/YieldTrees' % (
      Common.NeatDirectory, self.getParameter('input')
    )
    sampletype = self.getParameter('sample','yield')
    if sampletype == 'training':
      indir = '%s/scratch/%s/TrainingTrees' % (
        Common.NeatDirectory, self.getParameter('input')
      )
    elif sampletype == 'testing':
      indir = '%s/scratch/%s/TestingTrees' % (
        Common.NeatDirectory, self.getParameter('input')
      )
    elif sampletype != 'yield':
      raise ProcessorError('Unknown sample option %s (allowed options: training, testing and yield).' % sampletype)

    if self.isParameter('xcheck'):
      indir = '%s/scratch/%s/XCheckTrees/%s' % (
        Common.NeatDirectory, self.getParameter('input'), self.getParameter('xcheck')
      )
            
    # Setting the outdir directory
    outdir = '%s/scratch/%s/YieldHistograms' % (
      Common.NeatDirectory, self.getParameter('input')
    )
    if sampletype == 'training':
      outdir = '%s/scratch/%s/TrainingHistograms' % (
        Common.NeatDirectory, self.getParameter('input')
      )
    elif sampletype == 'testing':
      outdir = '%s/scratch/%s/TestingHistograms' % (
        Common.NeatDirectory, self.getParameter('input')
      )
      
    if self.isParameter('xcheck'):
      outdir = '%s/scratch/%s/XCheckHistograms/%s' % (
        Common.NeatDirectory, self.getParameter('input'), self.getParameter('xcheck')
      )
          
    # Check for output directory
    if not os.path.exists(outdir):
      os.makedirs(outdir)

    # File mode for writting histograms
    mode = 'recreate'
 
    # Create the list of sample
    samples = None
    if type(Common.YieldSignals) == list:
      samples = Common.YieldBackgrounds + Common.YieldSignals
      if len(Common.YieldSignals) > 1:
        samples = samples + [''.join(Common.YieldSignals)]
    else:
      samples = Common.YieldBackgrounds + [Common.YieldSignals]
    samples = samples + [Common.Data] 

    # Loop over all the samples with trees
    for systematic in Common.Systematics + ['']:
      for sample in samples:

        # No systematics for QCD and DATA
        if (sample in Common.NoSystematics) and systematic != '': continue

        # No systematics in case of xchecks
        if self.isParameter('xcheck') and systematic != '': continue

        self.message('Processing systematic %s samples %s.' % (systematic, sample))

        infile = '%s/%s.root' % (
          indir, Common.filename(set, sample, systematic)
        )        

        # Check in the input file exist
        if not os.path.isfile(infile):
          self.message('Warning missing input file %s skipping ...' % infile)
          continue
        
        # Create a topovar reader only for neat output
        topovars = TopovarReader([Common.NeatOutputName], infile)

        outfile = '%s/%s.root' % (
          outdir, Common.filename(set, sample, systematic)
        )        
        
        # Create a histogram producer
        histograms = HistogramWriter(outfile, mode)
        # Histogram booking (hardcoded not many options really)
        histograms.book('%s_400' % Common.NeatOutputName, 400, 0., 1.)
        histograms.book('%s_200' % Common.NeatOutputName, 200, 0., 1.)
        histograms.book('%s_100' % Common.NeatOutputName, 100, 0., 1.)
        histograms.book('%s_50' % Common.NeatOutputName, 50, 0., 1.)
        histograms.book('%s_25' % Common.NeatOutputName, 25, 0., 1.)
        
        # Loop over the tree producing histograms of neat output        
        for entry in xrange(topovars.getEntries()):
          if entry % 5000 == 0 and entry != 0:
            self.message('Reading %d events.' % entry)
          # Read one event
          event = topovars.read(entry)
          # Fill the histogram
          histograms.fill(getattr(event,Common.NeatOutputName), getattr(event,Common.EventWeight))
Example #6
0
def Evaluate(set):
    global signalYield, signalSample, backgroundYield, backgroundSample

    # Reading variable list
    variables = open('%s/inputvars.txt' % set['directory']).readlines()
    variables = [variable.rstrip() for variable in variables]

    # Setting the number of input and output in the neat config file
    file = open('%s/neat.config' % set['directory'])
    template = string.Template(file.read())
    file.close()
    file = open('%s/neat.config' % set['directory'], 'w')
    file.write(
        template.safe_substitute(input_nodes=len(variables), output_nodes=1))
    file.close()

    # Read neat configuration file
    config.load('%s/neat.config' % set['directory'])

    print('Training: Reading signal files')

    # Signal topovars
    signals = TopovarReader(variables)

    # Adding signal files
    for sample in Common.TrainingSignals:
        signals.add('%s/%s/%s.root' %
                    (Common.SampleLocation, Common.TrainingSample,
                     Common.filename(set, sample)))

    # Creating a variable normalizer
    normalizer = VariableNormalizer(variables)

    # Saving the population in buffer
    signalSample = signals.sample(compress=True)

    # Adding the sample to the normalizer
    normalizer.add(signalSample)

    # Compute the total weight
    signalYield = normalizer.getTotalWeight()

    print('Training: Reading background files')

    # Background topovars
    backgrounds = TopovarReader(variables)

    # Adding background files
    for sample in Common.TrainingBackgrounds:
        backgrounds.add('%s/%s/%s.root' %
                        (Common.SampleLocation, Common.TrainingSample,
                         Common.filename(set, sample)))

    # Saving the population in buffer
    backgroundSample = backgrounds.sample(compress=True)

    # Adding the sample to the normalizer
    normalizer.add(backgroundSample)

    # Compute the total weight
    backgroundYield = normalizer.getTotalWeight() - signalYield

    # Reporting the normalization
    normalizer.report()

    # Normalization of the variables
    normalizer.normalizeSample(signalSample)
    normalizer.normalizeSample(backgroundSample)

    # NEAT training
    chromosome.node_gene_type = genome.NodeGene
    population.Population.evaluate = FitnessFunctionWrapper
    pop = population.Population()
    pop.epoch(int(set['number_generations']),
              report=True,
              save_best=False,
              checkpoint_interval=None)
    winner = pop.stats[0][-1]
    print 'Training: Number of evaluations: %d' % winner.id
    print 'Training: Best NN fitness: %0.2f' % winner.fitness

    # Save the best network
    file = open('%s/winner.dat' % set['directory'], 'w')
    pickle.dump(winner, file)
    file.close()
    # Save the best network
    file = open('%s/winner-fitness.txt' % set['directory'], 'w')
    file.write('%f' % winner.fitness)
    file.close()
    def process(self, set):

        self.message('Processing channel %s' % set['channel'])

        # Setting the indir directory
        indir = '%s/scratch/%s/YieldTrees' % (Common.NeatDirectory,
                                              self.getParameter('input'))
        sampletype = self.getParameter('sample', 'yield')
        if sampletype == 'training':
            indir = '%s/scratch/%s/TrainingTrees' % (
                Common.NeatDirectory, self.getParameter('input'))
        elif sampletype == 'testing':
            indir = '%s/scratch/%s/TestingTrees' % (Common.NeatDirectory,
                                                    self.getParameter('input'))
        elif sampletype != 'yield':
            raise ProcessorError(
                'Unknown sample option %s (allowed options: training, testing and yield).'
                % sampletype)

        if self.isParameter('xcheck'):
            indir = '%s/scratch/%s/XCheckTrees/%s' % (
                Common.NeatDirectory, self.getParameter('input'),
                self.getParameter('xcheck'))

        # Setting the outdir directory
        outdir = '%s/scratch/%s/YieldHistograms' % (Common.NeatDirectory,
                                                    self.getParameter('input'))
        if sampletype == 'training':
            outdir = '%s/scratch/%s/TrainingHistograms' % (
                Common.NeatDirectory, self.getParameter('input'))
        elif sampletype == 'testing':
            outdir = '%s/scratch/%s/TestingHistograms' % (
                Common.NeatDirectory, self.getParameter('input'))

        if self.isParameter('xcheck'):
            outdir = '%s/scratch/%s/XCheckHistograms/%s' % (
                Common.NeatDirectory, self.getParameter('input'),
                self.getParameter('xcheck'))

        # Check for output directory
        if not os.path.exists(outdir):
            os.makedirs(outdir)

        # File mode for writting histograms
        mode = 'recreate'

        # Create the list of sample
        samples = None
        if type(Common.YieldSignals) == list:
            samples = Common.YieldBackgrounds + Common.YieldSignals
            if len(Common.YieldSignals) > 1:
                samples = samples + [''.join(Common.YieldSignals)]
        else:
            samples = Common.YieldBackgrounds + [Common.YieldSignals]
        samples = samples + [Common.Data]

        # Loop over all the samples with trees
        for systematic in Common.Systematics + ['']:
            for sample in samples:

                # No systematics for QCD and DATA
                if (sample in Common.NoSystematics) and systematic != '':
                    continue

                # No systematics in case of xchecks
                if self.isParameter('xcheck') and systematic != '': continue

                self.message('Processing systematic %s samples %s.' %
                             (systematic, sample))

                infile = '%s/%s.root' % (
                    indir, Common.filename(set, sample, systematic))

                # Check in the input file exist
                if not os.path.isfile(infile):
                    self.message('Warning missing input file %s skipping ...' %
                                 infile)
                    continue

                # Create a topovar reader only for neat output
                topovars = TopovarReader([Common.NeatOutputName], infile)

                outfile = '%s/%s.root' % (
                    outdir, Common.filename(set, sample, systematic))

                # Create a histogram producer
                histograms = HistogramWriter(outfile, mode)
                # Histogram booking (hardcoded not many options really)
                histograms.book('%s_400' % Common.NeatOutputName, 400, 0., 1.)
                histograms.book('%s_200' % Common.NeatOutputName, 200, 0., 1.)
                histograms.book('%s_100' % Common.NeatOutputName, 100, 0., 1.)
                histograms.book('%s_50' % Common.NeatOutputName, 50, 0., 1.)
                histograms.book('%s_25' % Common.NeatOutputName, 25, 0., 1.)

                # Loop over the tree producing histograms of neat output
                for entry in xrange(topovars.getEntries()):
                    if entry % 5000 == 0 and entry != 0:
                        self.message('Reading %d events.' % entry)
                    # Read one event
                    event = topovars.read(entry)
                    # Fill the histogram
                    histograms.fill(getattr(event, Common.NeatOutputName),
                                    getattr(event, Common.EventWeight))