def interface(self, set, indir, outdir): # Copy indirvar file vfilename = '%s/%s' % (outdir, Common.Inputvars) if indir: shutil.copyfile('%s/%s' % (indir, Common.Selectvars), vfilename) else: # Creating output directory topovars = '%s/scratch/%s/%s.txt' % ( Common.NeatDirectory, self.getParameter('topovars'), channelName(set) ) shutil.copyfile(topovars, vfilename) # Reading variable list variables = open(vfilename).readlines() variables = [variable.rstrip() for variable in variables] # Reading signal topovars signal = TopovarReader(variables) self.message('Reading signal files') # Adding signal files for sample in Common.TrainingSignals: signal.add('%s/%s/%s_zero_Topo_%s.root' % ( Common.SampleLocation, Common.TrainingSample, channelName(set, sample), Common.TrainingSample ) ) # Create and save a random sampling signal.saveRandomSampling('%s/signals.txt' % outdir, Common.RuleFitTrainingEvents) # Reading background topovars background = TopovarReader(variables) self.message('Reading background files') # Adding background files for sample in Common.TrainingBackgrounds: background.add('%s/%s/%s_zero_Topo_%s.root' % ( Common.SampleLocation, Common.TrainingSample, channelName(set, sample), Common.TrainingSample ) ) # Create and save a random sampling background.saveRandomSampling('%s/backgrounds.txt' % outdir, Common.RuleFitTrainingEvents)
def process(self, set, lock=None): # Print channel being processed self.message('Processing channel %s.' % set['channel']) # Placeholder for winner winner = None # Variable Normalization aves = {}; stds = {} # Loop over the training sets for counter in xrange(1,Common.NeatNumberTries+1): # Setting the indir directory indir = '%s/scratch/%s/Trainings/%s/Training%05d' % ( Common.NeatDirectory, self.getParameter('input'), set['channel'], counter ) # Creating output directory outdir = '%s/scratch/%s/Trainings/%s' % ( Common.NeatDirectory, self.getParameter('input'), set['channel'] ) # Look for missing files files = ['neat.config', 'winner.dat'] missing = False for file in files: if not os.path.isfile("%s/%s" % (indir, file)): self.message('%s does not exist in %s.' % (file, indir)) missing = True if missing: continue # Compute the normalization if needed if len (aves) == 0 and self.getParameter('normalization','true') == 'true': variables = open('%s/inputvars.txt' % indir).readlines() variables = [variable.rstrip() for variable in variables] # Reading training sample for computing normalization training = TopovarReader(variables) # Adding training files for sample in Common.TrainingBackgrounds + Common.TrainingSignals: training.add('%s/%s/%s.root' % ( Common.SampleLocation, Common.TrainingSample, filename(set, sample) ) ) normalizer = VariableNormalizer(variables) normalizer.add(training.sample(True)) normalizer.report() for variable in variables: ave, std = normalizer(variable) aves[variable] = ave stds[variable] = std ## Write winner files # Read the winner net = pickle.load(open('%s/winner.dat' % indir)) # Winner information candidate = { 'training' : 'Training%05d' % counter, 'fitness' : net.fitness, 'variables' : variables, 'aves' : aves, 'stds' : stds } if not winner or candidate['fitness'] > winner['fitness']: self.message('Winner candidate for channel %s found in training %s with fitness %.4g' % ( set['channel'], candidate['training'], candidate['fitness'] ) ) winner = candidate file = open('%s/winner.info' % outdir,'w') pickle.dump(winner, file)
def Evaluate(set): global signalYield, signalSample, backgroundYield, backgroundSample # Reading variable list variables = open('%s/inputvars.txt' % set['directory']).readlines() variables = [variable.rstrip() for variable in variables] # Setting the number of input and output in the neat config file file = open ('%s/neat.config' % set['directory']) template = string.Template(file.read()) file.close() file = open ('%s/neat.config' % set['directory'], 'w') file.write(template.safe_substitute(input_nodes = len(variables), output_nodes = 1)) file.close() # Read neat configuration file config.load('%s/neat.config' % set['directory']) print('Training: Reading signal files') # Signal topovars signals = TopovarReader(variables) # Adding signal files for sample in Common.TrainingSignals: signals.add('%s/%s/%s.root' % ( Common.SampleLocation, Common.TrainingSample, Common.filename(set, sample) ) ) # Creating a variable normalizer normalizer = VariableNormalizer(variables) # Saving the population in buffer signalSample = signals.sample(compress = True) # Adding the sample to the normalizer normalizer.add(signalSample) # Compute the total weight signalYield = normalizer.getTotalWeight() print ('Training: Reading background files') # Background topovars backgrounds = TopovarReader(variables) # Adding background files for sample in Common.TrainingBackgrounds: backgrounds.add('%s/%s/%s.root' % ( Common.SampleLocation, Common.TrainingSample, Common.filename(set, sample) ) ) # Saving the population in buffer backgroundSample = backgrounds.sample(compress = True) # Adding the sample to the normalizer normalizer.add(backgroundSample) # Compute the total weight backgroundYield = normalizer.getTotalWeight() - signalYield # Reporting the normalization normalizer.report() # Normalization of the variables normalizer.normalizeSample(signalSample) normalizer.normalizeSample(backgroundSample) # NEAT training chromosome.node_gene_type = genome.NodeGene population.Population.evaluate = FitnessFunctionWrapper pop = population.Population() pop.epoch(int(set['number_generations']), report=True, save_best=False, checkpoint_interval = None) winner = pop.stats[0][-1] print 'Training: Number of evaluations: %d' % winner.id print 'Training: Best NN fitness: %0.2f' % winner.fitness # Save the best network file = open('%s/winner.dat' % set['directory'], 'w') pickle.dump(winner, file) file.close() # Save the best network file = open('%s/winner-fitness.txt' % set['directory'], 'w') file.write('%f' % winner.fitness) file.close()
def interface(self, set, indir, outdir): # Copy indirvar file vfilename = '%s/%s' % (outdir, Common.Inputvars) if indir: shutil.copyfile('%s/%s' % (indir, Common.Selectvars), vfilename) else: # Creating output directory topovars = '%s/scratch/%s/%s.txt' % (Common.NeatDirectory, self.getParameter('topovars'), channelName(set)) shutil.copyfile(topovars, vfilename) # Reading variable list variables = open(vfilename).readlines() variables = [variable.rstrip() for variable in variables] # Reading signal topovars signal = TopovarReader(variables) self.message('Reading signal files') # Adding signal files for sample in Common.TrainingSignals: signal.add('%s/%s/%s_zero_Topo_%s.root' % (Common.SampleLocation, Common.TrainingSample, channelName(set, sample), Common.TrainingSample)) # Create and save a random sampling signal.saveRandomSampling('%s/signals.txt' % outdir, Common.RuleFitTrainingEvents) # Reading background topovars background = TopovarReader(variables) self.message('Reading background files') # Adding background files for sample in Common.TrainingBackgrounds: background.add('%s/%s/%s_zero_Topo_%s.root' % (Common.SampleLocation, Common.TrainingSample, channelName(set, sample), Common.TrainingSample)) # Create and save a random sampling background.saveRandomSampling('%s/backgrounds.txt' % outdir, Common.RuleFitTrainingEvents)
def process(self, set): self.message('Processing channel %s' % set['channel']) # Setting the indir directory indir = '%s/scratch/%s/YieldTrees' % ( Common.NeatDirectory, self.getParameter('input') ) sampletype = self.getParameter('sample','yield') if sampletype == 'training': indir = '%s/scratch/%s/TrainingTrees' % ( Common.NeatDirectory, self.getParameter('input') ) elif sampletype == 'testing': indir = '%s/scratch/%s/TestingTrees' % ( Common.NeatDirectory, self.getParameter('input') ) elif sampletype != 'yield': raise ProcessorError('Unknown sample option %s (allowed options: training, testing and yield).' % sampletype) if self.isParameter('xcheck'): indir = '%s/scratch/%s/XCheckTrees/%s' % ( Common.NeatDirectory, self.getParameter('input'), self.getParameter('xcheck') ) # Setting the outdir directory outdir = '%s/scratch/%s/YieldHistograms' % ( Common.NeatDirectory, self.getParameter('input') ) if sampletype == 'training': outdir = '%s/scratch/%s/TrainingHistograms' % ( Common.NeatDirectory, self.getParameter('input') ) elif sampletype == 'testing': outdir = '%s/scratch/%s/TestingHistograms' % ( Common.NeatDirectory, self.getParameter('input') ) if self.isParameter('xcheck'): outdir = '%s/scratch/%s/XCheckHistograms/%s' % ( Common.NeatDirectory, self.getParameter('input'), self.getParameter('xcheck') ) # Check for output directory if not os.path.exists(outdir): os.makedirs(outdir) # File mode for writting histograms mode = 'recreate' # Create the list of sample samples = None if type(Common.YieldSignals) == list: samples = Common.YieldBackgrounds + Common.YieldSignals if len(Common.YieldSignals) > 1: samples = samples + [''.join(Common.YieldSignals)] else: samples = Common.YieldBackgrounds + [Common.YieldSignals] samples = samples + [Common.Data] # Loop over all the samples with trees for systematic in Common.Systematics + ['']: for sample in samples: # No systematics for QCD and DATA if (sample in Common.NoSystematics) and systematic != '': continue # No systematics in case of xchecks if self.isParameter('xcheck') and systematic != '': continue self.message('Processing systematic %s samples %s.' % (systematic, sample)) infile = '%s/%s.root' % ( indir, Common.filename(set, sample, systematic) ) # Check in the input file exist if not os.path.isfile(infile): self.message('Warning missing input file %s skipping ...' % infile) continue # Create a topovar reader only for neat output topovars = TopovarReader([Common.NeatOutputName], infile) outfile = '%s/%s.root' % ( outdir, Common.filename(set, sample, systematic) ) # Create a histogram producer histograms = HistogramWriter(outfile, mode) # Histogram booking (hardcoded not many options really) histograms.book('%s_400' % Common.NeatOutputName, 400, 0., 1.) histograms.book('%s_200' % Common.NeatOutputName, 200, 0., 1.) histograms.book('%s_100' % Common.NeatOutputName, 100, 0., 1.) histograms.book('%s_50' % Common.NeatOutputName, 50, 0., 1.) histograms.book('%s_25' % Common.NeatOutputName, 25, 0., 1.) # Loop over the tree producing histograms of neat output for entry in xrange(topovars.getEntries()): if entry % 5000 == 0 and entry != 0: self.message('Reading %d events.' % entry) # Read one event event = topovars.read(entry) # Fill the histogram histograms.fill(getattr(event,Common.NeatOutputName), getattr(event,Common.EventWeight))
def Evaluate(set): global signalYield, signalSample, backgroundYield, backgroundSample # Reading variable list variables = open('%s/inputvars.txt' % set['directory']).readlines() variables = [variable.rstrip() for variable in variables] # Setting the number of input and output in the neat config file file = open('%s/neat.config' % set['directory']) template = string.Template(file.read()) file.close() file = open('%s/neat.config' % set['directory'], 'w') file.write( template.safe_substitute(input_nodes=len(variables), output_nodes=1)) file.close() # Read neat configuration file config.load('%s/neat.config' % set['directory']) print('Training: Reading signal files') # Signal topovars signals = TopovarReader(variables) # Adding signal files for sample in Common.TrainingSignals: signals.add('%s/%s/%s.root' % (Common.SampleLocation, Common.TrainingSample, Common.filename(set, sample))) # Creating a variable normalizer normalizer = VariableNormalizer(variables) # Saving the population in buffer signalSample = signals.sample(compress=True) # Adding the sample to the normalizer normalizer.add(signalSample) # Compute the total weight signalYield = normalizer.getTotalWeight() print('Training: Reading background files') # Background topovars backgrounds = TopovarReader(variables) # Adding background files for sample in Common.TrainingBackgrounds: backgrounds.add('%s/%s/%s.root' % (Common.SampleLocation, Common.TrainingSample, Common.filename(set, sample))) # Saving the population in buffer backgroundSample = backgrounds.sample(compress=True) # Adding the sample to the normalizer normalizer.add(backgroundSample) # Compute the total weight backgroundYield = normalizer.getTotalWeight() - signalYield # Reporting the normalization normalizer.report() # Normalization of the variables normalizer.normalizeSample(signalSample) normalizer.normalizeSample(backgroundSample) # NEAT training chromosome.node_gene_type = genome.NodeGene population.Population.evaluate = FitnessFunctionWrapper pop = population.Population() pop.epoch(int(set['number_generations']), report=True, save_best=False, checkpoint_interval=None) winner = pop.stats[0][-1] print 'Training: Number of evaluations: %d' % winner.id print 'Training: Best NN fitness: %0.2f' % winner.fitness # Save the best network file = open('%s/winner.dat' % set['directory'], 'w') pickle.dump(winner, file) file.close() # Save the best network file = open('%s/winner-fitness.txt' % set['directory'], 'w') file.write('%f' % winner.fitness) file.close()
def process(self, set): self.message('Processing channel %s' % set['channel']) # Setting the indir directory indir = '%s/scratch/%s/YieldTrees' % (Common.NeatDirectory, self.getParameter('input')) sampletype = self.getParameter('sample', 'yield') if sampletype == 'training': indir = '%s/scratch/%s/TrainingTrees' % ( Common.NeatDirectory, self.getParameter('input')) elif sampletype == 'testing': indir = '%s/scratch/%s/TestingTrees' % (Common.NeatDirectory, self.getParameter('input')) elif sampletype != 'yield': raise ProcessorError( 'Unknown sample option %s (allowed options: training, testing and yield).' % sampletype) if self.isParameter('xcheck'): indir = '%s/scratch/%s/XCheckTrees/%s' % ( Common.NeatDirectory, self.getParameter('input'), self.getParameter('xcheck')) # Setting the outdir directory outdir = '%s/scratch/%s/YieldHistograms' % (Common.NeatDirectory, self.getParameter('input')) if sampletype == 'training': outdir = '%s/scratch/%s/TrainingHistograms' % ( Common.NeatDirectory, self.getParameter('input')) elif sampletype == 'testing': outdir = '%s/scratch/%s/TestingHistograms' % ( Common.NeatDirectory, self.getParameter('input')) if self.isParameter('xcheck'): outdir = '%s/scratch/%s/XCheckHistograms/%s' % ( Common.NeatDirectory, self.getParameter('input'), self.getParameter('xcheck')) # Check for output directory if not os.path.exists(outdir): os.makedirs(outdir) # File mode for writting histograms mode = 'recreate' # Create the list of sample samples = None if type(Common.YieldSignals) == list: samples = Common.YieldBackgrounds + Common.YieldSignals if len(Common.YieldSignals) > 1: samples = samples + [''.join(Common.YieldSignals)] else: samples = Common.YieldBackgrounds + [Common.YieldSignals] samples = samples + [Common.Data] # Loop over all the samples with trees for systematic in Common.Systematics + ['']: for sample in samples: # No systematics for QCD and DATA if (sample in Common.NoSystematics) and systematic != '': continue # No systematics in case of xchecks if self.isParameter('xcheck') and systematic != '': continue self.message('Processing systematic %s samples %s.' % (systematic, sample)) infile = '%s/%s.root' % ( indir, Common.filename(set, sample, systematic)) # Check in the input file exist if not os.path.isfile(infile): self.message('Warning missing input file %s skipping ...' % infile) continue # Create a topovar reader only for neat output topovars = TopovarReader([Common.NeatOutputName], infile) outfile = '%s/%s.root' % ( outdir, Common.filename(set, sample, systematic)) # Create a histogram producer histograms = HistogramWriter(outfile, mode) # Histogram booking (hardcoded not many options really) histograms.book('%s_400' % Common.NeatOutputName, 400, 0., 1.) histograms.book('%s_200' % Common.NeatOutputName, 200, 0., 1.) histograms.book('%s_100' % Common.NeatOutputName, 100, 0., 1.) histograms.book('%s_50' % Common.NeatOutputName, 50, 0., 1.) histograms.book('%s_25' % Common.NeatOutputName, 25, 0., 1.) # Loop over the tree producing histograms of neat output for entry in xrange(topovars.getEntries()): if entry % 5000 == 0 and entry != 0: self.message('Reading %d events.' % entry) # Read one event event = topovars.read(entry) # Fill the histogram histograms.fill(getattr(event, Common.NeatOutputName), getattr(event, Common.EventWeight))