def manageAttrs(self): extensionPoolPercentage = .2 self.maxAttrsCount = self.maxAvgAttrsPerTree*len(self.treesPool) if self.writeLog: self.logWriter.writeLine( '\nManaging attrs. Initial attrs count = ',self.currentAttrsCount,'/',self.maxAttrsCount) self.logWriter.timeMarker('manageAttrs',False) deletionSequence = helpers.proportionalrandom(self.treesPool, lambda x: x.lenStatements()/(x.fitness*x.numNodes), max(0,int(self.currentAttrsCount- self.maxAttrsCount*(1-extensionPoolPercentage)) )) if self.writeLog: self.logWriter.writeLine('Deletion sequence length = ', len(deletionSequence)) _s=0 while len(deletionSequence) !=0: count = 0 elem = deletionSequence[-1] while deletionSequence[-1] == elem: deletionSequence.pop() count+=1 if len(deletionSequence) ==0: break; outcome, delSequence = elem.removeWorstStates(count) if outcome: self.currentAttrsCount -= count #if an entire tree is removed, the same is done inside removeTree method. else: self.removeTree(elem) _s+=1 if self.writeLog: self.logWriter.writeLine( _s,' trees removed due to zero attrs') extensionSequence = helpers.proportionalrandom(self.treesPool,lambda x: x.fitness*x.numNodes / x.lenStatements(),max(self.maxAttrsCount-self.currentAttrsCount, 0)) if self.writeLog: self.logWriter.writeLine('Extension sequence length = ', len(extensionSequence)) while len(extensionSequence)!=0: count=0 elem = extensionSequence[-1] while extensionSequence[-1] == elem: extensionSequence.pop() count+=1 if len(extensionSequence) ==0: break; trueCount = elem.expandBestStates(count) self.currentAttrsCount+=trueCount if self.writeLog: self.logWriter.timeMarker('manageAttrs') self.logWriter.writeLine( 'OnAfterSubStep attrs count = ',self.currentAttrsCount,'/',self.maxAttrsCount) self.logWriter.writeLine( 'End of attrs management.\n')
def processSamples(self,samples, sumAdditionRates = 5.): """processes a list of samples""" for sample in samples: self.updateWithSample(sample,sumAdditionRates/len(samples)) self.samplesPool[sample] = set() treeseq = helpers.proportionalrandom(self.treesPool, lambda x:x.fitness/len(x.samples), int(round(sumAdditionRates*self.maxSamplesCount))) while len(treeseq) !=0: count = 0 elem = treeseq[-1] while treeseq[-1] == elem: treeseq.pop() count+=1 if len(treeseq) ==0: break; count = min(count, len(samples)) self.currentSamplesCount += count for samp in helpers.proportionalrandomNoRep(samples,lambda x:1,count):#WHY THE F*****G F**K DOES lambda x:<sum of fitnesses of trees failed> result in drops?! elem.addSample(samp) self.samplesPool[samp].add(elem) for sample in samples: if len(self.samplesPool[sample])==0: del self.samplesPool[sample]
def processSamples(self,samples, sumAdditionRates = 5.): """processes a list of samples""" for sample in samples: self.updateWithSample(sample,sumAdditionRates/len(samples)) assert sample not in self.samplesPool self.samplesPool[sample] = set() #if you strongly want to reprocess existing samples, you have at least 4 options: # 1) make sure that the sample is not added to the trees that have it already # 2) use lists instead of sets to store samples # 3) assign sample weights # 4) simply make a shallow copy of repeated samples right before this loop treeseq = helpers.proportionalrandom(self.treesPool, lambda x:x.fitness/len(x.samples), int(round(sumAdditionRates*self.maxSamplesCount))) while len(treeseq) !=0: count = 0 elem = treeseq[-1] while treeseq[-1] == elem: treeseq.pop() count+=1 if len(treeseq) ==0: break; count = min(count, len(samples)) self.currentSamplesCount += count for samp in helpers.proportionalrandomNoRep(samples,lambda x:1,count):#WHY THE F*****G F**K DOES lambda x:<sum of fitnesses of trees failed> result in drops?! assert elem.addSample(samp) self.samplesPool[samp].add(elem) for sample in samples: if len(self.samplesPool[sample])==0: del self.samplesPool[sample]
def generateChromo(self,numNodes,numSamples,numStatements,fitness= 1): newCh = chromo(self, set(helpers.proportionalrandom(self.samplesPool.keys(),lambda x:1,min(numSamples,len(self.samplesPool)))) , self.boolStatements, self.numStatements, 1) newCh.removeWorstStates(max([newCh.lenStatements() - numStatements,0])) for i in range(numNodes): if not newCh.expandBestNode(): break if newCh.numNodes ==0: return False newCh.fitness = fitness return newCh
def manageSamples(self): '''samples removal from trees (proportional to len(tree.samples)/tree.fitness) on exceeding maximal count, as well as [planned] recombination and spreading''' #ALARME!! try clustering here. may be good to cluster pairs with p ~ distance_between_samples/sum_of_importances but distance must be measured in terms of STATEMENTS of STATEMENT POOL proportionally to their IMPORTANCE... maybe #ALARME!! a place for spreading of existent samples, IF IT IS NECCESARY given that they spread via tree recombination #ALARME!! it may happen that a rubbish sample is kept cause it happened to be in one good chromo's list, test it and mb fix it somehow. p.e. expand 'good' samples in proportion to their fitness/len(self.samplesPool[sample]), but not to the 'best' nodes as won't fix the initial problem (mb uniformly?) if self.writeLog: self.logWriter.writeLine( '\nManaging samples. Initial samples count = ',self.currentSamplesCount,'/',self.maxSamplesCount) self.logWriter.timeMarker('manageSamples',False) deletionSequence = helpers.proportionalrandom(self.treesPool, lambda x: len(x.samples)/x.fitness, max(0,self.currentSamplesCount- self.maxSamplesCount )) if self.writeLog: self.logWriter.writeLine('Deletion sequence length = ', len(deletionSequence)) _s=0 while len(deletionSequence) !=0: count = 0 elem = deletionSequence[-1] while deletionSequence[-1] == elem: deletionSequence.pop() count+=1 if len(deletionSequence) ==0: break; outcome, delSequence = elem.removeSamples(count) if outcome: self.currentSamplesCount -= count #if an entire tree is removed, the same is done inside removeTree method. for sample in delSequence: self.samplesPool[sample].remove(elem) if len(self.samplesPool[sample])==0: del self.samplesPool[sample] else: self.removeTree(elem) _s+=1 if self.writeLog: self.logWriter.writeLine( _s,' trees removed due to zero samples') self.logWriter.timeMarker('manageSamples') self.logWriter.writeLine( 'OnAfterStep samples count = ',self.currentSamplesCount,'/',self.maxSamplesCount) self.logWriter.writeLine( 'End of samples management.\n')
def initialise(self): '''create initial pool of trees''' rateTrees = .3 baseFitnessForIdealTree = 1 if self.writeLog: self.logWriter.writeLine('starting initialisation') self.logWriter.timeMarker('init',False) samplesPerTree = int(round(self.maxSamplesCount/(rateTrees*self.maxNodesCount)+0.49)) nodesPerTree = int(1/rateTrees) countTrees = int(self.maxNodesCount*rateTrees) self.maxAttrsCount= countTrees #ALARMA!! may need some details for samples/statements pools once they're implemented for i in range(countTrees): newCh = chromo(self, set(helpers.proportionalrandom(self.samplesPool.keys(),lambda x:1,min(samplesPerTree,len(self.samplesPool)))) , self.boolStatements, self.numStatements, 1) for i in range(nodesPerTree): if not newCh.expandBestNode(): break if newCh.numNodes ==0: continue newCh.fitness = helpers.getSuccessProbability(newCh,self.samplesPool.keys())*baseFitnessForIdealTree self.currentNodesCount+= newCh.numNodes self.currentSamplesCount+=len(newCh.samples) self.currentAttrsCount += len(newCh.boolStatements)+len(newCh.numStatements) self.treesPool.add(newCh) for sample in newCh.samples: self.samplesPool[sample].add(newCh) for sample in self.samplesPool.keys(): if len(self.samplesPool[sample])==0: del self.samplesPool[sample] if self.writeLog: self.logWriter.timeMarker('init') self.logWriter.writeLine('init phase end.', len(self.treesPool),'trees created with', nodesPerTree,'nodes and',samplesPerTree,'samples each')
def manageTrees(self): ''' update the pool''' recombinationRate = .025 expandedPoolCapacityRate = 1.2 if self.writeLog: self.logWriter.writeLine( '\nManaging trees pool. Initial nodes count = ', self.currentNodesCount,'/',self.maxNodesCount, ', trees count = ',len(self.treesPool)) self.logWriter.timeMarker('manageTrees',False) self.logWriter.timeMarker('filterNegFitness',False) #remove all trees with their fitness <=0 _s = 0 for i in copy.copy(self.treesPool): if i.fitness <=0:#better be a deadline. check if this one is ever reached self.removeTree(i) _s+=1 if self.writeLog: self.logWriter.timeMarker('filterNegFitness') self.logWriter.writeLine(_s,' trees removed due to negative fitness') self.logWriter.writeLine('\nRecombination phase') self.logWriter.timeMarker('recombination',False) _s = 0 # add some new trees mutationSequence = helpers.proportionalrandom(self.treesPool,lambda x:(x.fitness),int(self.maxNodesCount*recombinationRate*2)) if self.writeLog: self.logWriter.writeLine('Recombination phase: sequence len =',len(mutationSequence)) random.shuffle(mutationSequence) #ALARME!!actual mutation for trees may be implemented either here or separately. or not implemented due to it's simulated by all the alterations with samples and nodes. while len(mutationSequence)>1: self.recombine(mutationSequence.pop(),mutationSequence.pop()) _s+=1 if self.writeLog: self.logWriter.timeMarker('recombination') self.logWriter.writeLine('End of recombination phase') self.logWriter.writeLine('\nExpansion phase') self.logWriter.timeMarker('expansion',False) #expand nodes up to maximal nodescount times expandedPoolCapacityRate (actually less due to absense of repetitions), # prioritised by tree fitness times expected benefit from expansion expandees = set(helpers.proportionalrandom(self.treesPool,lambda x:x.fitness*x.getBestReplacementPotential(),max(int((self.maxNodesCount*expandedPoolCapacityRate-self.currentNodesCount)), 0))) if self.writeLog: self.logWriter.writeLine( 'Expansion phase: sequence len =',len(expandees)) for tree in expandees: if tree.expandBestNode(): self.currentNodesCount+=1 if self.writeLog: self.logWriter.timeMarker('expansion') self.logWriter.writeLine('End of expansion phase') self.logWriter.writeLine('\nPruning phase') self.logWriter.timeMarker('pruning',False) #remove up to all nodes above nodescount(actually less due to possibility that trees have less nodes than tis demanded to remove) # of least effective nodes in terms of nodes per fitness, remove empty trees reducees = helpers.proportionalrandom(self.treesPool,lambda x:x.numNodes/(x.fitness),max(self.currentNodesCount - self.maxNodesCount,0)) if self.writeLog: self.logWriter.writeLine('Pruning phase: sequence len =',len(reducees)) _s=0 for tree in reducees: if tree in self.treesPool: if tree.pruneWorstNode(): self.currentNodesCount-=1 #if pruning fails, -1 node is done in removeTree else: _s+=1 self.removeTree(tree) if self.writeLog: self.logWriter.writeLine( _s,' trees removed in pruning phase') self.logWriter.timeMarker('pruning') self.logWriter.writeLine('End of pruning phase') if self.writeLog: self.logWriter.timeMarker('manageTrees') self.logWriter.writeLine( 'OnAfterStep nodes count = ', self.currentNodesCount,', trees count = ',len(self.treesPool)) self.logWriter.writeLine( 'End of tree management.\n')
def manageTrees(self): ''' update the pool''' expandedPoolCapacityRate = 1.2 targetTreesCount = 0.3*self.maxNodesCount minTreesCount = 10 if self.writeLog: self.logWriter.writeLine( '\nManaging trees pool. Initial nodes count = ', self.currentNodesCount,'/',self.maxNodesCount, ', trees count = ',len(self.treesPool)) self.logWriter.timeMarker('manageTrees',False) self.logWriter.timeMarker('negative fitness removal',False) _t = 0 _r = 0 for chromo in copy.copy(self.treesPool): if chromo.fitness <=0: self.removeTree(chromo) _t +=1 while len(self.treesPool) < minTreesCount: newCh = self.generateChromo(self.currentNodesCount/len(self.treesPool),len(self.treesPool),self.currentSamplesCount/len(self.treesPool),self.currentAttrsCount/len(self.treesPool)) if newCh: newCh.fitness = helpers.getSuccessProbability(newCh,self.samplesPool.keys()) self.addChromo(newCh) _r+=1 if self.writeLog: self.logWriter.timeMarker('negative fitness removal') self.logWriter.writeLine(_t,'trees removed for negative fitness, ',_r,' replaced') self.logWriter.writeLine('\nRecombination phase') self.logWriter.timeMarker('recombination',False) treeGrowthRate = 5 _s = 0 # add some new trees mutationSequence = helpers.proportionalrandom(self.treesPool,lambda x:(x.fitness),2*treeGrowthRate) if self.writeLog: self.logWriter.writeLine('Recombination phase: sequence len =',len(mutationSequence)) random.shuffle(mutationSequence) #ALARME!!actual mutation for trees may be implemented either here or separately. or not implemented due to it's simulated by all the alterations with samples and nodes. while len(mutationSequence)>1: newChromo = self.recombine(mutationSequence.pop(),mutationSequence.pop()); if newChromo: self.addChromo(newChromo) _s+=1 if self.writeLog: self.logWriter.timeMarker('recombination') self.logWriter.writeLine('End of recombination phase') self.logWriter.writeLine('\nExpansion phase') self.logWriter.timeMarker('expansion',False) #expand nodes up to maximal nodescount times expandedPoolCapacityRate (actually less due to absense of repetitions), # prioritised by tree fitness times expected benefit from expansion #this NoRep is recalculating priorities every round. expandees = helpers.proportionalrandomRounds(self.treesPool,lambda x:x.fitness*x.getBestReplacementPotential(),max(int((self.maxNodesCount*expandedPoolCapacityRate-self.currentNodesCount)), 0))#that one was set(random) just a sec ago if self.writeLog: self.logWriter.writeLine( 'Expansion phase: sequence len =',len(expandees)) for tree in expandees: if tree.expandBestNode(): self.currentNodesCount+=1 if self.writeLog: self.logWriter.timeMarker('expansion') self.logWriter.writeLine('End of expansion phase') self.logWriter.writeLine('\nPruning phase') self.logWriter.timeMarker('pruning',False) #remove up to all nodes above nodescount(actually less due to possibility that trees have less nodes than tis demanded to remove) # of least effective nodes in terms of nodes per fitness, remove empty trees if self.writeLog: self.logWriter.writeLine('Pruning phase: initial nodes count =',self.currentNodesCount,'/',self.maxNodesCount) _t = 0 while self.currentNodesCount > self.maxNodesCount: reducees = helpers.proportionalrandom(self.treesPool,lambda x:x.numNodes/(x.fitness),max(self.currentNodesCount - self.maxNodesCount,0)) for tree in reducees: if tree in self.treesPool: if tree.pruneWorstNode(): self.currentNodesCount-=1 #if pruning fails, -1 node is done in else: _t+=1 self.removeTree(tree) if self.writeLog: self.logWriter.writeLine( _t,' trees removed in pruning phase') self.logWriter.timeMarker('pruning') self.logWriter.writeLine('End of pruning phase') if self.writeLog: self.logWriter.timeMarker('manageTrees') self.logWriter.writeLine( 'OnAfterStep nodes count = ', self.currentNodesCount,', trees count = ',len(self.treesPool)) self.logWriter.writeLine( 'End of tree management.\n')