Esempio n. 1
0
	def _CPARdfs(self,objPNDatacopy,lstAntecedent,lstConsequent):
		"""Depth first search for constructing a new rule."""
		blnMaxExceeded = False
		if (len(lstAntecedent) == self.MAX_RULE_SIZE):
			blnMaxExceeded = True
		'GET ALL ATTRIBUTES WITHIN 1.0 - dblGainSimilarityRatio of max'
		objPNDatacopy.recalculateGains()
		[dblGain,lstAttributes] = objPNDatacopy.getBestGainAttributes(self.dblGainSimilarityRatio)
		if dblGain > self.dblMinGainThreshold and not blnMaxExceeded:
			for intAttribute in lstAttributes:
				"""
				This is a check to see if this attribute still within 1%...it may not
				after the depth first search
				"""
				lstAntecedentCopy = deepcopy(lstAntecedent)
				lstAntecedentCopy.append(intAttribute)
				objPNDatacopyTemp = objPNDatacopy.copyPrimes()
				objPNDatacopyTemp.lstAprime[intAttribute] = 0
				objPNDatacopyTemp.removeExamplesNotSatisfying(lstAntecedentCopy)
			
				self._CPARdfs(objPNDatacopyTemp,lstAntecedentCopy,lstConsequent)
		else:
			if len(lstAntecedent)>0:
				lstAntecedent.sort()
				dblLaPlaceAccuracy = self.objPNData.getLaPlaceAccuracy(lstAntecedent,lstConsequent)
				objRule = AssociationRule(lstAntecedent, lstConsequent, {"laplace":dblLaPlaceAccuracy})
				objPNDatacopy.updateWeights(objRule,self.dblDecayFactor)
				self.addRule(objRule)
			else:
				log("Empty antecedent.")
				pass
Esempio n. 2
0
    def train(self, samples, maxRuleSize=9999, mineOnlyClass=None):
        """Train with CPAR on the sample set, returning an AssociationRuleSet."""
        self.MAX_RULE_SIZE = maxRuleSize
        self.objPNData = PNData(samples)
        self.lstRules = []
        classes = self.objPNData.getClassList()

        log("Dataset has %d classes over %d samples." %
            (len(classes), len(samples)))
        for current_class in classes:
            if mineOnlyClass != None:
                if current_class != mineOnlyClass:
                    continue
            log("Processing class %s" % (current_class))
            self.objPNData.setCurrentClass(current_class)
            dblMinTotalWeight = self.dblTotalWeightFactor * self.objPNData.getTotalWeight(
            )
            lstAntecedent = []
            while self.objPNData.getTotalWeight() > dblMinTotalWeight:
                self.objPNData.refreshPNAData()
                if self.objPNData.noValidGainsinPNarray(
                        self.dblMinGainThreshold):
                    #log("NO VALID GAINS....Breaking!");
                    break
                #log('BEGIN DEPTH FIRST SEARCH - total weight %f > %f'%(self.objPNData.getTotalWeight(),dblMinTotalWeight))
                self._CPARdfs(self.objPNData.copyPrimes(), lstAntecedent,
                              [current_class])
        trules = len(self.lstRules)
        self.removeDuplicateRules()
        #log("End of rule search. Found %d rules total, %d after duplicates removed."%(trules,len(self.lstRules)))
        arset = AssociationRuleSet()
        arset.extend(self.lstRules)
        arset.set_target_accuracy("laplace")
        return self.remap_index_to_feature(arset, samples)
Esempio n. 3
0
	def train(self,samples,maxRuleSize=9999,mineOnlyClass=None):
		"""Train with CPAR on the sample set, returning an AssociationRuleSet."""
		self.MAX_RULE_SIZE = maxRuleSize
		self.objPNData = PNData(samples)
		self.lstRules = []
		classes = self.objPNData.getClassList()
		
		log("Dataset has %d classes over %d samples."%(len(classes),len(samples)))
		for current_class in classes:
			if mineOnlyClass != None:
				if current_class != mineOnlyClass:
					continue
			log("Processing class %s"%(current_class))
			self.objPNData.setCurrentClass(current_class)
			dblMinTotalWeight = self.dblTotalWeightFactor * self.objPNData.getTotalWeight()
			lstAntecedent = []
			while self.objPNData.getTotalWeight() > dblMinTotalWeight:
				self.objPNData.refreshPNAData()
				if self.objPNData.noValidGainsinPNarray(self.dblMinGainThreshold):
					#log("NO VALID GAINS....Breaking!"); 
					break
				#log('BEGIN DEPTH FIRST SEARCH - total weight %f > %f'%(self.objPNData.getTotalWeight(),dblMinTotalWeight))
				self._CPARdfs(self.objPNData.copyPrimes(),lstAntecedent,[current_class])
		trules = len(self.lstRules)
		self.removeDuplicateRules()
		#log("End of rule search. Found %d rules total, %d after duplicates removed."%(trules,len(self.lstRules)))
		arset = AssociationRuleSet()
		arset.extend(self.lstRules)
		arset.set_target_accuracy("laplace")
		return self.remap_index_to_feature(arset,samples)
Esempio n. 4
0
    def _CPARdfs(self, objPNDatacopy, lstAntecedent, lstConsequent):
        """Depth first search for constructing a new rule."""
        blnMaxExceeded = False
        if (len(lstAntecedent) == self.MAX_RULE_SIZE):
            blnMaxExceeded = True
        'GET ALL ATTRIBUTES WITHIN 1.0 - dblGainSimilarityRatio of max'
        objPNDatacopy.recalculateGains()
        [dblGain, lstAttributes
         ] = objPNDatacopy.getBestGainAttributes(self.dblGainSimilarityRatio)
        if dblGain > self.dblMinGainThreshold and not blnMaxExceeded:
            for intAttribute in lstAttributes:
                """
				This is a check to see if this attribute still within 1%...it may not
				after the depth first search
				"""
                lstAntecedentCopy = deepcopy(lstAntecedent)
                lstAntecedentCopy.append(intAttribute)
                objPNDatacopyTemp = objPNDatacopy.copyPrimes()
                objPNDatacopyTemp.lstAprime[intAttribute] = 0
                objPNDatacopyTemp.removeExamplesNotSatisfying(
                    lstAntecedentCopy)

                self._CPARdfs(objPNDatacopyTemp, lstAntecedentCopy,
                              lstConsequent)
        else:
            if len(lstAntecedent) > 0:
                lstAntecedent.sort()
                dblLaPlaceAccuracy = self.objPNData.getLaPlaceAccuracy(
                    lstAntecedent, lstConsequent)
                objRule = AssociationRule(lstAntecedent, lstConsequent,
                                          {"laplace": dblLaPlaceAccuracy})
                objPNDatacopy.updateWeights(objRule, self.dblDecayFactor)
                self.addRule(objRule)
            else:
                log("Empty antecedent.")
                pass