def train(self, samples, maxRuleSize=9999, mineOnlyClass=None): """Train with CPAR on the sample set, returning an AssociationRuleSet.""" self.MAX_RULE_SIZE = maxRuleSize self.objPNData = PNData(samples) self.lstRules = [] classes = self.objPNData.getClassList() log("Dataset has %d classes over %d samples." % (len(classes), len(samples))) for current_class in classes: if mineOnlyClass != None: if current_class != mineOnlyClass: continue log("Processing class %s" % (current_class)) self.objPNData.setCurrentClass(current_class) dblMinTotalWeight = self.dblTotalWeightFactor * self.objPNData.getTotalWeight( ) lstAntecedent = [] while self.objPNData.getTotalWeight() > dblMinTotalWeight: self.objPNData.refreshPNAData() if self.objPNData.noValidGainsinPNarray( self.dblMinGainThreshold): #log("NO VALID GAINS....Breaking!"); break #log('BEGIN DEPTH FIRST SEARCH - total weight %f > %f'%(self.objPNData.getTotalWeight(),dblMinTotalWeight)) self._CPARdfs(self.objPNData.copyPrimes(), lstAntecedent, [current_class]) trules = len(self.lstRules) self.removeDuplicateRules() #log("End of rule search. Found %d rules total, %d after duplicates removed."%(trules,len(self.lstRules))) arset = AssociationRuleSet() arset.extend(self.lstRules) arset.set_target_accuracy("laplace") return self.remap_index_to_feature(arset, samples)