예제 #1
0
파일: CPARTrainer.py 프로젝트: pythseq/PICA
    def train(self, samples, maxRuleSize=9999, mineOnlyClass=None):
        """Train with CPAR on the sample set, returning an AssociationRuleSet."""
        self.MAX_RULE_SIZE = maxRuleSize
        self.objPNData = PNData(samples)
        self.lstRules = []
        classes = self.objPNData.getClassList()

        log("Dataset has %d classes over %d samples." %
            (len(classes), len(samples)))
        for current_class in classes:
            if mineOnlyClass != None:
                if current_class != mineOnlyClass:
                    continue
            log("Processing class %s" % (current_class))
            self.objPNData.setCurrentClass(current_class)
            dblMinTotalWeight = self.dblTotalWeightFactor * self.objPNData.getTotalWeight(
            )
            lstAntecedent = []
            while self.objPNData.getTotalWeight() > dblMinTotalWeight:
                self.objPNData.refreshPNAData()
                if self.objPNData.noValidGainsinPNarray(
                        self.dblMinGainThreshold):
                    #log("NO VALID GAINS....Breaking!");
                    break
                #log('BEGIN DEPTH FIRST SEARCH - total weight %f > %f'%(self.objPNData.getTotalWeight(),dblMinTotalWeight))
                self._CPARdfs(self.objPNData.copyPrimes(), lstAntecedent,
                              [current_class])
        trules = len(self.lstRules)
        self.removeDuplicateRules()
        #log("End of rule search. Found %d rules total, %d after duplicates removed."%(trules,len(self.lstRules)))
        arset = AssociationRuleSet()
        arset.extend(self.lstRules)
        arset.set_target_accuracy("laplace")
        return self.remap_index_to_feature(arset, samples)