Esempio n. 1
0
    def learn3(self, k, intBaseFeat, boolBaseFeat, baseFeatureValues, exclude,
               call):
        #on the empty set of data points, return true
        if len(baseFeatureValues) == 0:
            print("called learn3 with 0 feature vectors")
            logger.info("called learn3 with 0 feature vectors")
            return PrecisFormula(BoolVal(False))
        #rename  splitIntoBoolAndIntFeatureVectors
        (intBaseFeatVectors,
         boolBaseFeatVectors) = Featurizer.getBoolAndIntFeatureVectors(
             intBaseFeat, boolBaseFeat, baseFeatureValues)

        derivFeats = self.synthesizeUniqueFeatures(intBaseFeat, boolBaseFeat,
                                                   baseFeatureValues, exclude)
        derivFeatVectors: List[
            FeatureVector] = Featurizer.generateDerivedFeatureVectors(
                derivFeats, intBaseFeat + boolBaseFeat, baseFeatureValues)
        #assert(len(baseFeatureValues) == len(derivFeatVectors))
        boolFvs = Featurizer.mergeFeatureVectors(boolBaseFeatVectors,
                                                 derivFeatVectors)

        houdini = Houdini()
        (allTrueFormula,
         indicesAllwaysTrue) = houdini.learn2(boolBaseFeat + derivFeats,
                                              boolFvs, call)
        logger.info("Houdini AlwaysTrue for k=" + str(k) + " : " +
                    allTrueFormula.toInfix() + "\n")

        if k == 0:
            return allTrueFormula
        else:
            #removing features returned by houdini and their corresponding feature vector entries.
            (remainingBaseBoolFeat, remainingDerivBoolFeat, featuresRemoved)  = \
                self.removeFeatureFromFeaturelist(boolBaseFeat, derivFeats, indicesAllwaysTrue)

            (reaminingEntriesBaseBoolFv, reaminingEntriesDerivBoolFv) = \
                self.removeFeatureEntryInFeatureVectors(boolBaseFeatVectors, derivFeatVectors, indicesAllwaysTrue)

            # features that are true on parent node should not be passed down to children;(they are redundantly also true in child nodes)
            exclude = exclude + featuresRemoved
            lookAhead = len(intBaseFeatVectors[0])

            ######################################
            #bug: chooseFeatureImplication does not update reamining bool features or feature vectors. Idx is with respect to updates
            (f,idx, posBaseFv, negBaseFv, remainingBaseBoolFeat, remainingDerivBoolFeat ) = \
                self.chooseFeatureImplication(allTrueFormula,intBaseFeat,remainingBaseBoolFeat , remainingDerivBoolFeat, \
                    Featurizer.mergeFeatureVectors(intBaseFeatVectors,reaminingEntriesBaseBoolFv) , reaminingEntriesDerivBoolFv, lookAhead, call )
            ######################################
            if idx < 0:
                print("Predicate: " + call + " for k = " + str(k) + " : None")
                logger.info("Predicate: " + call + " for k = " + str(k) +
                            " : None" + "\n")
                return allTrueFormula
            #TODO: choose should return boolBasePosFv and intBasePosFv ...
            #(f,idx, posBaseFv, negBaseFv) = \
            #    self.chooseFeature2(remainingBaseBoolFeat + remainingDerivBoolFeat, \
            #        Featurizer.mergeFeatureVectors(intBaseFeatVectors,reaminingEntriesBaseBoolFv), reaminingEntriesDerivBoolFv, call, lookAhead)
            logger.info("Predicate: " + call + " for k = " + str(k) + " : " +
                        str(f) + "\n")
            print("Predicate chosen at " + call + " : " + str(f))

            #featureSplitRemoved == f
            (newBoolBaseFeat, newDeriveBaseFeat, featureSplitRemoved) = \
                self.removeFeatureFromFeaturelist(remainingBaseBoolFeat, remainingDerivBoolFeat, [idx])
            # if predicate to split on is in derivedFeatures, then add to exclude list;
            if len(remainingBaseBoolFeat) == len(newBoolBaseFeat):
                exclude = exclude + (f, )
            else:
                # if predicate to split is in baseFeatures, the update posBaseFv and negBaseFv feature vectors
                posBaseFv = self.removeFeatureEntryInBaseFv(
                    posBaseFv, [idx + lookAhead])
                negBaseFv = self.removeFeatureEntryInBaseFv(
                    negBaseFv, [idx + lookAhead])

            posPost = self.learn3( k-1,\
                intBaseFeat, newBoolBaseFeat, posBaseFv, exclude, call + " Left")  #recursive call

            logger.info(call + " Left: " + " for k = " + str(k) + " : " +
                        posPost.toInfix())
            print(call + " Left: " + " for k = " + str(k) + " : " +
                  posPost.toInfix())

            negPost = self.learn3( k-1,\
                intBaseFeat, newBoolBaseFeat, negBaseFv, exclude, call +" Right") #recursive call

            logger.info(call + " Right: " + " for k = " + str(k) + " : " +
                        negPost.toInfix())
            print(call + " Right: " + " for k = " + str(k) + " : " +
                  negPost.toInfix())

            disjunctivePost = And(
                allTrueFormula.formulaZ3,
                Or(And(posPost.formulaZ3, f.varZ3),
                   And(negPost.formulaZ3, Not(f.varZ3))))
            precisPost = PrecisFormula(disjunctivePost)
            return precisPost
Esempio n. 2
0
    def chooseFeatureImplication(self, alwaysTrueFormula, intBaseFeatures, baseBoolFeatures, \
         derivBoolFeatures, baseFv, derivFv, lookAhead, call ):
        houdini = Houdini()
        fvPos = list()
        fvPosDeriv = list()
        fvNeg = list()
        fvNegDeriv = list()
        irrelevantFeatures = ()
        irrelevantIndices = []
        boolFeatures = baseBoolFeatures + derivBoolFeatures
        for idx in range(0, len(boolFeatures)):
            #region pruneFunction
            feature = boolFeatures[idx]
            if is_int(feature.varZ3):
                assert (False)
            (fvPos, fvPosDeriv, fvNeg,
             fvNegDeriv) = self.splitSamplesImplication(
                 feature, idx + lookAhead, baseFv, derivFv)
            #if len(fvPos) == 0 or len(fvNeg) == 0:
            #irrelevantIndices.append(idx)
            #continue

            (posIntBaseFv,
             posBoolBaseFv) = Featurizer.getBoolAndIntFeatureVectors(
                 intBaseFeatures, baseBoolFeatures, fvPos)
            (negIntBaseFv,
             negBoolBaseFv) = Featurizer.getBoolAndIntFeatureVectors(
                 intBaseFeatures, baseBoolFeatures, fvNeg)

            posFvs = Featurizer.mergeFeatureVectors(posBoolBaseFv, fvPosDeriv)
            negFvs = Featurizer.mergeFeatureVectors(negBoolBaseFv, fvNegDeriv)

            (posAllTrueFormula, posIndicesAllwaysTrue) = houdini.learn2(
                boolFeatures, posFvs, call +
                " from implication check-- split from pred " + str(feature))
            (negAllTrueFormula, negIndicesAllwaysTrue) = houdini.learn2(
                boolFeatures, negFvs, call +
                " from implication check-- split from pred " + str(feature))
            if len(fvPos) != 0 and len(fvNeg) != 0:
                logger.info(call + " implication check-- split pred: " +
                            str(feature))
                logger.info(call + " implication check-- featurePos: " +
                            str(posAllTrueFormula.toInfix()))
                logger.info(call + " implication check-- featureNeg: " +
                            str(negAllTrueFormula.toInfix()) + "\n")

            #disjunct z3 type
            disjunct = Or(And(posAllTrueFormula.formulaZ3, feature.varZ3),
                          And(negAllTrueFormula.formulaZ3, Not(feature.varZ3)))
            implication = Implies(alwaysTrueFormula.formulaZ3, disjunct)
            solver = Solver()
            # check (not (postK0 => postK1)) is unsat
            solver.add(Not(implication))
            check = solver.check()
            #splitting on `feature does not` add new information: alwaysTrueFormula -> (OR(f and posSplit, ~f and negSplit)) is valid
            if str(check) == 'unsat':
                #collect irrelevant features and indices to remove
                irrelevantFeatures = irrelevantFeatures + (feature, )
                irrelevantIndices.append(idx)
            #splitting adds new information
            elif str(check) == 'sat':
                pass
            else:
                # solver does not know
                assert (False)
            #endregion

        copyBaseIntFeat = tuple(intBaseFeatures)
        copyBaseBoolFeat = tuple(baseBoolFeatures)
        copyDerivFeat = tuple(derivBoolFeatures)
        #(remainingBaseBoolFeat, remainingDerivBoolFeat, featuresRemoved)  = \
        #    self.removeFeatureFromFeaturelist(boolBaseFeat, derivFeats, indicesAllwaysTrue)
        (intBaseFv, boolBaseFv) = Featurizer.getBoolAndIntFeatureVectors(
            copyBaseIntFeat, copyBaseBoolFeat, baseFv)

        (copyRemainingBaseBoolFeat, copyRemainingDerivBoolFeat, featuresRemoved) = \
            self.removeFeatureFromFeaturelist(copyBaseBoolFeat, copyDerivFeat, irrelevantIndices)

        #boolFvs = Featurizer.mergeFeatureVectors(boolBaseFv, derivFv)
        (copyReaminingEntriesBaseBoolFv, reaminingEntriesDerivBoolFv) = \
            self.removeFeatureEntryInFeatureVectors(boolBaseFv, derivFv, irrelevantIndices)
        #Debug Check
        if (len(copyRemainingBaseBoolFeat) +
                len(copyRemainingDerivBoolFeat)) == 0:
            return (None, -1, None, None, None, None)
        skipAhead = len(intBaseFv[0])
        newBaseFv = Featurizer.mergeFeatureVectors(
            intBaseFv, copyReaminingEntriesBaseBoolFv)

        (f, idx, posBaseFv, negBaseFv) = self.chooseFeature2(
            copyRemainingBaseBoolFeat + copyRemainingDerivBoolFeat, newBaseFv,
            reaminingEntriesDerivBoolFv, call, skipAhead)
        #print(irrelevantIndices)

        #intBaseFeatures = copyBaseIntFeat
        #baseBoolFeatures = copyRemainingBaseBoolFeat
        #erivBoolFeatures = copyDerivFeat
        #baseFv = newBaseFv
        #derivFv = reaminingEntriesDerivBoolFv
        return (f, idx, posBaseFv, negBaseFv, copyRemainingBaseBoolFeat,
                copyRemainingDerivBoolFeat)