def findCombination(word, lstFunc, alphabet, offset, reprs):

    debug("findCombination(%s,%s,%s)" % (word, lstFunc, alphabet))

    found = False
    tmpAlph = []
    mutation = 1
    his = dict()
    spaces = dict()
    spaceTree = Tree()
    spaceTree.add_features(space=offset)

    if contains(word, alphabet):
        info("Alphabet contains Word")
        info("PUSH %s" % word)
        exit()

    while not found:
        info("Mutation: %d !" % mutation)

        #debug
        #debug("> Tree:")
        #print spaceTree
        #print spaceTree.get_ascii(attributes=['space',])

        for n in spaceTree.get_leaves():

            #debug(">> Node:")
            #print spaceTree.get_ascii(attributes=['space',])

            for f in lstFunc:
                tmpAlph = n.space

                #generate space from the new alphabet
                space = generateSpaceEx(f, tmpAlph, alphabet)
                tmpSpace = list(set([c[0] for c in space]))
                debugListHex(tmpSpace, "SPACE")

                #check to see any the word representation exists in the space
                for r in reprs:
                    #debugListHex(r,"Checking Representation")
                    if contains(r, tmpSpace):
                        found = True
                        info("FOUND : %s" % r)
                        lstAncestors = [
                            n,
                        ]
                        lstAncestors.extend(n.get_ancestors())
                        nodeF = n.add_child(name=f)
                        nodeF.add_features(space=tmpSpace, history=space)
                        lstAncestors = [
                            nodeF,
                        ]
                        lstAncestors.extend(nodeF.get_ancestors())
                        getSolution(r, offset, lstAncestors)
                        exit()

                nodeF = n.add_child(name=f)
                nodeF.add_features(space=tmpSpace, history=space)
        mutation = mutation + 1
Ejemplo n.º 2
0
def getSolution(reprs,offset,his):

    debugListHex(reprs,"reprs:",2)
    debugListHex(offset,"offset:",2)
    sol = dict()
    sol2 = dict()
    for rg in range(len(reprs)):
        r = reprs[rg]
        of = offset[rg]
        #print prettyText("searching for 0x%02x <= 0x%02x" % (r,of),"red")
        tPath = Tree(name=r)
        tPath.add_features(value=r)
        for h in his[:-1]:
            #print prettyText("in H","red")
            for leaf in tPath.get_leaves():
                r = leaf.value
                #print prettyText("leaves: %s" % str(tPath.get_leaves()),"cyan")
                for line in h.history:
                    res, alph, past, method = line[0], line[1], line[2], line[3].func_name
                    #debug("0x%02x = 0x%02x %s. (0x%02x)" % (res,alph,method,past),2)
                    #print prettyText("comparing res=0x%02x ?= r=0x%02x" % (res,r),"yellow")
                    if res == r:
                        n = leaf.add_child(name=alph)
                        n.add_features(function=method,value=past)
        #print tPath.get_ascii(attributes=['name','function','value'])
        lf = tPath.get_leaves()[0]
        anc = lf.get_ancestors()[:-1]
        llf = [lf,]
        llf.extend(anc)
        vls = [c.name for c in llf]
        sol[rg] = llf
    for i in sol:
        vls = [(c.name, c.function) for c in sol[i]]
        sol2["method"] = []
        for j in range(len(vls)):
            sol2["method"].append(sol[i][0].function)
            if sol2.has_key(j):
                sol2[j].append(vls[j][0])
            else:
                sol2[j] = []
                sol2[j].append(vls[j][0])
    print prettyText("Solution:","red")
    info("PUSH\t\t0x%02x%02x%02x%02x" % (offset[0],offset[1],offset[2],offset[3]))

    test = []
    test.append(offset[0] * 0x01000000 + offset[1] * 0x00010000 + offset[2] * 0x00000100 + offset[3] * 0x00000001)

    for m in range(len(sol2["method"])):


        test.append(sol2[m][0] * 0x01000000 + sol2[m][1] * 0x00010000 + sol2[m][2] * 0x00000100 + sol2[m][3] * 0x00000001)

        info("%s\t\t\t0x%02x%02x%02x%02x" % (sol2["method"][m],sol2[m][0],sol2[m][1],sol2[m][2],sol2[m][3]))


    info("RESULT\t\t0x%08x" % (reprs[0] * 0x01000000 + reprs[1] * 0x00010000 + reprs[2] * 0x00000100 + reprs[3] * 0x00000001))
    testResult(test,(reprs[0] * 0x01000000 + reprs[1] * 0x00010000 + reprs[2] * 0x00000100 + reprs[3] * 0x00000001))
Ejemplo n.º 3
0
def build_tree(data,
               feature_info,
               sens,
               expl,
               output,
               metric,
               conf,
               max_depth,
               min_leaf_size=100,
               agg_type='avg',
               max_bins=10,
               subsample_frac=1.0):
    """
    Builds a decision tree guided towards nodes with high bias

    Parameters
    ----------
    data :
        the dataset

    feature_info :
        information about user features

    sens :
        name of the sensitive feature

    expl :
        name of the explanatory feature

    output :
        the target feature

    metric :
        the fairness metric to use

    conf :
        the confidence level

    max_depth :
        maximum depth of the decision-tree

    min_leaf_size :
        minimum size of a leaf

    agg_type :
        aggregation method for children scores

    max_bins :
        maximum number of bins to use when binning continuous features

    Returns
    -------
    tree :
        the tree built by the algorithm
    """
    logging.info('Building a Guided Decision Tree')
    tree = Tree()

    # Check if there are multiple labeled outputs
    # targets = data.columns[-output.num_labels:].tolist()
    targets = output.names.tolist()
    logging.debug('Targets: %s', targets)

    features = set(data.columns.tolist()) - set([sens, expl]) - set(targets)
    logging.debug('Contextual Features: %s', features)

    # check the data dimensions
    if metric.dataType == Metric.DATATYPE_CORR:
        if expl:
            dim = (feature_info[expl].arity, 6)
        else:
            dim = 6
    else:
        # get the dimensions of the OUTPUT x SENSITIVE contingency table
        if expl:
            dim = (feature_info[expl].arity, output.arity,
                   feature_info[sens].arity)
        else:
            dim = (output.arity, feature_info[sens].arity)

    logging.debug('Data Dimension for Metric: %s', dim)

    # bin the continuous features
    cont_thresholds = find_thresholds(data, features, feature_info, max_bins)

    score_params = ScoreParams(metric, agg_type, conf)
    split_params = SplitParams(targets, sens, expl, dim, feature_info,
                               cont_thresholds, min_leaf_size, subsample_frac)

    # get a measure for the root
    if metric.dataType == Metric.DATATYPE_CT:
        stats = [count_values(data, sens, targets[0], expl, dim)[0]]
    elif metric.dataType == Metric.DATATYPE_CORR:
        stats = [corr_values(data, sens, targets[0], expl, dim)[0]]
    else:
        stats = [data[targets + [sens]]]

    _, root_metric = score(stats, score_params)
    tree.add_features(metric=root_metric[0])

    #
    # Builds up the tree recursively. Selects the best feature to split on,
    # in order to maximize the average bias (mutual information) in all
    # sub-trees.
    def rec_build_tree(node_data, node, pred, split_features, depth,
                       parent_score, pool):
        """
        Recursive tree building.

        Parameters
        ----------
        node_data :
            the data for the current node

        pred :
            the predicate defining the current context

        split_features :
            the features on which a split can occur

        depth :
            the current depth

        parent_score :
            the metric score at the parent

        pool :
            the thread pool

        Returns
        -------
        tree :
            the tree built by the algorithm
        """

        node.add_features(size=len(node_data))

        # make a new leaf if recursion is stopped
        if (depth == max_depth) or (len(split_features) == 0):
            return

        logging.debug('looking for splits at pred %s', pred)

        # select the best feature to split on
        split_score, best_feature, threshold, to_drop, child_metrics = \
            select_best_feature(node_data, split_features, split_params,
                                score_params, parent_score, pool)

        # no split found, make a leaf
        if best_feature is None:
            return

        logging.info('splitting on %s (score=%s) with threshold %s at pred %s',
                     best_feature, split_score, threshold, pred)

        if threshold:
            # binary split
            data_left = node_data[node_data[best_feature] <= threshold]
            data_right = node_data[node_data[best_feature] > threshold]

            # predicates for sub-trees
            pred_left = "{} <= {}".format(best_feature, threshold)
            pred_right = "{} > {}".format(best_feature, threshold)

            # add new nodes to the underlying tree structure
            left_child = node.add_child(name=str(pred_left))
            left_child.add_features(feature_type='continuous',
                                    feature=best_feature,
                                    threshold=threshold,
                                    is_left=True,
                                    metric=child_metrics['left'])

            right_child = node.add_child(name=str(pred_right))
            right_child.add_features(feature_type='continuous',
                                     feature=best_feature,
                                     threshold=threshold,
                                     is_left=False,
                                     metric=child_metrics['right'])

            # recursively build the tree
            rec_build_tree(data_left, left_child, pred + [pred_left],
                           split_features - set(to_drop), depth + 1,
                           split_score, pool)
            rec_build_tree(data_right, right_child, pred + [pred_right],
                           split_features - set(to_drop), depth + 1,
                           split_score, pool)

        else:
            # categorical split
            for val in node_data[best_feature].unique():

                # check if this child was pruned or not
                if val in child_metrics:
                    # predicate for the current sub-tree
                    new_pred = "{} = {}".format(best_feature, val)

                    # add a node to the underlying tree structure
                    child = node.add_child(name=str(new_pred))
                    child.add_features(feature_type='categorical',
                                       feature=best_feature,
                                       category=val,
                                       metric=child_metrics[val])

                    child_data = node_data[node_data[best_feature] == val]

                    # recursively build the tree
                    rec_build_tree(
                        child_data, child, pred + [new_pred],
                        split_features - set(to_drop + [best_feature]),
                        depth + 1, split_score, pool)

    #
    # When contextual features are just a few there is
    # no actual benefit out of parallelization. In fact,
    # contention introduces a slight overhead. Hence,
    # use only one thread to score less than 10 features.
    #
    if len(features) < 10:
        pool_size = 1
    else:
        pool_size = 1  #max(1, multiprocessing.cpu_count() - 2)

    pool = multiprocessing.Pool(pool_size)
    rec_build_tree(data, tree, [], features, 0, 0, pool)
    pool.close()
    pool.join()

    return tree
Ejemplo n.º 4
0
    w, v = LA.eig(matrizQ)
    print(w)
    print(v)
    print(np.diag(w))
    print(np.exp(np.diag(w)))
    print(np.diag(np.exp(w)))
    print(v.T.I)
    print("aqui")
    matrizP = v.T.I * np.diag(np.exp(w)) * v.T
    print(sum(matrizP[1, :]))
    print(matrizP)
    # t = Tree("((a,b),c);")

#print(samplerAC)
t = Tree("((A, B)Internal_1:0.7, (C, D)Internal_2:0.5)root:1.3;", format=1)
t.add_features(size=4)
print t.get_ascii(attributes=["name", "dist", "size"])

#tree = Phylo.parse("/Users/patricioburchard/Downloads/Particiones_grupos_de_especies/part1.nex.con.tre", "nexus").next()
#tree = Phylo.read("/Users/patricioburchard/Downloads/Cercosaura_FinalPAUP.tre", "nexus")
#tree.rooted = True
#Phylo.draw(tree)
#infile = open('/Users/patricioburchard/Downloads/Particiones_grupos_de_especies/part1.nex.run1.p', 'r')
#t.show()
n = NexusReader(
    '/Users/patricioburchard/Downloads/Particiones_grupos_de_especies/part1.nex.con.tre'
)
n.read_file(
    '/Users/patricioburchard/Downloads/Particiones_grupos_de_especies/part1.nex.con.tre'
)