def supergraphs_in_eq(g, g2, rate=1):
    '''Find  all supergraphs of g  that are also in  the same equivalence
    class with respect to g2 and the rate.
    Currently works only for bfu.undersample by 1
    '''
    if bfu.undersample(g, rate) != g2:
        raise ValueError('g is not in equivalence class of g2')

    s = set()

    def addnodes(g, g2, edges):
        if edges:
            masks = []
            for e in edges:
                if ok2addanedge(e[0], e[1], g, g2, rate=rate):
                    masks.append(True)
                else:
                    masks.append(False)
            nedges = [edges[i] for i in range(len(edges)) if masks[i]]
            n = len(nedges)
            if n:
                for i in range(n):
                    mask = addanedge(g, nedges[i])
                    s.add(g2num(g))
                    addnodes(g, g2, nedges[:i] + nedges[i + 1:])
                    delanedge(g, nedges[i], mask)

    edges = gk.edgelist(gk.complement(g))
    addnodes(g, g2, edges)
    return s
Exemple #2
0
def estOE(d):
    gt = d['gt']['graph']
    gt = bfu.undersample(gt, 1)
    e = gk.OCE(d['estimate'], gt)
    N = np.double(len(gk.edgelist(gt))) +\
        np.double(len(gk.bedgelist(gt)))
    return (e['directed'][0] + e['bidirected'][0]) / N
Exemple #3
0
def estCOE(d):
    gt = d['gt']['graph']
    gt = bfu.undersample(gt, 1)
    e = gk.OCE(d['estimate'], gt)
    n = len(gt)
    N = np.double(n ** 2 + (n - 1) ** 2 / 2.0
                  - len(gk.edgelist(gt))
                  - len(gk.bedgelist(gt)))
    return (e['directed'][1] + e['bidirected'][1]) / N
def g22g1(g2, capsize=None):
    '''
    computes all g1 that are in the equivalence class for g2
    '''
    if bfu.isSclique(g2):
        print 'Superclique - any SCC with GCD = 1 fits'
        return set([-1])

    single_cache = {}

    @memo  # memoize the search
    def nodesearch(g, g2, edges, s):
        if edges:
            if bfu.increment(g) == g2:
                s.add(g2num(g))
                if capsize and len(s) > capsize:
                    raise ValueError('Too many elements')
                return g
            e = edges[0]
            for n in g2:

                if (n, e) in single_cache:
                    continue
                if not edge_increment_ok(e[0], n, e[1], g, g2):
                    continue

                mask = add2edges(g, e, n)
                r = nodesearch(g, g2, edges[1:], s)
                del2edges(g, e, n, mask)

        elif bfu.increment(g) == g2:
            s.add(g2num(g))
            if capsize and len(s) > capsize:
                raise ValueError('Too many elements in eqclass')
            return g

    # find all directed g1's not conflicting with g2
    n = len(g2)
    edges = gk.edgelist(g2)
    random.shuffle(edges)
    g = cloneempty(g2)

    for e in edges:
        for n in g2:

            mask = add2edges(g, e, n)
            if not gk.isedgesubset(bfu.increment(g), g2):
                single_cache[(n, e)] = False
            del2edges(g, e, n, mask)

    s = set()
    try:
        nodesearch(g, g2, edges, s)
    except ValueError:
        s.add(0)
    return s
def checker(n, ee):
    g = gk.ringmore(n, ee)
    g2 = bfu.increment(g)
    d = checkable(g2)
    t = [len(d[x]) for x in d]
    r = []
    n = len(g2)
    ee = len(gk.edgelist(g2))
    for i in range(1, len(t)):
        r.append(sum(np.log10(t[:i])) - ee * np.log10(n))
    return r
def checkerDS(n, ee):
    g = gk.ringmore(n, ee)
    g2 = bfu.increment(g)
    gg = checkable(g2)
    d, p, idx = conformanceDS(g2, gg, gg.keys())
    t = [len(x) for x in p]
    r = []
    n = len(g2)
    ee = len(gk.edgelist(g2))
    for i in range(1, len(t)):
        r.append(sum(np.log10(t[:i])) - ee * np.log10(n))
    return r
def makediscrete(graph, data, numvalues, ss):
    n = len(data)
    data = DiscretizeDataQuantiles(data, numvalues)
    ddata = MakeDataDictForBNPackage(data)
    result = getBNparams(graph, ddata, n)
    r = GenProbTable(result)
    initialVdata, bnV_data = alterinputsforBNtoDynBN(result, r, n, numvalues)
    trueEdges = gk.edgelist(graph)
    vertices = map(str, range(1, n + 1))
    d = CreateDynDiscBN(vertices, trueEdges, initialVdata, bnV_data)
    data = sampleBN(d, ss)
    return data
def edge_backtrack2g1_directed(g2, capsize=None):
    '''
    computes all g1 that are in the equivalence class for g2
    '''
    if bfu.isSclique(g2):
        print 'Superclique - any SCC with GCD = 1 fits'
        return set([-1])

    single_cache = {}

    def edgeset(g):
        return set(gk.edgelist(g))

    @memo  # memoize the search
    def nodesearch(g, g2, edges, s):
        if edges:
            e = edges.pop()
            ln = [n for n in g2]
            for n in ln:
                if (n, e) in single_cache:
                    continue
                mask = add2edges(g, e, n)
                if gk.isedgesubset(bfu.increment(g), g2):
                    r = nodesearch(g, g2, edges, s)
                    if r and edgeset(bfu.increment(r)) == edgeset(g2):
                        s.add(g2num(r))
                        if capsize and len(s) > capsize:
                            raise ValueError('Too many elements in eqclass')
                del2edges(g, e, n, mask)
            edges.append(e)
        else:
            return g
    # find all directed g1's not conflicting with g2
    n = len(g2)
    edges = gk.edgelist(g2)
    random.shuffle(edges)
    g = cloneempty(g2)

    for e in edges:
        for n in g2:
            mask = add2edges(g, e, n)
            if not gk.isedgesubset(bfu.increment(g), g2):
                single_cache[(n, e)] = False
            del2edges(g, e, n, mask)

    s = set()
    try:
        nodesearch(g, g2, edges, s)
    except ValueError:
        s.add(0)
    return s
def getBNparams(graph, ddata, n):
    # Gets Disc. BN parameters given a graph skeleton
    #skeleton should include t-1 and t nodes for each variable
    nodes = range(1, (n * 2) + 1)
    nodes = map(str, nodes)
    edges = gk.edgelist(graph)
    for i in range(len(edges)):
        edges[i] = list([edges[i][0], str(n + int(edges[i][1]))])
    skel = GraphSkeleton()
    skel.V = nodes
    skel.E = edges
    learner = PGMLearner()
    result = learner.discrete_mle_estimateparams(skel, ddata)
    return result
 def BackwardDirected():
     edges = gk.edgelist(g)
     CandidateSet = []
     for e in edges:
         score = Comparescore('backward', g, int(e[0]), int(e[1]))
         if score < 0:
             CandidateSet.append((score, (int(e[0]), int(e[1]))))
     while CandidateSet:
         CandidateSet.sort(reverse=True)
         edgetoDel = CandidateSet.pop()[1]
         fr[edgetoDel[1]][edgetoDel[0]] = 0
         g[str(edgetoDel[0])][str(edgetoDel[1])] = set()
         reeval_node = edgetoDel[1]
         newCandidateSet = []
         for a, e in CandidateSet:
             if e[1] == reeval_node:
                 CandidateSet.remove((a, e))
                 score = Comparescore('backward', g, e[0], e[1])
                 if score < 0:
                     newCandidateSet.append((score, (e[0], e[1])))
         CandidateSet = newCandidateSet + CandidateSet
     return g, fr
Exemple #11
0
def vedgelist(g, pathtoo=False):
    """ Return a list of tuples for edges of g and forks
    a superugly organically grown function that badly needs refactoring
    """
    l = []
    el = gk.edgelist(g)
    bl = gk.bedgelist(g)

    if pathtoo:
        l.extend(make_longpaths(g, el))
    l2, r = make_allforks_and_rest(g, el, bl, dofullforks=True)
    l.extend(l2)

    A, singles = makechains(r)

    if singles:
        B, singles = makesinks(singles)
    else:
        B, singles = [], []

    l = longpaths_pick(l) + threedges_pick(l) + A + B + singles
    return l
Exemple #12
0
def eqsearch(g2, rate=1):
    '''Find  all  g  that are also in  the equivalence
    class with respect to g2 and the rate.
    '''

    s = set()
    noop = set()

    @memo1
    def addnodes(g, g2, edges):
        if edges:
            masks = []
            for e in edges:
                if ok2addanedge_(e[0], e[1], g, g2, rate=rate):
                    masks.append(True)
                else:
                    masks.append(False)
            nedges = [edges[i] for i in range(len(edges)) if masks[i]]
            n = len(nedges)
            if n:
                for i in range(n):
                    mask = addanedge(g, nedges[i])
                    if bfu.undersample(g, rate) == g2:
                        s.add(g2num(g))
                    addnodes(g, g2, nedges[:i] + nedges[i + 1:])
                    delanedge(g, nedges[i], mask)
                return s
            else:
                return noop
        else:
            return noop

    g = cloneempty(g2)
    edges = gk.edgelist(gk.complement(g))
    addnodes(g, g2, edges)
    return s
Exemple #13
0
def density(g):
    return len(gk.edgelist(g)) / np.double(len(g) ** 2)
Exemple #14
0
def backtrack_more(g2, rate=1, capsize=None):
    '''
    computes all g1 that are in the equivalence class for g2
    '''
    if bfu.isSclique(g2):
        print 'Superclique - any SCC with GCD = 1 fits'
        return set([-1])

    single_cache = {}
    if rate == 1:
        ln = [n for n in g2]
    else:
        ln = []
        for x in itertools.combinations_with_replacement(g2.keys(), rate):
            ln.extend(itertools.permutations(x, rate))
        ln = set(ln)

    @memo  # memoize the search
    def nodesearch(g, g2, edges, s):
        if edges:
            if bfu.undersample(g, rate) == g2:
                s.add(g2num(g))
                if capsize and len(s) > capsize:
                    raise ValueError('Too many elements')
                return g
            e = edges[0]
            for n in ln:

                if (n, e) in single_cache:
                    continue
                if not ok2addaVpath(e, n, g, g2, rate=rate):
                    continue

                mask = addaVpath(g, e, n)
                r = nodesearch(g, g2, edges[1:], s)
                delaVpath(g, e, n, mask)

        elif bfu.undersample(g, rate) == g2:
            s.add(g2num(g))
            if capsize and len(s) > capsize:
                raise ValueError('Too many elements in eqclass')
            return g

    # find all directed g1's not conflicting with g2
    n = len(g2)
    edges = gk.edgelist(g2)
    random.shuffle(edges)
    g = cloneempty(g2)

    for e in edges:
        for n in ln:

            mask = addaVpath(g, e, n)
            if not gk.isedgesubset(bfu.undersample(g, rate), g2):
                single_cache[(n, e)] = False
            delaVpath(g, e, n, mask)

    s = set()
    try:
        nodesearch(g, g2, edges, s)
    except ValueError:
        s.add(0)
    return s
Exemple #15
0
def backtrack_more2(g2, rate=2, capsize=None):
    '''
    computes all g1 that are in the equivalence class for g2
    '''
    if bfu.isSclique(g2):
        print 'Superclique - any SCC with GCD = 1 fits'
        return set([-1])

    f = [(addaVpath, delaVpath, maskaVpath)]
    c = [ok2addaVpath]

    def predictive_check(g, g2, pool, checks_ok, key):
        s = set()
        for u in pool:
            if not checks_ok(key, u, g, g2, rate=rate):
                continue
            s.add(u)
        return s

    @memo2  # memoize the search
    def nodesearch(g, g2, order, inlist, s, cds, pool, pc):
        if order:
            if bfu.undersample(g, rate) == g2:
                s.add(g2num(g))
                if capsize and len(s) > capsize:
                    raise ValueError('Too many elements')
                s.update(supergraphs_in_eq(g, g2, rate=rate))
                return g

            key = order[0]
            if pc:
                tocheck = [x for x in pc if x in cds[len(inlist) - 1][inlist[0]]]
            else:
                tocheck = cds[len(inlist) - 1][inlist[0]]

            if len(order) > 1:
                kk = order[1]
                pc = predictive_check(g, g2, pool[len(inlist)],
                                      c[edge_function_idx(kk)], kk)
            else:
                pc = set()

            adder, remover, masker = f[edge_function_idx(key)]
            checks_ok = c[edge_function_idx(key)]

            for n in tocheck:
                if not checks_ok(key, n, g, g2, rate=rate):
                    continue
                masked = np.prod(masker(g, key, n))
                if masked:
                    nodesearch(g, g2, order[1:], [n] + inlist, s, cds, pool, pc)
                else:
                    mask = adder(g, key, n)
                    nodesearch(g, g2, order[1:], [n] + inlist, s, cds, pool, pc)
                    remover(g, key, n, mask)

        elif bfu.undersample(g, rate) == g2:
            s.add(g2num(g))
            if capsize and len(s) > capsize:
                raise ValueError('Too many elements')
            return g

    # find all directed g1's not conflicting with g2

    startTime = int(round(time.time() * 1000))
    ln = [x for x in itertools.permutations(g2.keys(), rate)] + \
         [(n, n) for n in g2]
    gg = {x: ln for x in gk.edgelist(g2)}
    keys = gg.keys()
    cds, order, idx = conformanceDS(g2, gg, gg.keys(), f=f, c=c)
    endTime = int(round(time.time() * 1000))
    print "precomputed in {:10} seconds".format(round((endTime - startTime) / 1000., 3))
    if 0 in [len(x) for x in order]:
        return set()
    g = cloneempty(g2)

    s = set()
    try:
        nodesearch(g, g2, [keys[i] for i in idx], ['0'], s, cds, order, set())
    except ValueError, e:
        print e
        s.add(0)
def dpc(data, varst, pval=0.1):
    n = data.shape[0]
    #    if n<200:
    #        pval=.05
    #    if n <1000:
    #        pval=.1
    #    elif n<2000:
    #        pval=.1
    # stack the data: first n rows is t-1 slice, the next n are slice t
    data = np.asarray(np.r_[data[:, :-1], data[:, 1:]])

    def cindependent(y, x, counter, parents=[], pval=pval):
        for S in [j for j in combinations(parents, counter)]:
            print S
            if ChiSquaredTest(x, y, condset=list(S)):
                return True
        return False

    def bindependent(y, x, parents=[], pval=pval):
        print "done"
        return ChiSquaredTest(x, y, condset=parents, shift=n)

    def dir_prune(elist, mask, g):
        for e in mask:
            sett = copy.deepcopy(g[e[0]][e[1]])
            sett.remove((0, 1))
            g[e[0]][e[1]] = sett
            elist.remove(e)

    def bi_prune(mask, g):
        for e in mask:
            sett = copy.deepcopy(g[e[0]][e[1]])
            sett.remove((2, 0))
            g[e[0]][e[1]] = sett
            g[e[1]][e[0]] = sett

    def chisq_of_df_cols(df, c1, c2):
        groupsizes = df.groupby([c1, c2]).size()
        ctsum = groupsizes.unstack(c1)
        # fillna(0) is necessary to remove any NAs which will cause exceptions
        return (chi2_contingency(ctsum.fillna(0)))

    def ChiSquaredTest(x, y, condset, shift=0):
        if condset:
            X = data[[shift + int(x) - 1] + [n + int(y) - 1] + condset, :].T
            df = makeDF(X)
            condnum = df.shape[1] - 2
            for i in range(condnum):
                if i == 0:
                    v = pd.Series.unique(df[i + 2])
                else:
                    v = np.vstack([v, pd.Series.unique(df[i + 2])])
            if condnum == 1:
                condvalues = [v]
            else:
                condvalues = list(itertools.product(*v))
            chis = 0
            dofs = 0
            for i in condvalues:
                count = 1
                for j in range(condnum):
                    if count == 1:
                        newdf = df[df[j + 2] == i[j]]
                        count = 2
                    else:
                        newdf = newdf[newdf[j + 2] == i[j]]
                try:
                    chi2, p, dof, ex = chisq_of_df_cols(newdf, 0, 1)
                except:
                    chi2 = 0
                    dof = (varst - 1)**2
                chis += chi2
                dofs += dof
            val = chisqprob(chis, dofs)
        else:
            X = data[[shift + int(x) - 1] + [n + int(y) - 1], :].T
            df = makeDF(X)
            chi2, p, dof, ex = chisq_of_df_cols(df, 0, 1)
            val = chisqprob(chi2, dof)
        return val > pval

    def stringify(array):
        d = []
        for i in array:
            d.append((str(i[0]), str(i[1])))
        return d

    num_g = gk.superclique(n)
    el = gk.edgelist(num_g)
    el = stringify(el)
    print(el)
    num_gtr = gk.gtranspose(num_g)
    gtr = conv.ian2g(num_gtr)
    g = conv.ian2g(num_g)
    for counter in range(n):
        to_remove = []
        for e in el:
            ppp = [int(k) - 1 for k in gtr[e[1]] if k != e[0]]
            if counter <= len(ppp):
                if cindependent(e[1], e[0], counter, parents=ppp, pval=pval):
                    to_remove.append(e)
                    gtr[e[1]].pop(e[0], None)
        dir_prune(el, to_remove, g)
    print(g)
    bel = [map(lambda k: str(k + 1), x) for x in combinations(range(n), 2)]
    bi_list = []
    for e in bel:
        ppp = list(set(gtr[e[0]].keys()) | set(gtr[e[1]].keys()))
        ppp = map(lambda x: int(x) - 1, ppp)
        if bindependent(e[0], e[1], parents=ppp, pval=pval):
            bi_list.append(e)
    bi_prune(bi_list, g)
    g = conv.dict_format_converter(g)
    gk.clean_leaf_nodes(g)
    return g
Exemple #17
0
def udensity(g):
    return (len(gk.edgelist(g)) + len(gk.bedgelist(g)) / 2.) / np.double(len(g) ** 2 + len(g) * (len(g) - 1) / 2.)
Exemple #18
0
 def edgeset(g):
     return set(gk.edgelist(g))
Exemple #19
0
def checkcedge(c, g2):
    """ Nodes to check to merge the virtual nodes of c ( a->b->c )
    """
    l = gk.edgelist(g2)
    return list(set(l))