def iteqclass(H, verbose=True, capsize=100): ''' Find all graphs in the same equivalence class with respect to graph H and any undesampling rate. ''' if cmp.isSclique(H): print 'not running on superclique' return None g = {n: {} for n in H} s = set() Hnum = bfu.ug2num(H) if Hnum[1] == 0: s.add(Hnum[0]) cp = confpairs(H) ccf = conflictors(H) edges = gk.edgelist(gk.complement(g)) ds = {bfu.g2num(g): edges} if verbose: print '%3s' % 'i' + '%10s' % ' graphs' for i in range(len(H)**2): ds, ss = add2set_(ds, H, cp, ccf, iter=i, verbose=verbose, capsize=capsize) s = s | ss if capsize <= len(ss): break if not ds: break return s
def dpc(data, pval=0.05): n = data.shape[0] # stack the data: first n rows is t-1 slice, the next n are slice t data = np.asarray(np.r_[data[:,:-1],data[:,1:]]) def tetrad_cind_(y,x,condset=[], alpha=0.01, shift=0): y = data[n+int(y)-1,:] x = data[shift+int(x)-1,:] if condset: X = data[condset,:] ry, rx = residuals_(y,x,X) else: ry, rx = [y,x] return independent_(ry, rx, alpha = alpha) def cind_(y,x, condset=[], pval=pval, shift=0): yd = data[n+int(y)-1,:].T X = data[[shift+int(x)-1]+condset,:].T return independent(yd, X, pval=pval) def cindependent(y, x, counter, parents=[], pval=pval): for S in [j for j in iter.combinations(parents,counter)]: if cind_(y, x, condset=list(S), pval=pval): return True #if tetrad_cind_(x, y, condset=list(S), alpha=pval): return True return False def bindependent(y, x, parents=[], pval=pval): return cind_(y, x, condset=parents, pval=pval, shift=n) #return tetrad_cind_(y, x, condset=parents, alpha=pval, shift=n) def prune(elist, mask, g): for e in mask: g[e[0]][e[1]].remove((0,1)) elist.remove(e) gk.clean_leaf_nodes(g) g = gk.superclique(n) gtr= bfu.gtranspose(g) el = gk.edgelist(g) for counter in range(n): to_remove = [] for e in el: ppp = [int(k)-1 for k in gtr[e[1]] if k != e[0]] if counter <= len(ppp): if cindependent(e[1], e[0], counter, parents=ppp, pval=pval): to_remove.append(e) gtr[e[1]].pop(e[0],None) prune(el, to_remove, g) bel = [map(lambda k: str(k+1), x) for x in iter.combinations(range(n),2)] for e in bel: ppp = list(set(gtr[e[0]].keys()) | set(gtr[e[1]].keys())) ppp = map(lambda x: int(x)-1, ppp) if bindependent(e[0], e[1], parents=ppp, pval=pval): g[e[0]][e[1]].remove((2,0)) g[e[1]][e[0]].remove((2,0)) gk.clean_leaf_nodes(g) return g
def iteqclass(H, verbose=True, capsize=100): ''' Find all graphs in the same equivalence class with respect to graph H and any undesampling rate. ''' if cmp.isSclique(H): print 'not running on superclique' return None g = {n:{} for n in H} s = set() Hnum = bfu.ug2num(H) if Hnum[1]==0: s.add(Hnum[0]) cp = confpairs(H) ccf = conflictors(H) edges = gk.edgelist(gk.complement(g)) ds = {bfu.g2num(g): edges} if verbose: print '%3s'%'i'+'%10s'%' graphs' for i in range(len(H)**2): ds, ss = add2set_(ds, H, cp, ccf, iter=i, verbose=verbose, capsize=capsize) s = s | ss if capsize <= len(ss): break if not ds: break return s
def estOE(d): gt= d['gt']['graph'] gt=bfu.undersample(gt,1) e = gk.OCE(d['estimate'],gt) N = np.double(len(gk.edgelist(gt))) +\ np.double(len(gk.bedgelist(gt))) return (e['directed'][0]+e['bidirected'][0])/N
def eqsearch(g2, rate=1): '''Find all g that are also in the equivalence class with respect to g2 and the rate. ''' s = set() noop = set() @memo1 def addnodes(g, g2, edges): if edges: masks = [] for e in edges: if ok2addanedge_(e[0], e[1], g, g2, rate=rate): masks.append(True) else: masks.append(False) nedges = [edges[i] for i in range(len(edges)) if masks[i]] n = len(nedges) if n: for i in range(n): mask = addanedge(g, nedges[i]) if bfu.undersample(g, rate) == g2: s.add(bfu.g2num(g)) addnodes(g, g2, nedges[:i] + nedges[i + 1:]) delanedge(g, nedges[i], mask) return s else: return noop else: return noop g = cloneempty(g2) edges = gk.edgelist(gk.complement(g)) addnodes(g, g2, edges) return s
def estOE(d): gt = d['gt']['graph'] gt = bfu.undersample(gt, 1) e = gk.OCE(d['estimate'], gt) N = np.double(len(gk.edgelist(gt))) +\ np.double(len(gk.bedgelist(gt))) return (e['directed'][0] + e['bidirected'][0]) / N
def supergraphs_in_eq(g, g2, rate=1): '''Find all supergraphs of g that are also in the same equivalence class with respect to g2 and the rate. Currently works only for bfu.undersample by 1 ''' if bfu.undersample(g,rate) != g2: raise ValueError('g is not in equivalence class of g2') s = set() def addnodes(g,g2,edges): if edges: masks = [] for e in edges: if ok2addanedge(e[0],e[1],g,g2,rate=rate): masks.append(True) else: masks.append(False) nedges = [edges[i] for i in range(len(edges)) if masks[i]] n = len(nedges) if n: for i in range(n): mask = addanedge(g,nedges[i]) s.add(bfu.g2num(g)) addnodes(g,g2,nedges[:i]+nedges[i+1:]) delanedge(g,nedges[i],mask) edges = gk.edgelist(gk.complement(g)) addnodes(g,g2,edges) return s
def eqclass(H): ''' Find all graphs in the same equivalence class with respect to graph H and any undesampling rate. ''' g = {n:{} for n in H} s = set() @memo def addedges(g,H,edges): if edges: nedges = prune_conflicts(H, g, edges) n = len(nedges) if n == 0: return None for i in range(n): gk.addanedge(g,nedges[i]) if bfu.call_u_equals(g, H): s.add(bfu.g2num(g)) addedges(g,H,nedges[:i]+nedges[i+1:]) gk.delanedge(g,nedges[i]) edges = gk.edgelist(gk.complement(g)) addedges(g,H,edges) return s-set([None])
def eqclass(H): ''' Find all graphs in the same equivalence class with respect to graph H and any undesampling rate. ''' g = {n: {} for n in H} s = set() @memo def addedges(g, H, edges): if edges: nedges = prune_conflicts(H, g, edges) n = len(nedges) if n == 0: return None for i in range(n): gk.addanedge(g, nedges[i]) if bfu.call_u_equals(g, H): s.add(bfu.g2num(g)) s.add(addedges(g, H, nedges[:i] + nedges[i + 1:])) gk.delanedge(g, nedges[i]) edges = gk.edgelist(gk.complement(g)) addedges(g, H, edges) return s - set([None])
def estCOE(d): gt = d['gt']['graph'] gt = bfu.undersample(gt, 1) e = gk.OCE(d['estimate'], gt) n = len(gt) N = np.double(n**2+(n-1)**2/2.0\ -len(gk.edgelist(gt)) -len(gk.bedgelist(gt))) return (e['directed'][1] + e['bidirected'][1]) / N
def estCOE(d): gt= d['gt']['graph'] gt=bfu.undersample(gt,1) e = gk.OCE(d['estimate'],gt) n = len(gt) N = np.double(n**2+(n-1)**2/2.0\ -len(gk.edgelist(gt)) -len(gk.bedgelist(gt))) return (e['directed'][1]+e['bidirected'][1])/N
def prune(g): numh = bfu.g2num(g) cannotprune = True for l in gk.edgelist(gk.digonly(g)): gk.delanedge(g,l) if bfu.forms_loop(g, loop): cannotprune = False prune(g) gk.addanedge(g,l) if cannotprune: s.add(bfu.g2num(g))
def checker(n,ee): g = bfu.ringmore(n,ee) g2 = bfu.increment(g) d = checkable(g2) t = [len(d[x]) for x in d] r = [] n = len(g2) ee= len(gk.edgelist(g2)) for i in range(1,len(t)): r.append(sum(np.log10(t[:i])) - ee*np.log10(n)) return r
def g22g1(g2, capsize=None): ''' computes all g1 that are in the equivalence class for g2 ''' if ecj.isSclique(g2): print 'Superclique - any SCC with GCD = 1 fits' return set([-1]) single_cache = {} @memo # memoize the search def nodesearch(g, g2, edges, s): if edges: if bfu.increment(g) == g2: s.add(bfu.g2num(g)) if capsize and len(s)>capsize: raise ValueError('Too many elements') return g e = edges[0] for n in g2: if (n,e) in single_cache: continue if not edge_increment_ok(e[0],n,e[1],g,g2): continue mask = add2edges(g,e,n) r = nodesearch(g,g2,edges[1:],s) del2edges(g,e,n,mask) elif bfu.increment(g)==g2: s.add(bfu.g2num(g)) if capsize and len(s)>capsize: raise ValueError('Too many elements in eqclass') return g # find all directed g1's not conflicting with g2 n = len(g2) edges = gk.edgelist(g2) random.shuffle(edges) g = cloneempty(g2) for e in edges: for n in g2: mask = add2edges(g,e,n) if not isedgesubset(bfu.increment(g), g2): single_cache[(n,e)] = False del2edges(g,e,n,mask) s = set() try: nodesearch(g,g2,edges,s) except ValueError: s.add(0) return s
def checkerDS(n,ee): g = bfu.ringmore(n,ee) g2 = bfu.increment(g) gg = checkable(g2) d,p,idx = conformanceDS(g2,gg,gg.keys()) t = [len(x) for x in p] r = [] n = len(g2) ee= len(gk.edgelist(g2)) for i in range(1,len(t)): r.append(sum(np.log10(t[:i])) - ee*np.log10(n)) return r
def edge_backtrack2g1_directed(g2, capsize=None): ''' computes all g1 that are in the equivalence class for g2 ''' if ecj.isSclique(g2): print 'Superclique - any SCC with GCD = 1 fits' return set([-1]) single_cache = {} def edgeset(g): return set(gk.edgelist(g)) @memo # memoize the search def nodesearch(g, g2, edges, s): if edges: e = edges.pop() ln = [n for n in g2] for n in ln: if (n, e) in single_cache: continue mask = add2edges(g, e, n) if isedgesubsetD(bfu.increment(g), g2): r = nodesearch(g, g2, edges, s) if r and edgeset(bfu.increment(r)) == edgeset(g2): s.add(bfu.g2num(r)) if capsize and len(s) > capsize: raise ValueError('Too many elements in eqclass') del2edges(g, e, n, mask) edges.append(e) else: return g # find all directed g1's not conflicting with g2 n = len(g2) edges = gk.edgelist(g2) random.shuffle(edges) g = cloneempty(g2) for e in edges: for n in g2: mask = add2edges(g, e, n) if not isedgesubsetD(bfu.increment(g), g2): single_cache[(n, e)] = False del2edges(g, e, n, mask) s = set() try: nodesearch(g, g2, edges, s) except ValueError: s.add(0) return s
def edge_backtrack2g1_directed(g2, capsize=None): ''' computes all g1 that are in the equivalence class for g2 ''' if ecj.isSclique(g2): print 'Superclique - any SCC with GCD = 1 fits' return set([-1]) single_cache = {} def edgeset(g): return set(gk.edgelist(g)) @memo # memoize the search def nodesearch(g, g2, edges, s): if edges: e = edges.pop() ln = [n for n in g2] for n in ln: if (n,e) in single_cache: continue mask = add2edges(g,e,n) if isedgesubsetD(bfu.increment(g), g2): r = nodesearch(g,g2,edges,s) if r and edgeset(bfu.increment(r))==edgeset(g2): s.add(bfu.g2num(r)) if capsize and len(s)>capsize: raise ValueError('Too many elements in eqclass') del2edges(g,e,n,mask) edges.append(e) else: return g # find all directed g1's not conflicting with g2 n = len(g2) edges = gk.edgelist(g2) random.shuffle(edges) g = cloneempty(g2) for e in edges: for n in g2: mask = add2edges(g,e,n) if not isedgesubsetD(bfu.increment(g), g2): single_cache[(n,e)] = False del2edges(g,e,n,mask) s = set() try: nodesearch(g,g2,edges,s) except ValueError: s.add(0) return s
def dceqclass2(H): """Find all graphs in the same equivalence class with respect to H Arguments: - `H`: an undersampled graph """ if cmp.isSclique(H): print 'not running on superclique' return set() n = len(H) s = set() cp = confpairs(H) confs = conflictor_set(H) ccf = conflictors(H) def prune_loops(gl, H): l = [] for e in gl: if e[0] == e[1] and not (e[1] in H[e[0]] and (1, 0) in H[e[0]][e[1]]): continue l.append(e) return l edges = gk.edgelist(gk.complement(bfu.num2CG(0, n))) edges = prune_loops(edges, H) glist = map(lambda x: e2num(x, n), edges) #glist = list(2**np.arange(n**2)) i = 0 while glist != []: print 2**i, len(glist) glist_prev = glist glist, ss = quadmerge21(glist, H, confs) s = s | ss i += 1 ds = {x: edges for x in glist_prev} for j in range(i, len(H)**2): ds, ss = add2set_(ds, H, cp, ccf, iter=j, verbose=True) s = s | ss if not ds: break return s
def confpairs(H): n = len(H) g = {n: {} for n in H} d = {} edges = gk.edgelist(gk.complement(g)) edges = prune_conflicts(H, g, edges) for p in combinations(edges, 2): gk.addedges(g, p) if bfu.call_u_conflicts(g, H): n1 = e2num(p[0], n) n2 = e2num(p[1], n) d.setdefault(n1, set()).add(n2) d.setdefault(n2, set()).add(n1) gk.deledges(g, p) return d
def confpairs(H): n = len(H) g = {n:{} for n in H} d = {} edges = gk.edgelist(gk.complement(g)) edges = prune_conflicts(H, g, edges) for p in combinations(edges,2): gk.addedges(g,p) if bfu.call_u_conflicts(g, H): n1 = e2num(p[0],n) n2 = e2num(p[1],n) d.setdefault(n1,set()).add(n2) d.setdefault(n2,set()).add(n1) gk.deledges(g,p) return d
def eqclass_list(H): ''' Find all graphs in the same equivalence class with respect to graph H and any undesampling rate. ''' g = {n: {} for n in H} s = set() edges = gk.edgelist(gk.complement(g)) #edges = prune_conflicts(H, g, edges) gset = set([bfu.g2num(g)]) for i in range(len(H)**2): print i gset, ss, edges = add2set(gset, edges, H) s = s | ss if not edges: break return s
def eqclass_list(H): ''' Find all graphs in the same equivalence class with respect to graph H and any undesampling rate. ''' g = {n:{} for n in H} s = set() edges = gk.edgelist(gk.complement(g)) #edges = prune_conflicts(H, g, edges) gset = set([bfu.g2num(g)]) for i in range(len(H)**2): print i gset, ss, edges = add2set(gset, edges, H) s = s | ss if not edges: break return s
def dceqclass2(H): """Find all graphs in the same equivalence class with respect to H Arguments: - `H`: an undersampled graph """ if cmp.isSclique(H): print 'not running on superclique' return set() n = len(H) s = set() cp = confpairs(H) confs = conflictor_set(H) ccf = conflictors(H) def prune_loops(gl, H): l = [] for e in gl: if e[0] == e[1] and not (e[1] in H[e[0]] and (1,0) in H[e[0]][e[1]]): continue l.append(e) return l edges = gk.edgelist(gk.complement(bfu.num2CG(0,n))) edges = prune_loops(edges, H) glist = map(lambda x: e2num(x,n),edges) #glist = list(2**np.arange(n**2)) i = 0 while glist != []: print 2**i, len(glist) glist_prev = glist glist, ss = quadmerge21(glist, H, confs) s = s|ss i += 1 ds = {x: edges for x in glist_prev} for j in range(i, len(H)**2): ds, ss = add2set_(ds, H, cp, ccf, iter=j, verbose=True) s = s | ss if not ds: break return s
def vedgelist(g, pathtoo=False): """ Return a list of tuples for edges of g and forks a superugly organically grown function that badly needs refactoring """ l = [] el = gk.edgelist(g) bl = gk.bedgelist(g) if pathtoo: l.extend(make_longpaths(g,el)) l2,r = make_allforks_and_rest(g,el,bl,dofullforks=True) l.extend(l2) A, singles = makechains(r) if singles: B, singles = makesinks(singles) else: B, singles = [], [] l = longpaths_pick(l)+threedges_pick(l) + A + B + singles return l
def eqsearch(g2, rate=1): '''Find all g that are also in the equivalence class with respect to g2 and the rate. ''' s = set() noop = set() @memo1 def addnodes(g,g2,edges): if edges: masks = [] for e in edges: if ok2addanedge_(e[0],e[1],g,g2,rate=rate): masks.append(True) else: masks.append(False) nedges = [edges[i] for i in range(len(edges)) if masks[i]] n = len(nedges) if n: for i in range(n): mask = addanedge(g,nedges[i]) if bfu.undersample(g,rate) == g2: s.add(bfu.g2num(g)) addnodes(g,g2,nedges[:i]+nedges[i+1:]) delanedge(g,nedges[i],mask) return s else: return noop else: return noop g = cloneempty(g2) edges = gk.edgelist(gk.complement(g)) addnodes(g,g2,edges) return s
def backtrack_more(g2, rate=1, capsize=None): ''' computes all g1 that are in the equivalence class for g2 ''' if ecj.isSclique(g2): print 'Superclique - any SCC with GCD = 1 fits' return set([-1]) single_cache = {} if rate == 1: ln = [n for n in g2] else: ln = [] for x in itertools.combinations_with_replacement(g2.keys(),rate): ln.extend(itertools.permutations(x,rate)) ln = set(ln) @memo # memoize the search def nodesearch(g, g2, edges, s): if edges: if bfu.undersample(g,rate) == g2: s.add(bfu.g2num(g)) if capsize and len(s)>capsize: raise ValueError('Too many elements') return g e = edges[0] for n in ln: if (n,e) in single_cache: continue if not ok2addaVpath(e, n, g, g2, rate=rate): continue mask = addaVpath(g,e,n) r = nodesearch(g,g2,edges[1:],s) delaVpath(g,e,n,mask) elif bfu.undersample(g,rate)==g2: s.add(bfu.g2num(g)) if capsize and len(s)>capsize: raise ValueError('Too many elements in eqclass') return g # find all directed g1's not conflicting with g2 n = len(g2) edges = gk.edgelist(g2) random.shuffle(edges) g = cloneempty(g2) for e in edges: for n in ln: mask = addaVpath(g,e,n) if not isedgesubset(bfu.undersample(g,rate), g2): single_cache[(n,e)] = False delaVpath(g,e,n,mask) s = set() try: nodesearch(g,g2,edges,s) except ValueError: s.add(0) return s
def edgeset(g): return set(gk.edgelist(g))
def density(g): return len(gk.edgelist(g)) / np.double(len(g)**2)
def dpc(data, pval=0.05): n = data.shape[0] # stack the data: first n rows is t-1 slice, the next n are slice t data = np.asarray(np.r_[data[:, :-1], data[:, 1:]]) def tetrad_cind_(y, x, condset=[], alpha=0.01, shift=0): y = data[n + int(y) - 1, :] x = data[shift + int(x) - 1, :] if condset: X = data[condset, :] ry, rx = residuals_(y, x, X) else: ry, rx = [y, x] return independent_(ry, rx, alpha=alpha) def cind_(y, x, condset=[], pval=pval, shift=0): yd = data[n + int(y) - 1, :].T X = data[[shift + int(x) - 1] + condset, :].T return independent(yd, X, pval=pval) def cindependent(y, x, counter, parents=[], pval=pval): for S in [j for j in iter.combinations(parents, counter)]: if cind_(y, x, condset=list(S), pval=pval): return True #if tetrad_cind_(x, y, condset=list(S), alpha=pval): return True return False def bindependent(y, x, parents=[], pval=pval): return cind_(y, x, condset=parents, pval=pval, shift=n) #return tetrad_cind_(y, x, condset=parents, alpha=pval, shift=n) def prune(elist, mask, g): for e in mask: g[e[0]][e[1]].remove((0, 1)) elist.remove(e) gk.clean_leaf_nodes(g) g = gk.superclique(n) gtr = bfu.gtranspose(g) el = gk.edgelist(g) for counter in range(n): to_remove = [] for e in el: ppp = [int(k) - 1 for k in gtr[e[1]] if k != e[0]] if counter <= len(ppp): if cindependent(e[1], e[0], counter, parents=ppp, pval=pval): to_remove.append(e) gtr[e[1]].pop(e[0], None) prune(el, to_remove, g) bel = [ map(lambda k: str(k + 1), x) for x in iter.combinations(range(n), 2) ] for e in bel: ppp = list(set(gtr[e[0]].keys()) | set(gtr[e[1]].keys())) ppp = map(lambda x: int(x) - 1, ppp) if bindependent(e[0], e[1], parents=ppp, pval=pval): g[e[0]][e[1]].remove((2, 0)) g[e[1]][e[0]].remove((2, 0)) gk.clean_leaf_nodes(g) return g
def udensity(g): return (len(gk.edgelist(g))+len(gk.bedgelist(g))/2.)/np.double(len(g)**2 + len(g)*(len(g)-1)/2.) def esig(l,n):
def density(g): return len(gk.edgelist(g))/np.double(len(g)**2) def esig(l,n):
def printedges(g): l = gk.edgelist(g) for e in l: print e[0] - 1, '->', e[1] - 1
def checkcedge(c, g2): """ Nodes to check to merge the virtual nodes of c ( a->b->c ) """ l = gk.edgelist(g2) return list(set(l))
def backtrack_more2(g2, rate=2, capsize=None): ''' computes all g1 that are in the equivalence class for g2 ''' if ecj.isSclique(g2): print 'Superclique - any SCC with GCD = 1 fits' return set([-1]) f = [(addaVpath,delaVpath,maskaVpath)] c = [ok2addaVpath] def predictive_check(g,g2,pool,checks_ok, key): s = set() for u in pool: if not checks_ok(key,u,g,g2,rate=rate): continue s.add(u) return s @memo2 # memoize the search def nodesearch(g, g2, order, inlist, s, cds, pool, pc): if order: if bfu.undersample(g,rate) == g2: s.add(bfu.g2num(g)) if capsize and len(s)>capsize: raise ValueError('Too many elements') s.update(supergraphs_in_eq(g, g2, rate=rate)) return g key = order[0] if pc: tocheck = [x for x in pc if x in cds[len(inlist)-1][inlist[0]]] else: tocheck = cds[len(inlist)-1][inlist[0]] if len(order) > 1: kk = order[1] pc = predictive_check(g,g2,pool[len(inlist)], c[edge_function_idx(kk)],kk) else: pc = set() adder, remover, masker = f[edge_function_idx(key)] checks_ok = c[edge_function_idx(key)] for n in tocheck: if not checks_ok(key,n,g,g2,rate=rate): continue masked = np.prod(masker(g,key,n)) if masked: nodesearch(g,g2,order[1:], [n]+inlist, s, cds, pool, pc) else: mask = adder(g,key,n) nodesearch(g,g2,order[1:], [n]+inlist, s, cds, pool, pc) remover(g,key,n,mask) elif bfu.undersample(g,rate)==g2: s.add(bfu.g2num(g)) if capsize and len(s)>capsize: raise ValueError('Too many elements') return g # find all directed g1's not conflicting with g2 startTime = int(round(time.time() * 1000)) ln = [x for x in itertools.permutations(g2.keys(),rate)] + \ [(n,n) for n in g2] gg = {x:ln for x in gk.edgelist(g2)} keys = gg.keys() cds, order, idx = conformanceDS(g2, gg, gg.keys(), f=f, c=c) endTime = int(round(time.time() * 1000)) print "precomputed in {:10} seconds".format(round((endTime-startTime)/1000.,3)) if 0 in [len(x) for x in order]: return set() g = cloneempty(g2) s = set() try: nodesearch(g, g2, [keys[i] for i in idx], ['0'], s, cds, order, set()) except ValueError, e: print e s.add(0)