def __init__(self, regexp): self.regexp = regexp self.m = len(regexp) self.graph = Digraph(self.m + 1) ops = [] for i in range(0, self.m): lp = i if regexp[i] == '(' or regexp[i] == '|': ops += [i] elif regexp[i] == ')': or_ = ops.pop() if regexp[or_] == '|': lp = ops.pop() self.graph.addEdge(lp, or_ + 1) self.graph.addEdge(or_, i) elif regexp[or_] == '(': lp = or_ else: assert False if i < self.m - 1 and regexp[i + 1] == '*': self.graph.addEdge(lp, i + 1) self.graph.addEdge(i + 1, lp) if regexp[i] == '(' or regexp[i] == '*' or regexp[i] == ')': self.graph.addEdge(i, i + 1) if len(ops) != 0: raise ValueError("Invalid regular expression")
def read_graph(filename): with open(filename, 'r') as reader: g = Digraph(int(reader.readline())) for line in reader: (s, d, w) = line.split() g.insert_arc(int(s), int(d), float(w)) return g
class NFA: regexp = None m = 0 def __init__(self, regexp): self.regexp = regexp self.m = len(regexp) self.graph = Digraph(self.m + 1) ops = [] for i in range(0, self.m): lp = i if regexp[i] == '(' or regexp[i] == '|': ops += [i] elif regexp[i] == ')': or_ = ops.pop() if regexp[or_] == '|': lp = ops.pop() self.graph.addEdge(lp, or_ + 1) self.graph.addEdge(or_, i) elif regexp[or_] == '(': lp = or_ else: assert False if i < self.m - 1 and regexp[i + 1] == '*': self.graph.addEdge(lp, i + 1) self.graph.addEdge(i + 1, lp) if regexp[i] == '(' or regexp[i] == '*' or regexp[i] == ')': self.graph.addEdge(i, i + 1) if len(ops) != 0: raise ValueError("Invalid regular expression") def recognizes(self, txt): dfs = DirectedDFS(self.graph, [0]) pc = [] for v in range(0, self.graph.Vertex()): if dfs.marked[v]: pc += [v] for i in range(0, len(txt)): if txt[i] == '*' or txt[i] == '|' or txt[i] == '(' or txt[i] == ')': raise ValueError("text contains the metacharacter %s" % (txt[i])) match = [] for v in pc: if v == self.m: continue if self.regexp[v] == txt[i] or self.regexp[v] == '.': match += [v + 1] dfs = DirectedDFS(self.graph, match) pc = [] for v in range(0, self.graph.Vertex()): if dfs.marked[v]: pc += [v] if len(pc) == 0: return False for v in pc: if v == self.m: return True return False
def addEdge(self, edge): """ Adds an edge to the graph Requires: edge Edge Ensures: edge[src] = dest and edge[dest] = src """ Digraph.addEdge(self, edge) rev = Edge(edge.getDestination(), edge.getSource()) Digraph.addEdge(self, rev)
class SymbolDigraph: def __init__(self, filename, delimiter): self.st = dict() self.keys = dict() delimiter = None myin = In(filename, delimiter) while myin.hasNextLine(): a = myin.readLine().split(delimiter) for i in range(0, len(a)): if self.st.get(a[i]) == None: self.st[a[i]] = len(self.st) for name in self.st.keys(): self.keys[self.st[name]] = name self.graph = Digraph(len(self.st.keys())) myin = In(filename, '\n') while myin.hasNextLine(): a = myin.readLine().split(' ') v = self.st.get(a[0]) for i in range(1, len(a)): w = self.st.get(a[i]) self.graph.addEdge(v, w) def contains(self, s): if self.st.get(s) == None: return False else: return True def index(self, s): return self.st.get(s) def indexOf(self, s): return self.get(s) def name(self, v): self.validateVertex(v) return self.keys[v] def nameOf(self, v): self.validateVertex(v) return self.keys[v] def G(self): return self.graph def digraph(self): return self.graph def validateVertex(self, v): Ver = self.graph.Vertex() if v < 0 or v >= Ver: raise ("vertex %d is not between 0 and %d " % (v, Ver - 1))
def __init__(self, s): self._ops = list() self._re = list(self.__convert(s)) self._M = len(self._re) self._G = Digraph(self._M + 1) for i in range(self._M): lp = i # left parenthesis(bracket, brace), used for closure # (), | if self._re[i] == '(' or self._re[i] == '|': self._ops.append(i) elif self._re[i] == ')': or_pos = list() _or = self._ops.pop() while self._re[_or] == '|': or_pos.append(_or) _or = self._ops.pop() lp = _or # left parenthesis for pos in or_pos: self._G.add_edge(lp, pos + 1) self._G.add_edge(pos, i) # meta characters, support only convert meta character # \, ., |, *, (, ), +, [, ], {, } if i < self._M - 1 and self._re[i] == '\\': escape = '\\.*+?|()[]{}' # '\\.|*()+[]{}' if escape.find(self._re[i + 1]): self._G.add_edge(i, i + 1) else: print("please don't use only one \\ " "or \\(special character) like \\s," " which is not finish") # closure, and look forward to check # * closure, zero or more recognizes if i < self._M - 1 and self._re[i + 1] == '*': self._G.add_edge(lp, i + 1) self._G.add_edge(i + 1, lp) # + closure, one or more recognizes if i < self._M - 1 and self._re[i + 1] == '+': self._G.add_edge(i + 1, lp) # ? closure, zero or one recognizes if i < self._M - 1 and self._re[i + 1] == '?': self._G.add_edge(lp, i + 1) # keep moving if self._re[i] == '(' or \ self._re[i] == '*' or \ self._re[i] == ')' or \ self._re[i] == '+' or \ self._re[i] == '?': self._G.add_edge(i, i + 1)
def test_Digraph(self): gr = Digraph(5) gram = Digraph_AM(5) for item in self.G: gr.addEdge(item[0], item[1]) gram.addEdge(item[0], item[1]) self.assertTrue(gr.V == gram.V) self.assertTrue(gr.E == gram.E) a = gr.adjlist(1) b = gram.adjlist(1) self.assertTrue(a == b)
def main(): filename = raw_input('input the graph path: ') G = Digraph(filename) print 'The adjacent table of the input Graph is: \n' print G print '' #Start_point = raw_input('input the start point: ') DFS = Depth_First_Order(G) print '\nThe order used for the SCC search of orininal G is:: ' print DFS.topo_list() print '\nThe toplocical order the the graph is: ' DFS.show_topo_order() print '----------------the reversed G------------------' print G.reverse() DFS = Depth_First_Order(G.reverse()) print '\nThe order used for the SCC of reversed G is: ' print DFS.topo_list() print '\nThe toplocical order the the reversed graph is: ' DFS.show_topo_order()
def __init__(self, filename, delimiter): self.st = dict() self.keys = dict() delimiter = None myin = In(filename, delimiter) while myin.hasNextLine(): a = myin.readLine().split(delimiter) for i in range(0, len(a)): if self.st.get(a[i]) == None: self.st[a[i]] = len(self.st) for name in self.st.keys(): self.keys[self.st[name]] = name self.graph = Digraph(len(self.st.keys())) myin = In(filename, '\n') while myin.hasNextLine(): a = myin.readLine().split(' ') v = self.st.get(a[0]) for i in range(1, len(a)): w = self.st.get(a[i]) self.graph.addEdge(v, w)
return self.count def processVertexEarly(self, s): #print "Discovered vertex ", s return def processVertexLate(self, s): return def processEdge(self,s, n): #print "Processed Edge ", s, " -> ", n return if __name__ == '__main__': inputFile = sys.argv[1] sourceId = int(sys.argv[2]) g = Digraph(inputFile) d = DirectedDFS(g, sourceId) for vid in g.getVertexIds(): print sourceId, "to", vid, ":", print d.pathTo(vid) print "" # if d.getCount() == g.getNoOfVertices(): # print "connected" # else: # print "NOT connected"
def openTwoPathOpenTimes(numNodes, transaction_list, verbose): """Given a list of transactions which are in the form of a list of (sender, receiver, timestamp) tuples, return the list of times at open two-paths are created. Parameters: numNodes - number of actors (nodes) transaction_list - list of (sender, receiver, timestamp) tuples. Sender and receiver are intergers in 0..numNodes-1 and timestamps are numeric values. The list must be ordered by timestamp ascending. verbose - if True write debug output to stdout Return value: dict { (i, j, k) : t } where (i, j, k) is an open directed two-path (note this means it is not part of a transitive triad i.e. i -> k is not present, but it may be part of a cyclic triad i.e. k -> i may be present), and t is the time the two-path was created (open). BUT only if the second arc in two-path has higher timestamp than the first (i.e. we don't count a two-path that goes backward in time along the path) """ G = Digraph(numNodes) lastTime = None pathdict = {} # dict mapping (i,j,k) two-path tuple to open time for trans in transaction_list: assert (trans[TSENDER] >= 0 and trans[TSENDER] < numNodes) assert (trans[TRECEIVER] >= 0 and trans[TRECEIVER] < numNodes) assert (lastTime is None or trans[TTIME] >= lastTime) if verbose: print trans[TSENDER], '->', trans[TRECEIVER], ' at time ', trans[ TTIME], (ulist, vlist) = openTwoPaths(G, trans[TSENDER], trans[TRECEIVER]) i = trans[TSENDER] j = trans[TRECEIVER] if len(ulist) > 0 or len(vlist) > 0: if verbose: print 'opened', len(ulist) + len(vlist), 'two-paths' for u in ulist: # u -> i -> j path_1st_time = G.G[u][i] path_2nd_time = trans[TTIME] # will be G.G[i][j] when inserted if path_2nd_time > path_1st_time: if verbose: print ' path from', u, 'is forward in time (', path_1st_time, ',', path_2nd_time, '), including' if not pathdict.has_key((u, i, j)): pathdict[(u, i, j)] = path_2nd_time else: if verbose: print ' two-path ', u, i, j, 'already present from time', pathdict[ (u, i, j)], ' not updating' else: if verbose: print ' path from', u, 'is backwards in time (', path_1st_time, ',', path_2nd_time, '), skipping' if len(vlist) > 0: if verbose: print ' ', len(vlist), ' are backward in time, skipping' for v in vlist: # i -> j -> v path_1st_time = trans[TTIME] # will be G.G[i][j] when inserted path_2nd_time = G.G[j][v] # as the transactions are ordered in time we cannot # have this a two-path ordered in time, as the 2nd is older assert (path_1st_time > path_2nd_time) else: if verbose: print # now check if the new arc i -> j would close any currently open # two-paths. If so, remove those from the dictionary of open # two paths. for v in closedTwoPaths(G, i, j): # For each v, i -> v -> j is now a two-path closed by i -> j if verbose: print ' removing ', i, v, j, ' as it is now a transitive triad' # note (i, v, j) might exist in pathdict as it was an open two-path # but NOT NECESSARILY as it might have been ignored as backward in time if pathdict.has_key((i, v, j)): pathdict.pop((i, v, j)) else: if verbose: print ' (did not exist in dict)' # add this new arc i -> j to the graph # note there is a potential inconsistency here in that this # arc might already exist in which case we update the time with # the new (later) time, however in the pathdict dictionary # we do not update times of open two-paths but keep the first # opening time. Need to decide which one really is correct. G.insertArc(trans[TSENDER], trans[TRECEIVER], trans[TTIME]) lastTime = trans[TTIME] return pathdict
'''WinPython 3.4.4 64-bit''' import time from Digraph import Digraph # read file: 14.1 seconds start = time.time() digraph = Digraph() with open("SCC.txt", "rt") as f: for line in f: tail, head = [int(value) for value in line.split()] digraph.add_edge(tail, head) finish = time.time() elapsed = 1.0 * (finish - start) print("Time to read file: %.3f" % (elapsed,)) print("Number of graph vertices: %d" % (digraph.num_nodes(),)) print() # calculate scc sizes: 9.100 seconds # answer: start = time.time() scc_sizes = digraph.scc_sizes() finish = time.time() elapsed = 1.0 * (finish - start) print("Calculation time: %.3f" % (elapsed,)) # print out answer print("Component sizes: ", end="") for i in range(5): print("%d " % (scc_sizes[i],), end="") print()
def addEdge(self, edge): Digraph.addEdge(self, edge) rev = Edge(edge.getDestination(), edge.getSource()) Digraph.addEdge(self, rev)
self.edgeTo[w] = v def hasPathTo(self, v): return self.marked[v] def pathTo(self, v): if (not self.hasPathTo(v)): return None else: path = [v] x = v while not (x == self.s): path.append(self.edgeTo[x]) x = self.edgeTo[x] # path.append(v) return list(reversed(path)) if __name__ == '__main__': from Digraph import Digraph from In import In import sys in_ = In(sys.argv[1]) G = Digraph(fileobject=in_) bfp = BreadthFirstPaths(G, 0) print(bfp.edgeTo) print(bfp.marked) print(bfp.pathTo(3)) print(bfp.distTo)
class NFA: """This class provides a data type for creating a non-deterministic finite state automaton(NFA) from a regular expression and testing whether a given string is matched by that regular expression. """ def __init__(self, s): self._ops = list() self._re = list(self.__convert(s)) self._M = len(self._re) self._G = Digraph(self._M + 1) for i in range(self._M): lp = i # left parenthesis(bracket, brace), used for closure # (), | if self._re[i] == '(' or self._re[i] == '|': self._ops.append(i) elif self._re[i] == ')': or_pos = list() _or = self._ops.pop() while self._re[_or] == '|': or_pos.append(_or) _or = self._ops.pop() lp = _or # left parenthesis for pos in or_pos: self._G.add_edge(lp, pos + 1) self._G.add_edge(pos, i) # meta characters, support only convert meta character # \, ., |, *, (, ), +, [, ], {, } if i < self._M - 1 and self._re[i] == '\\': escape = '\\.*+?|()[]{}' # '\\.|*()+[]{}' if escape.find(self._re[i + 1]): self._G.add_edge(i, i + 1) else: print("please don't use only one \\ " "or \\(special character) like \\s," " which is not finish") # closure, and look forward to check # * closure, zero or more recognizes if i < self._M - 1 and self._re[i + 1] == '*': self._G.add_edge(lp, i + 1) self._G.add_edge(i + 1, lp) # + closure, one or more recognizes if i < self._M - 1 and self._re[i + 1] == '+': self._G.add_edge(i + 1, lp) # ? closure, zero or one recognizes if i < self._M - 1 and self._re[i + 1] == '?': self._G.add_edge(lp, i + 1) # keep moving if self._re[i] == '(' or \ self._re[i] == '*' or \ self._re[i] == ')' or \ self._re[i] == '+' or \ self._re[i] == '?': self._G.add_edge(i, i + 1) def recognizes(self, txt): pc = [0] # 0 is source, the state in start dfs = DirectedDFS(self._G, pc) pc.clear() # initialize the states collection, which the first state can arrived for v in range(self._G.V): if dfs.marked(v): pc.append(v) # calculate all of NFA states that txt[i+1] can arrived for i in range(len(txt)): recognizes = list() # calculate arrived states after recognizes for v in pc: if v < self._M: if self._re[v] == txt[i] or self._re[v] == '.': recognizes.append(v + 1) pc.clear() # calculate states, which epsilon transform can arrived after recognizes dfs = DirectedDFS(self._G, recognizes) for v in range(self._G.V): if dfs.marked(v): pc.append(v) for v in pc: if v == self._M: return True return False def __convert(self, s): """using convert to straight implement some pattern like using (A|B|C) to implement [ABC] and AAAA* to A{3,} """ seq = deque() i = 0 length = len(s) while i < length: if s[i] == '\\': seq.append(s[i]) # add '\' seq.append(s[i + 1]) # add the character to convert i += 1 elif s[i] == '[': # [ABC] -> (A|B|C) seq.append('(') i += 1 while s[i] != ']': seq.append(s[i]) seq.append('|') i += 1 seq.pop() seq.append(')') elif s[i] == '{': # A{3}->AAA, A{3,5}->AAAA?A?, A{3,}->AAAA* in_brace = '' num1, num2 = 0, 0 multiple, _range, more = False, False, False # {3},{3,5},{3,} # get content in brace i += 1 while s[i] != '}': in_brace += s[i] i += 1 # get the type of range if ',' in in_brace: nums = in_brace.split(',') num1 = int(nums[0]) if nums[1] == '': more = True elif nums[1] != '': _range = True num2 = int(nums[1]) else: multiple = True num1 = int(in_brace) # get the basic unit used for multiple unit = list() if seq[-1] == ')': unit.append(seq.pop()) # add ')' if seq[-1] == '\\': unit.append(seq.pop()) # add '\' else: lp_count = 0 rp_count = 1 while lp_count != rp_count: if seq[-1] == ')': rp_count += 1 elif seq[-1] == '(': lp_count += 1 unit.append(seq.pop()) else: unit.append(seq.pop()) # add multiple unit to seq def seq_add_unit(_seq, _unit): for k in range(len(_unit) - 1, -1, -1): _seq.append(_unit[k]) while num1 > 0: seq_add_unit(seq, unit) num1 -= 1 if multiple: pass # no-statement elif _range: times = num2 - num1 while times > 0: seq_add_unit(seq, unit) seq.append('?') times -= 1 elif more: seq_add_unit(seq, unit) seq.append('*') else: seq.append(s[i]) i += 1 # generator result result = '' for ch in seq: result += ch return result
def run_on_network_attr(edgelist_filename, param_func_list, labels, outcome_bin_filename, binattr_filename=None, contattr_filename=None, catattr_filename=None, EEiterations = 50000, run = None, learningRate = 0.01, sampler_func = basicALAAMsampler, zone_filename= None, directed = False): """Run on specified network with binary and/or continuous and categorical attributes. Parameters: edgelist_filename - filename of Pajek format edgelist param_func_list - list of change statistic functions corresponding to parameters to estimate labels - list of strings corresponding to param_func_list to label output (header line) outcome_bin_filename - filename of binary attribute (node per line) of outcome variable for ALAAM binattr_filename - filename of binary attributes (node per line) Default None, in which case no binary attr. contattr_filename - filename of continuous attributes (node per line) Default None, in which case no continuous attr. catattr_filename - filename of categorical attributes (node per line) Default None, in which case no categorical attr. EEiterations - Number of iterations of the EE algorithm. Default 50000. run - run number for parallel runs, used as suffix on output filenames. Default None in which case no suffix added to output files. learningRate - learning rate (step size multiplier, a) defult 0.01 sampler_func - ALAAM sampler function with signature (G, A, changestats_func_list, theta, performMove, sampler_m); see basicALAAMsampler.py default basicALAAMsampler zone_filename - filename of snowball sampling zone file (header line 'zone' then zone number for nodes, one per line) Default None, in which case no snowball zones. If not None then the sampler_func should take account of snowball sample zones i.e. conditionalALAAMsampler() directed - Default False. True for directed network else undirected. Write output to ifd_theta_values_<basename>_<run>.txt and ifd_dzA_values_<basename>_<run>.txt where <basename> is the baesname of edgelist filename e..g if edgelist_filename is edges.txt then ifd_theta_values_edges_0.txt and ifd_dzA_values_edges_0.txt etc. WARNING: these files are overwritten. """ assert(len(param_func_list) == len(labels)) basename = os.path.splitext(os.path.basename(edgelist_filename))[0] THETA_OUTFILENAME = THETA_PREFIX + basename DZA_OUTFILENAME = DZA_PREFIX + basename if run is not None: THETA_OUTFILENAME += '_' + str(run) DZA_OUTFILENAME += '_' + str(run) THETA_OUTFILENAME += os.extsep + 'txt' DZA_OUTFILENAME += os.extsep + 'txt' if directed: G = Digraph(edgelist_filename, binattr_filename, contattr_filename, catattr_filename, zone_filename) else: G = Graph(edgelist_filename, binattr_filename, contattr_filename, catattr_filename, zone_filename) G.printSummary() outcome_binvar = list(map(int_or_na, open(outcome_bin_filename).read().split()[1:])) assert(len(outcome_binvar) == G.numNodes()) A = outcome_binvar print('positive outcome attribute = ', (float(A.count(1))/len(A))*100.0, '%') assert( all([x in [0,1,NA_VALUE] for x in A]) ) if NA_VALUE in A: print('Warning: outcome variable has', A.count(NA_VALUE), 'NA values') A = np.array(A) # convert list to numpy vector # steps of Alg 1 M1 = 100 #OLD: Mouter = 500 # outer iterations of Algorithm EE #OLD: Msteps = 100 # multiplier for number of inner steps of Algorithm EE #OLD: print 'M1 = ', M1, ' Mouter = ', Mouter, ' Msteps = ', Msteps print('M1 = ', M1, ' EEiterations = ', EEiterations, end=' ') print('learningRate = ', learningRate, end=' ') theta_outfile = open(THETA_OUTFILENAME, 'w',1) # 1 means line buffering theta_outfile.write('t ' + ' '.join(labels) + ' ' + 'AcceptanceRate' + '\n') print('Running Algorithm S...', end=' ') start = time.time() (theta, Dmean) = algorithm_S(G, A, param_func_list, M1, theta_outfile, sampler_func) print(time.time() - start, 's') print('after Algorithm S:') print('theta = ', theta) print('Dmean = ', Dmean) dzA_outfile = open(DZA_OUTFILENAME, 'w',1) dzA_outfile.write('t ' + ' '.join(labels) + '\n') print('Running Algorithm EE...', end=' ') start = time.time() #OLD: theta = algorithm_EE(G, A, param_func_list, theta, Dmean, #OLD: Mouter, Msteps, theta_outfile, dzA_outfile) theta = algorithm_EE(G, A, param_func_list, theta, EEiterations, theta_outfile, dzA_outfile, learningRate, sampler_func) print(time.time() - start, 's') theta_outfile.close() dzA_outfile.close() print('at end theta = ', theta)
from DepthFirstOrder import DepthFirstOrder from DigraphCycle import DirectedCycle class Topological: def __init__(self,digraph): self._order = None cycleFinder = DirectedCycle(digraph) if not cycleFinder.hasCycle(): dfs = DepthFirstOrder(digraph) self._order = dfs.reversePost #Directed Acyclical Graph: A graph with no cycles def isDAG(self): return self._order is not None def order(self): return self._order j = Digraph(6) j.addEdge(1,2) j.addEdge(1,3) j.addEdge(0,2) j.addEdge(5,1) j.addEdge(3,4) j.addEdge(0,4) z = Topological(j) print(z.isDAG())
def dfs(self, G, v): self._marked[v] = True for w in G.adj(v): if not self._marked[w]: self.dfs(G, w) def marked(self, v): return self._marked[v] # unittest if __name__ == '__main__': with open('tinyDG.txt', 'r') as g: V = int(g.readline().split()[0]) E = int(g.readline().split()[0]) DG = Digraph(V) for e in range(E): v, w = g.readline().split() DG.add_edge(v, w) # sources: 1, 2, 6 # reachable: 0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12 # don't have 7 sources = [1, 2, 6] reachable = DirectedDFS(DG, sources) for i in range(V): if i == 7: assert reachable.marked(i) is False else: assert reachable.marked(i) is True
print(s) if self.marked[s] is False: self._dfs(digraph,s) self.count += 1 def _dfs(self,digraph,v): self.marked[v] = True self.id[v] = self.count for w in digraph.adj(v): if self.marked[w] is False: self._dfs(digraph,w) def stronglyConnected(self,v,w): print("id of V:",self.id[v]) print("id of W:",self.id[w]) return self.id[v] == self.id[w] j = Digraph(6) j.addEdge(1,2) j.addEdge(1,3) j.addEdge(0,2) j.addEdge(5,1) j.addEdge(3,4) j.addEdge(0,4) z = KosarajuSCC(j) print(z.stronglyConnected(3,4)) for x in z.id: print(x)
def run_on_network_attr(edgelist_filename, param_func_list, labels, binattr_filename=None, catattr_filename=None): """ Run on specified network with binary and/or categorical attributes. Parameters: edgelist_filename - filename of Pajek format edgelist param_func_list - list of change statistic functions corresponding to parameters to estimate labels - list of strings corresponding to param_func_list to label output (header line) binattr_filename - filename of binary attributes (node per line) Default None, in which case no binary attr. catattr_filename - filename of categorical attributes (node per line) Default None, in which case no categorical attr. Write output to ifd_theta_values_<basename>.txt and ifd_dzA_values_<basename>.txt where <basename> is the baesname of edgelist filename e..g if edgelist_filename is edges.txt then ifd_theta_values_edges.txt and ifd_dzA_values_edges.txt WARNING: these files are overwritten. """ assert (len(param_func_list) == len(labels)) basename = os.path.splitext(os.path.basename(edgelist_filename))[0] THETA_OUTFILENAME = THETA_PREFIX + basename + os.extsep + 'txt' DZA_OUTFILENAME = DZA_PREFIX + basename + os.extsep + 'txt' G = Digraph(edgelist_filename, binattr_filename, catattr_filename) M1_steps = 500 # steps of Alg 1 M1 = int(M1_steps * G.density() * (1 - G.density()) * G.numNodes()**2 / sampler_m) Mouter = 500 # outer iterations of Algorithm EE Msteps = 100 # multiplier for number of inner steps of Algorithm EE # inner steps of EE M = int(Msteps * G.density() * (1 - G.density()) * G.numNodes()**2 / sampler_m) print('M1 = ', M1, ' Mouter = ', Mouter, ' M = ', M) theta_outfile = open(THETA_OUTFILENAME, 'w', 1) # 1 means line buffering theta_outfile.write('t ' + ' '.join(labels) + ' ' + 'AcceptanceRate' + '\n') print('Running Algorithm S...', end=' ') start = time.time() (theta, Dmean) = algorithm_S(G, param_func_list, M1, theta_outfile) print(time.time() - start, 's') print('after Algorithm S:') print('theta = ', theta) print('Dmean = ', Dmean) dzA_outfile = open(DZA_OUTFILENAME, 'w', 1) dzA_outfile.write('t ' + ' '.join(labels) + '\n') print('Running Algorithm EE...', end=' ') start = time.time() theta = algorithm_EE(G, param_func_list, theta, Dmean, Mouter, M, theta_outfile, dzA_outfile) print(time.time() - start, 's') theta_outfile.close() dzA_outfile.close() print('at end theta = ', theta)
def run_on_network_attr(edgelist_filename, param_func_list, labels, outcome_bin_filename, binattr_filename=None, contattr_filename=None, catattr_filename=None, sampler_func = basicALAAMsampler, zone_filename = None, directed = False): """Run on specified network with binary and/or continuous and categorical attributes. Parameters: edgelist_filename - filename of Pajek format edgelist param_func_list - list of change statistic functions corresponding to parameters to estimate labels - list of strings corresponding to param_func_list to label output (header line) outcome_bin_filename - filename of binary attribute (node per line) of outcome variable for ALAAM binattr_filename - filename of binary attributes (node per line) Default None, in which case no binary attr. contattr_filename - filename of continuous attributes (node per line) Default None, in which case no continuous attr. catattr_filename - filename of continuous attributes (node per line) Default None, in which case no categorical attr. sampler_func - ALAAM sampler function with signature (G, A, changestats_func_list, theta, performMove, sampler_m); see basicALAAMsampler.py default basicALAAMsampler zone_filename - filename of snowball sampling zone file (header line 'zone' then zone number for nodes, one per line) Default None, in which case no snowball zones. If not None then the sampler_func should take account of snowball sample zones i.e. conditionalALAAMsampler() directed - Default False. True for directed network else undirected. Write output to stdout. """ assert(len(param_func_list) == len(labels)) if directed: G = Digraph(edgelist_filename, binattr_filename, contattr_filename, catattr_filename, zone_filename) else: G = Graph(edgelist_filename, binattr_filename, contattr_filename, catattr_filename, zone_filename) G.printSummary() outcome_binvar = list(map(int_or_na, open(outcome_bin_filename).read().split()[1:])) assert(len(outcome_binvar) == G.numNodes()) A = outcome_binvar assert( all([x in [0,1,NA_VALUE] for x in A]) ) print('positive outcome attribute = ', (float(A.count(1))/len(A))*100.0, '%') if NA_VALUE in A: print('Warning: outcome variable has', A.count(NA_VALUE), 'NA values') # Calculate observed statistics by summing change stats for each 1 variable Zobs = computeObservedStatistics(G, A, param_func_list) print('Zobs = ', Zobs) theta = np.zeros(len(param_func_list)) estimation_start = time.time() max_runs = 20 i = 0 converged = False while i < max_runs and not converged: i += 1 print('Running stochastic approximation (run', i,' of at most',max_runs,')...') start = time.time() (theta, std_error, t_ratio) = stochasticApproximation(G, A, param_func_list, theta, Zobs, sampler_func) print('Stochastic approximation took',time.time() - start, 's') if theta is None: print('Failed.') break print(' ',labels) print('theta =', theta) print('std_error =', std_error) print('t_ratio =', t_ratio) converged = np.all(np.abs(t_ratio) < 0.1) print('Total estimation time (',i,'runs) was',time.time() - estimation_start, 's') if converged: print('Converged.') significant = np.abs(theta) > 2 * std_error sys.stdout.write(20*' ' + ' Parameter Std.Error t-ratio\n') for j in range(len(theta)): sys.stdout.write('%20.20s % 6.3f % 6.3f % 6.3f %c\n' % (labels[j], theta[j], std_error[j], t_ratio[j], ('*' if significant[j] else ' '))) print() # Do goodness-of-fit test # change stats functions to add to GoF if not already in estimation if directed: # TODO GoF statistics for directed gof_param_func_list = list(param_func_list) goflabels = list(labels) else: statfuncs = [changeTwoStar, changeThreeStar, changePartnerActivityTwoPath, changeTriangleT1, changeContagion, changeIndirectPartnerAttribute, changePartnerAttributeActivity, changePartnerPartnerAttribute, changeTriangleT2, changeTriangleT3] statlabels = ['Two-Star', 'Three-Star', 'Alter-2Star1A', 'T1', 'Contagion', 'Alter-2Star2A', 'Partner-Activity', 'Partner-Resource','T2', 'T3'] gof_param_func_list = (list(param_func_list) + [f for f in statfuncs if f not in param_func_list]) goflabels = (list(labels) + [f for f in statlabels if f not in labels]) n = len(gof_param_func_list) assert len(goflabels) == n # pad theta vector with zeros for the added parameters gof_theta = np.array(list(theta) + (n-len(theta))*[0]) Ainitial = None # default: use random intialization if zone_filename is not None: # conditional estimation # For snowball conditional estimation, we must not start with # random initial outcome vector, but rather make sure the # nodes in the outermost zone have the same outcome attributes # as the obseved vector Ainitial = np.copy(A) # copy of observed vector # make vector of 50% ones, size of number of inner nodes Arandom_inner = rand_bin_array(int(0.5*len(G.inner_nodes)), len(G.inner_nodes)) # set the outcome for inner nodes to random values, leaving # value of outermost nodes at the original observed values Ainitial[G.inner_nodes] = Arandom_inner print('Running goodness-of-fit test...') start = time.time() gofresult = gof(G, A, gof_param_func_list, gof_theta, sampler_func = sampler_func, Ainitial = Ainitial) print('GoF took',time.time() - start, 's') print(' ',goflabels) print('t_ratios = ',gofresult) sys.stdout.write(20*' ' + ' t-ratio\n') for j in range(n): sys.stdout.write('%20.20s % 6.3f\n' % (goflabels[j], gofresult[j])) print()
def lastTransitiveClosureTime(numNodes, transaction_list, verbose): """ Given a list of transactions which are in the form of a list of (sender, receiver, timestamp) tuples, return the list of times at which open two-paths are closed. Parameters: numNodes - number of actors (nodes) transaction_list - list of (sender, receiver, timestamp) tuples. Sender and receiver are intergers in 0..numNodes-1 and timestamps are numeric values. The list must be ordered by timestamp ascending. verbose - if True write debug output to stdout Return value: List of tuples (open_time, delta_time) where open_time is second timestamp in open two-path and and delta_time is it took open two-paths to be closed (the difference in timestamp between the closing transaction (arc) and the second (along arc) timestamp in the open two-path, BUT only if the second arc in two-path has higher timestamp than the first (i.e. we don't count a two-path that goes backward in time along the path) """ G = Digraph(numNodes) lastTime = None delta_time_list = [] for trans in transaction_list: assert (trans[TSENDER] >= 0 and trans[TSENDER] < numNodes) assert (trans[TRECEIVER] >= 0 and trans[TRECEIVER] < numNodes) assert (lastTime is None or trans[TTIME] >= lastTime) if verbose: print trans[TSENDER], '->', trans[TRECEIVER], ' at time ', trans[ TTIME], closed_2paths_v = closedTwoPaths(G, trans[TSENDER], trans[TRECEIVER]) if len(closed_2paths_v) > 0: if verbose: print 'closed', len( closed_2paths_v), 'two-paths via', closed_2paths_v path_2nd_time_list = [] for v in closed_2paths_v: path_1st_time = G.G[trans[TSENDER]][v] path_2nd_time = G.G[v][trans[TRECEIVER]] if path_2nd_time > path_1st_time: if verbose: print ' path via', v, 'is forward in time (', path_1st_time, ',', path_2nd_time, '), considering' path_2nd_time_list.append(path_2nd_time) else: if verbose: print ' path via', v, 'is backwards in time (', path_1st_time, ',', path_2nd_time, '), skipping' if len(path_2nd_time_list) > 0: path_2nd_time_max = max(path_2nd_time_list) delta_time = trans[TTIME] - path_2nd_time_max if verbose: print ' ', len( path_2nd_time_list ), 'paths considered as forward in time, max 2nd time is', path_2nd_time_max, ' appending delta_time =', delta_time delta_time_list.append((path_2nd_time_max, delta_time)) else: if verbose: print ' (no paths forward in time)' else: if verbose: print if not G.isArc(trans[TSENDER], trans[TRECEIVER]): # only insert arc if not already one there, to keep first # time on transactions, not subsequent times. G.insertArc(trans[TSENDER], trans[TRECEIVER], trans[TTIME]) lastTime = trans[TTIME] return delta_time_list
def vid(self, v): self.validateVertex(v) return self.id[v] def check(self, G): pass def validateVertex(self, v): V = len(self.marked) if v < 0 or v >= V: raise ("vertex %d is not between 0 and %d" % (v, V - 1)) if __name__ == '__main__': myin = In(sys.argv[1], None) G = Digraph(myin) scc = KosarajuSharirSCC(G) m = scc.mcount() print("%d strong components" % m) components = [None] * m for i in range(0, m): components[i] = [] for v in range(0, G.Vertex()): components[scc.vid(v)].append(v) s = "" for i in range(0, m): for v in components[i]: s += "%d " % v s += '\n'
self.cycle = [] x = v while x != w: self.cycle.append(x) x = self.edgeTo[x] self.cycle.append(w) self.cycle.append(v) self.onStack[v] = False def hasCycle(self): return self.cycle != [] def cycle_l(self): return self.cycle if __name__ == '__main__': myin = In(sys.argv[1]) G = Digraph(myin) finder = DirectedCycle(G) s = "" if finder.hasCycle(): print("Directed cycle: ") for v in finder.cycle_l(): s += "%d " % v print(s) else: print("No directed cycle") print("\n")