def test_search(): a = TimeCollection([((1, False), 3), ((4, True), 4)], discrete=False) for i, v1, v2 in zip(range(6), 4 * [0] + 2 * [1], 5 * [0] + [1]): assert_equal(a.search_time(i), v1) assert_equal(a.search_time((i, False)), v2) a = TimeCollection([(1, 3), (4, 4)], discrete=True) for i, v1 in zip(range(6), 4 * [0] + 2 * [1]): assert_equal(a.search_time(i), v1) a = TimeCollection([(1, 3), (4, 4)], instantaneous=True) for i, v1 in zip(range(6), [None, 0, None, None, 1, None]): assert_equal(a.search_time(i), v1)
def nodes_at(self, t=None): if bool(self): if t is None: def generate(iter_): prev = None for u, ts in iter_: if prev is None: active_set, prev = {u}, ts elif ts != prev: yield (prev, NodeSetS(set(active_set))) active_set, prev = {u}, ts else: active_set.add(u) if len(active_set): yield (prev, NodeSetS(set(active_set))) # Iterate in ascending time and yield the NodeSet at each time-instant in a generator fashion return TimeGenerator(generate(self.sort_df('ts').itertuples()), instantaneous=True, discrete=self.discrete) elif isinstance(t, Real): # Count how many times its time-step occurs, for each key and return a sorted list return NodeSetS(self.df.df_at(t).u) else: raise ValueError( 'Input can either be a real number or an ascending interval of two real numbers' ) else: if t is None: return TimeCollection(iter(), instantaneous=True, discrete=self.discrete) else: return NodeSetS()
def link_contribution_at(self, t=None): """Calculate the contribution of a link inside the stream_graph. Parameters ---------- t: time or None Returns ------- node_coverage : Real or TimeCollection Returns :math:`l_{t}=\\frac{|E_{t}|}{|V*(V-1)|}`. If None returns the time coverage for each node at each time-event. """ denom = float(self.nodeset_.size) denom = denom * (denom - 1) if t is None: if denom > .0: def fun(t, v): return v / denom return self.temporal_linkset_.m_at().map(fun) else: return TimeCollection( instants=self.temporal_linkset_.instantaneous) else: if denom > .0: return self.temporal_linkset_.m_at(t) / denom else: return .0
def _m_at_unweighted(self, t): if t is None: from stream_graph.collections import TimeCollection time_links = self.links_at() return TimeCollection([(time, l.size) for time, l in time_links], time_links.instants) else: return self.links_at(t).size
def _m_at_weighted(self, t): if t is None: return TimeCollection(self._build_time_generator( Counter, sum_counter_), instantaneous=False, discrete=self.discrete) else: return self.df.count_at(t, weights=True)
def _m_at_weighted(self, t): if t is None: ct = Counter() for ts, w in self.df[['ts', 'w']].itertuples(weights=True): ct[ts] += w # Add up weigths for each time-stamp and sort on time. return TimeCollection(sorted(iteritems(ct)), instantaneous=True, discrete=self.discrete) else: return self.links_at(t).weighted_size
def _m_at_unweighted(self, t): if t is None: return TimeCollection(self._build_time_generator(set, len_set_, weighted=False), instantaneous=False, discrete=self.discrete) else: return self.df.count_at(t)
def _degree_at_weighted(self, u, t, direction): if u is None: if t is None: out = dict() for u, val in self._build_time_generator( Counter, sum_counter_n, direction=direction, get_key=get_key_first): d = out.get(u, None) if d is None: out[u] = TimeCollection([val], discrete=self.discrete, instantaneous=False) else: d.append(val) return NodeCollection(out) else: return LinkSetDF(self.df.df_at(t)[['u', 'v', 'w']], weighted=True, merge_function=self.df_.merge_function, no_duplicates=False).degree( u=None, direction=direction, weights=True) else: if direction == 'out': df = self.df[self.df.u == u].drop( columns=['u'], merge=False).rename(columns={'v': 'u'}) elif direction == 'in': df = self.df[self.df.v == u].drop(columns=['v'], merge=False) elif direction == 'both': df = self.df[self.df.u == u].drop( columns=['u'], merge=False).rename(columns={'v': 'u'}) df = df.append(self.df[self.df.v == u].drop(columns=['v'], merge=True), ignore_index=True) else: raise UnrecognizedDirection() if t is None: return TimeCollection(self._build_time_generator( Counter, sum_counter_, direction=direction, df=df), discrete=self.discrete, instantaneous=False) else: return df.w[df.index_at(t)].sum()
def neighbors_at(self, u=None, t=None, direction='out'): if not bool(self): if u is None: return NodeCollection() if t is None: return TimeCollection() return NodeSetS() mf = (self.df_.merge_function if self.weighted else None) if u is None: if t is None: out = dict() for u, val in self._build_time_generator( set, set_unweighted_n_sparse, weighted=False, direction=direction, get_key=get_key_first, sparse=True): d = out.get(u, None) if d is None: out[u] = TimeSparseCollection([val], discrete=self.discrete, caster=NodeSetS) else: d.append(val) return NodeCollection(out) else: return LinkSetDF(self.df.df_at(t)[['u', 'v'] + self._wc], merge_function=mf, no_duplicates=False).neighbors_of( u=None, direction=direction) else: if direction == 'out': df = self.df[self.df.u == u].drop( columns=['u'] + self._wc, merge=False).rename(columns={'v': 'u'}) elif direction == 'in': df = self.df[self.df.v == u].drop(columns=['v'] + self._wc, merge=False) elif direction == 'both': df = self.df[self.df.u == u].drop( columns=['u'] + self._wc).rename(columns={'v': 'u'}) df = df.append(self.df[self.df.v == u].drop(columns=['v'] + self._wc, merge=True), ignore_index=True) else: raise UnrecognizedDirection() if t is None: return TemporalNodeSetDF(df).nodes_at(t=None) else: return NodeSetS(df[df.index_at(t)].u.values.flat)
def n_at(self, t=None): if bool(self): if t is None: return self._build_time_generator(set, len_set_nodes, TimeCollection) else: return self.df.count_at(t) else: if t is None: return TimeCollection([], False) else: return 0
def n_at(self, t=None): if bool(self): if t is None: # Count how many times its time-step occurs, for each key and return a sorted list return TimeCollection(sorted( list(iteritems(Counter(t for t in self.df.ts)))), instantaneous=True, discrete=self.discrete) elif isinstance(t, Real): # Count for only one time-stamp return len(set(self.df.df_at(t).u)) else: raise ValueError( 'Input can either be a real number or an ascending interval of two real numbers' ) else: if t is None: return TimeCollection(iter(), instantaneous=True, discrete=self.discrete) else: return NodeSetS()
def n_at(self, t=None): if t is None: if bool(self): return constant_time_generator(self.timeset, self.n, 0, self.discrete, self.instantaneous) else: return TimeCollection() else: if bool(self) and (t in self.timeset_): return self.n else: return 0
def n_at(self, t=None): """Returns number of nodes of the TemporalNodeSet at a certain time. Parameters ---------- t : Real Returns ------- n : Int or TimeCollection(Int) Returns the number of active nodes at time :code:`t`. If None returns an iterator of tuples containing a timestamp and an Int. """ if t is None: from stream_graph.collections import TimeCollection time_nodes = self.nodes_at() return TimeCollection([(ts, ns.size) for (ts, ns) in time_nodes], time_nodes.instants) else: return self.nodes_at(t).size
def _degree_at_unweighted(self, u, t, direction): if u is None: if t is None: out = dict() for u, val in self._build_time_generator( set, len_set_n, direction=direction, get_key=get_key_first): d = out.get(u, None) if d is None: out[u] = TimeCollection([val], discrete=self.discrete, instantaneous=False) else: d.append(val) return NodeCollection(out) else: return LinkSetDF(self.df.df_at(t)[['u', 'v'] + self._wc], no_duplicates=False).degree( u=None, direction=direction) else: df = (self.df.drop(columns='w', merge=False) if self.weighted else self.df) if direction == 'out': df = df[df.u == u].drop( columns=['u'], merge=self.weighted).rename(columns={'v': 'u'}) elif direction == 'in': df = df[df.v == u].drop(columns=['v'], merge=self.weighted) elif direction == 'both': dfa = df[df.u == u].drop( columns=['u'], merge=False).rename(columns={'v': 'u'}) df = dfa.append(df[df.v == u].drop(columns=['v'], merge=False), ignore_index=True, merge=True) else: raise UnrecognizedDirection() if t is None: return TemporalNodeSetDF(df).n_at(t=None) else: return len(set(df.df_at(t).u.values.flat))
def m_at(self, t=None, weights=False): """Returns the number of links appearing at a certain time. Parameters ---------- t : Int weights : bool, default=False Returns ------- m : Int Returns the numer of links at a certain time. """ if not bool(self): if t is None: return TimeCollection(discrete=self.discrete_) else: return .0 elif weights and self.weighted: return self._m_at_weighted(t) else: return self._m_at_unweighted(t)
def _degree_at_unweighted(self, u=None, t=None, direction='out'): if not bool(self): if u is None: return NodeCollection() if t is None: return TimeCollection(discrete=self.discrete, instantaneous=True) return 0 if u is None: if t is None: out = dict() if direction == 'out': def add(d, u, v): d[u].add(v) elif direction == 'in': def add(d, u, v): d[v].add(u) elif direction == 'both': def add(d, u, v): d[u].add(v) d[v].add(u) else: raise UnrecognizedDirection() prev = None for u, v, ts in self.sort_df('ts').itertuples(): # Collect neighbors at each time-stamp if prev is None: cache = defaultdict(set) prev = ts elif ts != prev: for z, s in iteritems(cache): if z in out: # and calculate their size out[z].it.append((prev, len(s))) else: # in a TimeCollection of ascending time for each node out[z] = TimeCollection([(prev, len(s))], discrete=self.discrete, instantaneous=True) cache = defaultdict(set) prev = ts add(cache, u, v) # Add the remaining, from the cache for u, s in iteritems(cache): if u in out: out[u].it.append((prev, len(s))) else: out[u] = TimeCollection([(prev, len(s))], discrete=self.discrete, instantaneous=True) return NodeCollection(out) else: return LinkSetDF(self.df.df_at(t).drop(columns=['ts']), weighted=self.weighted).degree(u=None, direction=direction) else: if direction == 'out': df = self.df[self.df.u == u].drop(columns=['u'], merge=False).rename(columns={'v': 'u'}) elif direction == 'in': df = self.df[self.df.v == u].drop(columns=['v'], merge=False) elif direction == 'both': df = self.df[self.df.u == u].drop(columns=['u'], merge=False).rename(columns={'v': 'u'}) df = df.append(self.df[self.df.v == u].drop(columns=['v'], merge=False), ignore_index=True, merge=True) else: raise UnrecognizedDirection() if t is None: dt = defaultdict(set) # Collect all nodes for each time-stamp for u, ts in df.itertuples(): dt[ts].add(u) return TimeCollection(sorted(list((ts, len(us)) for ts, us in iteritems(dt))), discrete=self.discrete, instantaneous=True) else: return len(set(df.df_at(t).u.values.flat))
def neighbors_at(self, u=None, t=None, direction='out'): if not bool(self): if u is None: return NodeCollection() if t is None: return TimeCollection(discrete=self.discrete, instantaneous=True) return NodeSetS() if u is None: if t is None: out = dict() if direction == 'out': def add(d, u, v): d[u].add(v) elif direction == 'in': def add(d, u, v): d[v].add(u) elif direction == 'both': def add(d, u, v): d[u].add(v) d[v].add(u) else: raise UnrecognizedDirection() prev = None for u, v, ts in self.sort_df('ts').itertuples(): # Iterate in ascending time and at each instant collect for each node the set of its neighbors if prev is None: prev, cache = ts, defaultdict(set) elif ts != prev: # For each node for z, s in iteritems(cache): if z in out: # Or append it if it exists out[z].it.append((prev, NodeSetS(s))) else: # Initialize a time-collection with the time-stamp and the node-set out[z] = TimeCollection([(prev, NodeSetS(s))], instantaneous=True, discrete=self.discrete) prev, cache = ts, defaultdict(set) add(cache, u, v) # Add also remaining elements. for u, s in iteritems(cache): if u in out: out[u].it.append((prev, NodeSetS(s))) else: out[u] = TimeCollection([(prev, NodeSetS(s))], instantaneous=True, discrete=self.discrete) return NodeCollection(out) else: return LinkSetDF(self.df.df_at(t).drop(columns=['ts']), weighted=self.weighted).neighbors_of(u=None, direction=direction) else: di = True if direction == 'out': df = self.df[self.df.u == u].drop(columns=['u']).rename(columns={'v': 'u'}) elif direction == 'in': df = self.df[self.df.v == u].drop(columns=['v']) elif direction == 'both': df = self.df[self.df.u == u].drop(columns=['u']).rename(columns={'v': 'u'}) df = df.append(self.df[self.df.v == u].drop(columns=['v']), ignore_index=True) di = False else: raise UnrecognizedDirection() if t is None: return ITemporalNodeSetDF(df[['u', 'ts']], no_duplicates=di, discrete=self.discrete).nodes_at(t=None) else: return NodeSetS(df.df_at(t).u.values.flat)
def _m_at_unweighted(self, t): if t is None: # Count how many times each time-stamp occurs and sort on time. return TimeCollection(sorted(list(iteritems(Counter(iter(self.df.ts))))), instantaneous=True, discrete=self.discrete) else: return self.links_at(t).size
def test_merge(): # Continuous tc_a = TimeCollection([((1, True), 4), ((2, True), 3), ((4, False), 5), ((5, False), 0), ((6, False), 1), ((7, True), 0)]) tc_b = TimeCollection([((1.5, False), 1), ((2, False), 2), ((3, False), 1), ((5.5, True), 0), ((8, True), 1)]) assert_equal(list(tc_a.merge(tc_b, add, missing_value=0)), list(tc_b.merge(tc_a, add, missing_value=0))) assert_equal(list(tc_a.merge(tc_b, add, missing_value=0)), [((1, True), 4), ((1.5, False), 5), ((2, False), 6), ((2, True), 5), ((3, False), 4), ((4, False), 6), ((5, False), 1), ((5.5, True), 0), ((6, False), 1), ((7, True), 0), ((8, True), 1)]) assert not tc_a.merge(tc_b, add, missing_value=0).instants # future fix # tc_a = TimeCollection([((1, True), 4), ((2, True), 3), ((4, False), 5), ((5, False), 0), ((6, False), 1), ((7, True), 0)]) # tc_b = TimeCollection([(1.5, 1), (2, 2), (3, 1), (5.5, 0), (8, 1)], instantaneous=True) # print(tc_a.merge(tc_b, add, missing_value=0)) # assert list(tc_a.merge(tc_b, add, missing_value=0)) == list(tc_b.merge(tc_a, add, missing_value=0)) # assert list(tc_a.merge(tc_b, add, missing_value=0)) == [(1.5, 5), (2, 5), (3, 4), (5.5, 0), (8, 1)] # assert not tc_a.merge(tc_b, add, missing_value=0).instants # tc_a = TimeCollection([(1, 4), (2, 3), (4, 5), ((5, False), 0), ((6, False), 1), ((7, True), 0)], instantaneous=True) # tc_b = TimeCollection([(1.5, 1), (2, 2), (3, 1), (5.5, 0), (8, 1)], instantaneous=True) # assert list(tc_a.merge(tc_b, add, missing_value=0)) == list(tc_b.merge(tc_a, add, missing_value=0)) # assert list(tc_a.merge(tc_b, add, missing_value=0)) == [(1, 4), (1.5, 1), (2, 5), (3, 1), (4, 5), (5, 0), (5.5, 0), (6, 1), (7, 0), (8, 1)] # assert tc_a.merge(tc_b, add, missing_value=0).instants # Discrete tc_a = TimeCollection([(2, 4), (4, 3), (8, 5), (10, 0), (12, 1), (14, 0)], discrete=True) tc_b = TimeCollection([(3, 1), (4, 2), (6, 1), (11, 0), (16, 1)], discrete=True) assert_equal(list(tc_a.merge(tc_b, add, missing_value=0)), list(tc_b.merge(tc_a, add, missing_value=0))) assert_equal(list(tc_a.merge(tc_b, add, missing_value=0)), [(2, 4), (3, 5), (6, 4), (8, 6), (10, 1), (11, 0), (12, 1), (14, 0), (16, 1)]) assert not tc_a.merge(tc_b, add, missing_value=0).instants # tc_a = TimeCollection([(2, 4), (3, 3), (4, 5), (10, 0), (12, 1), (14, 0)], instantaneous=True) # tc_b = TimeCollection([(3, 1), (4, 2), (6, 1), (11, 0), (16, 1)], discrete=True, instantaneous=True) # assert list(tc_a.merge(tc_b, add, missing_value=0)) == list(tc_b.merge(tc_a, add, missing_value=0)) # assert list(tc_a.merge(tc_b, add, missing_value=0)) == [(1.5, 5), (2, 5), (3, 4), (5.5, 0), (8, 1)] # assert not tc_a.merge(tc_b, add, missing_value=0).instants tc_a = TimeCollection([(2, 4), (4, 3), (8, 5), (10, 0), (12, 1), (14, 0)], discrete=True, instantaneous=True) tc_b = TimeCollection([(3, 1), (4, 2), (6, 1), (11, 0), (16, 1)], discrete=True, instantaneous=True) assert_equal(list(tc_a.merge(tc_b, add, missing_value=0)), list(tc_b.merge(tc_a, add, missing_value=0))) assert_equal(list(tc_a.merge(tc_b, add, missing_value=0)), [(2, 4), (3, 1), (4, 5), (6, 1), (8, 5), (10, 0), (11, 0), (12, 1), (14, 0), (16, 1)]) assert tc_a.merge(tc_b, add, missing_value=0).instants
def ego(e, ne, l, both, detailed, discrete): # print >> sys.stderr, "Running node : " + str(e) u, v, t, index, times = 0, 0, 0, 0, list() ce, info = dict(), dict() for i in ne: info[(e, i)] = -1 for x in ne - {i}: info[(i, x)] = -1 info[(i, e)] = -1 index = 0 time = l[index][2] # starting time. ne_x, lines, paths = ne | {e}, dict(), dict() if both: def add_lines(u, v, t): lines[(u, v)] = t lines[(v, u)] = t else: def add_lines(u, v, t): lines[(u, v)] = t if detailed: def take(times, prev, val): times.append((time, val)) else: if discrete: def take(times, prev, val): if prev[0] is None or prev[0] != val: times.append((time, val)) prev[0] = val else: def take(times, prev, val): if prev[0] is None or prev[0] != val: times.append(((time, True), val)) prev[0] = val prev = [None] while (index < len(l) - 1): # get all links of time stamp while (index < len(l) - 1): u, v, t = l[index] add_lines(u, v, t) index += 1 if (t != time): break for u in ne: for v in ne - {u}: # print u,v ce[(u, v)] = 0.0 Q = set() if (u, v) not in lines: # print u,v news = info[(u, v)] for x in ne_x - {u, v}: ux = info[(u, x)] if (x, v) in lines: xv = lines[(x, v)] else: xv = info[(x, v)] if ux != -1 and xv != -1 and ux < xv: if ux == news: Q.add(x) if ux > news: Q = {x} news = ux if (u, v) in paths: old_paths = paths[(u, v)] if old_paths[0] == news: paths[(u, v)] = (news, paths[(u, v)][1] | Q) elif old_paths[0] < news: paths[(u, v)] = (news, Q) else: paths[(u, v)] = (news, Q) if e in paths[(u, v)][1]: ce[(u, v)] = 1.0 / len(paths[(u, v)][1]) else: paths[(u, v)] = (t, {u}) val = sum(ce.values()) take(times, prev, val) for k in lines: info[k] = lines[k] time, lines = l[index][2], {} return TimeCollection(times, instantaneous=detailed, discrete=discrete)
def _degree_at_weighted(self, u, t, direction): if u is None: if t is None: out = dict() if direction == 'out': def add(d, u, v, w): d[u] += w elif direction == 'in': def add(d, u, v, w): d[v] += w elif direction == 'both': def add(d, u, v, w): d[u] += w d[v] += w else: raise UnrecognizedDirection() prev = None for u, v, ts, w in self.sort_df('ts').itertuples(weights=True): # Iterate in ascending time if prev is None: # For each node add all the weights for all its neighbors cache = Counter() prev = ts elif ts != prev: for z, weight in iteritems(cache): if z in out: # Append in ascending time inside the TimeCollection out[z].it.append((prev, weight)) else: # Initialize inside the TimeCollection out[z] = TimeCollection([(prev, weight)], discrete=self.discrete, instantaneous=True) cache = Counter() prev = ts add(cache, u, v, w) # Remove the remaining. for u, weight in iteritems(cache): if u in out: out[u].it.append((prev, weight)) else: out[u] = TimeCollection([(prev, weight)], discrete=self.discrete, instantaneous=True) return NodeCollection(out) else: return LinkSetDF(self.df.df_at(t).drop(columns=['ts']), weighted=self.weighted, merge_function=self.df_.merge_function).degree(u=None, direction=direction, weights=True) else: if direction == 'out': df = self.df[self.df.u == u].drop(columns=['u'], merge=False).rename(columns={'v': 'u'}) elif direction == 'in': df = self.df[self.df.v == u].drop(columns=['v'], merge=False) elif direction == 'both': df = self.df[self.df.u == u].drop(columns=['u'], merge=False).rename(columns={'v': 'u'}) df = df.append(self.df[self.df.v == u].drop(columns=['v'], merge=False), ignore_index=True, merge=True) else: raise UnrecognizedDirection() if t is None: dt = Counter() for u, ts, w in df.itertuples(weights=True): # collect all the weights for its time-stamp. dt[ts] += w return TimeCollection(sorted(list(iteritems(dt))), discrete=self.discrete, instantaneous=True) else: return df.df_at(t).w.sum()