def test_happy_days(self): '''Test various regular inputs''' assert list(sliding_window(range(5), size=1)) == [(0,),(1,),(2,),(3,),(4,)] assert list(sliding_window(range(5), size=2)) == [(0,1),(1,2),(2,3),(3,4)] assert list(sliding_window(range(5), size=3)) == [(0,1,2),(1,2,3),(2,3,4)] assert list(sliding_window(range(5), size=4)) == [(0,1,2,3),(1,2,3,4)] assert list(sliding_window(range(5), size=5)) == [(0,1,2,3,4)]
def test_invalid_size(self): ''' - When size < 1, ValueError - When size > ilen(iterable), ValueError ''' with pytest.raises(ValueError): list(sliding_window(range(5), size=0)) with pytest.raises(ValueError): list(sliding_window(range(5), size=6))
def connected_components2(sets): g = nx.Graph() sets = {k:v for k,v in sets.items() if v} g.add_nodes_from(sets.keys()) for x, y in sliding_window(sorted((item, name) for name, set_ in sets.items() for item in set_)): if x[0] == y[0]: g.add_edge(x[1], y[1]) return {frozenset(component) : set.union(*(set(sets[name]) for name in component)) for component in nx.connected_components(g)}
def create_input(overlap, list_size_distribution, list_size_mean, set_count): # create without overlap min_ = 1 max_ = 2 * list_size_mean - min_ if list_size_distribution == 'constant': # 1 list size list_sizes = np.full(set_count, list_size_mean, dtype=int) elif list_size_distribution == 'uniform': # all sizes equally possible list_sizes = np.random.random_integers(min_, max_, set_count) elif list_size_distribution == 'left_triangular': # more small lists list_sizes = np.random.triangular(min_, min_, max_, set_count).round() elif list_size_distribution == 'right_triangular': # more large lists list_sizes = np.random.triangular(min_, max_, max_, set_count).round() else: assert False indices = np.insert(list_sizes.astype(int).cumsum(), 0, 0) sets = [set(range(start, end)) for start, end in sliding_window(indices)] # add overlap, with a skip every so often overlap_to_create = round(set_count * overlap) i = 0 def get_next_skip_distance(): return round(np.random.standard_exponential()) + 1 next_skip = get_next_skip_distance() expected_output = [] current_overlap = sets[0] overlap_to_create -= 1 while overlap_to_create > 0: if i == next_skip and i < len(sets)-1 and overlap_to_create >= 2: # start new family of overlapping sets i += 1 overlap_to_create -= 1 expected_output.append(current_overlap) current_overlap = sets[i] next_skip = i + get_next_skip_distance() # create overlap sets[i+1].pop() sets[i+1].add(first(sets[i])) overlap_to_create -= 1 current_overlap = current_overlap | sets[i+1] i += 1 else: expected_output.append(current_overlap) expected_output.extend(sets[i+1:]) # final things sets = [list(x) for x in sets] random.shuffle(sets) # debug info of this func # print() # print('{} overlap, {} set sizes with mean {}, {} sets'.format(overlap, list_size_distribution, list_size_mean, set_count)) # print(pd.Series(len(x) for x in sets).describe()) # # overlap = 1 - len(expected_output)/len(sets) # print('{:.2}% of sets removed due to overlap'.format(overlap)) # Note this is only a rough under-estimate of actual number of sets overlapping return sets, expected_output
def toset_from_tosets( *tosets ): # Note: a setlist is perfect representation of a toset as it's totally ordered and it's a set, i.e. a toset """ Create totally ordered set (toset) from tosets. These tosets, when merged, form a partially ordered set. The linear extension of this poset, a toset, is returned. .. warning:: untested Parameters ---------- tosets : iterable of setlist Tosets to merge Raises ------ ValueError If the tosets (derived from the lists) contradict each other. E.g. ``[a, b]`` and ``[b, c, a]`` contradict each other. Returns ------- setlist Totally ordered set """ # Construct directed graph with: a <-- b iff a < b and adjacent in a list graph = nx.DiGraph() for toset in tosets: graph.add_nodes_from(toset) graph.add_edges_from(sliding_window(reversed(toset))) # No cycles allowed if not nx.is_directed_acyclic_graph( graph ): # TODO could rely on NetworkXUnfeasible https://networkx.github.io/documentation/networkx-1.9/reference/generated/networkx.algorithms.dag.topological_sort.html raise ValueError("Given tosets contradict each other") # each cycle is a contradiction, e.g. a > b > c > a # Topological sort return setlist(nx.topological_sort(graph, reverse=True))