def test_basic_iter_random_walk_weighted(): # not deterministic but practically OK assert set( take(10**5, iter_random_walk(graph_abc_weighted, "a", weight="weight"))) == {"a", "b", "c"} count = Counter( take(10**5, iter_random_walk(graph_abc_weighted, "a", weight="weight"))) assert count["c"] > count["b"]
def get_user_requests(self): """ return a list of user requested items. Each item is a dict with the following keys: 'date': the date and time running the command 'cmd': a list of argv of the actual command which was run 'action': install/remove/update 'specs': the specs being used """ res = [] for dt, unused_cont, comments in self.parse(): item = {'date': dt} for line in comments: comment_items = self._parse_comment_line(line) item.update(comment_items) if 'cmd' in item: res.append(item) dists = groupby(itemgetter(0), unused_cont) item['unlink_dists'] = dists.get('-', ()) item['link_dists'] = dists.get('+', ()) conda_versions_from_history = tuple(x['conda_version'] for x in res if 'conda_version' in x) if conda_versions_from_history: minimum_conda_version = sorted(conda_versions_from_history, key=VersionOrder)[-1] minimum_major_minor = '.'.join( take(2, minimum_conda_version.split('.'))) current_major_minor = '.'.join(take(2, CONDA_VERSION.split('.'))) if VersionOrder(current_major_minor) < VersionOrder( minimum_major_minor): message = dals(""" This environment has previously been operated on by a conda version that's newer than the conda currently being used. A newer version of conda is required. target environment location: %(target_prefix)s current conda version: %(conda_version)s minimum conda version: %(minimum_version)s """) % { "target_prefix": self.prefix, "conda_version": CONDA_VERSION, "minimum_version": minimum_major_minor, } if not paths_equal(self.prefix, context.root_prefix): message += dedent(""" Update conda and try again. $ conda install -p "%(base_prefix)s" "conda>=%(minimum_version)s" """) % { "base_prefix": context.root_prefix, "minimum_version": minimum_major_minor, } raise CondaUpgradeError(message) return res
def get_user_requests(self): """ return a list of user requested items. Each item is a dict with the following keys: 'date': the date and time running the command 'cmd': a list of argv of the actual command which was run 'action': install/remove/update 'specs': the specs being used """ res = [] for dt, unused_cont, comments in self.parse(): item = {'date': dt} for line in comments: comment_items = self._parse_comment_line(line) item.update(comment_items) if 'cmd' in item: res.append(item) dists = groupby(itemgetter(0), unused_cont) item['unlink_dists'] = dists.get('-', ()) item['link_dists'] = dists.get('+', ()) conda_versions_from_history = tuple(x['conda_version'] for x in res if 'conda_version' in x) if conda_versions_from_history: minimum_conda_version = sorted(conda_versions_from_history, key=VersionOrder)[-1] minimum_major_minor = '.'.join(take(2, minimum_conda_version.split('.'))) current_major_minor = '.'.join(take(2, CONDA_VERSION.split('.'))) if VersionOrder(current_major_minor) < VersionOrder(minimum_major_minor): message = dals(""" This environment has previously been operated on by a conda version that's newer than the conda currently being used. A newer version of conda is required. target environment location: %(target_prefix)s current conda version: %(conda_version)s minimum conda version: %(minimum_version)s """) % { "target_prefix": self.prefix, "conda_version": CONDA_VERSION, "minimum_version": minimum_major_minor, } if not paths_equal(self.prefix, context.root_prefix): message += dedent(""" Update conda and try again. $ conda install -p "%(base_prefix)s" "conda>=%(minimum_version)s" """) % { "base_prefix": context.root_prefix, "minimum_version": minimum_major_minor, } raise CondaUpgradeError(message) return res
def _execute(cls, all_action_groups): with signal_handler(conda_signal_handler): pkg_idx = 0 try: for pkg_idx, axngroup in enumerate(all_action_groups): cls._execute_actions(pkg_idx, axngroup) except Exception as execute_multi_exc: # reverse all executed packages except the one that failed rollback_excs = [] if context.rollback_enabled: failed_pkg_idx = pkg_idx reverse_actions = reversed( tuple( enumerate(take(failed_pkg_idx, all_action_groups)))) for pkg_idx, axngroup in reverse_actions: excs = cls._reverse_actions(pkg_idx, axngroup) rollback_excs.extend(excs) raise CondaMultiError( tuple( concatv( (execute_multi_exc.errors if isinstance( execute_multi_exc, CondaMultiError) else (execute_multi_exc, )), rollback_excs, ))) else: for axngroup in all_action_groups: for action in axngroup.actions: action.cleanup()
def aoc15(a_start=A_START, b_start=B_START, a_mult=A_MULT, b_mult=B_MULT, n=40_000_000): a_gen = value_gen(a_start, A_FACTOR, a_mult) b_gen = value_gen(b_start, B_FACTOR, b_mult) return sum(a == b for a, b in take(n, zip(a_gen, b_gen)))
def read_mod_and_etag(path): with open(path, 'rb') as f: try: with closing(mmap(f.fileno(), 0, access=ACCESS_READ)) as m: match_objects = take(2, re.finditer(b'"(_etag|_mod)":[ ]?"(.*)"', m)) result = dict(map(ensure_text_type, mo.groups()) for mo in match_objects) return result except ValueError: # ValueError: cannot mmap an empty file return {}
def _weirdo_game(*decks): seen = set() d1, d2 = decks while all(decks): ts = tuple(d1), tuple(d2) if ts in seen: return 0, d1 seen.add(ts) c1, c2 = d1.popleft(), d2.popleft() if len(d1) >= c1 and len(d2) >= c2: winner, _ = _weirdo_game(deque(take(c1, d1)), deque(take(c2, d2))) else: winner = c1 < c2 decks[winner].extend((c2, c1) if winner else (c1, c2)) return bool(d2), decks[bool(d2)]
def read_mod_and_etag(path): with open(path, 'rb') as f: try: with closing(mmap(f.fileno(), 0, access=ACCESS_READ)) as m: match_objects = take(3, re.finditer(REPODATA_HEADER_RE, m)) result = dict( map(ensure_unicode, mo.groups()) for mo in match_objects) return result except ValueError: # ValueError: cannot mmap an empty file return {}
def read_mod_and_etag(path): with open(path, 'rb') as f: try: with closing(mmap(f.fileno(), 0, access=ACCESS_READ)) as m: match_objects = take( 2, re.finditer(b'"(_etag|_mod)":[ ]?"(.*)"', m)) result = dict( map(ensure_text_type, mo.groups()) for mo in match_objects) return result except ValueError: # ValueError: cannot mmap an empty file return {}
def read_mod_and_etag(path): with open(path, 'rb') as f: try: with closing(mmap(f.fileno(), 0, access=ACCESS_READ)) as m: match_objects = take(3, re.finditer(REPODATA_HEADER_RE, m)) result = dict(map(ensure_unicode, mo.groups()) for mo in match_objects) return result except (BufferError, ValueError): # BufferError: cannot close exported pointers exist # https://github.com/conda/conda/issues/4592 # ValueError: cannot mmap an empty file return {}
def get_major_minor_version(string, with_dot=True): # returns None if not found, otherwise two digits as a string # should work for # - 3.5.2 # - 27 # - bin/python2.7 # - lib/python34/site-packages/ # the last two are dangers because windows doesn't have version information there assert isinstance(string, string_types) digits = tuple(take(2, (c for c in string if c.isdigit()))) if len(digits) == 2: return '.'.join(digits) if with_dot else ''.join(digits) return None
def test_embedding_graph(): forward, reverse = lookup_tables(graph_abcd) walks = take(10, iter_random_walks(graph_abcd, 5)) node_embeddings = initial_deepwalk_embedding(walks, forward, 100) initial_embedding = to_embedding_matrix(node_embeddings, 100, reverse) embedding = SplitterEmbedding( node_count=10, persona_node_count=15, embedding_dimension=100, initial_embedding=initial_embedding, ) persona_batch = torch.ones(5).long() output = embedding(persona_batch) assert output.shape == (5, 100)
def _execute(cls, all_action_groups): with signal_handler(conda_signal_handler), time_recorder( "unlink_link_execute"): pkg_idx = 0 try: with spinner("Executing transaction", not context.verbosity and not context.quiet, context.json): for pkg_idx, axngroup in enumerate(all_action_groups): cls._execute_actions(pkg_idx, axngroup) except CondaMultiError as e: action, is_unlink = (None, axngroup.type == 'unlink') prec = axngroup.pkg_data log.error( "An error occurred while %s package '%s'.\n" "%r\n" "Attempting to roll back.\n", 'uninstalling' if is_unlink else 'installing', prec and prec.dist_str(), e.errors[0]) # reverse all executed packages except the one that failed rollback_excs = [] if context.rollback_enabled: with spinner("Rolling back transaction", not context.verbosity and not context.quiet, context.json): failed_pkg_idx = pkg_idx reverse_actions = reversed( tuple( enumerate( take(failed_pkg_idx, all_action_groups)))) for pkg_idx, axngroup in reverse_actions: excs = cls._reverse_actions(pkg_idx, axngroup) rollback_excs.extend(excs) raise CondaMultiError( tuple( concatv( (e.errors if isinstance(e, CondaMultiError) else (e, )), rollback_excs, ))) else: for axngroup in all_action_groups: for action in axngroup.actions: action.cleanup()
def iter_random_walks( G: nx.Graph, length: int, weight: Optional[str] = None ) -> Iterable[List[Hashable]]: """ Given an input graph, repeatedly yield random walks of a fixed maximum length starting at random nodes; if the node is disconnected then the walk will consist of the node itself. :param G: input graph :param length: maximum length of walk :param weight: name of weight attribute to use, or None to disable, default None :return: yields lists of walks """ while True: yield list( take( length, iter_random_walk(G, random.choice(list(G.nodes())), weight=weight), ) )
def _execute(cls, all_action_groups): with signal_handler(conda_signal_handler), time_recorder("unlink_link_execute"): pkg_idx = 0 try: with Spinner("Executing transaction", not context.verbosity and not context.quiet, context.json): for pkg_idx, axngroup in enumerate(all_action_groups): cls._execute_actions(pkg_idx, axngroup) except CondaMultiError as e: action, is_unlink = (None, axngroup.type == 'unlink') prec = axngroup.pkg_data log.error("An error occurred while %s package '%s'.\n" "%r\n" "Attempting to roll back.\n", 'uninstalling' if is_unlink else 'installing', prec and prec.dist_str(), e.errors[0]) # reverse all executed packages except the one that failed rollback_excs = [] if context.rollback_enabled: with Spinner("Rolling back transaction", not context.verbosity and not context.quiet, context.json): failed_pkg_idx = pkg_idx reverse_actions = reversed(tuple(enumerate( take(failed_pkg_idx, all_action_groups) ))) for pkg_idx, axngroup in reverse_actions: excs = cls._reverse_actions(pkg_idx, axngroup) rollback_excs.extend(excs) raise CondaMultiError(tuple(concatv( (e.errors if isinstance(e, CondaMultiError) else (e,)), rollback_excs, ))) else: for axngroup in all_action_groups: for action in axngroup.actions: action.cleanup()
def test_concatv(): assert list(concatv([], [], [])) == [] assert list(take(5, concatv(["a", "b"], range(1000000000)))) == ["a", "b", 0, 1, 2]
def test_iterate(): assert list(itertools.islice(iterate(inc, 0), 0, 5)) == [0, 1, 2, 3, 4] assert list(take(4, iterate(double, 1))) == [1, 2, 4, 8]
def test_concatv(): assert list(concatv([], [], [])) == [] assert (list(take(5, concatv(['a', 'b'], range(1000000000)))) == ['a', 'b', 0, 1, 2])
from ptsplitter.persona import persona_graph from ptsplitter.splitter import SplitterEmbedding from ptsplitter.utils import ( embedding_groups, positive_edges, negative_edges, iter_get_scores, ) # TODO this dataset is directed print("Reading in dataset.") G = nx.read_edgelist("data_input/wiki-Vote.txt") sample_number = G.number_of_edges() // 2 G_original = nx.Graph(G) positive_samples = list(take(sample_number, positive_edges(G))) negative_samples = list(take(sample_number, negative_edges(G))) G.remove_edges_from(positive_samples) print("Constructing persona graph.") PG = persona_graph(G) print("Constructing lookups.") forward_persona, reverse_persona = lookup_tables(PG) forward, reverse = lookup_tables(G) print("Generating random walks and initial embeddings.") walks = take(10000, iter_random_walks(G, length=10)) base_embedding = initial_deepwalk_embedding( walks=walks, forward_lookup=forward, embedding_dimension=100, window=10 )
def test_basic_initial_deepwalk_embedding_oov(): forward, reverse = lookup_tables(graph_ab) walks = take(100, cycle([["a"]])) embedding = initial_deepwalk_embedding(walks, forward, 10) assert len(embedding) == 2 assert set(embedding.keys()) == {"a", "b"}
def test_basic_iter_random_walks(): for walk in map(set, (take(10, iter_random_walks(graph_ab, 2)))): assert walk == {"a", "b"}
def test_take(): assert list(take(3, 'ABCDE')) == list('ABC') assert list(take(2, (3, 2, 1))) == list((3, 2))
def write_head(fo): fo.write("==> %s <==\n" % time.strftime('%Y-%m-%d %H:%M:%S')) fo.write("# cmd: %s\n" % (' '.join(ensure_text_type(s) for s in sys.argv))) fo.write("# conda version: %s\n" % '.'.join(take(3, CONDA_VERSION.split('.'))))
def range_(a, b): return take(b-a, integers(a))
def range_(a, b): return take(b - a, integers(a))
def test_basic_iter_random_walk(): # not deterministic but practically OK assert set(take(10**5, iter_random_walk(graph_abcd, "a"))) == {"a", "b", "c", "d"} assert set(take(2, iter_random_walk(graph_ab, "a"))) == {"a", "b"} assert set(take(2, iter_random_walk(graph_ab, "b"))) == {"a", "b"}
def test_take(): assert list(take(3, "ABCDE")) == list("ABC") assert list(take(2, (3, 2, 1))) == list((3, 2))
def test_basic_initial_deepwalk_embedding(): forward, reverse = lookup_tables(graph_ab) walks = take(100, iter_random_walks(graph_ab, 2)) embedding = initial_deepwalk_embedding(walks, forward, 10) assert len(embedding) == 2 assert set(embedding.keys()) == {"a", "b"}
def test_to_embedding_matrix(): forward, reverse = lookup_tables(graph_ab) walks = take(100, iter_random_walks(graph_ab, 2)) node_embedding = initial_deepwalk_embedding(walks, forward, 10) embedding = to_embedding_matrix(node_embedding, 10, reverse) assert embedding.shape == (2, 10)
from networkx.algorithms.link_prediction import jaccard_coefficient, adamic_adar_index from sklearn.metrics import roc_auc_score from ptsplitter.deepwalk import lookup_tables from ptsplitter.persona import persona_graph from ptsplitter.utils import positive_edges, negative_edges, iter_get_scores_networkx print("Reading in dataset.") G = max( nx.connected_component_subgraphs(nx.read_edgelist("data_input/CA-AstroPh.txt")), key=len, ) sample_number = G.number_of_edges() // 2 G_original = nx.Graph(G) positive_samples = list(take(sample_number, positive_edges(G))) negative_samples = list(take(sample_number, negative_edges(G))) G.remove_edges_from(positive_samples) positive_scores_non_persona = list( map(nth(2), jaccard_coefficient(G, positive_samples)) ) negative_scores_non_persona = list( map(nth(2), jaccard_coefficient(G, negative_samples)) ) print(sum(positive_scores_non_persona)) print(sum(negative_scores_non_persona)) print( roc_auc_score(