def test_partition(self): def is_odd(x): return x % 2 == 1 before, after = iters.partition(is_odd, iters.range(5)) self.assertEqual([0, 2, 4], list(before)) self.assertEqual([1, 3], list(after))
def test_partition(self): def is_odd(x): return x % 2 == 1 before, after = iters.partition(is_odd, iters.range(5)) self.assertEqual([0,2,4], list(before)) self.assertEqual([1,3], list(after))
def _collect(start_url, limit, do_head_fn, do_get_fn): ''' Collect recursively incoming and outgoing information starting from specified URL @note: If one of the pages cannot be reached - limit doesn't decrease @note: Algorithm applied below is very similar to BFS @types: str, int, callable, callable -> dict[str, UrlInfo] @type do_head_fn: (iterable[str] -> iterable[requests.Response]) @param do_head_fn: function takes iterable of URLs and returns list of Response for HEAD requests @type do_get_fn: (iterable[str] -> iterable[requests.Response]) @param do_get_fn: function takes iterable of URLs and returns list of responses for GET requests ''' logger.info("staring url with limit %s: %s" % (limit, start_url)) info_by_url = defaultdict(new_url_info) parent_to_url_queue = [(None, _normalize_url(start_url))] while parent_to_url_queue and limit > 0: urls_to_process = parent_to_url_queue[:limit] del parent_to_url_queue[:limit] urls = (url for _, url in urls_to_process) results = _get_outgoings(urls, do_head_fn, do_get_fn) results = izip(urls_to_process, results) for result in results: (parent_url, url), (is_page_reached, urls) = result if is_page_reached: limit = limit - 1 urls = ifilterfalse(_is_fragment_ref, urls) urls = [_normalize_url(url, u) for u in urls] info = info_by_url[url] info.outgoing.update(urls) parent_url and info.incomming.add(parent_url) candidates, visited = partition(info_by_url.has_key, urls) [info_by_url.get(u).incomming.add(url) for u in visited] parent_to_url_queue.extend(izip(repeat(url), candidates)) logging.debug("OK %s <-- %s" % (url, parent_url)) else: logging.debug("FAILED %s <-- %s" % (url, parent_url)) return info_by_url
def extract_args(name_space): def extract(nt, args): vals = (getattr(name_space, arg) for arg in args) args_dict = dict(zip(args, vals)) return nt(**args_dict) non_nt_types, nts = partition(compose(is_namedtuple, get(1)), types.items()) nt_names, nt_types = zip(*nts) nt_args = map(lambda x: x._fields, nt_types) args_dict = dict(zip(nt_names, map(extract, nt_types, nt_args))) non_nt_types = list(non_nt_types) non_nts = dict( (attr, getattr(name_space, attr)) for (attr, _) in non_nt_types) args_dict.update(non_nts) return args_dict