def record_user_click(index, keyword, url): urls = lookup(index, keyword) if urls: for entry in urls: print_(urls) print_(url) if entry[0] == url: entry[1] += 1
def crawl_web(seed, max_depth): to_crawl = [seed] crawled = [] index = {} next_depth = [] depth = 0 while to_crawl and depth <= max_depth: page = to_crawl.pop() if page not in crawled: content = get_page(page) add_page_to_index(index, page, content) union(next_depth, get_all_links(content)) # something bugged here need to check crawled.append(page) # @TODO: Run code and check. if not to_crawl: to_crawl, next_depth = next_depth, [] depth += 1 print_("index: " + str(index)) return index
def crawl_web(seed, max_depth): to_crawl = [seed] crawled = [] index = {} next_depth = [] depth = 0 while to_crawl and depth <= max_depth: page = to_crawl.pop() if page not in crawled: content = get_page(page) add_page_to_index(index, page, content) union( next_depth, get_all_links(content)) # something bugged here need to check crawled.append(page) # @TODO: Run code and check. if not to_crawl: to_crawl, next_depth = next_depth, [] depth += 1 print_("index: " + str(index)) return index
#!/usr/bin/env python import json from pip.req import parse_requirements from pip.download import PipSession from pip._vendor import pkg_resources from pip._vendor.six import print_ requirements = [ req.req for req in parse_requirements('requirements.txt', session=PipSession()) ] transform = lambda dist: { 'name': dist.project_name, 'version': dist.version, 'location': dist.location, 'dependencies': list(map(lambda dependency: dependency.project_name, dist.requires())), } packages = [ transform(dist) for dist in pkg_resources.working_set.resolve(requirements) ] print_(json.dumps(packages))
print_("index: " + str(index)) return index def add_to_index(index, keyword, url): if keyword in index: index[keyword].append([url, 0]) else: index[keyword] = [url, 0] def lookup(index, keyword): if keyword in index: return index[keyword] else: return None def add_page_to_index(index, url, content): content_list = content.split() for entry in content_list: add_to_index(index, entry, url) index = crawl_web('http://xkcd.com/353', 1) print_("lookup") print_(lookup(index, 'comic')) record_user_click(index, 'comic', 'http://xkcd.com/353') print_("new index") print_(index)
#!/usr/bin/env python import json from pip.utils import get_installed_distributions from pip._vendor.six import print_ packages = [] for dist in get_installed_distributions(): packages.append( { "name": dist.project_name, "version": dist.version, "location": dist.location, "dependencies": list(map(lambda dependency: dependency.project_name, dist.requires())) } ) print_(json.dumps(packages))
return index def add_to_index(index, keyword, url): if keyword in index: index[keyword].append([url, 0]) else: index[keyword] = [url, 0] def lookup(index, keyword): if keyword in index: return index[keyword] else: return None def add_page_to_index(index, url, content): content_list = content.split() for entry in content_list: add_to_index(index, entry, url) index = crawl_web('http://xkcd.com/353', 1) print_("lookup") print_(lookup(index, 'comic')) record_user_click(index, 'comic', 'http://xkcd.com/353') print_("new index") print_(index)