Beispiel #1
0
def crawl(conn, crawl_count=1, new_only=False):
    existing_packages = set([x[0] for x in list(conn.execute("SELECT name FROM packages"))])
    client = PyPI()._client
    packages = client.list_packages()
    remove_deleted_packages(conn, existing_packages, packages)
    total_count = len(packages)
    progress = Progress("packages", crawl_count, total_count)
    progress.start()
    for package in packages:
        progress.parse_count += 1
        # for the first crawl skip the packages that we already know of. Only the second crawl onward will replace data
        if new_only and package in existing_packages:
            progress.preloaded += 1
            continue
        package_data = per_package(package)
        save_package_data(conn, package, package_data)
    progress.stop()