def main():
    """
    Load the edges of the dependency graph by downloading PyPI.

    Yep. All of it. Make sure you have at least 50GB disk space.
    """
    with open("secret.json") as f:
        mysql = json.load(f)

    connection = pymysql.connect(host=mysql['host'],
                                 user=mysql['user'],
                                 passwd=mysql['passwd'],
                                 db=mysql['db'],
                                 cursorclass=pymysql.cursors.DictCursor,
                                 charset='utf8')
    cursor = connection.cursor()
    sql = "SELECT `packages`.`id`, `name` FROM `packages` ORDER BY `id` ASC"
    cursor.execute(sql)
    packages = cursor.fetchall()
    for pkg in packages:
        sql = ("SELECT `url` FROM `releases` "
               "WHERE `package_id` = %s "
               "ORDER BY `upload_time` DESC LIMIT 1")
        cursor.execute(sql, (pkg['id'], ))
        url = cursor.fetchone()
        if url is not None and 'url' in url:
            package_url = url['url']
            package_analysis.main(pkg['name'], package_url)
            logging.info("Package '%s' done.", pkg['name'])
def main():
    """
    Load the edges of the dependency graph by downloading PyPI.

    Yep. All of it. Make sure you have at least 50GB disk space.
    """
    with open("secret.json") as f:
        mysql = json.load(f)

    connection = pymysql.connect(
        host=mysql["host"],
        user=mysql["user"],
        passwd=mysql["passwd"],
        db=mysql["db"],
        cursorclass=pymysql.cursors.DictCursor,
        charset="utf8",
    )
    cursor = connection.cursor()

    sql = """SELECT
    `o`.`id`,
    `packages`.`name`,
    `o`.`url`,
    `o`.`upload_time`,
    `o`.`release_number`,
    `o`.`downloaded_bytes`
FROM
    `releases` o
LEFT JOIN
    `releases` b  ON `o`.`package_id` = `b`.`package_id`
    AND `o`.`upload_time` < `b`.`upload_time`
Left JOIN
    `packages` ON `packages`.`id` = `o`.`package_id`
WHERE
    `b`.`upload_time` is NULL
    AND `o`.`downloaded_bytes` = 0
ORDER BY
    `packages`.`name`
"""
    logging.info("Start fetching packages...")
    cursor.execute(sql)
    packages = cursor.fetchall()
    logging.info("Fetched %i packages.", len(packages))
    for pkg in packages:
        package_analysis.main(pkg["name"], pkg["url"], pkg["id"])
        logging.info("Package '%s' done.", pkg["name"])
def main():
    """
    Load the edges of the dependency graph by downloading PyPI.

    Yep. All of it. Make sure you have at least 50GB disk space.
    """
    with open("secret.json") as f:
        mysql = json.load(f)

    connection = pymysql.connect(host=mysql['host'],
                                 user=mysql['user'],
                                 passwd=mysql['passwd'],
                                 db=mysql['db'],
                                 cursorclass=pymysql.cursors.DictCursor,
                                 charset='utf8')
    cursor = connection.cursor()

    sql = ("""SELECT
    `o`.`id`,
    `packages`.`name`,
    `o`.`url`,
    `o`.`upload_time`,
    `o`.`release_number`,
    `o`.`downloaded_bytes`
FROM
    `releases` o
LEFT JOIN
    `releases` b  ON `o`.`package_id` = `b`.`package_id`
    AND `o`.`upload_time` < `b`.`upload_time`
Left JOIN
    `packages` ON `packages`.`id` = `o`.`package_id`
WHERE
    `b`.`upload_time` is NULL
    AND `o`.`downloaded_bytes` = 0
ORDER BY
    `packages`.`name`
""")
    cursor.execute(sql)
    packages = cursor.fetchall()
    logging.info("Fetched %i packages.", len(packages))
    for pkg in packages:
        package_analysis.main(pkg['name'], pkg['url'], pkg['id'])
        logging.info("Package '%s' done.", pkg['name'])