Exemple #1
0
print len(users), "duplicated users found with", len(
    duplicated), "total ci unique"

total_users = users.values()
total_names = set(x[0] for x in total_users)

delete = set(total_names)

# Exclude any user who has ever submitted a journal from deletion
cursor.execute("SELECT DISTINCT ON (submitted_by) submitted_by FROM journals")
journaled = set(x[0] for x in cursor.fetchall())
delete -= journaled

# Exclude any user who is assigned a role on a package
cursor.execute("SELECT DISTINCT ON (user_name) user_name FROM roles")
roles = set(x[0] for x in cursor.fetchall())
delete -= roles

# Exclude any user who has logged in
cursor.execute(
    "SELECT DISTINCT ON (name) name FROM users WHERE last_login != NULL")
logged_in = set(x[0] for x in cursor.fetchall())
delete -= logged_in

if delete:
    cursor.execute("DELETE FROM users WHERE name in %s", (tuple(delete), ))

store.commit()
store.close()
accepted_modes = {
    "pypi-scrape-crawl": ["pypi-scrape-crawl", "pypi-scrape", "pypi-explicit"],
    "pypi-scrape": ["pypi-scrape", "pypi-explicit"],
    "pypi-explicit": ["pypi-explicit"],
}


store.open()
for desired_mode, names in data.iteritems():
    if desired_mode == "pypi-scrape-crawl":
        continue  # We don't need to do any processing for pypi-scrape-crawl

    for name in names:
        packages = store.find_package(name)
        if not packages:
            continue  # This doesn't exist

        assert safe_name(name).lower() == safe_name(packages[0]).lower()
        name = packages[0]

        current_mode = store.get_package_hosting_mode(name)

        if current_mode not in accepted_modes[desired_mode]:
            store.set_package_hosting_mode(name, desired_mode)
            processed[desired_mode].add(name)
store.commit()

with open("migrated.pkl", "wb") as pkl:
    pickle.dump(processed, pkl)