def do_import_languages(file_data: List[dict]):
    imported = set()
    print("Importing languages ... ", flush=True)
    with progressbar.ProgressBar(max_value=len(file_data)) as bar:
        for idx, p in enumerate(file_data):
            info = p.get('info')
            classifiers = info.get('classifiers')
            for c in classifiers:
                if 'Programming Language' not in c:
                    continue

                original = c

                c = c.replace('Implementation ::', '').replace('::', ':')
                text = c
                parts = c.split(':')
                if len(parts) > 1:
                    text = ' '.join(parts[-2:]).strip().replace('  ', ' ')

                if text not in imported:
                    imported.add(text)
                    session = db_session.create_session()

                    lang = ProgrammingLanguage()
                    lang.description = original
                    lang.id = text
                    session.add(lang)
                    session.commit()

            bar.update(idx)

    sys.stderr.flush()
    sys.stdout.flush()
def find_user_by_id(user_id: int) -> Optional[User]:
    session = db_session.create_session()
    try:
        user = session.query(User).get(user_id)
        return user
    finally:
        session.close()
Example #3
0
def login_user(email: str, password: str) -> Optional[User]:
    session = db_session.create_session()
    user = session.query(User).filter(User.email == email).first()
    if not user:
        return None
    if not verify_hash(user.hashed_password, password):
        return None
    return user
def do_summary():
    session = db_session.create_session()

    print("Final numbers:")
    print("Users: {:,}".format(session.query(User).count()))
    print("Packages: {:,}".format(session.query(Package).count()))
    print("Releases: {:,}".format(session.query(Release).count()))
    print("Maintainers: {:,}".format(session.query(Maintainer).count()))
    print("Languages: {:,}".format(session.query(ProgrammingLanguage).count()))
    print("Licenses: {:,}".format(session.query(License).count()))
def get_latest_releases(limit=10) -> List[Release]:
    session = db_session.create_session()

    releases = session.query(Release). \
        options(sqlalchemy.orm.joinedload(Release.package)). \
        order_by(Release.created_date.desc()). \
        limit(limit). \
        all()
    session.close()

    return releases
def do_user_import(user_lookup: Dict[str, str]) -> Dict[str, User]:
    print("Importing users ... ", flush=True)
    with progressbar.ProgressBar(max_value=len(user_lookup)) as bar:
        for idx, (email, name) in enumerate(user_lookup.items()):
            session = db_session.create_session()
            session.expire_on_commit = False

            user = User()
            user.email = email
            user.name = name
            session.add(user)

            session.commit()
            bar.update(idx)

    print()
    sys.stderr.flush()
    sys.stdout.flush()

    session = db_session.create_session()
    return {u.email: u for u in session.query(User)}
def load_package(data: dict, user_lookup: Dict[str, User]):
    try:
        info = data.get('info', {})

        p = Package()
        p.id = data.get('package_name', '').strip()
        if not p.id:
            return

        p.author = info.get('author')
        p.author_email = info.get('author_email')

        releases = build_releases(p.id, data.get("releases", {}))

        if releases:
            p.created_date = releases[0].created_date

        maintainers_lookup = get_email_and_name_from_text(info.get('maintainer'), info.get('maintainer_email'))
        maintainers = []
        for email, name in maintainers_lookup.items():
            user = user_lookup.get(email)
            if not user:
                continue

            m = Maintainer()
            m.package_id = p.id
            m.user_id = user.id
            maintainers.append(m)

        p.summary = info.get('summary')
        p.description = info.get('description')

        p.home_page = info.get('home_page')
        p.docs_url = info.get('docs_url')
        p.package_url = info.get('package_url')

        p.author = info.get('author')
        p.author_email = info.get('author_email')
        p.license = detect_license(info.get('license'))

        session = db_session.create_session()
        session.add(p)
        session.add_all(releases)
        if maintainers:
            session.add_all(maintainers)
        session.commit()
        session.close()
    except OverflowError:
        # What the heck, people just putting fake data in here
        # Size is terabytes...
        pass
    except Exception:
        raise
Example #8
0
def migrate_users():
    if MongoUser.objects().count():
        return

    session = db_session.create_session()
    sql_users = session.query(SqlUser).all()
    for sut in sql_users:
        su: SqlUser = sut
        u = MongoUser()
        u.created_date = su.created_date
        u.hashed_password = su.hashed_password
        u.name = su.name
        u.email = su.email
        u.save()
Example #9
0
def create_user(name: str, email: str, password: str) -> Optional[User]:
    if find_user_by_email(email):
        return None

    user = User()
    user.email = email
    user.name = name
    user.hashed_password = hash_text(password)

    session = db_session.create_session()
    session.add(user)
    session.commit()

    return user
Example #10
0
def main():
    init_db()
    session = db_session.create_session()
    user_count = session.query(User).count()
    session.close()
    if user_count == 0:
        file_data = do_load_files()
        users = find_users(file_data)

        db_users = do_user_import(users)
        do_import_packages(file_data, db_users)

        do_import_languages(file_data)
        do_import_licenses(file_data)

    do_summary()
def get_package_by_id(package_id: str) -> Optional[Package]:
    if not package_id:
        return None

    package_id = package_id.strip().lower()

    session = db_session.create_session()

    package = session.query(Package) \
        .options(sqlalchemy.orm.joinedload(Package.releases)) \
        .filter(Package.id == package_id) \
        .first()

    session.close()

    return package
Example #12
0
def migrate_releases():
    if MongoRelease.objects().count():
        return

    session = db_session.create_session()
    sql_releases = session.query(SqlRelease).all()
    for srp in sql_releases:
        sr: SqlRelease = srp
        r = MongoRelease()
        r.created_date = sr.created_date
        r.comment = sr.comment
        r.major_ver = sr.major_ver
        r.minor_ver = sr.minor_ver
        r.build_ver = sr.build_ver
        r.url = sr.url
        r.size = sr.size
        r.package_id = sr.package_id

        r.save()
Example #13
0
def do_import_licenses(file_data: List[dict]):
    imported = set()
    print("Importing licenses ... ", flush=True)
    with progressbar.ProgressBar(max_value=len(file_data)) as bar:
        for idx, p in enumerate(file_data):
            info = p.get('info')
            license_text = detect_license(info.get('license'))

            if license_text and license_text not in imported:
                imported.add(license_text)
                session = db_session.create_session()

                package_license = License()
                package_license.id = license_text
                package_license.description = info.get('license')

                session.add(package_license)
                session.commit()

            bar.update(idx)

    sys.stderr.flush()
    sys.stdout.flush()
Example #14
0
def migrate_packages():
    if MongoPackage.objects().count():
        return

    session = db_session.create_session()
    sql_packages = session.query(SqlPackage).all()
    for sup in sql_packages:
        sp: SqlPackage = sup
        p = MongoPackage()
        p.id = sp.id
        p.created_date = sp.created_date
        p.summary = sp.summary
        p.description = sp.description
        p.home_page = sp.home_page
        p.docs_url = sp.docs_url
        p.package_url = sp.package_url

        p.author = sp.author_name
        p.author_email = sp.author_email

        p.license = sp.license
        p.maintainers = []  # Find, load, and copy over maintainer IDs.

        p.save()
def insert_a_package():
    p = Package()
    p.id = input("Package id / name: ").strip().lower()
    p.summary = input("Package summary: ").strip()
    p.author_name = input("Author: ").strip()
    p.license = input("License: ").strip()

    r = Release()
    r.major_ver = int(input("Major version: "))
    r.minor_ver = int(input("Minor version: "))
    r.build_ver = int(input("Build version: "))
    r.size = int(input("Size in bytes: "))
    p.releases.append(r)

    r = Release()
    r.major_ver = int(input("Major version: "))
    r.minor_ver = int(input("Minor version: "))
    r.build_ver = int(input("Build version: "))
    r.size = int(input("Size in bytes: "))
    p.releases.append(r)

    session = db_session.create_session()
    session.add(p)
    session.commit()
Example #16
0
def all_packages(limit: int) -> List[Package]:
    session: Session = db_session.create_session()
    try:
        return list(session.query(Package).limit(limit))
    finally:
        session.close()
Example #17
0
def get_release_count() -> int:
    session = db_session.create_session()
    try:
        return session.query(Release).count()
    finally:
        session.close()
Example #18
0
def get_package_count() -> int:
    session = db_session.create_session()
    try:
        return session.query(Package).count()
    finally:
        session.close()
def get_user_count() -> int:
    session = db_session.create_session()
    try:
        return session.query(User).count()
    finally:
        session.close()
def find_user_by_email(email: str) -> Optional[User]:
    session = db_session.create_session()
    try:
        return session.query(User).filter(User.email == email).first()
    finally:
        session.close()
Example #21
0
def get_user_count() -> int:
    session = db_session.create_session()
    return session.query(User).count()
def get_package_count() -> int:
    session = db_session.create_session()
    return session.query(Package).count()
def get_release_count() -> int:
    session = db_session.create_session()
    return session.query(Release).count()