Beispiel #1
0
def do_import_languages(file_data: List[dict]):
    imported = set()
    print("Importing languages ... ", flush=True)
    with progressbar.ProgressBar(max_value=len(file_data)) as bar:
        for idx, p in enumerate(file_data):
            info = p.get('info')
            classifiers = info.get('classifiers')
            for c in classifiers:
                if 'Programming Language' not in c:
                    continue

                original = c

                c = c.replace('Implementation ::', '').replace('::', ':')
                text = c
                parts = c.split(':')
                if len(parts) > 1:
                    text = ' '.join(parts[-2:]).strip().replace('  ', ' ')

                if text not in imported:
                    imported.add(text)
                    session = db_session.create_session()

                    lang = ProgrammingLanguage()
                    lang.description = original
                    lang.id = text
                    session.add(lang)
                    session.commit()

            bar.update(idx)

    sys.stderr.flush()
    sys.stdout.flush()
Beispiel #2
0
def insert_a_package():
    p = Package()
    p.id = input('Package id / name: ').strip().lower()

    p.summary = input("Package summary: ").strip()
    p.author_name = input("Author: ").strip()
    p.license = input("License: ").strip()

    print("Release 1:")
    r = Release()
    r.major_ver = int(input("Major version: "))
    r.minor_ver = int(input("Minor version: "))
    r.build_ver = int(input("Build version: "))
    r.size = int(input("Size in bytes: "))
    p.releases.append(r)

    print("Release 2:")
    r = Release()
    r.major_ver = int(input("Major version: "))
    r.minor_ver = int(input("Minor version: "))
    r.build_ver = int(input("Build version: "))
    r.size = int(input("Size in bytes: "))
    p.releases.append(r)

    session = db_session.create_session()
    session.add(p)
    session.commit()
Beispiel #3
0
def do_summary():
    session = db_session.create_session()

    print("Final numbers:")
    print("Users: {:,}".format(session.query(User).count()))
    print("Packages: {:,}".format(session.query(Package).count()))
    print("Releases: {:,}".format(session.query(Release).count()))
    print("Maintainers: {:,}".format(session.query(Maintainer).count()))
    print("Languages: {:,}".format(session.query(ProgrammingLanguage).count()))
    print("Licenses: {:,}".format(session.query(License).count()))
Beispiel #4
0
def load_package(data: dict, user_lookup: Dict[str, User]):
    try:
        info = data.get('info', {})

        p = Package()
        p.id = data.get('package_name', '').strip()
        if not p.id:
            return

        p.author = info.get('author')
        p.author_email = info.get('author_email')

        releases = build_releases(p.id, data.get("releases", {}))

        if releases:
            p.created_date = releases[0].created_date

        maintainers_lookup = get_email_and_name_from_text(
            info.get('maintainer'), info.get('maintainer_email'))
        maintainers = []
        for email, name in maintainers_lookup.items():
            user = user_lookup.get(email)
            if not user:
                continue

            m = Maintainer()
            m.package_id = p.id
            m.user_id = user.id
            maintainers.append(m)

        p.summary = info.get('summary')
        p.description = info.get('description')

        p.home_page = info.get('home_page')
        p.docs_url = info.get('docs_url')
        p.package_url = info.get('package_url')

        p.author = info.get('author')
        p.author_email = info.get('author_email')
        p.license = detect_license(info.get('license'))

        session = db_session.create_session()
        session.add(p)
        session.add_all(releases)
        if maintainers:
            session.add_all(maintainers)
        session.commit()
        session.close()
    except OverflowError:
        # What the heck, people just putting fake data in here
        # Size is terabytes...
        pass
    except Exception:
        raise
Beispiel #5
0
def do_user_import(user_lookup: Dict[str, str]) -> Dict[str, User]:
    print("Importing users ... ", flush=True)
    with progressbar.ProgressBar(max_value=len(user_lookup)) as bar:
        for idx, (email, name) in enumerate(user_lookup.items()):
            session = db_session.create_session()
            session.expire_on_commit = False

            user = User()
            user.email = email
            user.name = name
            session.add(user)

            session.commit()
            bar.update(idx)

    print()
    sys.stderr.flush()
    sys.stdout.flush()

    session = db_session.create_session()
    return {u.email: u for u in session.query(User)}
def get_latest_releases(limit=10) -> List[Release]:
    session = db_session.create_session()

    releases = session.query(Release). \
        options(sqlalchemy.orm.joinedload(Release.package)). \
        order_by(Release.created_date.desc()). \
        limit(limit). \
        all()

    session.close()

    return releases
Beispiel #7
0
def main():
    init_db()
    session = db_session.create_session()
    user_count = session.query(User).count()
    session.close()
    if user_count == 0:
        file_data = do_load_files()
        users = find_users(file_data)

        db_users = do_user_import(users)
        do_import_packages(file_data, db_users)

        do_import_languages(file_data)
        do_import_licenses(file_data)

    do_summary()
def get_package_by_id(package_id: str) -> Optional[Package]:
    if not package_id:
        return None

    package_id = package_id.strip().lower()

    session = db_session.create_session()

    package = session.query(Package) \
        .options(sqlalchemy.orm.joinedload(Package.releases)) \
        .filter(Package.id == package_id) \
        .first()

    session.close()

    return package
Beispiel #9
0
def do_import_licenses(file_data: List[dict]):
    imported = set()
    print("Importing licenses ... ", flush=True)
    with progressbar.ProgressBar(max_value=len(file_data)) as bar:
        for idx, p in enumerate(file_data):
            info = p.get('info')
            license_text = detect_license(info.get('license'))

            if license_text and license_text not in imported:
                imported.add(license_text)
                session = db_session.create_session()

                package_license = License()
                package_license.id = license_text
                package_license.description = info.get('license')

                session.add(package_license)
                session.commit()

            bar.update(idx)

    sys.stderr.flush()
    sys.stdout.flush()
def get_user_count() -> int:
    session = db_session.create_session()
    return session.query(User).count()
def get_release_count() -> int:
    session = db_session.create_session()
    return session.query(Release).count()
def get_package_count() -> int:
    session = db_session.create_session()
    return session.query(Package).count()