def do_import_licenses(file_data: List[dict]): imported = set() print("Importing licenses ... ", flush=True) pbar = progressbar.ProgressBar(maxval=len(file_data)).start() for idx, p in enumerate(file_data): info = p.get('info') license_text = detect_license(info.get('license')) if general_service.get_licenses_by_id(license_text): pbar.update(idx) return if license_text and license_text not in imported: imported.add(license_text) session = db_session.create_session() package_license = License() package_license.id = license_text package_license.description = info.get('license') session.add(package_license) session.commit() session.close() pbar.update(idx) sys.stderr.flush() sys.stdout.flush()
def insert_a_package(): p = Package() p.id = input('Package id / name: ').strip().lower() p.summary = input("Package summary: ").strip() p.author_name = input("Author: ").strip() p.license = input("License: ").strip() print("Release 1:") r = Release() r.major_ver = int(input("Major version: ")) r.minor_ver = int(input("Minor version: ")) r.build_ver = int(input("Build version: ")) r.size = int(input("Size in bytes: ")) p.releases.append(r) print("Release 2:") r = Release() r.major_ver = int(input("Major version: ")) r.minor_ver = int(input("Minor version: ")) r.build_ver = int(input("Build version: ")) r.size = int(input("Size in bytes: ")) p.releases.append(r) session = db_session.create_session() session.add(p) session.commit()
def get_new_users(limit=10) -> List[User]: session = db_session.create_session() users = session.query(User).order_by(User.created_date.desc()) \ .limit(limit).all() session.close() return users
def get_latest_logged(limit=10) -> List[User]: session = db_session.create_session() users = session.query(User).order_by(User.last_login.desc()) \ .limit(limit).all() session.close() return users
def load_package(data: dict, user_lookup: Dict[str, User]): try: info = data.get('info', {}) p = Package() p.id = data.get('package_name', '').strip() if not p.id: return if package_service.get_package_by_id(p.id): return p.author = info.get('author') p.author_email = info.get('author_email') releases = build_releases(p.id, data.get("releases", {})) if releases: p.created_date = releases[0].created_date maintainers_lookup = get_email_and_name_from_text( info.get('maintainer'), info.get('maintainer_email')) maintainers = [] for email, name in maintainers_lookup.items(): user = user_lookup.get(email) if not user: continue m = Maintainer() m.package_id = p.id m.user_id = user.id maintainers.append(m) p.summary = info.get('summary') p.description = info.get('description') p.home_page = info.get('home_page') p.docs_url = info.get('docs_url') p.package_url = info.get('package_url') p.author = info.get('author') p.author_email = info.get('author_email') p.license = detect_license(info.get('license')) session = db_session.create_session() session.add(p) session.add_all(releases) if maintainers: session.add_all(maintainers) session.commit() session.close() except OverflowError: # What the heck, people just putting fake data in here # Size is terabytes... pass except Exception: raise
def do_summary(): session = db_session.create_session() print("Final numbers:") print("Users: {:,}".format(session.query(User).count())) print("Packages: {:,}".format(session.query(Package).count())) print("Releases: {:,}".format(session.query(Release).count())) print("Maintainers: {:,}".format(session.query(Maintainer).count())) print("Languages: {:,}".format(session.query(ProgrammingLanguage).count())) print("Licenses: {:,}".format(session.query(License).count()))
def get_latest_releases(limit=10) -> List[Release]: session = db_session.create_session() releases = session.query(Release). \ options(sqlalchemy.orm.joinedload(Release.package)). \ order_by(Release.created_date.desc()). \ limit(limit). \ all() session.close() return releases
def search_packages_by_keyword(query: str) -> List[Package]: if not query: return None query = query.strip().lower() session = db_session.create_session() search = "%{}%".format(query) packages_found_title = session.query(Package) \ .filter(or_(Package.id.like(search), Package.description.like(search))) return packages_found_title
def login_user(email: str, password: str) -> Optional[User]: session = db_session.create_session() user = session.query(User).filter(User.email == email).first() if not user: return None if not crypto.verify_hash(user.hashed_password, password): return None #update the login time user.last_login = datetime.datetime.now() session.commit() return user
def create_user(name: str, email: str, password: str) -> Optional[User]: if find_user_by_email(email): return None user = User() user.name = name user.email = email user.hashed_password = crypto.hash_text(password) session = db_session.create_session() session.add(user) session.commit() return user
def load(drop_all: bool): init_db(drop_all) file_data = do_load_files() users = find_users(file_data) db_users = do_user_import(users) do_import_packages(file_data, db_users) do_import_languages(file_data) do_import_licenses(file_data) do_summary() session = db_session.create_session() session.close_all()
def do_user_import(user_lookup: Dict[str, str]) -> Dict[str, User]: print("Importing users ... ", flush=True) pbar = progressbar.ProgressBar(maxval=len(user_lookup)).start() for idx, (email, name) in enumerate(user_lookup.items()): session = db_session.create_session() session.expire_on_commit = False if session.query(User).filter(User.email == email).first(): #user exists pbar.update(idx) else: user = User() user.email = email user.name = name session.add(user) session.commit() session.close() pbar.update(idx) print() sys.stderr.flush() sys.stdout.flush() session = db_session.create_session() return {u.email: u for u in session.query(User)}
def get_package_by_id(package_id: str) -> Optional[Package]: if not package_id: return None package_id = package_id.strip().lower() session = db_session.create_session() package = session.query(Package) \ .options(sqlalchemy.orm.joinedload(Package.releases)) \ .filter(Package.id == package_id) \ .first() session.close() return package
def do_import_languages(file_data: List[dict]): imported = set() print("Importing languages ... ", flush=True) pbar = progressbar.ProgressBar(maxval=len(file_data)).start() for idx, p in enumerate(file_data): info = p.get('info') classifiers = info.get('classifiers') for c in classifiers: if 'Programming Language' not in c: continue original = c c = c.replace('Implementation ::', '').replace('::', ':') text = c parts = c.split(':') if len(parts) > 1: text = ' '.join(parts[-2:]).strip().replace(' ', ' ') if general_service.get_language_by_id(text): pbar.update(idx) return if text not in imported: imported.add(text) session = db_session.create_session() lang = ProgrammingLanguage() lang.description = original lang.id = text session.add(lang) session.commit() session.close() pbar.update(idx) sys.stderr.flush() sys.stdout.flush()
def find_user_by_id(user_id: str) -> Optional[User]: session = db_session.create_session() return session.query(User).filter(User.id == user_id).first()
def get_release_count() -> int: session = db_session.create_session() return session.query(Release).count()
def get_package_count() -> int: session = db_session.create_session() return session.query(Package).count()
def get_user_count() -> int: session = db_session.create_session() return session.query(User).count()
def find_user_by_email(email: str) -> Optional[User]: session = db_session.create_session() return session.query(User).filter(User.email == email).first()
def get_language_by_id(lang_id: str) -> Optional[ProgrammingLanguage]: session = db_session.create_session() lang = session.query(ProgrammingLanguage) \ .filter(ProgrammingLanguage.id == lang_id).first() session.close() return lang
def get_licenses_by_id(licenses_id: str) -> Optional[License]: session = db_session.create_session() lic = session.query(License) \ .filter(License.id == licenses_id).first() session.close() return lic