def find_user_by_id(user_id: int) -> Optional[User]: session = db_session.create_session() try: user = session.query(User).filter(User.id == user_id).first() return user finally: session.close()
def do_import_languages(file_data: List[dict]): imported = set() print("Importing languages ... ", flush=True) with progressbar.ProgressBar(max_value=len(file_data)) as bar: for idx, p in enumerate(file_data): info = p.get("info") classifiers = info.get("classifiers") for c in classifiers: if "Programming Language" not in c: continue original = c text = _create_language_id(original) if text not in imported: # Cache what we've added already locally, but also commit it to the db right now imported.add(text) session = db_session.create_session() lang = ProgrammingLanguage() lang.description = original lang.id = text session.add(lang) session.commit() bar.update(idx) # Important for progressbar? Or just because? sys.stderr.flush() sys.stdout.flush()
def insert_a_package(): p = Package() p.id = input('Package id / name: ').strip().lower() p.summary = input("Package summary: ").strip() p.author_name = input("Author: ").strip() p.license = input("License: ").strip() print("Release 1: ") r = Release() r.major_ver = int(input("Major version: ")) r.minor_ver = int(input("Minor version: ")) r.build_ver = int(input("Build version: ")) r.size = int(input("Size in bytes: ")) p.releases.append(r) print("Release 2: ") r = Release() r.major_ver = int(input("Major version: ")) r.minor_ver = int(input("Minor version: ")) r.build_ver = int(input("Build version: ")) r.size = int(input("Size in bytes: ")) p.releases.append(r) session = db_session.create_session() session.add(p) session.commit()
def insert_a_package(): p=Package() p.id=input("Package ID/Name:").strip().lower() p.summary=input("Summary: ").strip() p.license=input("Licence").strip() p.author_name=input("Author name").strip() print("Release 1") r=Release() r.major_ver=int(input("Major Version")) r.minor_ver=int(input("Minor Version")) r.build_ver=int(input("Build")) r.size=int(input("Size:")) p.releases.append(r) print("Release 2") r = Release() r.major_ver = int(input("Major Version")) r.minor_ver = int(input("Minor Version")) r.build_ver = int(input("Build")) r.size = int(input("Size:")) p.releases.append(r) session=db_session.create_session() session.add(p) session.commit()
def create_user(name: str, email: str, password: str, profile_image_url: str = None) -> Optional[User]: if not name or \ not email or \ not password: return None # Could use find_user to avoid committing an existing user, but I'm using the db to handle this. Which is better? user = User() user.name = name user.email = email user.hashed_password = hash_text(password) user.profile_image_url = profile_image_url s = create_session() s.add(user) try: # Commit data to the DB but keep an unexpired version to pass back to caller s.expire_on_commit = False s.commit() except exc.IntegrityError: # Record already exists or cannot be added user = None s.close() return user
def main(): init_db() # Quick way to check if there's any data in the database. Guess we'd have to have a user if we had any data? session = db_session.create_session() user_count = session.query(User).count() session.close() if user_count == 0: # No data - initialize file_data = do_load_files() users = find_users(file_data) db_users = do_user_import(users) do_import_packages(file_data, db_users) do_import_languages(file_data) do_import_licenses(file_data) else: # Has data - don't overwrite it! print( "WARNING: load_data.py called on database that already has data. No data written" ) do_summary()
def insert_a_package(): p = Package() p.id = input('Package id / name: ').strip().lower() p.summary = input('Package summary: ').strip() p.author_name = input('Author: ').strip() p.license = input('License: ').strip() print('Release 1:') r = Release() r.major_ver = int(input('Major Version: ')) r.minor_ver = int(input('Minor Version: ')) r.build_ver = int(input('Build Version: ')) r.size = int(input('Size in bytes: ')) p.releases.append(r) print('Release 2:') r = Release() r.major_ver = int(input('Major Version: ')) r.minor_ver = int(input('Minor Version: ')) r.build_ver = int(input('Build Version: ')) r.size = int(input('Size in bytes: ')) p.releases.append(r) session = db_session.create_session() session.add(p) session.commit()
def get_redirect_by_id(redirect_id: int) -> Optional[Redirect]: session = db_session.create_session() try: return session.query(Redirect).filter( Redirect.id == redirect_id).first() finally: session.close()
def all_redirects() -> List[Redirect]: session = db_session.create_session() try: return session.query(Redirect).order_by( Redirect.created_date.desc()).all() finally: session.close()
def do_import_languages(file_data: List[dict]): imported = set() print("Importing languages ... ", flush=True) with progressbar.ProgressBar(max_value=len(file_data)) as bar: for idx, p in enumerate(file_data): info = p.get('info') classifiers = info.get('classifiers') for c in classifiers: if 'Programming Language' not in c: continue original = c c = c.replace('Implementation ::', '').replace('::', ':') text = c parts = c.split(':') if len(parts) > 1: text = ' '.join(parts[-2:]).strip().replace(' ', ' ') if text not in imported: imported.add(text) session = db_session.create_session() lang = ProgrammingLanguage() lang.description = original lang.id = text session.add(lang) session.commit() bar.update(idx) sys.stderr.flush() sys.stdout.flush()
def get_latest_releases(limit=10) -> List[Release]: session = create_session() # Without joinedload, a single query gets all releases we want, but then an additional limit queries get the # associated packages (one package per query) which is rough! joinedload turns these into a single query releases = session.query(Release) \ .options(sqlalchemy.orm.joinedload(Release.package)) \ .order_by(Release.created_date.desc()) \ .limit(limit) \ .all() # Before doing the joinedload, this below code helped prevent lazy loading. But with the joinedload it breaks. # Trying to expunge on each package and release it appears raises an exception? # (from before adding joined load): Close the session so it doesn't lazily load things later # if we don't do this, access to these releases could still be db calls, even when fired by the html page builds. # And, if we depend on that, the connection could be garbage collected (?) and not exist when we expect (I was # getting intermittent errors that seemed like that or something similar). # But, if we don't expunge first, we won't have a static view of that data # for r in releases: # session.expunge(r.package) # session.expunge(r) # session.close() # But, if we don't close the session at all, that also raises an exception. Not sure if this expunge_all is needed? session.expunge_all() session.close() return releases
def get_latest_releases(limit=5) -> List[Release]: session = db_session.create_session() releases = session.query(Release). \ options(sqlalchemy.orm.joinedload(Release.package)). \ order_by(Release.created_date.desc()).limit(limit). \ all() session.close() return releases
def get_package_by_id(package_id: str) -> Optional[Package]: package_id = package_id.strip().lower() session = db_session.create_session() package = session.query(Package) \ .options(sqlalchemy.orm.joinedload(Package.releases)) \ .filter(Package.id == package_id) \ .first() session.close() return package
def get_package_by_id(package_name): session = db_session.create_session() package = session.query(Package) \ .options(sqlalchemy.orm.joinedload(Package.releases)) \ .filter_by(id=package_name)\ .order_by(Release.created_date.desc()) \ .first() session.close() return package
def login_user(email: str, password: str) -> Optional[User]: session = db_session.create_session() user = session.query(User).filter(User.email == email).first() if not user: return None if not verify_hash(user.hashed_password, password): return None return user return None
def do_summary(): session = db_session.create_session() print('Final numbers:') print('Users: {:,}'.format(session.query(User).count())) print('Packages: {:,}'.format(session.query(Package).count())) print('Releases: {:,}'.format(session.query(Release).count())) print('Maintainers: {:,}'.format(session.query(Maintainer).count())) print('Languages: {:,}'.format(session.query(ProgrammingLanguage).count())) print('Licenses: {:,}'.format(session.query(License).count()))
def do_user_import(user_lookup: Dict[str, str]) -> Dict[str, User]: print('Importing users ... ', flush=True) for idx, (email, name) in enumerate(user_lookup.items()): session = db_session.create_session() session.expire_on_commit = False user = User() user.email = email user.name = name session.add(user) session.commit() print() sys.stderr.flush() sys.stdout.flush() session = db_session.create_session() return {u.email: u for u in session.query(User)}
def do_summary(): session = db_session.create_session() print("Final numbers:") print("Users: {:,}".format(session.query(User).count())) print("Packages: {:,}".format(session.query(Package).count())) print("Releases: {:,}".format(session.query(Release).count())) print("Maintainers: {:,}".format(session.query(Maintainer).count())) print("Languages: {:,}".format(session.query(ProgrammingLanguage).count())) print("Licenses: {:,}".format(session.query(License).count()))
def get_redirect(base_url: str) -> Optional[Redirect]: if not base_url or not base_url.strip(): return None base_url = base_url.strip().lower() session = db_session.create_session() try: return session.query(Redirect).filter(Redirect.short_url == base_url).first() finally: session.close()
def create_user(name: str, email: str, password: str) -> Optional[User]: if find_user_by_email(email): return None u: User = User() u.email = email u.name = name u.hashed_password = hash_text(password) session = db_session.create_session() session.add(u) session.commit() return u
def load_package(data: dict, user_lookup: Dict[str, User]): try: info = data.get('info', {}) p = Package() p.id = data.get('package_name', '').strip() if not p.id: return p.author = info.get('author') p.author_email = info.get('author_email') releases = build_releases(p.id, data.get("releases", {})) if releases: p.created_date = releases[0].created_date maintainers_lookup = get_email_and_name_from_text( info.get('maintainer'), info.get('maintainer_email')) maintainers = [] for email, name in maintainers_lookup.items(): user = user_lookup.get(email) if not user: continue m = Maintainer() m.package_id = p.id m.user_id = user.id maintainers.append(m) p.summary = info.get('summary') p.description = info.get('description') p.home_page = info.get('home_page') p.docs_url = info.get('docs_url') p.package_url = info.get('package_url') p.author = info.get('author') p.author_email = info.get('author_email') p.license = detect_license(info.get('license')) session = db_session.create_session() session.add(p) session.add_all(releases) if maintainers: session.add_all(maintainers) session.commit() session.close() except OverflowError: # What the heck, people just putting fake data in here # Size is terabytes... pass except Exception: raise
def get_page(base_url: str) -> Optional[Page]: if not base_url or not base_url.strip(): return None base_url = base_url.strip().lower() session = db_session.create_session() try: return session.query(Page).filter(Page.url == base_url).first() finally: session.close()
def do_user_import(user_lookup: Dict[str, str]) -> Dict[str, User]: print("Importing users ... ", flush=True) with progressbar.ProgressBar(max_value=len(user_lookup)) as bar: for idx, (email, name) in enumerate(user_lookup.items()): session = db_session.create_session() session.expire_on_commit = False user = User() user.email = email user.name = name session.add(user) session.commit() bar.update(idx) print() sys.stderr.flush() sys.stdout.flush() session = db_session.create_session() return {u.email: u for u in session.query(User)}
def find_user_by_id(user_id) -> Optional[User]: user_id = try_int(user_id) if not user_id: return None s = create_session() user = s.query(User).filter(User.id == user_id).all() s.close() if len(user) > 1: raise ValueError(f"Found {len(user)} users - expected to find 0 or 1") elif len(user) == 0: return None else: return user[0]
def find_user_by_email(email) -> Optional[User]: if not email: return None email = email.strip().lower() s = create_session() user = s.query(User).filter(User.email == email).all() s.close() if len(user) > 1: raise ValueError(f"Found {len(user)} users - expected to find 0 or 1") elif len(user) == 0: return None else: return user[0]
def get_latest_releases(limit=10) -> List[Release]: session = db_session.create_session() # if you get a release also get the package using joinedLoad releases = session.query(Release). \ options(sqlalchemy.orm.joinedload(Release.package)). \ order_by(Release.created_date.desc()). \ limit(limit). \ all() session.close() return releases
def get_package_by_id(package_id: str) -> Optional[Package]: if not package_id: return None session = create_session() # Like above, need to do a joinedload to eager load the things we will eventually use. Otherwise, when the # html later tries to look at the releases, it will try to use this (now closed) session! package = session.query(Package)\ .options(sqlalchemy.orm.joinedload(Package.releases))\ .filter(Package.id == package_id.strip().lower())\ .first() session.close() return package
def do_user_import(user_lookup: Dict[str, str]) -> Dict[str, User]: print("Importing users ... ", flush=True) with progressbar.ProgressBar(max_value=len(user_lookup)) as bar: for idx, (email, name) in enumerate(user_lookup.items()): session = db_session.create_session() session.expire_on_commit = False user = User() user.email = email user.name = name session.add(user) session.commit() bar.update(idx) print() sys.stderr.flush() sys.stdout.flush() # Why do we create another one down here? Can't we use the one above? And do they close themselves when going out # of scope? session = db_session.create_session() return {u.email: u for u in session.query(User)}
def migrate_users(): if MongoUser.objects().count(): return session = db_session.create_session() sql_users = session.query(SqlUser).all() for sut in sql_users: su: SqlUser = sut u = MongoUser() u.created_date = su.created_date u.hashed_password = su.hashed_password u.name = su.name u.email = su.email u.save()
def update_redirect(redirect_id, name, short_url, url) -> Redirect: if not get_redirect_by_id(redirect_id): raise Exception("Cannot update redirect, does not exist!") session = db_session.create_session() try: redirect = session.query(Redirect).filter(Redirect.id == redirect_id).first() redirect.name = name redirect.short_url = short_url redirect.url = url session.commit() return redirect finally: session.close()