def get_package_version_link_id_query( session: sqlalchemy.orm.Session, link: Tuple[int, int]) -> sqlalchemy.orm.query.Query: parent_package_id, child_package_id = link return session.query(PackageLink.id).filter_by( parent_package_id=parent_package_id, child_package_id=child_package_id)
def test_does_not_lose_history(ssn: sa.orm.Session): """ Extensive test of InstanceHistoryProxy with query counters and lazy loads """ assert ssn.autoflush == False, 'this test relies on Session.autoflush=False' engine = ssn.get_bind() # Prepare ssn.add(User(id=1, name='John', age=18)) ssn.add(Article(id=1, title='Python', author_id=1)) ssn.commit() # === Test 1: ModelHistoryProxy does not lose history when flushing a session ssn.expunge_all( ) # got to reset; otherwise, the session might reuse loaded objects user = ssn.query(User).get(1) with ExpectedQueryCounter(engine, 0, 'Expected no queries here'): old_user_hist = InstanceHistoryProxy(user) # issues no queries # Modify user.name = 'CHANGED' # History works assert old_user_hist.name == 'John' # Flush ssn.flush() # History is NOT broken! assert old_user_hist.name == 'John' # Change another column after flush; history is still NOT broken! user.age = 1800 assert old_user_hist.age == 18 # correct # Undo ssn.rollback() # === Test 1: ModelHistoryProxy does not lose history when lazyloading a column ssn.expunge_all( ) # got to reset; otherwise, the session might reuse loaded objects user = ssn.query(User).options(load_only('name')).get(1) with ExpectedQueryCounter(engine, 0, 'Expected no queries here'): old_user_hist = InstanceHistoryProxy(user) # issues no queries user.name = 'CHANGED' assert old_user_hist.name == 'John' # Load a column with ExpectedQueryCounter(engine, 1, 'Expected 1 lazyload query'): user.age # get an unloaded column # History is NOT broken! assert old_user_hist.name == 'John' # === Test 2: ModelHistoryProxy does not lose history when lazyloading a one-to-many relationship ssn.expunge_all( ) # got to reset; otherwise, the session might reuse loaded objects user = ssn.query(User).get(1) with ExpectedQueryCounter(engine, 0, 'Expected no queries here'): old_user_hist = InstanceHistoryProxy(user) user.name = 'CHANGED' assert old_user_hist.name == 'John' # History works # Load a relationship with ExpectedQueryCounter(engine, 1, 'Expected 1 lazyload query'): list(user.articles) # History is NOT broken! assert old_user_hist.name == 'John' # === Test 3: ModelHistoryProxy does not lose history when lazyloading a one-to-one relationship ssn.expunge_all( ) # got to reset; otherwise, the session might reuse loaded objects article = ssn.query(Article).get(1) with ExpectedQueryCounter(engine, 0, 'Expected no queries here'): old_article_hist = InstanceHistoryProxy(article) article.title = 'CHANGED' assert old_article_hist.title == 'Python' # works # Load a relationship with ExpectedQueryCounter(engine, 1, 'Expected 1 lazyload query'): article.author # History is NOT broken! assert old_article_hist.title == 'Python' # works
def insert_npmsio_data(session: sqlalchemy.orm.Session, source: Generator[Dict[str, Any], None, None]) -> None: for line in source: fields = extract_nested_fields( line, { "package_name": ["collected", "metadata", "name"], "package_version": ["collected", "metadata", "version"], "analyzed_at": ["analyzedAt" ], # e.g. "2019-11-27T19:31:42.541Z" # overall score from .score.final on the interval [0, 1] "score": ["score", "final"], # score components on the interval [0, 1] "quality": ["score", "detail", "quality"], "popularity": ["score", "detail", "popularity"], "maintenance": ["score", "detail", "maintenance"], # score subcomponent/detail fields from .evaluation.<component>.<subcomponent> # generally frequencies and subscores are decimals between [0, 1] # or counts of downloads, stars, etc. # acceleration is signed (+/-) "branding": ["evaluation", "quality", "branding"], "carefulness": ["evaluation", "quality", "carefulness"], "health": ["evaluation", "quality", "health"], "tests": ["evaluation", "quality", "tests"], "community_interest": ["evaluation", "popularity", "communityInterest"], "dependents_count": ["evaluation", "popularity", "dependentsCount"], "downloads_acceleration": [ "evaluation", "popularity", "downloadsAcceleration", ], "downloads_count": ["evaluation", "popularity", "downloadsCount"], "commits_frequency": ["evaluation", "maintenance", "commitsFrequency"], "issues_distribution": [ "evaluation", "maintenance", "issuesDistribution", ], "open_issues": ["evaluation", "maintenance", "openIssues"], "releases_frequency": [ "evaluation", "maintenance", "releasesFrequency", ], }, ) fields[ "source_url"] = f"https://api.npms.io/v2/package/{fields['package_name']}" # only insert new rows if (session.query(NPMSIOScore.id).filter_by( package_name=fields["package_name"], package_version=fields["package_version"], analyzed_at=fields["analyzed_at"], ).one_or_none()): log.debug( f"skipping inserting npms.io score for {fields['package_name']}@{fields['package_version']}" f" analyzed at {fields['analyzed_at']}") else: session.add(NPMSIOScore(**fields)) session.commit() log.info( f"added npms.io score for {fields['package_name']}@{fields['package_version']}" f" analyzed at {fields['analyzed_at']}")
def insert_npm_registry_data( session: sqlalchemy.orm.Session, source: Generator[Dict[str, Any], None, None]) -> None: for line in source: # save version specific data for version, version_data in line["versions"].items(): fields = extract_nested_fields( version_data, { "package_name": ["name"], "package_version": ["version"], "shasum": ["dist", "shasum"], "tarball": ["dist", "tarball"], "git_head": ["gitHead"], "repository_type": ["repository", "type"], "repository_url": ["repository", "url"], "description": ["description"], "url": ["url"], "license_type": ["license"], "keywords": ["keywords"], "has_shrinkwrap": ["_hasShrinkwrap"], "bugs_url": ["bugs", "url"], "bugs_email": ["bugs", "email"], "author_name": ["author", "name"], "author_email": ["author", "email"], "author_url": ["author", "url"], "maintainers": ["maintainers"], "contributors": ["contributors"], "publisher_name": ["_npmUser", "name"], "publisher_email": ["_npmUser", "email"], "publisher_node_version": ["_nodeVersion"], "publisher_npm_version": ["_npmVersion"], }, ) # license can we a string e.g. 'MIT' # or dict e.g. {'type': 'MIT', 'url': 'https://github.com/jonschlinkert/micromatch/blob/master/LICENSE'} fields["license_url"] = None if isinstance(fields["license_type"], dict): fields["license_url"] = fields["license_type"].get("url", None) fields["license_type"] = fields["license_type"].get( "type", None) # looking at you [email protected].{3,4} with: # [{"name": "StrongLoop", "url": "http://strongloop.com/license/"}, "MIT"], if not ((isinstance(fields["license_type"], str) or fields["license_type"] is None) and (isinstance(fields["license_url"], str) or fields["license_url"] is None)): log.warning( f"skipping weird license format {fields['license_type']}") fields["license_url"] = None fields["license_type"] = None # published_at .time[<version>] e.g. '2014-05-23T21:21:04.170Z' (not from # the version info object) # where time: an object mapping versions to the time published, along with created and modified timestamps fields["published_at"] = get_in(line, ["time", version]) fields["package_modified_at"] = get_in(line, ["time", "modified"]) fields[ "source_url"] = f"https://registry.npmjs.org/{fields['package_name']}" if (session.query(NPMRegistryEntry.id).filter_by( package_name=fields["package_name"], package_version=fields["package_version"], shasum=fields["shasum"], tarball=fields["tarball"], ).one_or_none()): log.debug( f"skipping inserting npm registry entry for {fields['package_name']}@{fields['package_version']}" f" from {fields['tarball']} with sha {fields['shasum']}") else: session.add(NPMRegistryEntry(**fields)) session.commit() log.info( f"added npm registry entry for {fields['package_name']}@{fields['package_version']}" f" from {fields['tarball']} with sha {fields['shasum']}")
def get_node_advisory_id_query(session: sqlalchemy.orm.Session, advisory: Dict) -> sqlalchemy.orm.query.Query: return session.query(Advisory.id).filter_by(language="node", url=advisory["url"])
def insert_package_audit(session: sqlalchemy.orm.Session, task_data: Dict) -> None: is_yarn_cmd = bool("yarn" in task_data["command"]) # NB: yarn has .advisory and .resolution # the same advisory JSON (from the npm DB) is # at .advisories{k, v} for npm and .advisories[].advisory for yarn advisories = ((item.get("advisory", None) for item in task_data.get("advisories", [])) if is_yarn_cmd else task_data.get("advisories", dict()).values()) non_null_advisories = (adv for adv in advisories if adv) for advisory in non_null_advisories: advisory_fields = extract_nested_fields( advisory, { "package_name": ["module_name"], "npm_advisory_id": ["id"], "vulnerable_versions": ["vulnerable_versions"], "patched_versions": ["patched_versions"], "created": ["created"], "updated": ["updated"], "url": ["url"], "severity": ["severity"], "cves": ["cves"], "cwe": ["cwe"], "exploitability": ["metadata", "exploitability"], "title": ["title"], }, ) advisory_fields["cwe"] = int(advisory_fields["cwe"].lower().replace( "cwe-", "")) advisory_fields["language"] = "node" advisory_fields["vulnerable_package_version_ids"] = [] get_node_advisory_id_query( session, advisory_fields).one_or_none() or session.add( Advisory(**advisory_fields)) session.commit() # TODO: update other advisory fields too impacted_versions = set( finding.get("version", None) for finding in advisory.get("findings", []) if finding.get("version", None)) db_advisory = (session.query( Advisory.id, Advisory.vulnerable_package_version_ids).filter_by( language="node", url=advisory["url"]).first()) impacted_version_package_ids = list( vid for result in session.query(PackageVersion.id).filter( PackageVersion.name == advisory_fields["package_name"], PackageVersion.version.in_(impacted_versions), ).all() for vid in result) if len(impacted_versions) != len(impacted_version_package_ids): log.warning( f"missing package versions for {advisory_fields['package_name']!r}" f" in the db or misparsed audit output version:" f" {impacted_versions} {impacted_version_package_ids}") if db_advisory.vulnerable_package_version_ids is None: session.query(Advisory.id).filter_by(id=db_advisory.id).update( dict(vulnerable_package_version_ids=list())) # TODO: lock the row? vpvids = set( list( session.query(Advisory).filter_by( id=db_advisory.id).first().vulnerable_package_version_ids)) vpvids.update(set(impacted_version_package_ids)) session.query(Advisory.id).filter_by(id=db_advisory.id).update( dict(vulnerable_package_version_ids=sorted(vpvids))) session.commit()
def get_balance_sheet( fund_id: int = Path(..., ge=1), datetime: dt.datetime = None, session: sa.orm.Session = sess, ): """ Returns the balance sheet for a given fund The datetime default value is "now" A sample response would be: { "id": 1, "name": "root", "balance": 0.0, "parent_id": None, "childs": [ { "id": 2, "name": "Assets", "balance": 1000.0, "parent_id": 1, "childs": [ { "id": 3, "name": "Cash", "balance": 500.0, "parent_id": 2, "childs": [], }, { "id": 4, "name": "Receivables", "balance": 500.0, "parent_id": 2, "childs": [], }, { "id": 5, "name": "Inventory", "balance": 0, "parent_id": 2, "childs": [], }, ], }, { "id": 6, "name": "Liabilities", "balance": -1000.0, "parent_id": 1, "childs": [ { "id": 7, "name": "Payables", "balance": 0, "parent_id": 6, "childs": [], }, { "id": 8, "name": "Shares Issued", "balance": -1000.0, "parent_id": 6, "childs": [], }, { "id": 9, "name": "Retained Earnings", "balance": 0, "parent_id": 6, "childs": [], }, ], }, { "id": 10, "name": "Income", "balance": 0, "parent_id": 1, "childs": [ { "id": 11, "name": "Trade", "balance": 0, "parent_id": 10, "childs": [], }, { "id": 12, "name": "Interest", "balance": 0, "parent_id": 10, "childs": [], }, ], }, { "id": 13, "name": "Expenses", "balance": 0, "parent_id": 1, "childs": [ { "id": 14, "name": "Fees", "balance": 0, "parent_id": 13, "childs": [ { "id": 15, "name": "Broker", "balance": 0, "parent_id": 14, "childs": [], }, { "id": 16, "name": "Administration", "balance": 0, "parent_id": 14, "childs": [], }, ], }, { "id": 17, "name": "Tax", "balance": 0, "parent_id": 13, "childs": [], }, { "id": 18, "name": "Other", "balance": 0, "parent_id": 13, "childs": [], }, ], }, ], } """ if datetime is None: datetime = dt.datetime.utcnow() # get all accounts that have a balance s = (sa.select( [models.Account.id, sa.func.sum(models.Entry.value).label("value")]).select_from( sa.join( models.Account, models.Entry, models.Account.id == models.Entry.account_id, )).group_by( models.Account.id).where(models.Entry.datetime <= datetime)) accounts_balance = session.execute(s).fetchall() accounts_balance = {a.id: a.value for a in accounts_balance} # get all accounts accounts_list = (session.query(models.Account).order_by( models.Account.parent_id.desc()).all()) accounts_list = [ Account( id=acc.id, name=acc.name, balance=accounts_balance.get(acc.id, Decimal(0)), childs=[], parent_id=acc.parent_id, ) for acc in accounts_list ] accounts_tree = _create_acc_tree(accounts_list) return accounts_tree
def get_package_version_id_query(session: sqlalchemy.orm.Session, pkg: Dict) -> sqlalchemy.orm.query.Query: return session.query(PackageVersion.id).filter_by(name=pkg["name"], version=pkg["version"], language="node")
def clear(session: sa.orm.Session): "Just delete entries for all tables" for table in models.Base.metadata.tables.values(): session.query(table).delete() session.commit()
def try_to_commit(session: sa.orm.Session): try: session.commit() except sa.exc.IntegrityError: session.rollback() raise HTTPException(status_code=422, detail=tb.format_exc(limit=0))
def get_user(userId: int, session: sqlalchemy.orm.Session): """Returns the user object for the id""" return session.query(User).filter(User.userID == userId).all()
def get_user_tweets(userId: int, session: sqlalchemy.orm.Session): """Returns all tweets belonging to the user""" return session.query(Tweet).filter(Tweet.userID == userId).all()
def get_user_tweet_count(userId: int, session: sqlalchemy.orm.Session): """Returns how many tweets we have for the given user id :type userId: int :type session: sqlalchemy.orm.Session """ return session.query(Tweet).filter(Tweet.userID == userId).count()
def get_boosters( session: sa.orm.Session) -> Iterable[models.hardware.BoostStage]: return (result for result in session.query(models.hardware.BoostStage))
def process_jobs(jobs: List[TaskJobs], config: ConfigHolder, session: sqlalchemy.orm.Session): if not jobs: return processbar = tqdm.tqdm(total=len(jobs), desc=f"Task {jobs[0].task_id}: Process jobs") def _on_message(body): if body["status"] in ['SUCCESS', 'FAILURE']: if body["status"] == 'FAILURE': print("Found an error:", body) try: processbar.update() except AttributeError: pass # ToDo: To speed up solving time, maybe use bulksolve slice_size, slice_amount = _get_slicing(jobs, config.slice_size) slices = [(i*slice_size, (i+1)*slice_size) for i in range(slice_amount-1)] if slice_amount > 0: slices.append(tuple([(slice_amount-1)*slice_size, len(jobs)])) solver_args = config.solver_args if "time_limit" in solver_args: time_limit = solver_args["time_limit"] else: time_limit = 900 if config.local: for job in jobs: sol = solve( job.graph, config.solver, solver_config=config.solver_args, solve_config={ "start_solution":(None if job.prev_job is None else job.prev_job.solution.order), "time_limit":(time_limit if job.prev_job is None else time_limit - job.prev_job.solution.runtime) } ) job.solution = sol processbar.update() session.commit() else: for start, end in slices: results = group(solve.s( job.graph, config.solver, solver_config=config.solver_args, solve_config={ "start_solution":(None if job.prev_job is None else job.prev_job.solution.order), "time_limit":(time_limit if job.prev_job is None else time_limit - job.prev_job.solution.runtime) } ) for job in jobs[start:end])().get(on_message=_on_message) # This is just for local testing #results = [solve( # job.graph, # config.solver, # solver_config=config.solver_args, # solve_config={"start_solution":(None if job.prev_job is None else job.prev_job.solution.order)} # ) # for job in jobs[start:end]] for job, result in zip(jobs[start:end], results): result.graph = job.graph if job.prev_job is not None: result.runtime = result.runtime + job.prev_job.runtime job.solution = result if session: session.commit()