def db_api(client: TestClient, postgres_db: sa.engine.Engine) -> ProjectDBAPI: db_api = _create_project_db(client) yield db_api # clean the projects postgres_db.execute("DELETE FROM projects")
def setup_guids_postgresql(engine: sa.engine.Engine) -> None: # pragma: no cover """ Set up UUID generation using the pgcrypto extension for postgres This query only needs to be executed once when the database is created """ engine.execute('create EXTENSION if not EXISTS "pgcrypto"')
async def test_create_cluster( enable_dev_features: None, client: TestClient, postgres_db: sa.engine.Engine, logged_user: Dict[str, Any], faker: Faker, user_role: UserRole, cluster_authentication: Callable[[], Dict[str, Any]], expected: ExpectedResponse, ): # check we can create a cluster assert client.app url = client.app.router["create_cluster_handler"].url_for() cluster_data = json.loads( ClusterCreate( endpoint=faker.uri(), authentication=cluster_authentication(), name=faker.name(), type=random.choice(list(ClusterType)), ).json(by_alias=True, exclude_unset=True) ) rsp = await client.post(f"{url}", json=cluster_data) data, error = await assert_status( rsp, expected.forbidden if user_role == UserRole.USER else expected.created, # only accessible for TESTER ) if error: # we are done here return created_cluster = Cluster.parse_obj(data) assert created_cluster # check database entry was correctly created result: ResultProxy = postgres_db.execute( sa.select([clusters]).where(clusters.c.name == cluster_data["name"]) ) assert result, "could not find cluster in database" row = result.fetchone() assert row, "could not find cluster in database" assert row[clusters.c.name] == cluster_data["name"] assert row[clusters.c.owner] == logged_user["primary_gid"] assert ( Cluster( id=row[clusters.c.id], name=cluster_data["name"], type=row[clusters.c.type], endpoint=row[clusters.c.endpoint], authentication=row[clusters.c.authentication], owner=logged_user["primary_gid"], access_rights={logged_user["primary_gid"]: CLUSTER_ADMIN_RIGHTS}, ) == created_cluster ) # cleanup postgres_db.execute(clusters.delete().where(clusters.c.id == row[clusters.c.id]))
def truncate_table(db_engine: sqlalchemy.engine.Engine, table: str, schema: str = 'public', cascade: bool = False) -> None: """Truncate given table""" db_engine.execute( f"TRUNCATE TABLE {schema + '.' + table} {'CASCADE' if cascade else ''}" )
def establish_connection(bind: sa.engine.Engine) -> sa.engine.Engine: for _ in range(100): try: bind.connect() break except exc.OperationalError: time.sleep(0.05) return bind
def assign_ds_to_user(eng: sa.engine.Engine, drw_srcs: sa.engine.ResultProxy, u_id): relations_to_insert = [{ 'user_id': u_id, 'draw_source_id': ds.id } for ds in drw_srcs] eng.execute( db.user_draw_source_relationship.insert().values(relations_to_insert))
def drop_table(tbl_name: str, eng: sa.engine.Engine, dispose_eng=False): try: if eng.has_table(tbl_name): with eng.begin() as con: con.execute(f"DROP TABLE {tbl_name}") finally: if dispose_eng: eng.dispose()
def db_api(client: TestClient, postgres_db: sa.engine.Engine) -> Iterator[ProjectDBAPI]: assert client.app db_api = client.app[APP_PROJECT_DBAPI] yield db_api # clean the projects postgres_db.execute("DELETE FROM projects")
def run_sql(sql_path: t.Union[Path, str], engine: sqlalchemy.engine.Engine): sql_path = Path(sql_path) if not sql_path.is_file(): raise ValueError(f"There is no file at path {sql_path!s}") with sql_path.open("r") as fd: sql = fd.read() engine.execute(sql)
def insert_into_table(eng: sa.engine.Engine, df: pd.DataFrame, table_name: str, dtypes: dict = None, unique_columns=None, index_columns=None, hash_index_columns=None, dispose_eng=False): """ Adds df to a new table called $table_name Args: eng: An engine object connecting the db df: The dataframe we want to insert to the DB table_name: The new table's name, assuming it is not in the DB dtypes: The data-types for each column in the DB unique_columns: Optional param for adding a unique key index for several columns, needed for using merge_to_db in postgresql. If set, $dtypes also needs to be set dispose_eng: Whether to dispose of the engine after the read Returns: None """ table_name = table_name.lower() if unique_columns is not None: assert dtypes is not None, "if unique_columns is set, dtypes cannot be none, to handle gis columns correctly" if dtypes is None: dtypes = {} with eng.begin() as con: df.to_sql(table_name, con, if_exists="append", index=False, dtype=dtypes) # for some reason oracle does problems with this, it is only needed in postgres so whatever if unique_columns is not None and eng.dialect.name == "postgresql": from coord2vec.common.db.postgres import get_index_str_for_unique con.execute( f"CREATE UNIQUE INDEX {table_name}_uind " f"ON {table_name} ({get_index_str_for_unique(unique_columns, dtypes)});" ) if index_columns is not None: for col in index_columns: con.execute( f"CREATE INDEX {table_name}_{col}_ind ON {table_name} (col);" ) if hash_index_columns is not None: for col in hash_index_columns: con.execute( f"CREATE INDEX {table_name}_{col}_ind ON {table_name} using hash(col);" ) if dispose_eng: eng.dispose()
def run_sql_file(sql_subpath: Union[str, Path], engine: sqlalchemy.engine.Engine) -> None: sql_path = Path(PACKAGE_PATH, 'sql', sql_subpath).resolve() if not sql_path.is_file(): raise ValueError(f"There is no file at path {sql_path!s}") with sql_path.open("r") as fd: sql = fd.read() engine.execute(sql)
def setup_guids(engine: sa.engine.Engine) -> None: """ Set up UUID generation using the uuid-ossp extension for postgres """ database_backend = DatabaseBackend.from_engine(engine) # TODO: Add some way to run postgres-specific tests if database_backend == DatabaseBackend.postgresql: # pragma: no cover # noinspection SqlDialectInspection,SqlNoDataSourceInspection uuid_generation_setup_query = 'create EXTENSION if not EXISTS "pgcrypto"' engine.execute(uuid_generation_setup_query)
def sqlalchemy_hunter_session( sqlalchemy_hunter_bind: sa.engine.Engine, app: Starlette, ): meta = app.state.hunter_service.metadata() sqlalchemy_hunter_bind.execute("CREATE SCHEMA " + meta.schema) meta.create_all(sqlalchemy_hunter_bind) try: yield sqlalchemy_hunter_bind finally: meta.drop_all(sqlalchemy_hunter_bind) sqlalchemy_hunter_bind.execute("DROP SCHEMA " + meta.schema)
def project_id(user_id: int, postgres_db: sa.engine.Engine) -> Iterable[str]: # inject project for user in db. This will give user_id, the full project's ownership # pylint: disable=no-value-for-parameter stmt = (projects.insert().values(**random_project( prj_owner=user_id)).returning(projects.c.uuid)) print(str(stmt)) with postgres_db.connect() as conn: result = conn.execute(stmt) [prj_uuid] = result.fetchone() yield prj_uuid with postgres_db.connect() as conn: conn.execute(projects.delete().where(projects.c.uuid == prj_uuid))
def cluster( postgres_db: sa.engine.Engine, faker: Faker ) -> Iterable[Callable[[GroupID, Dict[GroupID, ClusterAccessRights]], Coroutine[Any, Any, Cluster]]]: list_of_created_cluster_ids = [] async def creator( gid: GroupID, cluster_access_rights: Dict[GroupID, ClusterAccessRights] = None ) -> Cluster: new_cluster = ClusterCreate( **{ "name": faker.name(), "type": random.choice(list(ClusterType)), "owner": gid, "access_rights": cluster_access_rights or {}, }) result = postgres_db.execute(clusters.insert().values( new_cluster.dict(by_alias=True, exclude={"id", "access_rights" })).returning(literal_column("*"))) cluster_in_db = result.first() assert cluster_in_db is not None new_cluster_id = cluster_in_db[clusters.c.id] list_of_created_cluster_ids.append(new_cluster_id) # when a cluster is created, the DB automatically creates the owner access rights for group_id, access_rights in new_cluster.access_rights.items(): result = postgres_db.execute( insert(cluster_to_groups).values( **{ "cluster_id": new_cluster_id, "gid": group_id, "read": access_rights.read, "write": access_rights.write, "delete": access_rights.delete, }).on_conflict_do_nothing()) return Cluster(id=new_cluster_id, **new_cluster.dict(by_alias=True, exclude={"id"})) yield creator # clean up postgres_db.execute(clusters.delete().where( clusters.c.id.in_(list_of_created_cluster_ids)))
async def test_delete_cluster( enable_dev_features: None, client: TestClient, postgres_db: sa.engine.Engine, logged_user: Dict[str, Any], second_user: Dict[str, Any], cluster: Callable[..., Coroutine[Any, Any, Cluster]], faker: Faker, user_role: UserRole, expected: ExpectedResponse, ): # deleting a non-existing cluster returns not found url = client.app.router["delete_cluster_handler"].url_for(cluster_id=f"{25}") rsp = await client.delete(f"{url}") data, error = await assert_status(rsp, expected.not_found) if error and user_role in [UserRole.ANONYMOUS, UserRole.GUEST]: return assert data is None # create our own cluster allows us to delete it admin_cluster: Cluster = await cluster(GroupID(logged_user["primary_gid"])) url = client.app.router["delete_cluster_handler"].url_for( cluster_id=f"{admin_cluster.id}" ) rsp = await client.delete(f"{url}") data, error = await assert_status(rsp, expected.no_content) assert data is None # check it was deleted result: ResultProxy = postgres_db.execute( sa.select([clusters]).where(clusters.c.id == admin_cluster.id) ) assert result.rowcount == 0
def list_dbs(connection: sqlalchemy.engine.Engine): """List databases from a connectionection.""" response = connection.execute(""" SELECT datname FROM pg_database """).fetchall() return [r[0] for r in response]
def runs( postgres_db: sa.engine.Engine ) -> Iterator[Callable[..., CompRunsAtDB]]: created_run_ids: List[int] = [] def creator(user: Dict[str, Any], project: ProjectAtDB, **run_kwargs) -> CompRunsAtDB: run_config = { "project_uuid": f"{project.uuid}", "user_id": f"{user['id']}", "iteration": 1, "result": StateType.NOT_STARTED, } run_config.update(**run_kwargs) with postgres_db.connect() as conn: result = conn.execute(comp_runs.insert().values( **run_config).returning(sa.literal_column("*"))) new_run = CompRunsAtDB.parse_obj(result.first()) created_run_ids.append(new_run.run_id) return new_run yield creator # cleanup with postgres_db.connect() as conn: conn.execute(comp_runs.delete().where( comp_runs.c.run_id.in_(created_run_ids)))
def cluster( postgres_db: sa.engine.Engine, ) -> Iterator[Callable[..., Cluster]]: created_cluster_ids: List[str] = [] def creator(user: Dict[str, Any], **cluster_kwargs) -> Cluster: cluster_config = Cluster.Config.schema_extra["examples"][0] cluster_config["owner"] = user["primary_gid"] cluster_config.update(**cluster_kwargs) new_cluster = Cluster.parse_obj(cluster_config) assert new_cluster with postgres_db.connect() as conn: # insert basic cluster created_cluster = conn.execute( sa.insert(clusters).values( new_cluster.to_clusters_db(only_update=False)).returning( sa.literal_column("*"))).one() created_cluster_ids.append(created_cluster.id) if "access_rights" in cluster_kwargs: for gid, rights in cluster_kwargs["access_rights"].items(): conn.execute( pg_insert(cluster_to_groups).values( cluster_id=created_cluster.id, gid=gid, **rights.dict()).on_conflict_do_update( index_elements=["gid", "cluster_id"], set_=rights.dict())) access_rights_in_db = {} for row in conn.execute( sa.select([ cluster_to_groups.c.gid, cluster_to_groups.c.read, cluster_to_groups.c.write, cluster_to_groups.c.delete, ]).select_from(clusters.join(cluster_to_groups)).where( clusters.c.id == created_cluster.id)): access_rights_in_db[row.gid] = { "read": row[cluster_to_groups.c.read], "write": row[cluster_to_groups.c.write], "delete": row[cluster_to_groups.c.delete], } return Cluster.construct( id=created_cluster.id, name=created_cluster.name, description=created_cluster.description, type=created_cluster.type, owner=created_cluster.owner, endpoint=created_cluster.endpoint, authentication=created_cluster.authentication, access_rights=access_rights_in_db, ) yield creator # cleanup with postgres_db.connect() as conn: conn.execute( # pylint: disable=no-value-for-parameter clusters.delete().where(clusters.c.id.in_(created_cluster_ids)))
def _assert_project_db_row(postgres_db: sa.engine.Engine, project: Dict[str, Any], **kwargs): row: Optional[Row] = postgres_db.execute( f"SELECT * FROM projects WHERE \"uuid\"='{project['uuid']}'").fetchone( ) expected_db_entries = { "type": "STANDARD", "uuid": project["uuid"], "name": project["name"], "description": project["description"], "thumbnail": project["thumbnail"], "prj_owner": None, "workbench": project["workbench"], "published": False, "access_rights": {}, "dev": project["dev"], "classifiers": project["classifiers"], "ui": project["ui"], "quality": project["quality"], "creation_date": to_datetime(project["creationDate"]), "last_change_date": to_datetime(project["lastChangeDate"]), } expected_db_entries.update(kwargs) for k in expected_db_entries: assert (row[k] == expected_db_entries[k] ), f"project column [{k}] does not correspond" assert row["last_change_date"] >= row["creation_date"]
def project(postgres_db: sa.engine.Engine, user_db: Dict) -> Callable: created_project_ids = [] def creator(**overrides) -> ProjectAtDB: project_config = { "uuid": uuid4(), "name": "my test project", "type": ProjectType.STANDARD.name, "description": "my test description", "prj_owner": user_db["id"], "workbench": {}, } project_config.update(**overrides) with postgres_db.connect() as con: result = con.execute( projects.insert() .values(**project_config) .returning(literal_column("*")) ) project = ProjectAtDB.parse_obj(result.first()) created_project_ids.append(project.uuid) return project yield creator # cleanup with postgres_db.connect() as con: for pid in created_project_ids: con.execute(projects.delete().where(projects.c.uuid == str(pid)))
def pipeline( postgres_db: sa.engine.Engine, ) -> Iterable[Callable[..., CompPipelineAtDB]]: created_pipeline_ids: List[str] = [] def creator(**overrides) -> CompPipelineAtDB: pipeline_config = { "project_id": f"{uuid4()}", "dag_adjacency_list": {}, "state": StateType.NOT_STARTED, } pipeline_config.update(**overrides) with postgres_db.connect() as conn: result = conn.execute(comp_pipeline.insert().values( **pipeline_config).returning(literal_column("*"))) new_pipeline = CompPipelineAtDB.parse_obj(result.first()) created_pipeline_ids.append(f"{new_pipeline.project_id}") return new_pipeline yield creator # cleanup with postgres_db.connect() as conn: conn.execute(comp_pipeline.delete().where( comp_pipeline.c.project_id.in_(created_pipeline_ids)))
def make_geography_db( data: Mapping[str, Any], con: sa.engine.Engine, ) -> None: metadata = sa.MetaData(bind=con) with con.begin() as bind: for table_name, schema in SCHEMAS.items(): table = sa.Table( table_name, metadata, *(sa.Column(col_name, col_type) for col_name, col_type in schema), ) table_columns = table.c.keys() post_parse = POST_PARSE_FUNCTIONS.get(table_name, toolz.identity) table.drop(bind=bind, checkfirst=True) table.create(bind=bind) bind.execute( table.insert().values(), [ post_parse(dict(zip(table_columns, row))) for row in data[table_name] ], )
def get_send_off_diff(shots_df: pd.DataFrame, engine: sqlalchemy.engine.Engine) -> pd.Series: """ Returns the player advantage based on the red cards given :param shots_df: data frame of shots taken :param engine: sqlalchemy engine :return: the player differential at the time of each shot """ diffs = pd.Series(data=0, index=shots_df.index) reds = [] with engine.connect() as cxn: for league in shots_df['league'].unique(): # this is only 7 leagues query = """SELECT * FROM events_{} WHERE "1701" = True or "1703" = True;""".format(league) # TAG: numbers reds.append(pd.read_sql(query, cxn)) reds_df = pd.concat(reds) for match in reds_df['matchId'].unique(): # 305 matches with red cards for ind, row in shots_df[shots_df['matchId'] == match].iterrows(): # 50 max same_team_slice = reds_df[(reds_df['eventSec'] < row['eventSec']) & (reds_df['teamId'] == row['teamId'])] other_team_slice = reds_df[(reds_df['eventSec'] < row['eventSec']) & (reds_df['teamId'] != row['teamId'])] reds_same = len( same_team_slice[same_team_slice['matchId'] == match]) reds_other = len( other_team_slice[other_team_slice['matchId'] == match]) diffs[ind] = reds_other - reds_same return diffs
def task(postgres_db: sa.engine.Engine) -> Callable[..., str]: created_task_ids: List[int] = [] def creator(project_id: str, node_uuid: str, **overrides) -> str: task_config = { "project_id": project_id, "node_id": node_uuid, } task_config.update(**overrides) with postgres_db.connect() as conn: result = conn.execute( comp_tasks.insert() # pylint: disable=no-value-for-parameter .values(**task_config) .returning(comp_tasks.c.task_id) ) new_task_id = result.first()[comp_tasks.c.task_id] created_task_ids.append(new_task_id) return node_uuid yield creator # cleanup with postgres_db.connect() as conn: conn.execute( comp_tasks.delete().where( # pylint: disable=no-value-for-parameter comp_tasks.c.task_id.in_(created_task_ids) ) )
def registered_user(postgres_db: sa.engine.Engine, faker: Faker) -> Iterator[Callable[..., Dict]]: created_user_ids = [] def creator(**user_kwargs) -> Dict[str, Any]: with postgres_db.connect() as con: # removes all users before continuing user_config = { "id": len(created_user_ids) + 1, "name": faker.name(), "email": faker.email(), "password_hash": faker.password(), "status": UserStatus.ACTIVE, "role": UserRole.USER, } user_config.update(user_kwargs) con.execute(users.insert().values(user_config).returning( sa.literal_column("*"))) # this is needed to get the primary_gid correctly result = con.execute( sa.select([users]).where(users.c.id == user_config["id"])) user = result.first() assert user created_user_ids.append(user["id"]) return dict(user) yield creator with postgres_db.connect() as con: con.execute(users.delete().where(users.c.id.in_(created_user_ids)))
def write_iostream_to_database(engine: sqlalchemy.engine.Engine, tablename: str, dataframe: pd.DataFrame) -> None: """ More effective way of saving dataframe into database using io stream - StringIO. :param sqlalchemy.engine.Engine engine: SQL Alchemy engine for connection to databse. :param str tablename: Name of the table in database, where will be content of dataframe appended. :param pd.Datafram dataframe: Dataframe, whose content will be appended to databse. :return None: No return value. """ store = io.StringIO() dataframe.to_csv(store, index=False, header=False) store.seek(0) conn = engine.connect().connection cursor = conn.cursor() cursor.copy_from(store, tablename, columns=dataframe.columns, sep=',', null='null') conn.commit() conn.close()
def cluster( user_db: Dict, postgres_db: sa.engine.Engine, ) -> Iterable[Callable[..., Cluster]]: created_cluster_ids: List[str] = [] def creator(**overrides) -> Cluster: cluster_config = Cluster.Config.schema_extra["examples"][0] cluster_config["owner"] = user_db["primary_gid"] cluster_config.update(**overrides) new_cluster = Cluster.parse_obj(cluster_config) assert new_cluster with postgres_db.connect() as conn: created_cluser_id = conn.scalar( # pylint: disable=no-value-for-parameter clusters.insert().values( new_cluster.to_clusters_db(only_update=False) ).returning(clusters.c.id)) created_cluster_ids.append(created_cluser_id) result = conn.execute( sa.select([ clusters, cluster_to_groups.c.gid, cluster_to_groups.c.read, cluster_to_groups.c.write, cluster_to_groups.c.delete, ]).select_from( clusters.join( cluster_to_groups, clusters.c.id == cluster_to_groups.c.cluster_id, )).where(clusters.c.id == created_cluser_id)) row = result.fetchone() assert row return Cluster.construct( id=row[clusters.c.id], name=row[clusters.c.name], description=row[clusters.c.description], type=row[clusters.c.type], owner=row[clusters.c.owner], endpoint=row[clusters.c.endpoint], authentication=row[clusters.c.authentication], access_rights={ row[clusters.c.owner]: { "read": row[cluster_to_groups.c.read], "write": row[cluster_to_groups.c.write], "delete": row[cluster_to_groups.c.delete], } }, ) yield creator # cleanup with postgres_db.connect() as conn: conn.execute( # pylint: disable=no-value-for-parameter clusters.delete().where(clusters.c.id.in_(created_cluster_ids)))
def auto_migrate(engine: sqlalchemy.engine.Engine): """Compares the current database with all defined models and applies the diff""" ddl = get_migration_ddl(engine) with engine.begin() as connection: for statement in ddl: sys.stdout.write('\033[1;32m' + statement + '\033[0;0m') connection.execute(statement)
def inject_tables(postgres_db: sa.engine.Engine): stmt = text("""\ INSERT INTO "group_classifiers" ("id", "bundle", "created", "modified", "gid", "uses_scicrunch") VALUES (2, '{"vcs_ref": "asdfasdf", "vcs_url": "https://foo.classifiers.git", "build_date": "2021-01-20T15:19:30Z", "classifiers": {"project::dak": {"url": null, "logo": null, "aliases": [], "related": [], "markdown": "", "released": null, "classifier": "project::dak", "created_by": "Nicolas Chavannes", "github_url": null, "display_name": "DAK", "wikipedia_url": null, "short_description": null}, "organization::zmt": {"url": "https://zmt.swiss/", "logo": null, "aliases": ["Zurich MedTech AG"], "related": [], "markdown": "Zurich MedTech AG (ZMT) offers tools and best practices for targeted life sciences applications to simulate, analyze, and predict complex and dynamic biological processes and interactions. ZMT is a member of Zurich43", "released": null, "classifier": "organization::zmt", "created_by": "crespo", "github_url": null, "display_name": "ZMT", "wikipedia_url": null, "short_description": "ZMT is a member of Zurich43"}}, "collections": {"jupyterlab-math": {"items": ["crespo/osparc-demo"], "markdown": "Curated collection of repositories with examples of notebooks to run in jupyter-python-octave-math service", "created_by": "crespo", "display_name": "jupyterlab-math"}}}', '2021-03-04 23:17:43.373258', '2021-03-04 23:17:43.373258', 1, '0'); """) with postgres_db.connect() as conn: conn.execute(stmt)
def insert_into_table(table: sqlalchemy.Table, db_connection: sqlalchemy.engine.Engine, key_value_dict: dict): assert isinstance(table, sqlalchemy.Table) insert_clause = table.insert().values(**key_value_dict) db_connection.execute(insert_clause)