def delete_product_block(conn: sa.engine.Connection, name: str) -> None: """Delete a product block and it's occurrences in resource types and products. Args: conn: DB connection as available in migration main file name: a product_block name you want to delete Example: >>> obsolete_stuff = "name_1" >>> delete_product_block(conn, obsolete_stuff) """ conn.execute( sa.text(""" WITH deleted_pb AS ( DELETE FROM product_blocks WHERE name=:name RETURNING product_block_id ), deleted_p_pb AS ( DELETE FROM product_product_blocks WHERE product_block_id IN (SELECT product_block_id FROM deleted_pb) ), deleted_pb_rt AS ( DELETE FROM product_block_resource_types WHERE product_block_id IN (SELECT product_block_id FROM deleted_pb) ) SELECT * from deleted_pb; """), name=name, )
def remove_products_from_workflow_by_product_tag( conn: sa.engine.Connection, workflow_name: str, product_tag: str, product_name_like: str = "%%") -> None: """Delete products from a workflow by product tag. Args: conn: DB connection as available in migration main file. workflow_name: Name of the workflow that the products need to be removed from. product_tag: Tag of the product to remove from the workflow. product_name_like (optional): Part of the product name to get more specific products (necessary for fw v2) Usage: ```python product_tag = "product_tag" workflow_name = "workflow_name" remove_products_from_workflow_by_product_tag(conn, product_tag, workflow_name) ``` """ conn.execute( sa.text(""" DELETE FROM products_workflows WHERE workflow_id = ( SELECT workflow_id FROM workflows where name=:workflow_name ) AND product_id IN ( SELECT product_id FROM products WHERE tag=:product_tag AND name LIKE :product_name_like ) """), workflow_name=workflow_name, product_tag=product_tag, product_name_like=product_name_like, )
def ensure_default_workflows(conn: sa.engine.Connection) -> None: """Ensure products_workflows table contains a link between all 'active' workflows and the set of workflows identified in the DEFAULT_PRODUCT_WORKFLOWS app_setting. Note that the 0th element of the uuids are taken when generating product_workflow_table_rows because sqlalchemy returns a row tuple even if selecting for a single column. """ products = sa.Table("products", sa.MetaData(), autoload_with=conn) workflows = sa.Table("workflows", sa.MetaData(), autoload_with=conn) product_workflows_table = sa.Table("products_workflows", sa.MetaData(), autoload_with=conn) all_product_uuids = conn.execute(sa.select( products.c.product_id)).fetchall() default_workflow_ids = conn.execute( sa.select(workflows.c.workflow_id).where( workflows.c.name.in_( app_settings.DEFAULT_PRODUCT_WORKFLOWS))).fetchall() product_workflow_table_rows = [(product_uuid[0], workflow_uuid[0]) for product_uuid in all_product_uuids for workflow_uuid in default_workflow_ids] conn.execute( sa.dialects.postgresql.insert( product_workflows_table, bind=conn).values( product_workflow_table_rows).on_conflict_do_nothing( index_elements=("product_id", "workflow_id")))
def add_product_block_relation_between_products_by_id( conn: sa.engine.Connection, in_use_by_id: Union[UUID, UUIDstr], depends_on_id: Union[UUID, UUIDstr]) -> None: """Add product block relation by product block id. Args: conn: DB connection as available in migration main file. in_use_by_id: ID of the product block that uses another product block. depends_on_id: ID of the product block that is used as dependency. Usage: ```python in_use_by_id = "in_use_by_id" depends_on_id = "depends_on_id" add_product_block_relation_between_products_by_id(conn, in_use_by_id, depends_on_id) ``` """ conn.execute( sa.text(""" INSERT INTO product_block_relations (in_use_by_id, depends_on_id) VALUES (:in_use_by_id, :depends_on_id) """), in_use_by_id=in_use_by_id, depends_on_id=depends_on_id, )
def test_sql_tracker_store_with_login_db_db_already_exists( postgres_login_db_connection: sa.engine.Connection, ): postgres_login_db_connection.execution_options( isolation_level="AUTOCOMMIT" ).execute(f"CREATE DATABASE {POSTGRES_TRACKER_STORE_DB}") tracker_store = SQLTrackerStore( dialect="postgresql", host=POSTGRES_HOST, port=POSTGRES_PORT, username=POSTGRES_USER, password=POSTGRES_PASSWORD, db=POSTGRES_TRACKER_STORE_DB, login_db=POSTGRES_LOGIN_DB, ) matching_rows = ( postgres_login_db_connection.execution_options(isolation_level="AUTOCOMMIT") .execute( sa.text( "SELECT 1 FROM pg_catalog.pg_database WHERE datname = :database_name" ), database_name=POSTGRES_TRACKER_STORE_DB, ) .rowcount ) assert matching_rows == 1 tracker_store.engine.dispose()
def create_products(conn: sa.engine.Connection, new: Dict) -> Dict[str, UUIDstr]: """Create new products with their fixed inputs. Args: conn: DB connection as available in migration main file new: an dict of your workflow data Example: >>> new = { "Example Product": { "product_id": "c9dc2374-514c-11eb-b685-acde48001122", "product_type": "ProductType1", "description": "Product description", "tag": "ProductType", "status": "active", "fixed_inputs": { "fixed_input_1": "value", "fixed_input_2": "value2" } }, "Example Product 2": { "product_type": "ProductType1", "description": "Product description", "tag": "ProductType", "status": "active", "product_block_ids": [ "37afe017-5a04-4d87-96b0-b8f88a328d7a" ] } } """ uuids = {} for name, product in new.items(): product["name"] = name current_uuid = product["product_id"] uuids[name] = current_uuid conn.execute( sa.text(""" INSERT INTO products (product_id, name, description, product_type, tag, status, created_at) VALUES (:product_id, :name, :description, :product_type, :tag, :status, now()) ON CONFLICT DO NOTHING; """), product, ) if "product_block_ids" in product: for product_block_uuid in product["product_block_ids"]: # Link many-to-many if product blocks are given. conn.execute( sa.text( "INSERT INTO product_product_blocks VALUES (:product_id, :product_block_id)" ), { "product_id": current_uuid, "product_block_id": product_block_uuid, }, ) if "fixed_inputs" in product: create_fixed_inputs(conn, current_uuid, product["fixed_inputs"]) return uuids
def delete_resource_type(conn: sa.engine.Connection, resource_type: str) -> None: """Delete a resource type and it's occurrences in product blocks and products. Args: conn: DB connection as available in migration main file resource_type: a resource_type name you want to delete Example: >>> resource_type = "name_1" >>> delete_product_block(conn, resource_type) """ conn.execute( sa.text(""" WITH deleted_pb AS ( DELETE FROM resource_types WHERE resource_type=:resource_type RETURNING resource_type_id ), deleted_pb_rt AS ( DELETE FROM product_block_resource_types WHERE resource_type_id IN (SELECT resource_type_id FROM deleted_pb) ) SELECT * from deleted_pb; """), resource_type=resource_type, )
def remove_product_block_relation_between_products_by_id( conn: sa.engine.Connection, in_use_by_id: Union[UUID, UUIDstr], depends_on_id: Union[UUID, UUIDstr]) -> None: """Remove product block relation by id. Args: conn: DB connection as available in migration main file. in_use_by_id: ID of the product block that uses another product block. depends_on_id: ID of the product block that is used as dependency. Usage: >>> in_use_by_id = "in_use_by_id" >>> depends_on_id = "depends_on_id" >>> remove_product_block_relation_between_products_by_id( conn, in_use_by_id, depends_on_id ) """ conn.execute( sa.text(""" DELETE FROM product_block_relations WHERE in_use_by_id=:in_use_by_id AND depends_on_id=:depends_on_id """), in_use_by_id=in_use_by_id, depends_on_id=depends_on_id, )
def delete_resource_types_from_product_blocks(conn: sa.engine.Connection, delete: Dict) -> None: """Delete resource type from product blocks. Note: the resource_type itself will not be deleted. Args: conn: DB connection as available in migration main file delete: dict of product_blocks and resource_types names that you want to unlink Example: >>> obsolete_stuff = { "ProductBlockName1": { "resource_type1": "Resource description" }, "ProductBlockName2": { "resource_type1": "Resource description", "resource_type1": "Resource description" } } >>> delete_resource_types_from_product_blocks(conn, obsolete_stuff) """ for product_block_name, resource_types in delete.items(): conn.execute( sa.text("""DELETE FROM product_block_resource_types USING resource_types WHERE product_block_id = (SELECT product_block_id FROM product_blocks WHERE name=:product_block_name) AND resource_types.resource_type_id = product_block_resource_types.resource_type_id AND resource_types.resource_type IN :obsolete_resource_types""" ), product_block_name=product_block_name, obsolete_resource_types=tuple(resource_types.keys()), )
def create_fixed_inputs(conn: sa.engine.Connection, product_id: Union[UUID, UUIDstr], new: Dict) -> Dict[str, str]: """Create fixed inputs for a given product. Args: conn: DB connection as available in migration main file product_id: UUID of the product to link to new: an dict of your workflow data Example: >>> product_id = "id" >>> new = { "fixed_input_1": ("value", "f6a4f529-ad17-4ad8-b8ba-45684e2354ba"), "fixed_input_2": ("value", "5a67321d-45d5-4921-aa93-b8708b5d74c6") } >>> create_resource_types(conn, product_id, new) without extra ID's you don't need the tuple: >>> product_id = "id" >>> new = { "fixed_input_1": "value", "fixed_input_2": "value", } >>> create_fixed_inputs(conn, product_id, new) """ insert_fixed_input_with_id = sa.text( """INSERT INTO fixed_inputs (fixed_input_id, name, value, created_at, product_id) VALUES (:fixed_input_id, :key, :value, now(), :product_id) ON CONFLICT DO NOTHING;""") insert_fixed_input_without_id = sa.text( """INSERT INTO fixed_inputs (name, value, created_at, product_id) VALUES (:key, :value, now(), :product_id) ON CONFLICT DO NOTHING;""") uuids = {} for key, values in new.items(): if isinstance(values, tuple): value, fixed_input_id = values uuids[key] = fixed_input_id conn.execute( insert_fixed_input_with_id, { "fixed_input_id": fixed_input_id, "key": key, "value": value, "product_id": product_id }, ) else: conn.execute(insert_fixed_input_without_id, key=key, value=values, product_id=product_id) uuids[key] = get_fixed_input_id_by_name(conn, key) return uuids
def insert_review(conn: sqlalchemy.engine.Connection, bookk, review_text: str, rating: float): # TODO: make status an enum or something conn.execute( text('insert into reviews values (:book_id, :review_text, :rating)'), book_id=bookk.id, review_text=review_text, rating=rating) return Review(bookk.id, review_text, rating)
def _onSqlite3Begin(connection: sqlalchemy.engine.Connection) -> sqlalchemy.engine.Connection: assert connection.dialect.name == "sqlite" # Replace pysqlite's buggy transaction handling that never BEGINs with our # own that does, and tell SQLite to try to acquire a lock as soon as we # start a transaction (this should lead to more blocking and fewer # deadlocks). connection.execute("BEGIN IMMEDIATE") return connection
def delete_files_on_disk(connection: sa.engine.Connection): """Deletes files from disk and their references in database.""" delete_query = revision_helper.update() \ .where(revision_helper.c.type == 'file') \ .where(revision_helper.c.depot_file.isnot(None)) \ .values(depot_file=None) connection.execute(delete_query) depot_storage_path = context.config.get_main_option('depot_storage_dir') shutil.rmtree(depot_storage_path, ignore_errors=True)
def insert_resource_type(conn: sa.engine.Connection, resource_type: str, description: str) -> None: """Create a new resource types.""" conn.execute( sa.text( """INSERT INTO resource_types (resource_type, description) VALUES (:resource_type, :description) ON CONFLICT DO NOTHING;"""), resource_type=resource_type, description=description, )
def apply_migrations(conn: sqlalchemy.engine.Connection, paths: List[str]): files = _find_sql_files(paths) for file in files: with open(file, "r") as fd: blob = fd.read() stmts = blob.split(";") for stmt in stmts: if stmt.strip(): conn.execute(sqlalchemy.text(stmt))
def insert_book_into_series(conn: sqlalchemy.engine.Connection, bookk: Book, seriess: series.Series, position=None): if position is None: position = series.series_len(conn, seriess) conn.execute(text( 'insert into book_series values (:book_id, :series_id, :position)'), book_id=bookk.id, series_id=seriess.id, position=position)
def search_authors( conn: sqlalchemy.engine.Connection, keys: str, item_ids: typing.Optional[typing.Sequence[typing.Union[str, int]]] = None ) -> pandas.DataFrame: """Using full-text search table to search in authors' names. Allow searching multiple words. Notes ----- May not be efficient if the database is very huge. Parameters ---------- conn : sqlalchemy.engine.Connection Connection object from `sqlalchemy`. keys : str A single string containing all tokens/keys. Tokens/keys are separated by spaces. item_ids : None or a list-like of int/str Limit the candidate items to these item_ids. If None, search all items. Returns ------- pandas.DataFrame A dataframe with only `itemID`s. """ # delete residual table from past failed operations conn.execute("DROP TABLE IF EXISTS temp.searchable;") keys = " ".join(["\"{}\"".format(key) for key in keys.split()]) if item_ids is None: partial_table = "itemCreators" else: item_ids = ", ".join(map(str, item_ids)) partial_table = "(SELECT * FROM itemCreators WHERE itemID IN ({0}))".format( item_ids) # create a temporary FTS table conn.execute( "CREATE VIRTUAL TABLE temp.searchable USING FTS5(itemID UNINDEXED, firstName, lastName);" ) # copy to the FTS table conn.execute("""INSERT INTO temp.searchable SELECT itemID, GROUP_CONCAT(firstName) AS firstNAme, GROUP_CONCAT(lastName) AS lastName FROM {0} INNER JOIN creators USING(creatorID) GROUP BY itemID; """.format(partial_table)) # conduct the search results = pandas.read_sql_query( """SELECT itemID FROM temp.searchable WHERE searchable MATCH '{0}' ORDER BY rank; """.format(keys), conn) # delete the temporary table conn.execute("DROP TABLE IF EXISTS temp.searchable;") return results
def scrape_geojson(conn: sa.engine.Connection, dataset_id: str) -> None: log = logger.bind(dataset_id=dataset_id, method="scrape_geojson") params = {"method": "export", "format": "GeoJSON"} url = f"{BASE}/geospatial/{dataset_id}" with utils.download_file(url, params=params) as fname: try: df = gpd.read_file(fname) except ValueError as e: raise exceptions.SocrataParseError from e for column in df.columns: if column == "geometry": continue # Bad type inference try: df[column] = df[column].astype(int) continue except (ValueError, TypeError): pass try: df[column] = df[column].astype(float) continue except (ValueError, TypeError): pass try: df[column] = pd.to_datetime(df[column]) continue except (ValueError, TypeError): pass log.info("Inserting") del df["geometry"] trans = conn.begin() try: conn.execute(f'DROP TABLE IF EXISTS "{RAW_SCHEMA}.{dataset_id}"') df.to_sql( f"{dataset_id}", conn, if_exists="replace", index=False, schema=RAW_SCHEMA, ) except Exception: trans.rollback() raise trans.commit()
def insert_book(conn: sqlalchemy.engine.Connection, title: str, authors: typing.List[author.Author], status: reading_status.Status): insertion = conn.execute( text('insert into books values (NULL, :title, :status)'), title=title, status=status.id) book_id = insertion.lastrowid for i, authorr in enumerate(authors): conn.execute(text( 'insert into book_author values (:book_id, :author_id, :index)'), book_id=book_id, author_id=authorr.id, index=i) return Book(book_id, title, status)
def trim_to_max_pk( table_name: str, pk: str, conn: sqla.engine.Connection, ) -> int: metadata = sqla.MetaData(bind=conn) table = sqla.Table(table_name, metadata, autoload=True) start_id = conn.execute(func.max(table.c[pk])).scalar() if start_id is not None: conn.execute(table.delete().where(table.c[pk] == start_id)) return start_id else: return -1
def _load_via_parent(self, connection: sa.engine.Connection, our_states: list[SARowDict], q: sa.sql.Select) -> abc.Iterator[SARowDict]: # mypy says it might be `None`. We don't want undefined behavior. Configure your relationships first. assert self.relation_property.uselist is not None # Use SelectInLoader query_info = self.loader._query_info label_prefix = f'' if query_info.load_with_join else '' # [o] uselist = self.uselist uselist: bool = self.relation_property.uselist # [o] _empty_result = () if uselist else None _empty_result: abc.Callable[[], Union[list, None]] = lambda: [] if uselist else None # [o] while our_states: # [o] chunk = our_states[0: self.CHUNKSIZE] our_states = our_states[self.CHUNKSIZE:] # [o] primary_keys = [ key[0] if query_info.zero_idx else key for key, state_dict in chunk ] # [o] data = collections.defaultdict(list) data: dict[tuple, list[dict]] = collections.defaultdict(list) for k, v in itertools.groupby( # type: ignore[call-overload] # [o] context.session.execute( # [o] q, params={"primary_keys": primary_keys} # [CUSTOMIZED] connection.execute(q, {"primary_keys": primary_keys}), lambda row: get_foreign_key_tuple(row, query_info.pk_cols, self.fk_label_prefix), # type: ignore[arg-type] ): # [o] data[k].extend(vv[1] for vv in v) # [CUSTOMIZED] convert MappingResult to an actual, mutable dict() to which we'll add keys data[k].extend(row_without_fk_columns(row, self.fk_label_prefix) for row in v) # [o] for key, state, state_dict, overwrite in chunk: for key, state_dict in chunk: # [o] collection = data.get(key, _empty_result()) # [o] if not uselist and collection: if len(collection) > 1: sa.util.warn(f"Multiple rows returned with uselist=False for attribute {self.relation_property}") # [o] state.get_impl(self.key).set_committed_value(state, state_dict, collection[0]) state_dict[self.key] = collection[0] # [CUSTOMIZED] else: # [o] state.get_impl(self.key).set_committed_value(state, state_dict, collection) state_dict[self.key] = collection # [CUSTOMIZED] # [ADDED] Return loaded objects if uselist: yield from collection # type: ignore[misc] else: yield collection # type: ignore[misc]
def get_review_for_book(conn: sqlalchemy.engine.Connection, bookk) -> typing.Optional[Review]: result = conn.execute(text('select book_id, review_text, rating from ' 'books join reviews on book_id = id ' 'where book_id = :book_id'), book_id=bookk.id).fetchone() return None if result is None else Review.from_db_row(result)
def get_books_by_reading_status( conn: sqlalchemy.engine.Connection, status: reading_status.Status) -> typing.List[Book]: rows = conn.execute(text('select * from books ' 'where status = :status'), status=status.id) return [Book.from_db_row(row) for row in rows]
def get_resource_type_id_by_name(conn: sa.engine.Connection, name: str) -> UUID: result = conn.execute(sa.text( "SELECT resource_type_id FROM resource_types WHERE resource_types.resource_type=:name" ), name=name) return [x for (x, ) in result.fetchall()][0]
def get_product_block_id_by_name(conn: sa.engine.Connection, name: str) -> UUID: result = conn.execute(sa.text( "SELECT product_block_id FROM product_blocks WHERE product_blocks.name=:name" ), name=name) return [x for (x, ) in result.fetchall()][0]
def convert_resource_type_relations_to_instance_relations( conn: sa.engine.Connection, resource_type_id: Union[UUID, UUIDstr], domain_model_attr: str, cleanup: bool = True) -> None: """Move resouce type relations to instance type relations using resource type id. Note: It removes the resource type relations after moving! (comment out the second execute to try without the removal) Args: conn: DB connection as available in migration main file. resource_type_id: ID of the resource type that you want to move to instance relations. domain_model_attr: Name of the domain model attribute that connects the product blocks together. cleanup: remove old resource type relations after the migrate? Usage: >>> resource_type_id = "id" >>> domain_model_attr = "domain_model_attr" >>> convert_resource_type_relation_to_instance_relations( conn, resource_type_id, domain_model_attr ) """ conn.execute( sa.text(""" INSERT INTO subscription_instance_relations (in_use_by_id, depends_on_id, order_id, domain_model_attr) WITH dependencies AS ( SELECT siv.value as subscription_id, siv.subscription_instance_id as in_use_by_instance_id, si.product_block_id FROM subscription_instance_values AS siv left join subscription_instances as si on siv.subscription_instance_id = si.subscription_instance_id WHERE siv.resource_type_id=:resource_type_id ) SELECT in_use_by_instance_id AS in_use_by_id, si.subscription_instance_id AS depends_on_id, '0' AS order_id, :domain_model_attr AS domain_model_attr FROM subscription_instances AS si INNER JOIN dependencies AS dep ON si.subscription_id=uuid(dep.subscription_id) ON CONFLICT DO NOTHING """), resource_type_id=resource_type_id, domain_model_attr=domain_model_attr, ) if cleanup: conn.execute( sa.text(""" DELETE FROM subscription_instance_values WHERE resource_type_id=:resource_type_id """), resource_type_id=resource_type_id, )
def insert(connection: sa.engine.Connection, Model: Union[sa.sql.Selectable, type], *values: dict): """ Helper: run a query to insert many rows of Model into a table using low-level SQL statement Example: insert(connection, Model, dict(id=1), dict(id=2), dict(id=3), ) """ all_keys = set(chain.from_iterable(d.keys() for d in values)) assert values[0].keys() == set( all_keys), 'The first dict() must contain all possible keys' stmt = sa.insert(Model).values(values) connection.execute(stmt)
def insert_series(conn: sqlalchemy.engine.Connection, series_name: str, authors, books): insertion = conn.execute(text('insert into series values (NULL, :series_name)'), series_name=series_name) series_id = insertion.lastrowid for authorr in authors: conn.execute(text('insert into series_author values (:series_id, :author_id)'), series_id=series_id, author_id=authorr.id) for i, bookk in enumerate(books): conn.execute(text('insert into book_series values (:book_id, :series_id, :position)'), book_id=bookk.id, series_id=series_id, position=i) return Series(series_id, series_name)
def read_sql_tmpfile(sql: str, con: sqla.engine.Connection, *args, **kwargs) -> pd.DataFrame: with tempfile.TemporaryDirectory() as tmp: path = os.path.join(tmp, "mycsv") curr = con.execute(sql + " limit 0") cols = ",".join((f"'{i}'" for i in curr.keys())) to_csv = f""" SELECT * FROM (SELECT {cols} UNION ALL {sql}) as tmp INTO OUTFILE '{path}' FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' LINES TERMINATED BY '\n'; """ con.execute(to_csv) return pd.read_csv(path, *args, **kwargs)
def delete_resource_type_by_id(conn: sa.engine.Connection, id: Union[UUID, UUIDstr]) -> None: """Delete resource type by resource type id. Args: conn: DB connection as available in migration main file. id: ID of the resource type to delete. Usage: ```python resource_type_id = "id" delete_resource_type_by_id(conn, resource_type_id) ``` """ conn.execute( sa.text("DELETE FROM resource_types WHERE resource_type_id=:id"), id=id)