def store_data(cur: Cursor, config: dict, table: str, csv_file: Path): """Store data.""" LOGGER.debug('Create Database if not exist') database = config['mysql']['database'] cur.execute(f"CREATE DATABASE IF NOT EXISTS {database}") cur.execute(f"USE {database}") schema = read_schema(csv_file) types = ', '.join([f'{name} {type}' for name, type in schema]) query_create_table = f"CREATE TABLE IF NOT EXISTS {table} ({types})" LOGGER.debug('Query> %s', query_create_table) cur.execute(query_create_table) LOGGER.debug('Check whether table %r is empty', table) cur.execute(f'SELECT COUNT(*) FROM {table}') if data := cur.fetchall()[0][0]: raise Csv2MysqlError(f'Table {table} is already having {data} rows')
def _flush_mappings_batch(cur: Cursor, firewall_table: str, msource_id: int, mappings: Sequence[Tuple[int, str]]) -> None: """Flush accumulated mappings batch to Firewall database.""" query = ( f"INSERT INTO {firewall_table}" f" (MEASUREMENT_SOURCE_ID, ID, NAME, CREATED_BY, CREATED_ON)" f" VALUES" ) + ','.join(" (" + ', '.join([ str(msource_id), str(mapping[0]), _sql_str_lit(mapping[1]), str(-2), 'CURRENT_TIMESTAMP()' ]) + ")" for mapping in mappings) + ( " ON DUPLICATE KEY UPDATE" " NAME = VALUES(NAME), MODIFIED_BY = -2, LAST_MODIFIED = CURRENT_TIMESTAMP()" ) cur.execute(query)
def DumpCurrentSchema(cursor: Cursor) -> Text: """Dumps current database schema.""" cursor.execute("SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES " "WHERE table_schema = (SELECT DATABASE())") defs = [] for table, in sorted(cursor.fetchall()): cursor.execute("SHOW CREATE TABLE `{}`".format(table)) rows = cursor.fetchall() defs.append(rows[0][1]) return "\n\n".join(defs)
def _update_firewall_db( *, cur: Cursor, log: Logger, extracted_mappings: Sequence[Mapping[str, int]] ) -> Tuple[Set[int], Set[Tuple[int, int, int]]]: """Update firewall DB with the extracted campaign id mappings.""" # map adv entity ids from events to IAS campaign ids ias_adv_entity_ids_sql = ', '.join(str(v) for v in set( mapping['ias_adv_entity_id'] for mapping in extracted_mappings )) query = ( f"SELECT ID, CAMPAIGN_ID" f" FROM ADV_ENTITY" f" WHERE ID IN ({ias_adv_entity_ids_sql})" f" LOCK IN SHARE MODE" ) log.info("getting IAS campaign ids for adv entities: %s", query) cur.execute(query) ias_campaign_ids = { row[0]: row[1] for row in cur.fetchall() } log.info("result (adv entity id/IAS campaign id): %s", ias_campaign_ids) # load existing campaign mappings partner_campaign_ids_sql = ', '.join(str(v) for v in set( mapping['partner_measured_campaign_id'] for mapping in extracted_mappings )) query = ( f"SELECT ID, CAMPAIGN_ID" f" FROM PARTNER_MEASURED_CAMPAIGN" f" WHERE ID IN ({partner_campaign_ids_sql})" f" AND MEASUREMENT_SOURCE_ID = {MSOURCE_ID}" f" FOR UPDATE" ) log.info("getting existing mappings: %s", query) cur.execute(query) existing_ias_campaign_ids = { row[0]: (row[1] if row[1] is not None else None) for row in cur.fetchall() } log.info("result (IAS campaign id/partner campaign id): %s", existing_ias_campaign_ids) # collect exceptions invalid_ias_adv_entity_ids = set() overriding_campaign_id_mappings = set() # update mappings for mapping in extracted_mappings: partner_campaign_id = mapping['partner_measured_campaign_id'] ias_adv_entity_id = mapping['ias_adv_entity_id'] ias_campaign_id = ias_campaign_ids.get(ias_adv_entity_id) existing_ias_campaign_id = existing_ias_campaign_ids.get(partner_campaign_id) if ias_campaign_id is None: log.warning("invalid adv entity id %s", ias_adv_entity_id) invalid_ias_adv_entity_ids.add(ias_adv_entity_id) elif ( existing_ias_campaign_id is not None and existing_ias_campaign_id != ias_campaign_id ): log.warning( "partner campaign %s is already linked to a different IAS campaign %s", partner_campaign_id, existing_ias_campaign_id ) overriding_campaign_id_mappings.add(( partner_campaign_id, existing_ias_campaign_id, ias_campaign_id )) elif existing_ias_campaign_id == ias_campaign_id: log.info( "partner campaign %s is already linked to IAS campaign %s, skipping it", partner_campaign_id, existing_ias_campaign_id ) elif partner_campaign_id in existing_ias_campaign_ids: query = ( f"UPDATE PARTNER_MEASURED_CAMPAIGN" f" SET CAMPAIGN_ID = {ias_campaign_id}" f" WHERE ID = {partner_campaign_id}" f" AND MEASUREMENT_SOURCE_ID = {MSOURCE_ID}" ) log.info("setting campaign mapping: %s", query) cur.execute(query) else: query = ( f"INSERT INTO PARTNER_MEASURED_CAMPAIGN" f" (MEASUREMENT_SOURCE_ID, ID, NAME, CAMPAIGN_ID)" f" VALUES ({MSOURCE_ID}, {partner_campaign_id}" f", 'DATA NOT RECEIVED', {ias_campaign_id})" ) log.info("creating new mapping: %s", query) cur.execute(query) # return exceptions return (invalid_ias_adv_entity_ids, overriding_campaign_id_mappings)
def __init__(self, cursor: Cursor): self.cursor = cursor cursor.execute('SET NAMES utf8;') cursor.execute('SET CHARACTER SET utf8;') cursor.execute('SET character_set_connection=utf8;')
def GetLatestMigrationNumber(cursor: Cursor) -> int: """Returns the number of the latest migration done.""" cursor.execute("SELECT MAX(migration_id) FROM _migrations") rows = cursor.fetchall() return rows[0][0]
def __init__(self, connection): self.thread_delay = 0.0 Cursor.__init__(self, connection)