def _run_query(query: str, conn: pyodbc.Connection) -> None: """ Run query without fetching results. Args: query (str): SQL statement (example: 'select x, y from table_XY') conn (pyodbc.Connection): query engine connection object """ conn.cursor().execute(query)
def route(connection: pyodbc.Connection): sql = SqlChain( """INSERT INTO test(id, x, ch) VALUES ('a', 1, 'a'), ('b', 2, 'b')""", cursor=connection.cursor()) res_sql = sql.chain("""SELECT * FROM test;""") sql.execute() return res_sql.get()
def fetch(query: str, conn: pyodbc.Connection, chunksize: Optional[int] = None): """ Run query and fetch results. Args: query (str): SQL statement conn (pyodbc.Connection): query engine connection object chunksize (int): Chunksize in bytes Returns: results: list of row object or None (if query fails) """ try: curr = conn.cursor().execute(query) except Exception as e: msg = str(e) + '----' + 'The failed query: {query}'.format(query=query) raise Exception(msg) else: if chunksize is None: results = curr.fetchall() else: results = _fetch_many(curr, chunksize) finally: if conn is None: conn.close() return results
def get_table_row_counts(self, conn: pyodbc.Connection) -> Dict: qry = self.get_table_row_counts_qry(self.source) with conn.cursor() as cursor: data = cursor.execute(qry).fetchall() columns = [key[0] for key in cursor.description] data = rows_to_json(data, columns) return {row['table_name']: row['row_count'] for row in data}
def load_manifest(json_data: dict, conn: pyodbc.Connection) -> Exception: """ Use SQL to load records into the data structure Args: json_data: manifest in a Python dictionary conn: database connection object Returns: true if creation is successful, otherwise returns the exception """ cursor = conn.cursor() fieldnamesql = "INSERT INTO ICO.inventory (vendor_id, vendor_product_code, quantity_kg, date_arrival)" today = date.today() mydate = today.strftime("%Y-%m-%d") try: items = json_data.get("items") for item in items: valsql = fieldnamesql + " VALUES (" valsql += "'" + item.get("vendor_id") + "', " valsql += "'" + item.get("vendor_product_code") + "', " valsql += str(item.get("quantity_kg")) + ", " valsql += "'" + mydate + "')" print("Inserting: " + valsql) cursor.execute(valsql) conn.commit() except Exception as exp: return exp return None
def get_all_locations(conn: pyodbc.Connection) -> List[Location]: """ Queries the SQL database for every X, Y, latitude, longitude, address and UPRN under a specific constraint and creates a list of location objects containing each one Args: conn (pyodbc.Connection): The connection to the database to query Returns: List[Location]: A list of Location objects containing information about the locations returned by the SQL query """ locations = [] with open('.\\get_all_locations_per_round.sql', 'r') as loc_query_f: loc_query_all = loc_query_f.read() cursor = conn.cursor() cursor.execute(loc_query_all) locs = cursor.fetchall() for loc in locs: locations.append( Location('{:.2f}'.format(loc.x), '{:.2f}'.format(loc.y), '{:.15f}'.format(loc.lat), '{:.15f}'.format(loc.lng), loc.addr, loc.street, loc.town, loc.postcode, str(loc.uprn), [loc.REF, loc.RECY, loc.MIX, loc.GLASS, loc.GW])) return locations
def get_table_columns(self, conn: pyodbc.Connection) -> Dict: qry = self.get_table_meta_qry(self.source) with conn.cursor() as cursor: data = cursor.execute(qry).fetchall() columns = [key[0] for key in cursor.description] data = rows_to_json(data, columns) return self.process_table_columns(data)
def get_ims_notifications(conn: pyodbc.Connection) -> List[dict]: sql = """ SELECT n.CreationDate, n.RequiredEndDate, n.NotificationNumber, i.ID as IMSTag_ID, i.tag, mel.* FROM TechnicalInfoTag as t, TechnicalInfoTag_IMSTag as ti, Notification as n, IMSTag i, ModelElement me left join aas.vModelElementLimitPivot mel on me.ID = mel.ModelElement_ID where t.ID = ti.TechnicalInfoTag_ID and ti.IMSTag_ID = i.ID and t.ID = n.TechnicalInfoTag_ID and i.ID = me.IMSTag_ID and (n.FailureImpact IN ('D','S') or NOTIFICATIONTYPE IN ('M2','M3','M4')) order by n.NotificationNumber desc """ cursor = conn.cursor() cursor.execute(sql) desc = cursor.description column_names = [col[0] for col in desc] data = [dict(zip(column_names, row)) for row in cursor.fetchall()] cursor.close() return data
def pyodbc_query_fetchall(conn: pyodbc.Connection, query: str) -> Union[List[Any], None]: """Execute pyodbc query and fetchone, see https://github.com/mkleehammer/pyodbc/wiki/Cursor. Args: conn (pyodbc.Connection): database connection object query (str): sql query Returns: Union[List[Any], None]: list of one to many results """ cursor = conn.cursor() result = cursor.execute(query).fetchall() cursor.close() return result
def get_location_from_uprn(conn: pyodbc.Connection, uprn: str) -> Location: """ Queries the SQL database for the X, Y, latitude, longitude and address associated with a UPRN and creates a Location object out of it Args: conn (pyodbc.Connection): The connection to the database to query uprn (str): The UPRN to query with Returns: Location: The Location object containing the latitude, longitude and address of the given UPRN, as well as the UPRN itself """ with open('.\\get_location.sql', 'r') as loc_query_f: loc_query = loc_query_f.read() cursor = conn.cursor() cursor.execute(loc_query, uprn) loc = cursor.fetchone() return Location(str(loc.x), str(loc.y), str(loc.lat), str(loc.lng), loc.addr, loc.street, loc.town, loc.postcode, uprn, [loc.REF, loc.RECY, loc.MIX, loc.GLASS, loc.GW])
def get_objects_id(self, cnx: pyodbc.Connection, query: str) -> list: """ Query selectID. Query must return only one column with name "id". :return: list with objects ID that you use in select query to get fields """ cur = cnx.cursor() self.log.debug(query) try: cur.execute(query, ) rows = cur.fetchall() if not rows: return [] else: return [i[0] for i in rows] except NameError as err: self.log.error(str(err)) finally: cur.close()
def select_lista_registros_database( conn: db.Connection, nm_tabela: str, lt_campos_select: list, condicao_where: str = None, condicao_groupby: str = None, condicao_orderby: str = None, str_select_completo_formatado: str = None) -> list: """ Manipula registros de SELECT no banco de dados especificado. Args: Returns: Raises: IOError: TODOs: implementar o controle de excecoes. """ str_linha_resultado_select = '' lt_registros_selecionados = [] if conn != None: print("[INFO]: conexão efetuada com sucesso.") else: print("[ERRO]: falha ao obter a conexão.") try: cursor = conn.cursor() if str_select_completo_formatado != None and str_select_completo_formatado != "": cursor.execute(str_select_completo_formatado) else: pass linha = cursor.fetchone() except db.DatabaseError as de: print("[EXCEPT]: database_util.get_lista_registros_database() -> {}". format(de)) while linha: str_linha_resultado_select = str(linha) lt_registros_selecionados.append(str_linha_resultado_select) linha = cursor.fetchone() return lt_registros_selecionados
def update_dolphin_database(dbname: pyodbc.Connection, updatesql: str) -> int: dolphincursor = dbname.cursor() try: if DolphinTestModeEnabled: logging.info( "Dolphin Test mode is enabled. We would update Dolphin with {0}" .format(updatesql)) else: dolphincursor.execute(updatesql) dbname.commit() except Exception as e: logging.error( "Dolphin database update failed with {0}, sql = {1}".format( e, updatesql)) dbname.rollback() return -1 dolphincursor.close() return 0
def get_latest_capture_instance_names( db_conn: pyodbc.Connection, capture_instance_version_strategy: str, capture_instance_version_regex: str) -> List[str]: if capture_instance_version_strategy == 'regex' and not capture_instance_version_regex: raise Exception( 'Please provide a capture_instance_version_regex when specifying the `regex` ' 'capture_instance_version_strategy.') result = [] table_to_capture_instances = collections.defaultdict(list) capture_instance_version_regex = capture_instance_version_regex and re.compile( capture_instance_version_regex) with db_conn.cursor() as cursor: cursor.execute(constants.CDC_CAPTURE_INSTANCES_QUERY) for row in cursor.fetchall(): as_dict = { 'source_object_id': row[0], 'capture_instance': row[1], 'create_date': row[2], } if capture_instance_version_regex: match = capture_instance_version_regex.match(row[1]) as_dict['regex_matched_group'] = match and match.group(1) or '' table_to_capture_instances[as_dict['source_object_id']].append( as_dict) for source_id, capture_instances in table_to_capture_instances.items(): if capture_instance_version_strategy == 'create_date': latest_instance = sorted(capture_instances, key=lambda x: x['create_date'])[-1] elif capture_instance_version_strategy == 'regex': latest_instance = sorted( capture_instances, key=lambda x: x['regex_matched_group'])[-1] else: raise Exception( f'Capture instance version strategy "{capture_instance_version_strategy}" not recognized.' ) result.append(latest_instance['capture_instance']) return result
def genTrials(conn: pyodbc.Connection, amount: int): fake = Faker() cursor = conn.cursor() conclusions = [ '\'innocent\'', '\'sent to 5 years in prison\'', '\'sent to 10 years in prison\'', '\'sent to 15 years in prison\'', '\'sent to 2 years in prison\'', '\'sent to 20 years in prison\'', '\'sent to death\'' ] teams = ['\'Offense\'', '\'Defense\''] # generate trial: for i in range(amount): # subject id sid = sqlexec(conn, return_=True, query=f''' select top 1 [specialization id] from [specializations] order by newid();''')[0][0] # date date = f'''\'{fake.date_between(start_date='-8y', end_date='today')}\'''' text = f'''\'{fake.paragraph(nb_sentences=5, variable_nb_sentences=False)}\'''' ctid = getCity(conn) crid = sqlexec(conn, f''' select top 1 [court id] from [courts] where [city id] = {ctid} order by newid(); ''', return_=True)[0][0] try: cursor.execute(f''' insert into [trials]([subject id], [date], [description], [court id]) values({sid}, {date}, {text}, {crid}); ''') cursor.commit() except pyodbc.IntegrityError: continue trid = sqlexec(conn, return_=True, query=f''' select top 1 [trial id] from [trials] where [subject id] = {sid} and [date] = {date} and [description] = {text} and [court id] = {crid}; ''')[0][0] print(trid) # generate defendants: for pid in getPerson(conn, ctid, int(np.random.uniform(1.1, 10))): pid = pid[0] conclusion = choice(conclusions) cursor.execute(f''' insert into [defendants]([trial id], [person id], [conclusion]) values({trid}, {pid}, {conclusion});''') cursor.commit() # generate judges in trial: for (jid, cridt) in getJudges(conn, ctid, int(np.random.uniform(1.1, 6))): cursor.execute(f''' insert into [judges in trial]([trial id], [person id], [court id]) values({trid}, {jid}, {cridt});''') cursor.commit() # generate lawyers in trial: i = 0 team = teams[0] for (lid, brid) in getLawyers( conn, getCity(conn), int(np.random.uniform(2.1, np.random.uniform(2.2, 10)))): if i == 0: team = teams[0] elif i == 1: team = teams[1] else: team = choice(teams) cursor.execute(f''' insert into [lawyers in trial]([trial id], [person id], [bar id], [team]) values({trid}, {lid}, {brid}, {team});''') cursor.commit() i += 1
def sql_parse(self, cnx: pyodbc.Connection, lootId) -> dict: """ Выполнение запросов :param cnx: current cnx :param lootId: document identifier from source """ def parse_rows(select:str, fetchOne=False, group=False) -> [dict]: """ Разбор строк. Каждая строка переводится в словарь. :param select: сам запрос :param fetchOne: заберёт одну строку, вернёт один словарь :param group: если придёт >1 строк, то упакует в один словарь, т.е. каждое поле будет содержать массив значений. :return: список со словорями, каждый со своим набором полей """ def row_as_dict(cur, rows): columns = [column[0] for column in cur.description] for row in rows: yield dict(zip(columns, row)) try: self.log.debug(select) cur.execute(select) except Exception as e: self.log.error(f"Fail to execute query: {e}") self.syncCount[5] += 1 return [{}] try: if fetchOne: rows = cur.fetchone() else: rows = cur.fetchall() except MemoryError: # для кривых запросов в млрд строк self.log.error("Not enough memory to fetch rows. Simplify your query") self.syncCount[5] += 1 cur.close() return [{}] if not rows: if not self.task['skipEmptyRows']: # таким образом я всегда получаю строку, даже если запрос ничего не вернёт. self.log.warning('Query has returned empty row. Will replaced with <None>') body = {} try: curDes = cur.description for col in curDes: body[col[0]] = None except Exception as e: self.syncCount[5] += 1 self.log.error(f'Fail to parse emtpy row: {e}') finally: return [body] if fetchOne: return [dict(zip([column[0] for column in cur.description], rows))] else: rows = row_as_dict(cur, rows) ls = [] groupBody = {} try: isFirst = True for row in rows: body = {} for col, val in row.items(): if val is not None: # TODO allow get null value? if group and not isFirst: try: groupBody[col].append(val) except KeyError: groupBody[col] = [val] except AttributeError: groupBody[col] = [groupBody[col], val] else: body[col] = val isFirst = False if not group: ls.append(body) if group: ls.append(groupBody) return ls except Exception as e: self.log.error(f'Fail to parse row: {e}') self.syncCount[5] += 1 return [{}] cur = cnx.cursor() fields = {} for select in self.task['simpleQuery']: select = select.replace('@loot_id@', str(lootId), -1) for i in fields: select = select.replace(f'@{i}@', str(fields[i]), -1) fields = {**fields, **parse_rows(select, fetchOne=True)[0]} # вложенные запросы for bundle in self.task['bundleQuery']: for name, selectList in bundle.items(): select = selectList[0].replace('@loot_id@', str(lootId), -1) for i in fields: select = select.replace(f'@{i}@', str(fields[i]), -1) subFields = parse_rows(select) if subFields: fields[name] = [] if len(selectList) > 1: for sub in subFields: for select in selectList[1:]: select = select.replace('@loot_id@', str(lootId), -1) for i in sub: select = select.replace(f'@{i}@', str(sub[i]), -1) for i in fields: select = select.replace(f'@{i}@', str(fields[i]), -1) sub = {**sub, **parse_rows(select, group=True)[0]} fields[name].append(sub) else: fields[name].append(subFields) return fields
def should_terminate_due_to_capture_instance_change( db_conn: pyodbc.Connection, progress_tracker: progress_tracking.ProgressTracker, capture_instance_version_strategy: str, capture_instance_version_regex: str, capture_instance_to_topic_map: Dict[str, str], current_capture_instances: Dict[str, Dict[str, Any]], table_whitelist_regex: str, table_blacklist_regex: str) -> bool: new_capture_instances = get_latest_capture_instances_by_fq_name( db_conn, capture_instance_version_strategy, capture_instance_version_regex, table_whitelist_regex, table_blacklist_regex) current = { k: v['capture_instance_name'] for k, v in current_capture_instances.items() } new = { k: v['capture_instance_name'] for k, v in new_capture_instances.items() } if new == current: logger.debug('Capture instances unchanged; continuing...') return False def better_json_serialize(obj): if isinstance(obj, (datetime.datetime, datetime.date)): return obj.isoformat() if isinstance(obj, (bytes, )): return f'0x{obj.hex()}' raise TypeError("Type %s not serializable" % type(obj)) logger.info( 'Change detected in capture instances. Current: %s New: %s', json.dumps(current_capture_instances, default=better_json_serialize), json.dumps(new_capture_instances, default=better_json_serialize)) for fq_name, current_ci in current_capture_instances.items(): if fq_name in new_capture_instances: new_ci = new_capture_instances[fq_name] last_recorded_progress = progress_tracker.get_last_recorded_progress_for_topic( capture_instance_to_topic_map[ current_ci['capture_instance_name']]) current_idx = last_recorded_progress and last_recorded_progress.change_index or \ change_index.LOWEST_CHANGE_INDEX new_ci_min_index = change_index.ChangeIndex( new_ci['start_lsn'], b'\x00' * 10, 0) if not last_recorded_progress or ( last_recorded_progress.change_index < new_ci_min_index): with db_conn.cursor() as cursor: ci_table_name = f"[{constants.CDC_DB_SCHEMA_NAME}].[{current_ci['capture_instance_name']}_CT]" cursor.execute( f"SELECT TOP 1 1 FROM {ci_table_name} WITH (NOLOCK)") has_rows = cursor.fetchval() is not None if has_rows: logger.info( 'Progress against existing capture instance ("%s") for table "%s" has reached index %s, ' 'but the new capture instance ("%s") does not begin until index %s. Deferring termination ' 'to maintain data integrity and will try again on next capture instance evaluation ' 'iteration.', current_ci['capture_instance_name'], fq_name, current_idx, new_ci['capture_instance_name'], new_ci_min_index) return False logger.warning( 'Terminating process due to change in capture instances. This behavior can be controlled by ' 'changing option TERMINATE_ON_CAPTURE_INSTANCE_CHANGE.') return True
def run_queries_in_file(*, con: pyodbc.Connection, fp: pathlib.Path) -> None: with con.cursor() as cur: for sql in read_sql(fp): cur.execute(sql)
def build_tracked_tables_from_cdc_metadata( db_conn: pyodbc.Connection, topic_name_template: str, table_whitelist_regex: str, table_blacklist_regex: str, snapshot_table_whitelist_regex: str, snapshot_table_blacklist_regex: str, capture_instance_version_strategy: str, capture_instance_version_regex: str) -> List[TrackedTable]: result = [] latest_names = get_latest_capture_instance_names( db_conn, capture_instance_version_strategy, capture_instance_version_regex) logger.debug( 'Latest capture instance names determined by "%s" strategy: %s', capture_instance_version_strategy, sorted(latest_names)) table_whitelist_regex = table_whitelist_regex and re.compile( table_whitelist_regex, re.IGNORECASE) table_blacklist_regex = table_blacklist_regex and re.compile( table_blacklist_regex, re.IGNORECASE) snapshot_table_whitelist_regex = snapshot_table_whitelist_regex and re.compile( snapshot_table_whitelist_regex, re.IGNORECASE) snapshot_table_blacklist_regex = snapshot_table_blacklist_regex and re.compile( snapshot_table_blacklist_regex, re.IGNORECASE) meta_query = constants.CDC_METADATA_QUERY.replace( '?', ', '.join(['?'] * len(latest_names))) name_to_meta_fields = collections.defaultdict(list) with db_conn.cursor() as cursor: cursor.execute(meta_query, latest_names) for row in cursor.fetchall(): # 0:4 gets schema name, table name, capture instance name, min captured LSN: name_to_meta_fields[tuple(row[0:4])].append(row[4:]) for (schema_name, table_name, capture_instance_name, min_lsn), fields in name_to_meta_fields.items(): fq_table_name = f'{schema_name}.{table_name}' can_snapshot = False if table_whitelist_regex and not table_whitelist_regex.match( fq_table_name): logger.debug('Table %s excluded by whitelist', fq_table_name) continue if table_blacklist_regex and table_blacklist_regex.match( fq_table_name): logger.debug('Table %s excluded by blacklist', fq_table_name) continue if snapshot_table_whitelist_regex and snapshot_table_whitelist_regex.match( fq_table_name): logger.debug('Table %s WILL be snapshotted due to whitelisting', fq_table_name) can_snapshot = True if snapshot_table_blacklist_regex and snapshot_table_blacklist_regex.match( fq_table_name): logger.debug( 'Table %s will NOT be snapshotted due to blacklisting', fq_table_name) can_snapshot = False topic_name = topic_name_template.format( schema_name=schema_name, table_name=table_name, capture_instance_name=capture_instance_name) tracked_table = TrackedTable(db_conn, schema_name, table_name, capture_instance_name, topic_name, min_lsn, can_snapshot) for (change_table_ordinal, column_name, sql_type_name, primary_key_ordinal, decimal_precision, decimal_scale) in fields: tracked_table.add_field( TrackedField(column_name, sql_type_name, change_table_ordinal, primary_key_ordinal, decimal_precision, decimal_scale)) result.append(tracked_table) return result
def insert_registros_database(conn: db.Connection, nm_database: str, nm_tabela: str, lt_campos_insert: list, lt_valores_insert: list, verbose: bool = False, log: bool = False, lt_registros_log: list = None): """ Manipula registros de INSERT no banco de dados especificado. Args: Returns: Raises: IOError: TODOs: (a) implementar o controle de excecoes. (b) implementar o insert de maneira generica """ if conn != None: if verbose == True: print("[INFO]: a conexão foi encontrada.") else: if verbose == True: print("[ERRO]: a conexão não foi encontrada.") return # formata a saida do metodo em erros e sucesso lt_msg_erros = [] dict_return_metodo: dict = {} str_clausula_insert = "INSERT INTO " + nm_database + ".dbo." + nm_tabela str_clausula_insert += monta_clausula_insert_by_array(lt_campos_insert) str_clausula_values = 'VALUES (' len_lt_valores_insert = len(lt_valores_insert) for idx, valor in enumerate(lt_valores_insert): if isinstance(valor, (str)): # tratamento de caracteres especiais valor = valor.replace('\\', '') valor = valor.replace('\'', '') str_clausula_values += "{}{}{}".format('\'', valor, '\'') elif isinstance(valor, (int)): str_clausula_values += "{}".format(valor) elif isinstance(valor, (float)): str_clausula_values += "{}".format(valor) if idx < (len_lt_valores_insert - 1): str_clausula_values += "," # formata a string de valores do insert str_clausula_values += ')' str_clausula_values = str_clausula_values.replace('[', '') str_clausula_values = str_clausula_values.replace(']', '') str_insert_formatado = str_clausula_insert + str_clausula_values try: cursor = conn.cursor() cursor.execute(str_insert_formatado) conn.commit() dict_return_metodo[ pgcst.SUCESSO_KEY] = "o registro foi inserido com sucesso." if verbose == True: print("[INFO]: o registro foi inserido com sucesso.") print("[INFO]: {}".format(str_insert_formatado)) except db.DataError as de: msg_except = "[ERRO]: \n {}".format(str_insert_formatado) lt_msg_erros.append(msg_except) print(msg_except) msg_except = "[EXCEPT]: database_util.insert_registros_database() -> {}".format( de) lt_msg_erros.append(msg_except) print(msg_except) if len(lt_msg_erros) > 0: dict_return_metodo[pgcst.ERRO_KEY] = lt_msg_erros return dict_return_metodo
def get_latest_capture_instances_by_fq_name( db_conn: pyodbc.Connection, capture_instance_version_strategy: str, capture_instance_version_regex: str, table_whitelist_regex: str, table_blacklist_regex: str) -> Dict[str, Dict[str, Any]]: if capture_instance_version_strategy == options.CAPTURE_INSTANCE_VERSION_STRATEGY_REGEX \ and not capture_instance_version_regex: raise Exception( 'Please provide a capture_instance_version_regex when specifying the `regex` ' 'capture_instance_version_strategy.') result: Dict[str, Dict[str, Any]] = {} fq_name_to_capture_instances: Dict[str, List[Dict[ str, Any]]] = collections.defaultdict(list) capture_instance_version_regex = capture_instance_version_regex and re.compile( capture_instance_version_regex) table_whitelist_regex = table_whitelist_regex and re.compile( table_whitelist_regex, re.IGNORECASE) table_blacklist_regex = table_blacklist_regex and re.compile( table_blacklist_regex, re.IGNORECASE) with db_conn.cursor() as cursor: q, p = sql_queries.get_cdc_capture_instances_metadata() cursor.execute(q) for row in cursor.fetchall(): fq_table_name = f'{row[0]}.{row[1]}' if table_whitelist_regex and not table_whitelist_regex.match( fq_table_name): logger.debug('Table %s excluded by whitelist', fq_table_name) continue if table_blacklist_regex and table_blacklist_regex.match( fq_table_name): logger.debug('Table %s excluded by blacklist', fq_table_name) continue as_dict = { 'fq_name': fq_table_name, 'capture_instance_name': row[2], 'start_lsn': row[3], 'create_date': row[4], } if capture_instance_version_regex: match = capture_instance_version_regex.match(row[1]) as_dict['regex_matched_group'] = match and match.group(1) or '' fq_name_to_capture_instances[as_dict['fq_name']].append(as_dict) for fq_name, capture_instances in fq_name_to_capture_instances.items(): if capture_instance_version_strategy == options.CAPTURE_INSTANCE_VERSION_STRATEGY_CREATE_DATE: latest_instance = sorted(capture_instances, key=lambda x: x['create_date'])[-1] elif capture_instance_version_strategy == options.CAPTURE_INSTANCE_VERSION_STRATEGY_REGEX: latest_instance = sorted( capture_instances, key=lambda x: x['regex_matched_group'])[-1] else: raise Exception( f'Capture instance version strategy "{capture_instance_version_strategy}" not recognized.' ) result[fq_name] = latest_instance logger.debug( 'Latest capture instance names determined by "%s" strategy: %s', capture_instance_version_strategy, sorted([v['capture_instance_name'] for v in result.values()])) return result
def build_tracked_tables_from_cdc_metadata( db_conn: pyodbc.Connection, clock_syncer: 'clock_sync.ClockSync', metrics_accumulator: 'accumulator.Accumulator', topic_name_template: str, snapshot_table_whitelist_regex: str, snapshot_table_blacklist_regex: str, truncate_fields: Dict[str, int], capture_instance_names: List[str], sql_query_processor: 'sql_query_subprocess.SQLQueryProcessor' ) -> List[tracked_tables.TrackedTable]: result: List[tracked_tables.TrackedTable] = [] truncate_fields = {k.lower(): v for k, v in truncate_fields.items()} snapshot_table_whitelist_regex = snapshot_table_whitelist_regex and re.compile( snapshot_table_whitelist_regex, re.IGNORECASE) snapshot_table_blacklist_regex = snapshot_table_blacklist_regex and re.compile( snapshot_table_blacklist_regex, re.IGNORECASE) name_to_meta_fields: Dict[Tuple, List[Tuple]] = collections.defaultdict(list) with db_conn.cursor() as cursor: q, p = sql_queries.get_cdc_tracked_tables_metadata( capture_instance_names) cursor.execute(q) for row in cursor.fetchall(): # 0:4 gets schema name, table name, capture instance name, min captured LSN: name_to_meta_fields[tuple(row[0:4])].append(row[4:]) for (schema_name, table_name, capture_instance_name, min_lsn), fields in name_to_meta_fields.items(): fq_table_name = f'{schema_name}.{table_name}' can_snapshot = False if snapshot_table_whitelist_regex and snapshot_table_whitelist_regex.match( fq_table_name): logger.debug('Table %s WILL be snapshotted due to whitelisting', fq_table_name) can_snapshot = True if snapshot_table_blacklist_regex and snapshot_table_blacklist_regex.match( fq_table_name): logger.debug( 'Table %s will NOT be snapshotted due to blacklisting', fq_table_name) can_snapshot = False topic_name = topic_name_template.format( schema_name=schema_name, table_name=table_name, capture_instance_name=capture_instance_name) tracked_table = tracked_tables.TrackedTable( db_conn, clock_syncer, metrics_accumulator, sql_query_processor, schema_name, table_name, capture_instance_name, topic_name, min_lsn, can_snapshot) for (change_table_ordinal, column_name, sql_type_name, primary_key_ordinal, decimal_precision, decimal_scale) in fields: truncate_after = truncate_fields.get( f'{schema_name}.{table_name}.{column_name}'.lower()) tracked_table.append_field( tracked_tables.TrackedField(column_name, sql_type_name, change_table_ordinal, primary_key_ordinal, decimal_precision, decimal_scale, truncate_after)) result.append(tracked_table) return result
def query(connection: pyodbc.Connection, query: str) -> List[Dict[str, Any]]: cursor = connection.cursor().execute(query) return list(_RowAsDict(cursor))