Beispiel #1
0
def _run_query(query: str, conn: pyodbc.Connection) -> None:
    """
    Run query without fetching results.

    Args:
        query (str): SQL statement (example: 'select x, y from table_XY')
        conn (pyodbc.Connection): query engine connection object
    """
    conn.cursor().execute(query)
 def route(connection: pyodbc.Connection):
     sql = SqlChain(
         """INSERT INTO test(id, x, ch) VALUES ('a', 1, 'a'), ('b', 2, 'b')""",
         cursor=connection.cursor())
     res_sql = sql.chain("""SELECT * FROM test;""")
     sql.execute()
     return res_sql.get()
Beispiel #3
0
def fetch(query: str,
          conn: pyodbc.Connection,
          chunksize: Optional[int] = None):
    """
    Run query and fetch results.

    Args:
        query (str): SQL statement
        conn (pyodbc.Connection): query engine connection object
        chunksize (int): Chunksize in bytes

    Returns:
        results: list of row object or None (if query fails)
    """
    try:
        curr = conn.cursor().execute(query)
    except Exception as e:
        msg = str(e) + '----' + 'The failed query: {query}'.format(query=query)
        raise Exception(msg)
    else:
        if chunksize is None:
            results = curr.fetchall()
        else:
            results = _fetch_many(curr, chunksize)
    finally:
        if conn is None:
            conn.close()
    return results
 def get_table_row_counts(self, conn: pyodbc.Connection) -> Dict:
     qry = self.get_table_row_counts_qry(self.source)
     with conn.cursor() as cursor:
         data = cursor.execute(qry).fetchall()
     columns = [key[0] for key in cursor.description]
     data = rows_to_json(data, columns)
     return {row['table_name']: row['row_count'] for row in data}
def load_manifest(json_data: dict, conn: pyodbc.Connection) -> Exception:
    """ Use SQL to load records into the data structure
    Args:
        json_data: manifest in a Python dictionary
        conn: database connection object
    Returns:
        true if creation is successful, otherwise returns the exception
    """
    cursor = conn.cursor()
    fieldnamesql = "INSERT INTO ICO.inventory (vendor_id, vendor_product_code, quantity_kg, date_arrival)"
    today = date.today()
    mydate = today.strftime("%Y-%m-%d")
    try:
        items = json_data.get("items")
        for item in items:
            valsql = fieldnamesql + " VALUES ("
            valsql += "'" + item.get("vendor_id") + "', "
            valsql += "'" + item.get("vendor_product_code") + "', "
            valsql += str(item.get("quantity_kg")) + ", "
            valsql += "'" + mydate + "')"
            print("Inserting: " + valsql)
            cursor.execute(valsql)

        conn.commit()
    except Exception as exp:
        return exp

    return None
Beispiel #6
0
def get_all_locations(conn: pyodbc.Connection) -> List[Location]:
    """
    Queries the SQL database for every X, Y, latitude, longitude, address
    and UPRN under a specific constraint and creates a list of location
    objects containing each one
    Args:
        conn (pyodbc.Connection): The connection to the database to query
    Returns:
        List[Location]: A list of Location objects containing information
        about the locations returned by the SQL query
    """
    locations = []
    with open('.\\get_all_locations_per_round.sql', 'r') as loc_query_f:
        loc_query_all = loc_query_f.read()
    cursor = conn.cursor()
    cursor.execute(loc_query_all)
    locs = cursor.fetchall()
    for loc in locs:
        locations.append(
            Location('{:.2f}'.format(loc.x), '{:.2f}'.format(loc.y),
                     '{:.15f}'.format(loc.lat), '{:.15f}'.format(loc.lng),
                     loc.addr, loc.street, loc.town, loc.postcode,
                     str(loc.uprn),
                     [loc.REF, loc.RECY, loc.MIX, loc.GLASS, loc.GW]))
    return locations
 def get_table_columns(self, conn: pyodbc.Connection) -> Dict:
     qry = self.get_table_meta_qry(self.source)
     with conn.cursor() as cursor:
         data = cursor.execute(qry).fetchall()
     columns = [key[0] for key in cursor.description]
     data = rows_to_json(data, columns)
     return self.process_table_columns(data)
def get_ims_notifications(conn: pyodbc.Connection) -> List[dict]:
    sql = """
        SELECT n.CreationDate, n.RequiredEndDate, n.NotificationNumber, 
            i.ID as IMSTag_ID, i.tag, mel.*
        FROM TechnicalInfoTag as t,
             TechnicalInfoTag_IMSTag as ti,
             Notification as n,
             IMSTag i,
             ModelElement me left join aas.vModelElementLimitPivot mel 
                on me.ID = mel.ModelElement_ID
        where 
            t.ID = ti.TechnicalInfoTag_ID
            and ti.IMSTag_ID = i.ID
            and t.ID = n.TechnicalInfoTag_ID
            and i.ID = me.IMSTag_ID
            and (n.FailureImpact IN ('D','S') or NOTIFICATIONTYPE IN ('M2','M3','M4'))
        order by n.NotificationNumber desc 
    """

    cursor = conn.cursor()
    cursor.execute(sql)
    desc = cursor.description
    column_names = [col[0] for col in desc]
    data = [dict(zip(column_names, row)) for row in cursor.fetchall()]
    cursor.close()
    return data
Beispiel #9
0
def pyodbc_query_fetchall(conn: pyodbc.Connection,
                          query: str) -> Union[List[Any], None]:
    """Execute pyodbc query and fetchone, see
    https://github.com/mkleehammer/pyodbc/wiki/Cursor.

    Args:
        conn (pyodbc.Connection): database connection object
        query (str): sql query

    Returns:
        Union[List[Any], None]: list of one to many results
    """

    cursor = conn.cursor()
    result = cursor.execute(query).fetchall()
    cursor.close()

    return result
Beispiel #10
0
def get_location_from_uprn(conn: pyodbc.Connection, uprn: str) -> Location:
    """
    Queries the SQL database for the X, Y, latitude, longitude and address
    associated with a UPRN and creates a Location object out of it
    Args:
        conn (pyodbc.Connection): The connection to the database to query
        uprn (str): The UPRN to query with
    Returns:
        Location: The Location object containing the latitude, longitude and
        address of the given UPRN, as well as the UPRN itself
    """
    with open('.\\get_location.sql', 'r') as loc_query_f:
        loc_query = loc_query_f.read()
    cursor = conn.cursor()
    cursor.execute(loc_query, uprn)
    loc = cursor.fetchone()
    return Location(str(loc.x), str(loc.y), str(loc.lat), str(loc.lng),
                    loc.addr, loc.street, loc.town, loc.postcode, uprn,
                    [loc.REF, loc.RECY, loc.MIX, loc.GLASS, loc.GW])
Beispiel #11
0
    def get_objects_id(self, cnx: pyodbc.Connection, query: str) -> list:
        """
        Query selectID. Query must return only one column with name "id".
        :return: list with objects ID that you use in select query to get fields
        """
        cur = cnx.cursor()
        self.log.debug(query)
        try:
            cur.execute(query, )
            rows = cur.fetchall()
            if not rows:
                return []
            else:
                return [i[0] for i in rows]

        except NameError as err:
            self.log.error(str(err))
        finally:
            cur.close()
Beispiel #12
0
def select_lista_registros_database(
        conn: db.Connection,
        nm_tabela: str,
        lt_campos_select: list,
        condicao_where: str = None,
        condicao_groupby: str = None,
        condicao_orderby: str = None,
        str_select_completo_formatado: str = None) -> list:
    """ Manipula registros de SELECT no banco de dados especificado.
    Args:
    Returns:
    Raises:
    IOError:    
    TODOs: implementar o controle de excecoes.
    """
    str_linha_resultado_select = ''
    lt_registros_selecionados = []

    if conn != None:
        print("[INFO]: conexão efetuada com sucesso.")
    else:
        print("[ERRO]: falha ao obter a conexão.")

    try:
        cursor = conn.cursor()

        if str_select_completo_formatado != None and str_select_completo_formatado != "":
            cursor.execute(str_select_completo_formatado)
        else:
            pass

        linha = cursor.fetchone()
    except db.DatabaseError as de:
        print("[EXCEPT]: database_util.get_lista_registros_database() -> {}".
              format(de))

    while linha:
        str_linha_resultado_select = str(linha)
        lt_registros_selecionados.append(str_linha_resultado_select)
        linha = cursor.fetchone()

    return lt_registros_selecionados
Beispiel #13
0
def update_dolphin_database(dbname: pyodbc.Connection, updatesql: str) -> int:
    dolphincursor = dbname.cursor()
    try:
        if DolphinTestModeEnabled:
            logging.info(
                "Dolphin Test mode is enabled. We would update Dolphin with {0}"
                .format(updatesql))
        else:
            dolphincursor.execute(updatesql)
            dbname.commit()

    except Exception as e:
        logging.error(
            "Dolphin database update failed with {0}, sql = {1}".format(
                e, updatesql))
        dbname.rollback()
        return -1

    dolphincursor.close()
    return 0
Beispiel #14
0
def get_latest_capture_instance_names(
        db_conn: pyodbc.Connection, capture_instance_version_strategy: str,
        capture_instance_version_regex: str) -> List[str]:
    if capture_instance_version_strategy == 'regex' and not capture_instance_version_regex:
        raise Exception(
            'Please provide a capture_instance_version_regex when specifying the `regex` '
            'capture_instance_version_strategy.')
    result = []
    table_to_capture_instances = collections.defaultdict(list)
    capture_instance_version_regex = capture_instance_version_regex and re.compile(
        capture_instance_version_regex)

    with db_conn.cursor() as cursor:
        cursor.execute(constants.CDC_CAPTURE_INSTANCES_QUERY)
        for row in cursor.fetchall():
            as_dict = {
                'source_object_id': row[0],
                'capture_instance': row[1],
                'create_date': row[2],
            }
            if capture_instance_version_regex:
                match = capture_instance_version_regex.match(row[1])
                as_dict['regex_matched_group'] = match and match.group(1) or ''
            table_to_capture_instances[as_dict['source_object_id']].append(
                as_dict)

    for source_id, capture_instances in table_to_capture_instances.items():
        if capture_instance_version_strategy == 'create_date':
            latest_instance = sorted(capture_instances,
                                     key=lambda x: x['create_date'])[-1]
        elif capture_instance_version_strategy == 'regex':
            latest_instance = sorted(
                capture_instances, key=lambda x: x['regex_matched_group'])[-1]
        else:
            raise Exception(
                f'Capture instance version strategy "{capture_instance_version_strategy}" not recognized.'
            )
        result.append(latest_instance['capture_instance'])

    return result
Beispiel #15
0
def genTrials(conn: pyodbc.Connection, amount: int):

    fake = Faker()
    cursor = conn.cursor()
    conclusions = [
        '\'innocent\'', '\'sent to 5 years in prison\'',
        '\'sent to 10 years in prison\'', '\'sent to 15 years in prison\'',
        '\'sent to 2 years in prison\'', '\'sent to 20 years in prison\'',
        '\'sent to death\''
    ]
    teams = ['\'Offense\'', '\'Defense\'']
    # generate trial:
    for i in range(amount):
        # subject id
        sid = sqlexec(conn,
                      return_=True,
                      query=f'''
                    select top 1 [specialization id] from [specializations]
                    order by newid();''')[0][0]

        # date
        date = f'''\'{fake.date_between(start_date='-8y', end_date='today')}\''''
        text = f'''\'{fake.paragraph(nb_sentences=5, variable_nb_sentences=False)}\''''

        ctid = getCity(conn)
        crid = sqlexec(conn,
                       f'''
                             select top 1 [court id] from [courts]
                             where [city id] = {ctid}
                             order by newid();
                             ''',
                       return_=True)[0][0]
        try:
            cursor.execute(f'''
                        insert into [trials]([subject id], [date], [description], [court id])
                        values({sid}, {date}, {text}, {crid});
                        ''')
            cursor.commit()
        except pyodbc.IntegrityError:
            continue

        trid = sqlexec(conn,
                       return_=True,
                       query=f'''
                       select top 1 [trial id] from [trials]
                       where [subject id] = {sid} and 
                       [date] = {date} and 
                       [description] = {text} and
                       [court id] = {crid};
                       ''')[0][0]
        print(trid)
        # generate defendants:
        for pid in getPerson(conn, ctid, int(np.random.uniform(1.1, 10))):
            pid = pid[0]
            conclusion = choice(conclusions)
            cursor.execute(f'''
                           insert into [defendants]([trial id], [person id], [conclusion])
                           values({trid}, {pid}, {conclusion});''')
            cursor.commit()

        # generate judges in trial:
        for (jid, cridt) in getJudges(conn, ctid,
                                      int(np.random.uniform(1.1, 6))):
            cursor.execute(f'''
                           insert into [judges in trial]([trial id], [person id], [court id])
                           values({trid}, {jid}, {cridt});''')
            cursor.commit()

        # generate lawyers in trial:
        i = 0
        team = teams[0]
        for (lid, brid) in getLawyers(
                conn, getCity(conn),
                int(np.random.uniform(2.1, np.random.uniform(2.2, 10)))):
            if i == 0:
                team = teams[0]
            elif i == 1:
                team = teams[1]
            else:
                team = choice(teams)
            cursor.execute(f'''
                           insert into [lawyers in trial]([trial id], [person id], [bar id], [team])
                           values({trid}, {lid}, {brid}, {team});''')
            cursor.commit()
            i += 1
Beispiel #16
0
    def sql_parse(self, cnx: pyodbc.Connection, lootId) -> dict:
        """
        Выполнение запросов

        :param cnx: current cnx
        :param lootId: document identifier from source
        """
        def parse_rows(select:str, fetchOne=False, group=False) -> [dict]:
            """
            Разбор строк. Каждая строка переводится в словарь.

            :param select: сам запрос
            :param fetchOne: заберёт одну строку, вернёт один словарь
            :param group: если придёт >1 строк, то упакует в один словарь,
                т.е. каждое поле будет содержать массив значений.
            :return: список со словорями, каждый со своим набором полей
            """
            def row_as_dict(cur, rows):
                columns = [column[0] for column in cur.description]
                for row in rows:
                    yield dict(zip(columns, row))

            try:
                self.log.debug(select)
                cur.execute(select)
            except Exception as e:
                self.log.error(f"Fail to execute query: {e}")
                self.syncCount[5] += 1
                return [{}]

            try:
                if fetchOne:
                    rows = cur.fetchone()
                else:
                    rows = cur.fetchall()
            except MemoryError:  # для кривых запросов в млрд строк
                self.log.error("Not enough memory to fetch rows. Simplify your query")
                self.syncCount[5] += 1
                cur.close()
                return [{}]

            if not rows:
                if not self.task['skipEmptyRows']:
                    # таким образом я всегда получаю строку, даже если запрос ничего не вернёт.
                    self.log.warning('Query has returned empty row. Will replaced with <None>')
                    body = {}
                    try:
                        curDes = cur.description
                        for col in curDes:
                            body[col[0]] = None

                    except Exception as e:
                        self.syncCount[5] += 1
                        self.log.error(f'Fail to parse emtpy row: {e}')
                    finally:
                        return [body]


            if fetchOne:
                return [dict(zip([column[0] for column in cur.description], rows))]
            else:
                rows = row_as_dict(cur, rows)

            ls = []
            groupBody = {}
            try:
                isFirst = True
                for row in rows:
                    body = {}
                    for col, val in row.items():
                        if val is not None:
                            # TODO allow get null value?
                            if group and not isFirst:
                                try:
                                    groupBody[col].append(val)
                                except KeyError:
                                    groupBody[col] = [val]
                                except AttributeError:
                                    groupBody[col] = [groupBody[col], val]
                            else:
                                body[col] = val

                    isFirst = False
                    if not group:
                        ls.append(body)
                if group:
                    ls.append(groupBody)
                return ls
            except Exception as e:
                self.log.error(f'Fail to parse row: {e}')
                self.syncCount[5] += 1
                return [{}]

        cur = cnx.cursor()
        fields = {}

        for select in self.task['simpleQuery']:
            select = select.replace('@loot_id@', str(lootId), -1)

            for i in fields:
                select = select.replace(f'@{i}@', str(fields[i]), -1)
            fields = {**fields, **parse_rows(select, fetchOne=True)[0]}

        # вложенные запросы
        for bundle in self.task['bundleQuery']:
            for name, selectList in bundle.items():
                select = selectList[0].replace('@loot_id@', str(lootId), -1)
                for i in fields:
                    select = select.replace(f'@{i}@', str(fields[i]), -1)

                subFields = parse_rows(select)
                if subFields:
                    fields[name] = []

                    if len(selectList) > 1:
                        for sub in subFields:
                            for select in selectList[1:]:
                                select = select.replace('@loot_id@', str(lootId), -1)

                                for i in sub:
                                    select = select.replace(f'@{i}@', str(sub[i]), -1)
                                for i in fields:
                                    select = select.replace(f'@{i}@', str(fields[i]), -1)

                                sub = {**sub, **parse_rows(select, group=True)[0]}
                            fields[name].append(sub)
                    else:
                        fields[name].append(subFields)
        return fields
Beispiel #17
0
def should_terminate_due_to_capture_instance_change(
        db_conn: pyodbc.Connection,
        progress_tracker: progress_tracking.ProgressTracker,
        capture_instance_version_strategy: str,
        capture_instance_version_regex: str,
        capture_instance_to_topic_map: Dict[str, str],
        current_capture_instances: Dict[str, Dict[str, Any]],
        table_whitelist_regex: str, table_blacklist_regex: str) -> bool:
    new_capture_instances = get_latest_capture_instances_by_fq_name(
        db_conn, capture_instance_version_strategy,
        capture_instance_version_regex, table_whitelist_regex,
        table_blacklist_regex)

    current = {
        k: v['capture_instance_name']
        for k, v in current_capture_instances.items()
    }
    new = {
        k: v['capture_instance_name']
        for k, v in new_capture_instances.items()
    }

    if new == current:
        logger.debug('Capture instances unchanged; continuing...')
        return False

    def better_json_serialize(obj):
        if isinstance(obj, (datetime.datetime, datetime.date)):
            return obj.isoformat()
        if isinstance(obj, (bytes, )):
            return f'0x{obj.hex()}'
        raise TypeError("Type %s not serializable" % type(obj))

    logger.info(
        'Change detected in capture instances. Current: %s New: %s',
        json.dumps(current_capture_instances, default=better_json_serialize),
        json.dumps(new_capture_instances, default=better_json_serialize))

    for fq_name, current_ci in current_capture_instances.items():
        if fq_name in new_capture_instances:
            new_ci = new_capture_instances[fq_name]
            last_recorded_progress = progress_tracker.get_last_recorded_progress_for_topic(
                capture_instance_to_topic_map[
                    current_ci['capture_instance_name']])
            current_idx = last_recorded_progress and last_recorded_progress.change_index or \
                change_index.LOWEST_CHANGE_INDEX
            new_ci_min_index = change_index.ChangeIndex(
                new_ci['start_lsn'], b'\x00' * 10, 0)
            if not last_recorded_progress or (
                    last_recorded_progress.change_index < new_ci_min_index):
                with db_conn.cursor() as cursor:
                    ci_table_name = f"[{constants.CDC_DB_SCHEMA_NAME}].[{current_ci['capture_instance_name']}_CT]"
                    cursor.execute(
                        f"SELECT TOP 1 1 FROM {ci_table_name} WITH (NOLOCK)")
                    has_rows = cursor.fetchval() is not None
                if has_rows:
                    logger.info(
                        'Progress against existing capture instance ("%s") for table "%s" has reached index %s, '
                        'but the new capture instance ("%s") does not begin until index %s. Deferring termination '
                        'to maintain data integrity and will try again on next capture instance evaluation '
                        'iteration.', current_ci['capture_instance_name'],
                        fq_name, current_idx, new_ci['capture_instance_name'],
                        new_ci_min_index)
                    return False

    logger.warning(
        'Terminating process due to change in capture instances. This behavior can be controlled by '
        'changing option TERMINATE_ON_CAPTURE_INSTANCE_CHANGE.')
    return True
Beispiel #18
0
def run_queries_in_file(*, con: pyodbc.Connection, fp: pathlib.Path) -> None:
    with con.cursor() as cur:
        for sql in read_sql(fp):
            cur.execute(sql)
Beispiel #19
0
def build_tracked_tables_from_cdc_metadata(
        db_conn: pyodbc.Connection, topic_name_template: str,
        table_whitelist_regex: str, table_blacklist_regex: str,
        snapshot_table_whitelist_regex: str,
        snapshot_table_blacklist_regex: str,
        capture_instance_version_strategy: str,
        capture_instance_version_regex: str) -> List[TrackedTable]:
    result = []

    latest_names = get_latest_capture_instance_names(
        db_conn, capture_instance_version_strategy,
        capture_instance_version_regex)
    logger.debug(
        'Latest capture instance names determined by "%s" strategy: %s',
        capture_instance_version_strategy, sorted(latest_names))

    table_whitelist_regex = table_whitelist_regex and re.compile(
        table_whitelist_regex, re.IGNORECASE)
    table_blacklist_regex = table_blacklist_regex and re.compile(
        table_blacklist_regex, re.IGNORECASE)
    snapshot_table_whitelist_regex = snapshot_table_whitelist_regex and re.compile(
        snapshot_table_whitelist_regex, re.IGNORECASE)
    snapshot_table_blacklist_regex = snapshot_table_blacklist_regex and re.compile(
        snapshot_table_blacklist_regex, re.IGNORECASE)

    meta_query = constants.CDC_METADATA_QUERY.replace(
        '?', ', '.join(['?'] * len(latest_names)))
    name_to_meta_fields = collections.defaultdict(list)

    with db_conn.cursor() as cursor:
        cursor.execute(meta_query, latest_names)
        for row in cursor.fetchall():
            # 0:4 gets schema name, table name, capture instance name, min captured LSN:
            name_to_meta_fields[tuple(row[0:4])].append(row[4:])

    for (schema_name, table_name, capture_instance_name,
         min_lsn), fields in name_to_meta_fields.items():
        fq_table_name = f'{schema_name}.{table_name}'

        can_snapshot = False

        if table_whitelist_regex and not table_whitelist_regex.match(
                fq_table_name):
            logger.debug('Table %s excluded by whitelist', fq_table_name)
            continue

        if table_blacklist_regex and table_blacklist_regex.match(
                fq_table_name):
            logger.debug('Table %s excluded by blacklist', fq_table_name)
            continue

        if snapshot_table_whitelist_regex and snapshot_table_whitelist_regex.match(
                fq_table_name):
            logger.debug('Table %s WILL be snapshotted due to whitelisting',
                         fq_table_name)
            can_snapshot = True

        if snapshot_table_blacklist_regex and snapshot_table_blacklist_regex.match(
                fq_table_name):
            logger.debug(
                'Table %s will NOT be snapshotted due to blacklisting',
                fq_table_name)
            can_snapshot = False

        topic_name = topic_name_template.format(
            schema_name=schema_name,
            table_name=table_name,
            capture_instance_name=capture_instance_name)

        tracked_table = TrackedTable(db_conn, schema_name, table_name,
                                     capture_instance_name, topic_name,
                                     min_lsn, can_snapshot)

        for (change_table_ordinal, column_name, sql_type_name,
             primary_key_ordinal, decimal_precision, decimal_scale) in fields:
            tracked_table.add_field(
                TrackedField(column_name, sql_type_name, change_table_ordinal,
                             primary_key_ordinal, decimal_precision,
                             decimal_scale))

        result.append(tracked_table)

    return result
Beispiel #20
0
def insert_registros_database(conn: db.Connection,
                              nm_database: str,
                              nm_tabela: str,
                              lt_campos_insert: list,
                              lt_valores_insert: list,
                              verbose: bool = False,
                              log: bool = False,
                              lt_registros_log: list = None):
    """ Manipula registros de INSERT no banco de dados especificado.
    Args:
    Returns:
    Raises:
    IOError:
    TODOs: (a) implementar o controle de excecoes.
           (b) implementar o insert de maneira generica
    """
    if conn != None:
        if verbose == True:
            print("[INFO]: a conexão foi encontrada.")
    else:
        if verbose == True:
            print("[ERRO]: a conexão não foi encontrada.")
        return

    # formata a saida do metodo em erros e sucesso
    lt_msg_erros = []
    dict_return_metodo: dict = {}

    str_clausula_insert = "INSERT INTO " + nm_database + ".dbo." + nm_tabela
    str_clausula_insert += monta_clausula_insert_by_array(lt_campos_insert)
    str_clausula_values = 'VALUES ('

    len_lt_valores_insert = len(lt_valores_insert)
    for idx, valor in enumerate(lt_valores_insert):
        if isinstance(valor, (str)):
            # tratamento de caracteres especiais
            valor = valor.replace('\\', '')
            valor = valor.replace('\'', '')

            str_clausula_values += "{}{}{}".format('\'', valor, '\'')

        elif isinstance(valor, (int)):
            str_clausula_values += "{}".format(valor)
        elif isinstance(valor, (float)):
            str_clausula_values += "{}".format(valor)

        if idx < (len_lt_valores_insert - 1):
            str_clausula_values += ","

    # formata a string de valores do insert
    str_clausula_values += ')'
    str_clausula_values = str_clausula_values.replace('[', '')
    str_clausula_values = str_clausula_values.replace(']', '')
    str_insert_formatado = str_clausula_insert + str_clausula_values

    try:
        cursor = conn.cursor()
        cursor.execute(str_insert_formatado)
        conn.commit()
        dict_return_metodo[
            pgcst.SUCESSO_KEY] = "o registro foi inserido com sucesso."

        if verbose == True:
            print("[INFO]: o registro foi inserido com sucesso.")
            print("[INFO]: {}".format(str_insert_formatado))

    except db.DataError as de:
        msg_except = "[ERRO]: \n {}".format(str_insert_formatado)
        lt_msg_erros.append(msg_except)
        print(msg_except)

        msg_except = "[EXCEPT]: database_util.insert_registros_database() -> {}".format(
            de)
        lt_msg_erros.append(msg_except)
        print(msg_except)

    if len(lt_msg_erros) > 0:
        dict_return_metodo[pgcst.ERRO_KEY] = lt_msg_erros

    return dict_return_metodo
Beispiel #21
0
def get_latest_capture_instances_by_fq_name(
        db_conn: pyodbc.Connection, capture_instance_version_strategy: str,
        capture_instance_version_regex: str, table_whitelist_regex: str,
        table_blacklist_regex: str) -> Dict[str, Dict[str, Any]]:
    if capture_instance_version_strategy == options.CAPTURE_INSTANCE_VERSION_STRATEGY_REGEX \
            and not capture_instance_version_regex:
        raise Exception(
            'Please provide a capture_instance_version_regex when specifying the `regex` '
            'capture_instance_version_strategy.')
    result: Dict[str, Dict[str, Any]] = {}
    fq_name_to_capture_instances: Dict[str, List[Dict[
        str, Any]]] = collections.defaultdict(list)
    capture_instance_version_regex = capture_instance_version_regex and re.compile(
        capture_instance_version_regex)
    table_whitelist_regex = table_whitelist_regex and re.compile(
        table_whitelist_regex, re.IGNORECASE)
    table_blacklist_regex = table_blacklist_regex and re.compile(
        table_blacklist_regex, re.IGNORECASE)

    with db_conn.cursor() as cursor:
        q, p = sql_queries.get_cdc_capture_instances_metadata()
        cursor.execute(q)
        for row in cursor.fetchall():
            fq_table_name = f'{row[0]}.{row[1]}'

            if table_whitelist_regex and not table_whitelist_regex.match(
                    fq_table_name):
                logger.debug('Table %s excluded by whitelist', fq_table_name)
                continue

            if table_blacklist_regex and table_blacklist_regex.match(
                    fq_table_name):
                logger.debug('Table %s excluded by blacklist', fq_table_name)
                continue

            as_dict = {
                'fq_name': fq_table_name,
                'capture_instance_name': row[2],
                'start_lsn': row[3],
                'create_date': row[4],
            }
            if capture_instance_version_regex:
                match = capture_instance_version_regex.match(row[1])
                as_dict['regex_matched_group'] = match and match.group(1) or ''
            fq_name_to_capture_instances[as_dict['fq_name']].append(as_dict)

    for fq_name, capture_instances in fq_name_to_capture_instances.items():
        if capture_instance_version_strategy == options.CAPTURE_INSTANCE_VERSION_STRATEGY_CREATE_DATE:
            latest_instance = sorted(capture_instances,
                                     key=lambda x: x['create_date'])[-1]
        elif capture_instance_version_strategy == options.CAPTURE_INSTANCE_VERSION_STRATEGY_REGEX:
            latest_instance = sorted(
                capture_instances, key=lambda x: x['regex_matched_group'])[-1]
        else:
            raise Exception(
                f'Capture instance version strategy "{capture_instance_version_strategy}" not recognized.'
            )
        result[fq_name] = latest_instance

    logger.debug(
        'Latest capture instance names determined by "%s" strategy: %s',
        capture_instance_version_strategy,
        sorted([v['capture_instance_name'] for v in result.values()]))

    return result
Beispiel #22
0
def build_tracked_tables_from_cdc_metadata(
    db_conn: pyodbc.Connection, clock_syncer: 'clock_sync.ClockSync',
    metrics_accumulator: 'accumulator.Accumulator', topic_name_template: str,
    snapshot_table_whitelist_regex: str, snapshot_table_blacklist_regex: str,
    truncate_fields: Dict[str, int], capture_instance_names: List[str],
    sql_query_processor: 'sql_query_subprocess.SQLQueryProcessor'
) -> List[tracked_tables.TrackedTable]:
    result: List[tracked_tables.TrackedTable] = []

    truncate_fields = {k.lower(): v for k, v in truncate_fields.items()}

    snapshot_table_whitelist_regex = snapshot_table_whitelist_regex and re.compile(
        snapshot_table_whitelist_regex, re.IGNORECASE)
    snapshot_table_blacklist_regex = snapshot_table_blacklist_regex and re.compile(
        snapshot_table_blacklist_regex, re.IGNORECASE)

    name_to_meta_fields: Dict[Tuple,
                              List[Tuple]] = collections.defaultdict(list)

    with db_conn.cursor() as cursor:
        q, p = sql_queries.get_cdc_tracked_tables_metadata(
            capture_instance_names)
        cursor.execute(q)
        for row in cursor.fetchall():
            # 0:4 gets schema name, table name, capture instance name, min captured LSN:
            name_to_meta_fields[tuple(row[0:4])].append(row[4:])

    for (schema_name, table_name, capture_instance_name,
         min_lsn), fields in name_to_meta_fields.items():
        fq_table_name = f'{schema_name}.{table_name}'

        can_snapshot = False

        if snapshot_table_whitelist_regex and snapshot_table_whitelist_regex.match(
                fq_table_name):
            logger.debug('Table %s WILL be snapshotted due to whitelisting',
                         fq_table_name)
            can_snapshot = True

        if snapshot_table_blacklist_regex and snapshot_table_blacklist_regex.match(
                fq_table_name):
            logger.debug(
                'Table %s will NOT be snapshotted due to blacklisting',
                fq_table_name)
            can_snapshot = False

        topic_name = topic_name_template.format(
            schema_name=schema_name,
            table_name=table_name,
            capture_instance_name=capture_instance_name)

        tracked_table = tracked_tables.TrackedTable(
            db_conn, clock_syncer, metrics_accumulator, sql_query_processor,
            schema_name, table_name, capture_instance_name, topic_name,
            min_lsn, can_snapshot)

        for (change_table_ordinal, column_name, sql_type_name,
             primary_key_ordinal, decimal_precision, decimal_scale) in fields:
            truncate_after = truncate_fields.get(
                f'{schema_name}.{table_name}.{column_name}'.lower())
            tracked_table.append_field(
                tracked_tables.TrackedField(column_name, sql_type_name,
                                            change_table_ordinal,
                                            primary_key_ordinal,
                                            decimal_precision, decimal_scale,
                                            truncate_after))

        result.append(tracked_table)

    return result
Beispiel #23
0
def query(connection: pyodbc.Connection, query: str) -> List[Dict[str, Any]]:
    cursor = connection.cursor().execute(query)
    return list(_RowAsDict(cursor))