Python get_db_connectionの例、etl.context.get_db_connection Pythonの例

コード例 #1

0

ファイルを表示

def test_db_connection_passwd_base64(mock_db):
    """context: test DB connection with Base64-encoded password"""
    with configure('db_conn_base64.json', None, False):
        context.get_db_connection("dbBase64")

        # Verify the password is correctly Base64-decoded
        mock_db.assert_called_once_with('Secret', 'Facility', 'TopSecret')

コード例 #2

0

ファイルを表示

def test_db_connection(mock_db):
    """context: test establishing DB connection"""
    with configure('db_conn_a.json', None, False):
        context.get_db_connection("db")

        # The DB connection must be created exactly one time
        mock_db.assert_called_once_with('a', 'b', 'c')

        # Repeated requests to the same DB should not recreate the connection
        context.get_db_connection("db")
        assert mock_db.call_count == 1

コード例 #3

0

ファイルを表示

def test_db_conn_external_ok(mock_db):
    """context: test DB connection defined in an external file"""
    with configure('db_conn_ext.json', None, False):
        context.get_db_connection("dbExt")

        # The DB connection's parameters must be read from the external file, then it must be created
        mock_db.assert_called_once_with('TheConnection', 'Mickey', 'Mouse')

        # Repeated requests to the same DB should not recreate the connection
        context.get_db_connection("dbExt")
        assert mock_db.call_count == 1

コード例 #4

0

ファイルを表示

    def run(self, config):
        """Override the abstract method of the base class."""
        # Get attributes from config
        output_param = config['output_param', 'data']
        db_name = config['database']
        fld_delimiter = config['field_delimiter', '']
        rec_delimiter = config['record_delimiter', '\n']
        col_headers = config['col_headers', False]
        quotechar = config['quotechar', None]
        sql = config['sql']
        params = config['params', None]

        # Substitute params in the DB connection and find it
        db_name = db_name.format(**config)
        db_conn = context.get_db_connection(db_name)

        # Prepare quote strings
        qopen = self.QUOTE_OPEN[
            quotechar] if quotechar in self.QUOTE_OPEN else quotechar
        qclose = self.QUOTE_CLOSE[
            quotechar] if quotechar in self.QUOTE_CLOSE else quotechar

        # Prepare parameters, if needed
        db_params = {}
        if params is not None:
            for param in params:
                p_name = param['name']
                p_value = param['value']
                db_params[p_name] = p_value.format(**config)

        # Execute the query
        cur = db_conn.cursor()
        records = []
        try:
            cur.execute(sql, db_params)

            # Output column headers, if needed
            if col_headers:
                records.append(
                    fld_delimiter.join(
                        [qopen + d[0] + qclose for d in cur.description]))

            # Read the returned data
            for record in cur:
                records.append(
                    fld_delimiter.join(
                        [qopen + str(v) + qclose for v in record]))
        finally:
            cur.close()

        logger.log('Done. Read {} rows from {}'.format(len(records), db_name))
        return {output_param: rec_delimiter.join(records) + rec_delimiter}

コード例 #5

0

ファイルを表示

    def run(self, config):
        """Override the abstract method of the base class."""
        # Get attributes from config
        db_name     = config['database']
        sql         = config['sql']
        params      = config['params', None]
        commit_stmt = config['commit_stmt', 'each']

        if commit_stmt == 'none':
            commit_policy = self.COMMIT_NONE
        elif commit_stmt == 'each':
            commit_policy = self.COMMIT_EACH
        elif commit_stmt == 'all':
            commit_policy = self.COMMIT_ALL
        else:
            raise errors.ConfigError('Invalid value for commit_stmt: "{}"'.format(commit_stmt))

        # Substitute params in the DB connection and find it
        db_name = db_name.format(**config)
        db_conn = context.get_db_connection(db_name)

        # Initiate a transaction, if all statements are committed at once
        if not context.dry_run_mode and commit_policy == self.COMMIT_ALL:
            db_conn.begin()

        # Prepare statement(s). If a single statement is given, transform it into a single-element list
        if type(sql) is not list:
            sql = [sql]

        # Prepare parameters, if needed
        db_params = {}
        if params is not None:
            for param in params:
                p_name  = param['name']
                p_value = param['value']
                db_params[p_name] = p_value.format(**config)

        # Execute the statement(s)
        if not context.dry_run_mode:
            for stmt in sql:
                db_conn.execute(stmt, db_params)
                # Commit if needed
                if commit_policy == self.COMMIT_EACH:
                    db_conn.commit()

        # Execute a commit, if all statements are committed at once
        if not context.dry_run_mode and commit_policy == self.COMMIT_ALL:
            db_conn.commit()

        logger.log(context.dry_run_prefix + 'Done. {} statement(s) executed on {}'.format(len(sql), db_name))

コード例 #6

0

ファイルを表示

    def run(self, config):
        """Override the abstract method of the base class."""
        # Get attributes from config
        input_param = config['input_param', 'data']
        data = config.lines(input_param)
        delimiter = None
        quotechar = None

        # Get and validate data format
        data_format = config['format']
        fmt_fixed = data_format == 'fixed'
        fmt_delimited = data_format == 'delimited'
        if not fmt_fixed and not fmt_delimited:
            raise errors.ConfigError(
                'Invalid format value: "{}".'.format(data_format))
        if fmt_delimited:
            delimiter = config['delimiter']
            quotechar = config['quotechar', None]

        # Fetch other config attributes
        start_line = int(config['start_line', 1])
        target_database = config['target_database']
        target_table = config['target_table']
        truncate_target = bool(config['truncate_target', False])
        column_mappings = config['column_mappings']

        # Substitute params in the DB connection
        target_database = target_database.format(**config)
        logger.log(
            context.dry_run_prefix +
            'Loading data to {}@{}'.format(target_table, target_database))

        # Find the DB connection
        db_conn = context.get_db_connection(target_database)

        # Truncate the target table, if required
        if truncate_target:
            if not context.dry_run_mode:
                db_conn.execute(
                    'truncate table {} drop storage'.format(target_table))
            logger.log(context.dry_run_prefix +
                       'Table {}@{} is truncated'.format(
                           target_table, target_database))

        # Create an inserter
        inserter = self.get_inserter(db_conn, target_table, column_mappings,
                                     fmt_fixed, fmt_delimited, config)

        # Fixed-width file: use the input data as is
        if fmt_fixed:
            input_data = data

        # Delimited file: open the input data as a CSV file
        else:
            args = {'delimiter': delimiter, 'strict': True}
            if quotechar is not None:
                args['quotechar'] = quotechar
            input_data = csv.reader(data, **args)

        # Iterate through the data
        count_src_lines = 0
        count_tgt_rows = 0
        for src_row in input_data:
            # Skip up to start_line
            count_src_lines += 1
            if count_src_lines < start_line:
                continue

            # Fetch data values
            try:
                target_row = []
                for cm in column_mappings:
                    # If a source value is used
                    if (fmt_fixed and 'source_pos'
                            in cm) or (fmt_delimited and 'source_index' in cm):
                        col_name = cm['name']
                        datatype = cm['datatype']
                        trim = cm['source_trim', 'none']

                        # Fixed-width
                        if fmt_fixed:
                            pos = cm['source_pos'].partition(':')
                            # Validate the position format
                            if pos[0] == '' or pos[2] == '':
                                raise errors.ConfigError(
                                    'Invalid position specifier "{}" for column "{}"'
                                    .format(cm['source_pos'], col_name))
                            # Validate left boundary
                            try:
                                pos_l = int(pos[0]) - 1
                            except ValueError as e:
                                raise errors.ConfigError(
                                    'Left boundary specification for column "{}": {}'
                                    .format(col_name, str(e)))
                            if pos_l < 0:
                                raise errors.ConfigError(
                                    'Left boundary must be positive (column "{}")'
                                    .format(col_name))
                            # Validate right boundary
                            try:
                                pos_r = int(pos[2])
                            except ValueError as e:
                                raise errors.ConfigError(
                                    'Right boundary specification for column "{}": {}'
                                    .format(col_name, str(e)))
                            if pos_r <= pos_l:
                                raise errors.ConfigError(
                                    'Right boundary must be greater than or equal to the left one (column "{}")'
                                    .format(col_name))
                            # Chomp
                            src_row = src_row.rstrip('\r\n')
                            # Extract column value
                            value = src_row[pos_l:pos_r]

                        # Delimited
                        else:
                            value = src_row[int(cm['source_index'])]

                        # Apply trimming
                        if trim == 'none':
                            pass
                        elif trim == 'left':
                            value = value.lstrip()
                        elif trim == 'right':
                            value = value.rstrip()
                        elif trim == 'both':
                            value = value.strip()
                        else:
                            raise errors.ConfigError(
                                'Invalid trim value for column "{}": "{}"'.
                                format(col_name, trim))

                        # All types: handle null values
                        val_len = len(value)
                        if val_len == 0:
                            value = None
                        # String: validate value length
                        elif datatype == 'string':
                            max_len = int(cm['length'])
                            # If the length exceeds the allowed size
                            if val_len > max_len:
                                # No truncation - raise an error
                                if not bool(cm['truncate', False]):
                                    raise errors.DataError(
                                        'String column "{}": value length ({}) exceeds allowed maximum ({})'
                                        .format(col_name, val_len, max_len))
                                # Otherwise truncate the value
                                value = value[:max_len]
                        # Integer: convert to actual int
                        elif datatype == 'integer':
                            try:
                                value = int(value)
                            except ValueError as e:
                                raise errors.DataError(
                                    'Integer column "{}": {}'.format(
                                        col_name, str(e)))
                        # Number: convert to actual number
                        elif datatype == 'number':
                            try:
                                value = float(value)
                            except ValueError as e:
                                raise errors.DataError(
                                    'Float value error for column "{}": {}'.
                                    format(col_name, str(e)))

                        # Add the value to the target row
                        target_row.append(value)

                    # Otherwise we're using a target expression
                    else:
                        target_expr = cm['target_expr']

                        # If it uses row number value, add it to the row
                        if '{rownum}' in target_expr:
                            target_row.append(count_tgt_rows + 1)

            # Reraise ConfigErrors as is
            except errors.ConfigError:
                raise

            # Assume all other errors are coming from the data
            except Exception as e:
                raise errors.DataError(
                    'Input data error at line {}: {}'.format(
                        count_src_lines, str(e)))

            # Push the row to the target table (unless we're in dry-run mode)
            if not context.dry_run_mode:
                inserter.push_row(target_row)
            count_tgt_rows += 1

        # Finalise
        if not context.dry_run_mode:
            inserter.flush()
            db_conn.commit()
        logger.info(context.dry_run_prefix +
                    'Loading {}@{} finished, read {} rows, inserted {} rows.'.
                    format(target_table, target_database, count_src_lines,
                           count_tgt_rows))

コード例 #7

0

ファイルを表示

def test_uninitialised_db_connection_access():
    """context: test accessing DB connections with uninitialised context"""
    context.get_db_connection('dummy')

コード例 #8

0

ファイルを表示

def test_db_connection_undefined():
    """context: test fetching undefined DB connection"""
    with configure('db_empty.json', None, False):
        context.get_db_connection("fake")

コード例 #9

0

ファイルを表示

def test_db_conn_external_nonexistent():
    """context: test DB connection referring to a nonexistent file"""
    with configure('db_conn_ext_err.json', None, False):
        context.get_db_connection("dbExtErr")

コード例 #10

0

ファイルを表示

def test_db_conn_external_wglob(mock_db):
    """context: test external DB connection defined via a global"""
    with configure('db_conn_ext_glob.json', None, False,
                   {'PATH_FROM_GLOBAL': 'db_conn_extdef.json'}):
        context.get_db_connection("dbExtGlob")
        mock_db.assert_called_once_with('TheConnection', 'Mickey', 'Mouse')

コード例 #11

0

ファイルを表示

def test_db_connection_incomplete():
    """context: test DB connection with incomplete spec"""
    with configure('db_conn_incomplete.json', None, False):
        context.get_db_connection("dbIncomplete")