def test_db_connection_passwd_base64(mock_db): """context: test DB connection with Base64-encoded password""" with configure('db_conn_base64.json', None, False): context.get_db_connection("dbBase64") # Verify the password is correctly Base64-decoded mock_db.assert_called_once_with('Secret', 'Facility', 'TopSecret')
def test_db_connection(mock_db): """context: test establishing DB connection""" with configure('db_conn_a.json', None, False): context.get_db_connection("db") # The DB connection must be created exactly one time mock_db.assert_called_once_with('a', 'b', 'c') # Repeated requests to the same DB should not recreate the connection context.get_db_connection("db") assert mock_db.call_count == 1
def test_db_conn_external_ok(mock_db): """context: test DB connection defined in an external file""" with configure('db_conn_ext.json', None, False): context.get_db_connection("dbExt") # The DB connection's parameters must be read from the external file, then it must be created mock_db.assert_called_once_with('TheConnection', 'Mickey', 'Mouse') # Repeated requests to the same DB should not recreate the connection context.get_db_connection("dbExt") assert mock_db.call_count == 1
def run(self, config): """Override the abstract method of the base class.""" # Get attributes from config output_param = config['output_param', 'data'] db_name = config['database'] fld_delimiter = config['field_delimiter', ''] rec_delimiter = config['record_delimiter', '\n'] col_headers = config['col_headers', False] quotechar = config['quotechar', None] sql = config['sql'] params = config['params', None] # Substitute params in the DB connection and find it db_name = db_name.format(**config) db_conn = context.get_db_connection(db_name) # Prepare quote strings qopen = self.QUOTE_OPEN[ quotechar] if quotechar in self.QUOTE_OPEN else quotechar qclose = self.QUOTE_CLOSE[ quotechar] if quotechar in self.QUOTE_CLOSE else quotechar # Prepare parameters, if needed db_params = {} if params is not None: for param in params: p_name = param['name'] p_value = param['value'] db_params[p_name] = p_value.format(**config) # Execute the query cur = db_conn.cursor() records = [] try: cur.execute(sql, db_params) # Output column headers, if needed if col_headers: records.append( fld_delimiter.join( [qopen + d[0] + qclose for d in cur.description])) # Read the returned data for record in cur: records.append( fld_delimiter.join( [qopen + str(v) + qclose for v in record])) finally: cur.close() logger.log('Done. Read {} rows from {}'.format(len(records), db_name)) return {output_param: rec_delimiter.join(records) + rec_delimiter}
def run(self, config): """Override the abstract method of the base class.""" # Get attributes from config db_name = config['database'] sql = config['sql'] params = config['params', None] commit_stmt = config['commit_stmt', 'each'] if commit_stmt == 'none': commit_policy = self.COMMIT_NONE elif commit_stmt == 'each': commit_policy = self.COMMIT_EACH elif commit_stmt == 'all': commit_policy = self.COMMIT_ALL else: raise errors.ConfigError('Invalid value for commit_stmt: "{}"'.format(commit_stmt)) # Substitute params in the DB connection and find it db_name = db_name.format(**config) db_conn = context.get_db_connection(db_name) # Initiate a transaction, if all statements are committed at once if not context.dry_run_mode and commit_policy == self.COMMIT_ALL: db_conn.begin() # Prepare statement(s). If a single statement is given, transform it into a single-element list if type(sql) is not list: sql = [sql] # Prepare parameters, if needed db_params = {} if params is not None: for param in params: p_name = param['name'] p_value = param['value'] db_params[p_name] = p_value.format(**config) # Execute the statement(s) if not context.dry_run_mode: for stmt in sql: db_conn.execute(stmt, db_params) # Commit if needed if commit_policy == self.COMMIT_EACH: db_conn.commit() # Execute a commit, if all statements are committed at once if not context.dry_run_mode and commit_policy == self.COMMIT_ALL: db_conn.commit() logger.log(context.dry_run_prefix + 'Done. {} statement(s) executed on {}'.format(len(sql), db_name))
def run(self, config): """Override the abstract method of the base class.""" # Get attributes from config input_param = config['input_param', 'data'] data = config.lines(input_param) delimiter = None quotechar = None # Get and validate data format data_format = config['format'] fmt_fixed = data_format == 'fixed' fmt_delimited = data_format == 'delimited' if not fmt_fixed and not fmt_delimited: raise errors.ConfigError( 'Invalid format value: "{}".'.format(data_format)) if fmt_delimited: delimiter = config['delimiter'] quotechar = config['quotechar', None] # Fetch other config attributes start_line = int(config['start_line', 1]) target_database = config['target_database'] target_table = config['target_table'] truncate_target = bool(config['truncate_target', False]) column_mappings = config['column_mappings'] # Substitute params in the DB connection target_database = target_database.format(**config) logger.log( context.dry_run_prefix + 'Loading data to {}@{}'.format(target_table, target_database)) # Find the DB connection db_conn = context.get_db_connection(target_database) # Truncate the target table, if required if truncate_target: if not context.dry_run_mode: db_conn.execute( 'truncate table {} drop storage'.format(target_table)) logger.log(context.dry_run_prefix + 'Table {}@{} is truncated'.format( target_table, target_database)) # Create an inserter inserter = self.get_inserter(db_conn, target_table, column_mappings, fmt_fixed, fmt_delimited, config) # Fixed-width file: use the input data as is if fmt_fixed: input_data = data # Delimited file: open the input data as a CSV file else: args = {'delimiter': delimiter, 'strict': True} if quotechar is not None: args['quotechar'] = quotechar input_data = csv.reader(data, **args) # Iterate through the data count_src_lines = 0 count_tgt_rows = 0 for src_row in input_data: # Skip up to start_line count_src_lines += 1 if count_src_lines < start_line: continue # Fetch data values try: target_row = [] for cm in column_mappings: # If a source value is used if (fmt_fixed and 'source_pos' in cm) or (fmt_delimited and 'source_index' in cm): col_name = cm['name'] datatype = cm['datatype'] trim = cm['source_trim', 'none'] # Fixed-width if fmt_fixed: pos = cm['source_pos'].partition(':') # Validate the position format if pos[0] == '' or pos[2] == '': raise errors.ConfigError( 'Invalid position specifier "{}" for column "{}"' .format(cm['source_pos'], col_name)) # Validate left boundary try: pos_l = int(pos[0]) - 1 except ValueError as e: raise errors.ConfigError( 'Left boundary specification for column "{}": {}' .format(col_name, str(e))) if pos_l < 0: raise errors.ConfigError( 'Left boundary must be positive (column "{}")' .format(col_name)) # Validate right boundary try: pos_r = int(pos[2]) except ValueError as e: raise errors.ConfigError( 'Right boundary specification for column "{}": {}' .format(col_name, str(e))) if pos_r <= pos_l: raise errors.ConfigError( 'Right boundary must be greater than or equal to the left one (column "{}")' .format(col_name)) # Chomp src_row = src_row.rstrip('\r\n') # Extract column value value = src_row[pos_l:pos_r] # Delimited else: value = src_row[int(cm['source_index'])] # Apply trimming if trim == 'none': pass elif trim == 'left': value = value.lstrip() elif trim == 'right': value = value.rstrip() elif trim == 'both': value = value.strip() else: raise errors.ConfigError( 'Invalid trim value for column "{}": "{}"'. format(col_name, trim)) # All types: handle null values val_len = len(value) if val_len == 0: value = None # String: validate value length elif datatype == 'string': max_len = int(cm['length']) # If the length exceeds the allowed size if val_len > max_len: # No truncation - raise an error if not bool(cm['truncate', False]): raise errors.DataError( 'String column "{}": value length ({}) exceeds allowed maximum ({})' .format(col_name, val_len, max_len)) # Otherwise truncate the value value = value[:max_len] # Integer: convert to actual int elif datatype == 'integer': try: value = int(value) except ValueError as e: raise errors.DataError( 'Integer column "{}": {}'.format( col_name, str(e))) # Number: convert to actual number elif datatype == 'number': try: value = float(value) except ValueError as e: raise errors.DataError( 'Float value error for column "{}": {}'. format(col_name, str(e))) # Add the value to the target row target_row.append(value) # Otherwise we're using a target expression else: target_expr = cm['target_expr'] # If it uses row number value, add it to the row if '{rownum}' in target_expr: target_row.append(count_tgt_rows + 1) # Reraise ConfigErrors as is except errors.ConfigError: raise # Assume all other errors are coming from the data except Exception as e: raise errors.DataError( 'Input data error at line {}: {}'.format( count_src_lines, str(e))) # Push the row to the target table (unless we're in dry-run mode) if not context.dry_run_mode: inserter.push_row(target_row) count_tgt_rows += 1 # Finalise if not context.dry_run_mode: inserter.flush() db_conn.commit() logger.info(context.dry_run_prefix + 'Loading {}@{} finished, read {} rows, inserted {} rows.'. format(target_table, target_database, count_src_lines, count_tgt_rows))
def test_uninitialised_db_connection_access(): """context: test accessing DB connections with uninitialised context""" context.get_db_connection('dummy')
def test_db_connection_undefined(): """context: test fetching undefined DB connection""" with configure('db_empty.json', None, False): context.get_db_connection("fake")
def test_db_conn_external_nonexistent(): """context: test DB connection referring to a nonexistent file""" with configure('db_conn_ext_err.json', None, False): context.get_db_connection("dbExtErr")
def test_db_conn_external_wglob(mock_db): """context: test external DB connection defined via a global""" with configure('db_conn_ext_glob.json', None, False, {'PATH_FROM_GLOBAL': 'db_conn_extdef.json'}): context.get_db_connection("dbExtGlob") mock_db.assert_called_once_with('TheConnection', 'Mickey', 'Mouse')
def test_db_connection_incomplete(): """context: test DB connection with incomplete spec""" with configure('db_conn_incomplete.json', None, False): context.get_db_connection("dbIncomplete")