def run(self, parsed_args: argparse.Namespace) -> Iterable[str]: cmd = parsed_args.command if not cmd: raise Exception("No command passed") # todo: add try catch for exceptions with different exit codes source_spec: ConnectorSpecification = self.source.spec(self.logger) self.configure_sentry(source_spec.connectionSpecification, parsed_args) with tempfile.TemporaryDirectory() as temp_dir: if cmd == "spec": message = AirbyteMessage(type=Type.SPEC, spec=source_spec) yield message.json(exclude_unset=True) else: raw_config = self.source.read_config(parsed_args.config) config = self.source.configure(raw_config, temp_dir) # Now that we have the config, we can use it to get a list of ai airbyte_secrets # that we should filter in logging to avoid leaking secrets config_secrets = get_secrets(self.source, config, self.logger) AirbyteLogFormatter.update_secrets(config_secrets) # Remove internal flags from config before validating so # jsonschema's additionalProperties flag wont fail the validation config, internal_config = split_config(config) if self.source.check_config_against_spec or cmd == "check": check_config_against_spec_or_exit(config, source_spec) # Put internal flags back to config dict config.update(internal_config.dict()) if cmd == "check": check_result = self.source.check(self.logger, config) if check_result.status == Status.SUCCEEDED: self.logger.info("Check succeeded") else: self.logger.error("Check failed") output_message = AirbyteMessage( type=Type.CONNECTION_STATUS, connectionStatus=check_result).json(exclude_unset=True) yield output_message elif cmd == "discover": catalog = self.source.discover(self.logger, config) yield AirbyteMessage( type=Type.CATALOG, catalog=catalog).json(exclude_unset=True) elif cmd == "read": config_catalog = self.source.read_catalog( parsed_args.catalog) state = self.source.read_state(parsed_args.state) generator = self.source.read(self.logger, config, config_catalog, state) for message in generator: yield message.json(exclude_unset=True) else: raise Exception("Unexpected command " + cmd)
def test_formatter(logger, caplog): formatter = AirbyteLogFormatter() logger.info("Test formatter") record = caplog.records[0] formatted_record = formatter.format(record) formatted_record_data = json.loads(formatted_record) assert formatted_record_data.get("type") == "LOG" log = formatted_record_data.get("log") assert isinstance(log, Dict) level = log.get("level") message = log.get("message") assert level == "INFO" assert message == "Test formatter"
def test_airbyte_secret_is_masked_on_logger_output(source_spec, mocker, config, caplog): caplog.set_level(logging.DEBUG, logger="airbyte.test") caplog.handler.setFormatter(AirbyteLogFormatter()) entrypoint = AirbyteEntrypoint(MockSource()) parsed_args = Namespace(command="read", config="", state="", catalog="") mocker.patch.object( MockSource, "spec", return_value=ConnectorSpecification( connectionSpecification=source_spec), ) mocker.patch.object(MockSource, "configure", return_value=config) mocker.patch.object(MockSource, "read_config", return_value=None) mocker.patch.object(MockSource, "read_state", return_value={}) mocker.patch.object(MockSource, "read_catalog", return_value={}) list(entrypoint.run(parsed_args)) log_result = caplog.text expected_secret_values = [ config[k] for k, v in source_spec["properties"].items() if v.get("airbyte_secret") ] expected_plain_text_values = [ config[k] for k, v in source_spec["properties"].items() if not v.get("airbyte_secret") ] assert all([str(v) not in log_result for v in expected_secret_values]) assert all([str(v) in log_result for v in expected_plain_text_values])
def test_level_transform(logger, caplog): formatter = AirbyteLogFormatter() logger.warning("Test level transform warn") logger.critical("Test level transform critical") record_warn = caplog.records[0] record_critical = caplog.records[1] formatted_record_warn = formatter.format(record_warn) formatted_record_warn_data = json.loads(formatted_record_warn) log_warn = formatted_record_warn_data.get("log") level_warn = log_warn.get("level") formatted_record_critical = formatter.format(record_critical) formatted_record_critical_data = json.loads(formatted_record_critical) log_critical = formatted_record_critical_data.get("log") level_critical = log_critical.get("level") assert level_warn == "WARN" assert level_critical == "FATAL"
def test_non_airbyte_secrets_are_not_masked_on_uncaught_exceptions( mocker, caplog, capsys): caplog.set_level(logging.DEBUG, logger="airbyte.test") caplog.handler.setFormatter(AirbyteLogFormatter()) class BrokenSource(MockSource): def read( self, logger: logging.Logger, config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: MutableMapping[str, Any] = None, ): raise Exception("Exception:" + NOT_A_SECRET_VALUE) entrypoint = AirbyteEntrypoint(BrokenSource()) parsed_args = Namespace(command="read", config="", state="", catalog="") source_spec = { "type": "object", "required": ["api_token"], "additionalProperties": False, "properties": { SECRET_PROPERTY: { "type": "string", "airbyte_secret": True }, NOT_SECRET_PROPERTY: { "type": "string", "airbyte_secret": False }, }, } simple_config = { SECRET_PROPERTY: I_AM_A_SECRET_VALUE, NOT_SECRET_PROPERTY: NOT_A_SECRET_VALUE, } mocker.patch.object( MockSource, "spec", return_value=ConnectorSpecification( connectionSpecification=source_spec), ) mocker.patch.object(MockSource, "configure", return_value=simple_config) mocker.patch.object(MockSource, "read_config", return_value=None) mocker.patch.object(MockSource, "read_state", return_value={}) mocker.patch.object(MockSource, "read_catalog", return_value={}) mocker.patch.object(MockSource, "read", side_effect=Exception("Exception:" + NOT_A_SECRET_VALUE)) try: list(entrypoint.run(parsed_args)) except Exception: sys.excepthook(*sys.exc_info()) assert NOT_A_SECRET_VALUE in capsys.readouterr( ).out, "Should not have filtered non-secret value from exception trace message" assert NOT_A_SECRET_VALUE in caplog.text, "Should not have filtered non-secret value from exception log message"