def __init__( self, config: Union[Dict[str, Any], Path], state: Union[None, Dict[str, Any], Path] = None, catalog: Union[None, Dict[str, Any], Catalog, Path] = None, discover: bool = False, **kwargs, ): self.catalog_path = self.state_path = self.config_path = None if isinstance(catalog, Path): self.catalog_path = str(catalog) catalog = Catalog.load(catalog) elif isinstance(catalog, dict): catalog = Catalog.from_dict(catalog) if isinstance(config, Path): self.config_path = str(config) config = load_json(config) if isinstance(state, Path): self.state_path = state state = load_json(state) self.config = config self.state = state self.catalog = catalog self.discover = discover for name, val in kwargs.items(): setattr(self, name, val)
def test_write_schema(mock_write_schema): from singer.catalog import Catalog from tap_mambu.tap_mambu_refactor.tap_processors.processor import TapProcessor catalog = Catalog.load(f"{FIXTURES_PATH}/processor_catalog.json") client_mock = MagicMock() processor = TapProcessor(catalog=catalog, stream_name="loan_accounts", client=client_mock, config=config_json, state={'currently_syncing': 'loan_accounts'}, sub_type="self", generators=[GeneratorMock([])]) processor.write_schema() schema = None stream_key_properties = None with open(f"{FIXTURES_PATH}/processor_catalog.json", "r") as fd: schema_json = json.loads(fd.read()) for stream in schema_json["streams"]: if stream["tap_stream_id"] == "loan_accounts": schema = stream["schema"] stream_key_properties = stream["key_properties"] break assert schema is not None mock_write_schema.assert_called_with("loan_accounts", schema, stream_key_properties)
def parse_args(): ''' This is to replace singer's default singer_utils.parse_args() https://github.com/singer-io/singer-python/blob/master/singer/utils.py Parse standard command-line args. Parses the command-line arguments mentioned in the SPEC and the BEST_PRACTICES documents: -c,--config Config file -s,--state State file -d,--discover Run in discover mode --catalog Catalog file Returns the parsed args object from argparse. For each argument that point to JSON files (config, state, properties), we will automatically load and parse the JSON file. ''' parser = argparse.ArgumentParser() parser.add_argument( '-c', '--config', help='Config file', required=True) parser.add_argument( '-s', '--state', help='State file') parser.add_argument( '-p', '--properties', help='Property selections: DEPRECATED, Please use --catalog instead') parser.add_argument( '--catalog', help='Catalog file') parser.add_argument( '-d', '--discover', action='store_true', help='Do schema discovery') # Capture additional args parser.add_argument( "--start_datetime", type=str, help="Inclusive start date time in ISO8601-Date-String format: 2019-04-11T00:00:00Z") parser.add_argument( "--end_datetime", type=str, help="Exclusive end date time in ISO8601-Date-String format: 2019-04-12T00:00:00Z") args = parser.parse_args() if args.config: args.config = singer_utils.load_json(args.config) if args.state: args.state = singer_utils.load_json(args.state) else: args.state = {} if args.properties: args.properties = singer_utils.load_json(args.properties) if args.catalog: args.catalog = Catalog.load(args.catalog) return args
def test_get_selected_streams(): from singer.catalog import Catalog from tap_mambu.tap_mambu_refactor.helpers import get_selected_streams catalog = Catalog.load(f"{FIXTURES_PATH}/catalog.json") selected_streams = get_selected_streams(catalog) expected_streams = ["loan_accounts", "loan_repayments", "audit_trail"] assert len(selected_streams) == len(expected_streams) and set( selected_streams) == set(expected_streams)
def test_write_exceptions(mock_write_schema, mock_write_record): from singer.catalog import Catalog from tap_mambu.tap_mambu_refactor.tap_processors.processor import TapProcessor catalog = Catalog.load(f"{FIXTURES_PATH}/processor_catalog.json") mock_write_record.side_effect = [None, OSError("Mock Record Exception")] mock_write_schema.side_effect = [None, OSError("Mock Schema Exception")] client_mock = MagicMock() processor = TapProcessor(catalog=catalog, stream_name="loan_accounts", client=client_mock, config=config_json, state={'currently_syncing': 'loan_accounts'}, sub_type="self", generators=[ GeneratorMock([{ "id": "1", "last_modified_date": "2022-01-01T00:00:00+03:00" }, { "id": "2", "last_modified_date": "2022-01-01T01:00:00+03:00" }, { "id": "3", "last_modified_date": "2022-01-01T02:00:00+03:00" }]) ]) processor.endpoint_deduplication_key = "id" with pytest.raises(OSError) as err: processor.process_streams_from_generators() assert err.value.args[0] == "Mock Record Exception" mock_write_record.assert_has_calls([ call("loan_accounts", IsInstanceMatcher(dict), time_extracted=IsInstanceMatcher(datetime.datetime)), call("loan_accounts", IsInstanceMatcher(dict), time_extracted=IsInstanceMatcher(datetime.datetime)) ]) with pytest.raises(OSError) as err: processor.process_streams_from_generators() assert err.value.args[0] == "Mock Schema Exception" mock_write_schema.assert_has_calls([ call("loan_accounts", IsInstanceMatcher(dict), IsInstanceMatcher(list)), call("loan_accounts", IsInstanceMatcher(dict), IsInstanceMatcher(list)) ])
def parse_args(required_config_keys): # fork function to be able to grab path of state file '''Parse standard command-line args. Parses the command-line arguments mentioned in the SPEC and the BEST_PRACTICES documents: -c,--config Config file -s,--state State file -d,--discover Run in discover mode -p,--properties Properties file: DEPRECATED, please use --catalog instead --catalog Catalog file Returns the parsed args object from argparse. For each argument that point to JSON files (config, state, properties), we will automatically load and parse the JSON file. ''' parser = argparse.ArgumentParser() parser.add_argument('-c', '--config', help='Config file', required=True) parser.add_argument('-s', '--state', help='State file') parser.add_argument( '-p', '--properties', help='Property selections: DEPRECATED, Please use --catalog instead') parser.add_argument('--catalog', help='Catalog file') parser.add_argument('-d', '--discover', action='store_true', help='Do schema discovery') args = parser.parse_args() if args.config: setattr(args, 'config_path', args.config) args.config = utils.load_json(args.config) if args.state: setattr(args, 'state_path', args.state) args.state_file = args.state args.state = utils.load_json(args.state) else: args.state_file = None args.state = {} if args.properties: setattr(args, 'properties_path', args.properties) args.properties = utils.load_json(args.properties) if args.catalog: setattr(args, 'catalog_path', args.catalog) args.catalog = Catalog.load(args.catalog) utils.check_config(args.config, required_config_keys) return args
def _prep_config(): cwd, _ = os.path.split(__file__) usgs_dir = os.path.join(cwd, "../examples/usgs") config = utils.load_json(os.path.join(usgs_dir, "config/tap_config.json")) config["schema_dir"] = os.path.join(usgs_dir, "schema") config["catalog_dir"] = os.path.join(usgs_dir, "catalog") catalog = Catalog.load(os.path.join(usgs_dir, config["catalog_dir"], "earthquakes.json")) config["start_datetime"] = (datetime.datetime.now() - datetime.timedelta(hours=1)).isoformat() streams = {} streams["earthquakes"] = Stream("earthquakes", config) return config, catalog, streams
def test_loan_accounts_processor(): from singer.catalog import Catalog catalog = Catalog.load(f"{FIXTURES_PATH}/processor_catalog.json") client_mock = MagicMock() from tap_mambu.tap_mambu_refactor.tap_processors.loan_accounts_processor import LoanAccountsProcessor processor = LoanAccountsProcessor( catalog=catalog, stream_name="loan_accounts", client=client_mock, config=config_json, state={'currently_syncing': 'loan_accounts'}, sub_type="self", generators=[GeneratorMock([])]) assert processor.endpoint_deduplication_key == "id" assert processor.endpoint_child_streams == ["loan_repayments"]
def test_loan_repayments_processor_endpoint_config_init(): from singer.catalog import Catalog catalog = Catalog.load(f"{FIXTURES_PATH}/processor_catalog.json") client_mock = MagicMock() from tap_mambu.tap_mambu_refactor.tap_processors.loan_repayments_processor import LoanRepaymentsProcessor processor = LoanRepaymentsProcessor( catalog=catalog, stream_name="loan_repayments", client=client_mock, config=config_json, state={'currently_syncing': 'loan_repayments'}, sub_type="self", generators=[GeneratorMock([])], parent_id='TEST') assert processor.endpoint_parent == 'loan_accounts' assert processor.endpoint_parent_id == 'TEST'
def test_deposit_cards_processor_endpoint_config_init(): from singer.catalog import Catalog catalog = Catalog.load(f"{FIXTURES_PATH}/processor_catalog.json") client_mock = MagicMock() from tap_mambu.tap_mambu_refactor.tap_processors.deposit_cards_processor import DepositCardsProcessor processor = DepositCardsProcessor(catalog=catalog, stream_name="cards", client=client_mock, config=config_json, state={'currently_syncing': 'cards'}, sub_type="self", generators=[GeneratorMock([])], parent_id='TEST') assert processor.endpoint_deduplication_key == 'reference_token' assert processor.endpoint_id_field == 'reference_token' assert processor.endpoint_parent == 'deposit' assert processor.endpoint_parent_id == 'TEST'
def parse_args(required_config_keys): """Parse standard command-line args. -c,--config Config file -s,--state State file -d,--discover Run in discover mode -a,--select_all Select all streams and fields for discover mode -p,--properties Properties file: DEPRECATED, please use --catalog instead --catalog Catalog file Returns the parsed args object from argparse. For each argument that point to JSON files (config, state, properties), we will automatically load and parse the JSON file. """ parser = argparse.ArgumentParser() parser.add_argument('--config', '-c', help='Config file', required=True) parser.add_argument('--state', '-s', help='State file') parser.add_argument('--catalog', help='Catalog file') parser.add_argument('--discover', '-d', action='store_true', help='Do schema discovery') parser.add_argument('--select_all', '-a', action='store_true', help='Select all streams and fields in discover mode') parser.add_argument('-p', '--properties', help='Property selections: DEPRECATED, Please use --catalog instead') args = parser.parse_args() if args.config: setattr(args, 'config_path', args.config) args.config = load_json(args.config) if args.state: setattr(args, 'state_path', args.state) args.state = load_json(args.state) else: args.state = {} if args.catalog: setattr(args, 'catalog_path', args.catalog) args.catalog = Catalog.load(args.catalog) elif args.properties: args.properties = load_json(args.properties) if args.select_all and not args.discover: parser.error('Select all only available for discovery mode') check_config(args.config, required_config_keys) return args
def test_bookmarks(mock_write_state): from singer.catalog import Catalog from tap_mambu.tap_mambu_refactor.tap_processors.processor import TapProcessor catalog = Catalog.load(f"{FIXTURES_PATH}/processor_catalog.json") client_mock = MagicMock() processor = TapProcessor(catalog=catalog, stream_name="loan_accounts", client=client_mock, config=config_json, state={'currently_syncing': 'loan_accounts'}, sub_type="self", generators=[GeneratorMock([])]) processor.write_bookmark() expected_state = { 'currently_syncing': 'loan_accounts', 'bookmarks': { 'loan_accounts': '2021-06-01T00:00:00Z' } } mock_write_state.assert_called_once_with(expected_state)
def catalog(shared_datadir): return Catalog.load(shared_datadir / "test.catalog.json")
def parse_args(spec_file, required_config_keys): ''' This is to replace singer's default utils.parse_args() https://github.com/singer-io/singer-python/blob/master/singer/utils.py Parse standard command-line args. Parses the command-line arguments mentioned in the SPEC and the BEST_PRACTICES documents: -c,--config Config file -s,--state State file -d,--discover Run in discover mode --catalog Catalog file Returns the parsed args object from argparse. For each argument that point to JSON files (config, state, properties), we will automatically load and parse the JSON file. ''' # Read default spec file default_spec = {} default_spec_file = get_abs_path("default_spec.json") with open(default_spec_file, "r") as f: default_spec.update(json.load(f)) # Read spec file with open(spec_file, "r") as f: SPEC.update(json.load(f)) # TODO: What about the fields other than arg for a in default_spec["args"]: if SPEC["args"].get(a) is None: SPEC["args"][a] = default_spec["args"][a] parser = argparse.ArgumentParser(SPEC["application"]) parser.add_argument("spec_file", type=str, help="Specification file") # Capture additional args for arg in SPEC["args"].keys(): parser.add_argument("--" + arg, type=TYPES[SPEC["args"][arg]["type"]], default=SPEC["args"][arg].get("default"), help=SPEC["args"][arg].get("help"), required=SPEC["args"][arg].get("required", False)) # Default arguments parser.add_argument('-c', '--config', help='Config file', required=True) """ parser.add_argument( "--schema_dir", type=str, help="Path to the schema directory.", required=True) """ parser.add_argument('-s', '--state', help='State file') parser.add_argument('--catalog', help='Catalog file') parser.add_argument('-d', '--discover', action='store_true', help='Do schema discovery') parser.add_argument('-i', '--infer_schema', action='store_true', help='Do infer schema') parser.add_argument( "--url", type=str, help="REST API endpoint with {params}. Required in config.") args = parser.parse_args() if args.config: args.config = utils.load_json(args.config) if args.state: args.state = utils.load_json(args.state) else: args.state = {} if args.catalog and os.path.isfile(args.catalog): args.catalog = Catalog.load(args.catalog) utils.check_config(args.config, required_config_keys) return args
def test_tap_processor_process_child_records( mock_sync_endpoint_refactor, mock_write_bookmark, mock_write_schema, # Mock write_schema so we don't pollute the output mock_get_selected_streams, capsys): from singer.catalog import Catalog from tap_mambu.tap_mambu_refactor.tap_processors.parent_processor import ParentProcessor fake_children_record_count = 4 mock_get_selected_streams.return_value = ["child_1", "child_2"] mock_sync_endpoint_refactor.return_value = fake_children_record_count catalog = Catalog.load(f"{FIXTURES_PATH}/processor_catalog.json") generator_data = [{ "encoded_key": "12345678901234567890123456789012", "last_modified_date": "2022-01-01T00:00:00.000000Z", "id": "2" }, { "encoded_key": "12345678901234567890123456789013", "last_modified_date": "2022-01-01T00:00:00.000000Z", "id": "3" }, { "encoded_key": "12345678901234567890123456789014", "last_modified_date": "2022-01-01T00:00:00.000000Z", "id": "4" }, { "encoded_key": "12345678901234567890123456789015", "last_modified_date": "2022-01-01T00:00:00.000000Z", "id": "5" }] generator = GeneratorMock(list(generator_data)) generator.time_extracted = 0 client_mock = MagicMock() processor = ParentProcessor(catalog=catalog, stream_name="loan_accounts", client=client_mock, config=config_json, state={'currently_syncing': 'loan_accounts'}, sub_type="self", generators=[generator]) processor.endpoint_child_streams = ["child_1", "child_2"] actual_records_count = processor.process_streams_from_generators() # sync_endpoint_refactor called for every record (len(generator_data)) for every child_stream + once for parent assert actual_records_count == \ len(generator_data) * (fake_children_record_count * len(processor.endpoint_child_streams) + 1), \ "Record count mismatch when adding child records" mock_sync_endpoint_refactor.assert_called_with( client=client_mock, catalog=processor.catalog, state={'currently_syncing': 'loan_accounts'}, stream_name=processor.endpoint_child_streams[-1], sub_type="self", config=config_json, parent_id="5") captured = capsys.readouterr() stdout_list = [ json.loads(line) for line in captured.out.split("\n") if line ] # noinspection PyTypeChecker assert stdout_list == [{ "type": "RECORD", "stream": "loan_accounts", "record": record } for record in generator_data], "Output should contain mocked records"
def parse_args(spec_file, required_config_keys): ''' This is to replace singer's default utils.parse_args() https://github.com/singer-io/singer-python/blob/master/singer/utils.py Parse standard command-line args. Parses the command-line arguments mentioned in the SPEC and the BEST_PRACTICES documents: -c,--config Config file -s,--state State file -d,--discover Run in discover mode --catalog Catalog file Returns the parsed args object from argparse. For each argument that point to JSON files (config, state, properties), we will automatically load and parse the JSON file. ''' # Read default spec file default_spec = {} default_spec_file = get_abs_path("default_spec.json") with open(default_spec_file, "r") as f: default_spec.update(json.load(f)) SPEC.update(default_spec) custom_spec = {} # Overwrite with the custom spec file with open(spec_file, "r") as f: custom_spec.update(json.load(f)) SPEC["application"] = custom_spec.get("application", SPEC["application"]) if custom_spec.get("args"): SPEC["args"].update(custom_spec.get("args")) parser = argparse.ArgumentParser(SPEC["application"]) parser.add_argument("spec_file", type=str, help="Specification file") # Capture additional args for arg in SPEC["args"].keys(): parser.add_argument("--" + arg, type=TYPES[SPEC["args"][arg]["type"]], default=SPEC["args"][arg].get("default"), help=SPEC["args"][arg].get("help"), required=SPEC["args"][arg].get("required", False)) # Default singer arguments, commands, and required args parser.add_argument('-c', '--config', help='Config file', required=True) parser.add_argument('-s', '--state', help='State file') parser.add_argument('--catalog', help='Catalog file') # commands parser.add_argument('-d', '--discover', action='store_true', help='Do schema discovery') parser.add_argument('-i', '--infer_schema', action='store_true', help='Do infer schema') parser.add_argument("--offline", "-o", action="store_true", help="Offline test mode") args = parser.parse_args() if args.config: args.config = utils.load_json(args.config) if args.state: args.state = utils.load_json(args.state) else: args.state = {} if args.catalog and os.path.isfile(args.catalog): args.catalog = Catalog.load(args.catalog) utils.check_config(args.config, required_config_keys) return args