Exemple #1
0
    def test_timeout_backoff__get_file_handle(self, mocked_sftp, mocked_sleep):
        """
            Test case to verify we backoff and retry for 'get_file_handle' function
        """
        # mock 'open' and raise 'socket.timeout' error
        mocked_open = mock.Mock()
        mocked_open.side_effect = socket.timeout
        mocked_sftp.open.side_effect = mocked_open

        config = {
            "host": "10.0.0.1",
            "port": 22,
            "username": "******",
            "password": "",
            "start_date": "2020-01-01"
        }
        # create connection
        conn = client.connection(config=config)

        with self.assertRaises(socket.timeout):
            # function call
            conn.get_file_handle({"filepath": "/root/file.csv"})

        # verify that the tap backoff for 5 times
        self.assertEquals(mocked_sftp.open.call_count, 5)
Exemple #2
0
    def test_timeout_backoff__get_files_by_prefix(self, mocked_sftp):
        """
            Test case to verify we backoff and retry for 'get_files_by_prefix' function
        """
        # mock 'listdir_attr' and raise 'socket.timeout' error
        mocked_listdir_attr = mock.Mock()
        mocked_listdir_attr.side_effect = socket.timeout
        mocked_sftp.listdir_attr.side_effect = mocked_listdir_attr

        config = {
            "host": "10.0.0.1",
            "port": 22,
            "username": "******",
            "password": "",
            "start_date": "2020-01-01"
        }
        # create connection
        conn = client.connection(config=config)

        before_time = datetime.now()
        with self.assertRaises(socket.timeout):
            # function call
            conn.get_files_by_prefix(".")
        after_time = datetime.now()

        # verify that the tap backoff for 60 seconds
        time_difference = (after_time - before_time).total_seconds()
        self.assertGreaterEqual(time_difference, 60)
Exemple #3
0
    def test_timeout_backoff__sync_file(self, mocked_get_row_iterators,
                                        mocked_get_file_handle, mocked_sleep):
        """
            Test case to verify we backoff and retry for 'sync_file' function
        """
        # mock 'get_row_iterators' and raise 'socket.timeout' error
        mocked_get_row_iterators.side_effect = socket.timeout
        # mock 'get_file_handle'
        mocked_get_file_handle.return_value = None

        config = {
            "host": "10.0.0.1",
            "port": 22,
            "username": "******",
            "password": "",
            "start_date": "2020-01-01"
        }
        table_spec = {
            "key_properties": "test_key_properties",
            "delimiter": ","
        }
        file = {"filepath": "/root/file.csv"}
        # create connection
        conn = client.connection(config=config)
        with self.assertRaises(socket.timeout):
            # function call
            sync.sync_file(conn=conn,
                           f=file,
                           stream="test_stream",
                           table_spec=table_spec)

        # verify that the tap backoff for 5 times
        self.assertEquals(mocked_get_row_iterators.call_count, 5)
Exemple #4
0
def discover_streams(config):
    streams = []

    conn = client.connection(config)
    prefix = format(config.get("user_dir", "./"))

    tables = json.loads(config['tables'])
    for table_spec in tables:
        schema, stream_md = get_schema(conn, table_spec)

        streams.append({
            'stream': table_spec['table_name'],
            'tap_stream_id': table_spec['table_name'],
            'schema': schema,
            'metadata': stream_md
        })

    return streams
Exemple #5
0
def sync_stream(config, state, stream):
    table_name = stream.tap_stream_id
    modified_since = utils.strptime_to_utc(
        singer.get_bookmark(state, table_name, 'modified_since')
        or config['start_date'])

    LOGGER.info('Syncing table "%s".', table_name)
    LOGGER.info('Getting files modified since %s.', modified_since)

    conn = client.connection(config)
    table_spec = [
        c for c in json.loads(config["tables"])
        if c["table_name"] == table_name
    ]
    if len(table_spec) == 0:
        LOGGER.info("No table configuration found for '%s', skipping stream",
                    table_name)
        return 0
    if len(table_spec) > 1:
        LOGGER.info(
            "Multiple table configurations found for '%s', skipping stream",
            table_name)
        return 0
    table_spec = table_spec[0]

    files = conn.get_files(table_spec["search_prefix"],
                           table_spec["search_pattern"], modified_since)

    LOGGER.info('Found %s files to be synced.', len(files))

    records_streamed = 0
    if not files:
        return records_streamed

    for f in files:
        records_streamed += sync_file(conn, f, stream, table_spec)
        state = singer.write_bookmark(state, table_name, 'modified_since',
                                      f['last_modified'].isoformat())
        singer.write_state(state)

    LOGGER.info('Wrote %s records for table "%s".', records_streamed,
                table_name)

    return records_streamed
Exemple #6
0
    def test_timeout_value_not_passed_in_config(self):
        """
            Test case to verify that the timeout value is 300 as
            we have not passed 'request_timeout' in config
        """
        # create config
        config = {
            "host": "10.0.0.1",
            "port": 22,
            "username": "******",
            "password": "",
            "start_date": "2020-01-01"
        }

        # create connection
        conn = client.connection(config=config)

        # verify the expected timeout value is set
        self.assertEquals(conn.request_timeout, 300)
Exemple #7
0
def discover_streams(config):
    streams = []

    conn = client.connection(config)
    prefix = format(config.get("user_dir", "./"))

    tables = json.loads(config['tables'])
    for table_spec in tables:
        LOGGER.info('Sampling records to determine table JSON schema "%s".', table_spec['table_name'])
        schema = json_schema.get_schema_for_table(conn, table_spec)
        stream_md = metadata.get_standard_metadata(schema,
                                                   key_properties=table_spec.get('key_properties'),
                                                   replication_method='INCREMENTAL')
        streams.append(
            {
                'stream': table_spec['table_name'],
                'tap_stream_id': table_spec['table_name'],
                'schema': schema,
                'metadata': stream_md
            }
        )

    return streams