コード例 #1
0
 def test_load_empty(self, fixturesfolder):
     loaderfolder = join(fixturesfolder, "loader")
     with pytest.raises(LoadError):
         load_file_to_str(join(loaderfolder, "empty.yml"))
     with pytest.raises(LoadError):
         load_yaml(join(loaderfolder, "empty.yml"))
     with pytest.raises(LoadError):
         load_json(join(loaderfolder, "empty.json"))
コード例 #2
0
 def test_load_file_to_str(self):
     with temp_dir(folder="test_text") as tmpdir:
         text_file = join(tmpdir, "text_file.txt")
         save_str_to_file(TestLoader.text, text_file)
         result = load_file_to_str(text_file)
         assert result == TestLoader.text
         result = load_file_to_str(text_file, strip=True)
         assert result == TestLoader.expected_text_strip
         result = load_file_to_str(text_file, replace_newlines=" ")
         assert result == TestLoader.expected_text_newlines_to_spaces
         with pytest.raises(IOError):
             load_file_to_str(join(tmpdir, "NOTEXIST.txt"))
コード例 #3
0
    def retrieve_text(
        self, url, filename, logstr=None, fallback=False, **kwargs
    ):
        """Retrieve text

        Args:
            url (str): URL to download
            filename (str): Filename to use for saved file
            logstr (Optional[str]): Text to use in log string to describe download. Defaults to filename.
            fallback (bool): Whether to use static fallback if download fails. Defaults to False.
            **kwargs: Parameters to pass to download call

        Returns:
            Union[Dict,List]: The text from the file

        """
        if not logstr:
            logstr = filename
        saved_path = join(self.saved_dir, filename)
        if self.use_saved:
            logger.info(f"Using saved {logstr} in {saved_path}")
            text = load_file_to_str(saved_path)
        else:
            try:
                logger.info(
                    f"Downloading {logstr} from {self.get_url_logstr(url)}"
                )
                self.downloader.download(url, **kwargs)
                text = self.downloader.get_text()
                if self.save:
                    logger.info(f"Saving {logstr} in {saved_path}")
                    save_str_to_file(text, saved_path)
            except DownloadError:
                if not fallback:
                    raise
                fallback_path = join(self.fallback_dir, filename)
                logger.exception(
                    f"{logstr} download failed, using static data {fallback_path}!"
                )
                text = load_file_to_str(fallback_path)
        return text
コード例 #4
0
    def load_api_key(path):
        # type: (str) -> str
        """
        Load HDX api key

        Args:
            path (str): Path to HDX key

        Returns:
            str: HDX api key

        """
        logger.info('Loading HDX api key from: %s' % path)
        apikey = load_file_to_str(path)
        return apikey
コード例 #5
0
def read_or_create_batch(folder: str, batch: Optional[str] = None) -> str:
    """Get batch or create it if it doesn't exist

    Args:
        folder (str): Folder in which to look for or create batch file.
        batch (Optional[str]): Batch to use if there isn't one in a file already.

    Returns:
        str: Batch
    """
    batch_file = join(folder, "batch.txt")
    if exists(batch_file):
        batch = load_file_to_str(batch_file, strip=True)
        logger.info(f"File BATCH = {batch}")
    else:
        if not batch:
            batch = get_uuid()
            logger.info(f"Generated BATCH = {batch}")
        save_str_to_file(batch, batch_file)
    return batch
コード例 #6
0
 def __init__(self, auth=None, basicauth=None, basicauthfile=None):
     # type: (Optional[Tuple[str, str]]) -> None
     s = requests.Session()
     if basicauthfile is not None:
         if basicauth is not None:
             raise DownloadError('Both basicauth and basicauthfile supplied!')
         elif auth is not None:
             raise DownloadError('Both auth and basicauthfile supplied!')
         else:
             basicauth = load_file_to_str(basicauthfile)
     if basicauth is not None:
         if auth is None:
             auth = decode(basicauth)
         else:
             raise DownloadError('Both auth and basicauth supplied!')
     s.auth = auth
     retries = Retry(total=5, backoff_factor=0.4, status_forcelist=[429, 500, 502, 503, 504], raise_on_redirect=True,
                     raise_on_status=True)
     s.mount('http://', HTTPAdapter(max_retries=retries, pool_connections=100, pool_maxsize=100))
     s.mount('https://', HTTPAdapter(max_retries=retries, pool_connections=100, pool_maxsize=100))
     self.session = s
     self.response = None
コード例 #7
0
ファイル: setup.py プロジェクト: OCHA-DAP/hdx-python-scraper
extras_requirements = {'pandas': ['pandas>=1.2.4']}

classifiers = [
    "Development Status :: 5 - Production/Stable",
    "Intended Audience :: Developers",
    "Natural Language :: English",
    "License :: OSI Approved :: MIT License",
    "Operating System :: OS Independent",
    "Programming Language :: Python",
    "Programming Language :: Python :: 3",
    "Topic :: Software Development :: Libraries :: Python Modules",
]

PublishCommand.version = load_file_to_str(join('src', 'hdx', 'scraper',
                                               'version.txt'),
                                          strip=True)

setup(
    name='hdx-python-scraper',
    description='HDX Python Scraper Library',
    license='MIT',
    url='https://github.com/OCHA-DAP/hdx-python-scraper',
    version=PublishCommand.version,
    author='Michael Rans',
    author_email='*****@*****.**',
    keywords=['HDX', 'API', 'library'],
    long_description=load_file_to_str('README.md'),
    long_description_content_type='text/markdown',
    packages=find_packages(where='src'),
    package_dir={'': 'src'},
コード例 #8
0
 def htmltext(self, fixturesfolder):
     return load_file_to_str(join(fixturesfolder, "html", "response.html"))
コード例 #9
0
def get_session(
    user_agent: Optional[str] = None,
    user_agent_config_yaml: Optional[str] = None,
    user_agent_lookup: Optional[str] = None,
    use_env: bool = True,
    fail_on_missing_file: bool = True,
    **kwargs: Any,
) -> requests.Session:
    """Set up and return Session object that is set up with retrying. Requires either global user agent to be set or
    appropriate user agent parameter(s) to be completed. If the EXTRA_PARAMS or BASIC_AUTH environment variable is
    supplied, the extra_params* parameters will be ignored.

    Args:
        user_agent (Optional[str]): User agent string. HDXPythonUtilities/X.X.X- is prefixed.
        user_agent_config_yaml (Optional[str]): Path to YAML user agent configuration. Ignored if user_agent supplied. Defaults to ~/.useragent.yml.
        user_agent_lookup (Optional[str]): Lookup key for YAML. Ignored if user_agent supplied.
        use_env (bool): Whether to read environment variables. Defaults to True.
        fail_on_missing_file (bool): Raise an exception if any specified configuration files are missing. Defaults to True.
        **kwargs: See below
        auth (Tuple[str, str]): Authorisation information in tuple form (user, pass) OR
        basic_auth (str): Authorisation information in basic auth string form (Basic xxxxxxxxxxxxxxxx) OR
        basic_auth_file (str): Path to file containing authorisation information in basic auth string form (Basic xxxxxxxxxxxxxxxx)
        extra_params_dict (Dict): Extra parameters to put on end of url as a dictionary OR
        extra_params_json (str): Path to JSON file containing extra parameters to put on end of url OR
        extra_params_yaml (str): Path to YAML file containing extra parameters to put on end of url
        extra_params_lookup (str): Lookup key for parameters. If not given assumes parameters are at root of the dict.
        headers (Dict): Additional headers to add to request.
        status_forcelist (iterable): HTTP statuses for which to force retry. Defaults to [429, 500, 502, 503, 504].
        allowed_methods (iterable): HTTP methods for which to force retry. Defaults t0 frozenset(['GET']).
    """
    s = requests.Session()

    ua = kwargs.get("full_agent")
    if not ua:
        ua = UserAgent.get(user_agent, user_agent_config_yaml,
                           user_agent_lookup, **kwargs)
    s.headers["User-Agent"] = ua

    auths_found = list()
    headers = kwargs.get("headers")
    if headers is not None:
        s.headers.update(headers)
        if "Authorization" in headers:
            auths_found.append("headers")

    extra_params_found = False
    extra_params_dict = None
    basic_auth = None
    if use_env:
        basic_auth_env = os.getenv("BASIC_AUTH")
        if basic_auth_env:
            basic_auth = basic_auth_env
            auths_found.append("basic_auth environment variable")
        extra_params = os.getenv("EXTRA_PARAMS")
        if extra_params:
            if "=" in extra_params:
                extra_params_dict = dict()
                logger.info(
                    "Loading extra parameters from environment variable")
                for extra_param in extra_params.split(","):
                    key, value = extra_param.split("=")
                    extra_params_dict[key] = value
            extra_params_found = True
    if not extra_params_found:
        # only do this if extra params env vars not supplied
        extra_params_dict = kwargs.get("extra_params_dict")
        if extra_params_dict:
            extra_params_found = True
            logger.info("Loading extra parameters from dictionary")

        extra_params_json = kwargs.get("extra_params_json", "")
        if extra_params_json:
            if extra_params_found:
                raise SessionError(
                    "More than one set of extra parameters given!")
            extra_params_found = True
            logger.info(f"Loading extra parameters from: {extra_params_json}")
            try:
                extra_params_dict = load_json(extra_params_json)
            except OSError:
                if fail_on_missing_file:
                    raise
        extra_params_yaml = kwargs.get("extra_params_yaml", "")
        if extra_params_yaml:
            if extra_params_found:
                raise SessionError(
                    "More than one set of extra parameters given!")
            logger.info(f"Loading extra parameters from: {extra_params_yaml}")
            try:
                extra_params_dict = load_yaml(extra_params_yaml)
            except OSError:
                if fail_on_missing_file:
                    raise
        extra_params_lookup = kwargs.get("extra_params_lookup")
        if extra_params_lookup and extra_params_dict:
            extra_params_dict = extra_params_dict.get(extra_params_lookup)
            if extra_params_dict is None:
                raise SessionError(
                    f"{extra_params_lookup} does not exist in extra_params!")
    if extra_params_dict:
        basic_auth_param = extra_params_dict.get("basic_auth")
        if basic_auth_param:
            basic_auth = basic_auth_param
            auths_found.append("basic_auth parameter")
            del extra_params_dict["basic_auth"]

    s.params = extra_params_dict

    basic_auth_arg = kwargs.get("basic_auth")
    if basic_auth_arg:
        basic_auth = basic_auth_arg
        auths_found.append("basic_auth argument")

    auth = kwargs.get("auth")
    if auth:
        auths_found.append("auth argument")
    basic_auth_file = kwargs.get("basic_auth_file")
    if basic_auth_file:
        logger.info(f"Loading basic auth from: {basic_auth_file}")
        try:
            basic_auth = load_file_to_str(basic_auth_file, strip=True)
            auths_found.append(f"file {basic_auth_file}")
        except OSError:
            if fail_on_missing_file:
                raise
    if len(auths_found) > 1:
        auths_found_str = ", ".join(auths_found)
        raise SessionError(
            f"More than one authorisation given! ({auths_found_str})")
    if "headers" not in auths_found:
        if basic_auth:
            auth = decode(basic_auth)
        s.auth = auth

    status_forcelist = kwargs.get("status_forcelist",
                                  [429, 500, 502, 503, 504])
    allowed_methods = kwargs.get(
        "allowed_methods",
        frozenset(["HEAD", "TRACE", "GET", "PUT", "OPTIONS", "DELETE"]),
    )

    retries = Retry(
        total=5,
        backoff_factor=0.4,
        status_forcelist=status_forcelist,
        allowed_methods=allowed_methods,
        raise_on_redirect=True,
        raise_on_status=True,
    )
    s.mount("file://", FileAdapter())
    s.mount(
        "http://",
        HTTPAdapter(max_retries=retries,
                    pool_connections=100,
                    pool_maxsize=100),
    )
    s.mount(
        "https://",
        HTTPAdapter(max_retries=retries,
                    pool_connections=100,
                    pool_maxsize=100),
    )
    return s
コード例 #10
0
    def test_progress_storing_tempdir(self, monkeypatch):
        tempfolder = "papa"
        expected_dir = join(gettempdir(), tempfolder)
        rmtree(expected_dir, ignore_errors=True)
        iterator = [
            {
                "iso3": "AFG",
                "name": "Afghanistan"
            },
            {
                "iso3": "SDN",
                "name": "Sudan"
            },
            {
                "iso3": "YEM",
                "name": "Yemen"
            },
            {
                "iso3": "ZAM",
                "name": "Zambia"
            },
        ]
        expected_batch_file = join(expected_dir, "batch.txt")
        result = list()
        for info, nextdict in progress_storing_tempdir(tempfolder, iterator,
                                                       "iso3"):
            assert info["folder"] == expected_dir
            expected_batch = load_file_to_str(expected_batch_file, strip=True)
            result.append(nextdict)
        assert result == iterator
        assert expected_batch == info["batch"]
        assert exists(expected_dir) is False

        monkeypatch.setenv("WHERETOSTART", "iso3=SDN")
        result = list()
        for info, nextdict in progress_storing_tempdir(tempfolder, iterator,
                                                       "iso3"):
            assert exists(info["folder"]) is True
            assert info["folder"] == expected_dir
            expected_batch = load_file_to_str(expected_batch_file, strip=True)
            result.append(nextdict)
        assert result == iterator[1:]
        assert expected_batch == info["batch"]
        assert exists(expected_dir) is False
        monkeypatch.delenv("WHERETOSTART")

        try:
            for info, nextdict in progress_storing_tempdir(
                    tempfolder, iterator, "iso3"):
                if nextdict["iso3"] == "YEM":
                    start_batch = info["batch"]
                    raise ValueError("Problem!")
        except ValueError:
            pass
        assert exists(expected_dir) is True
        result = list()
        for info, nextdict in progress_storing_tempdir(tempfolder, iterator,
                                                       "iso3"):
            assert exists(info["folder"]) is True
            assert info["folder"] == expected_dir
            assert info["batch"] == start_batch
            result.append(nextdict)
        assert result == iterator[2:]
        assert exists(expected_dir) is False

        try:
            for info, nextdict in progress_storing_tempdir(
                    tempfolder, iterator, "iso3"):
                if nextdict["iso3"] == "YEM":
                    start_batch = info["batch"]
                    raise ValueError("Problem!")
        except ValueError:
            pass
        assert exists(expected_dir) is True
        monkeypatch.setenv("WHERETOSTART", "RESET")
        result = list()
        for info, nextdict in progress_storing_tempdir(tempfolder, iterator,
                                                       "iso3"):
            assert exists(info["folder"]) is True
            assert info["folder"] == expected_dir
            assert info["batch"] != start_batch
            result.append(nextdict)
        assert result == iterator
        assert exists(expected_dir) is False
        monkeypatch.delenv("WHERETOSTART")

        try:
            for info, nextdict in progress_storing_tempdir(
                    tempfolder, iterator, "iso3"):
                if nextdict["iso3"] == "YEM":
                    start_batch = info["batch"]
                    raise ValueError("Problem!")
        except ValueError:
            pass
        assert exists(expected_dir) is True
        monkeypatch.setenv("WHERETOSTART", "iso3=SDN")
        result = list()
        for info, nextdict in progress_storing_tempdir(tempfolder, iterator,
                                                       "iso3"):
            assert exists(info["folder"]) is True
            assert info["folder"] == expected_dir
            assert info["batch"] == start_batch
            result.append(nextdict)
        assert result == iterator[1:]
        assert exists(expected_dir) is False
        monkeypatch.delenv("WHERETOSTART")

        try:
            for info, nextdict in progress_storing_tempdir(
                    tempfolder, iterator, "iso3"):
                if nextdict["iso3"] == "YEM":
                    start_batch = info["batch"]
                    raise ValueError("Problem!")
        except ValueError:
            pass
        monkeypatch.setenv("WHERETOSTART", "iso3=NOTFOUND")
        found = False
        for _ in progress_storing_tempdir(tempfolder, iterator, "iso3"):
            found = True
        assert found is False
        assert exists(expected_dir) is True
        batch = load_file_to_str(expected_batch_file, strip=True)
        assert batch == start_batch
        monkeypatch.delenv("WHERETOSTART")

        monkeypatch.setenv("WHERETOSTART", "NOTFOUND=SDN")
        found = False
        for _ in progress_storing_tempdir(tempfolder, iterator, "iso3"):
            found = True
        assert found is False
        assert exists(expected_dir) is True
        batch = load_file_to_str(expected_batch_file, strip=True)
        assert batch == start_batch
        monkeypatch.delenv("WHERETOSTART")

        rmtree(expected_dir, ignore_errors=True)
コード例 #11
0
 def htmltext(self, fixturesfolder):
     return load_file_to_str(join(fixturesfolder, 'html', 'response.html'))
コード例 #12
0
def get_session(**kwargs):
    # type: (Any) -> requests.Session
    """Set up and return Session object that is set up with retrying

    Args:
        **kwargs: See below
        auth (Tuple[str, str]): Authorisation information in tuple form (user, pass) OR
        basic_auth (str): Authorisation information in basic auth string form (Basic xxxxxxxxxxxxxxxx) OR
        basic_auth_file (str): Path to file containing authorisation information in basic auth string form (Basic xxxxxxxxxxxxxxxx)
        extra_params_dict (Dict): Extra parameters to put on end of url as a dictionary OR
        extra_params_json (str): Path to JSON file containing extra parameters to put on end of url OR
        extra_params_yaml (str): Path to YAML file containing extra parameters to put on end of url
        extra_params_lookup (str): Lookup key for parameters. If not given assumes parameters are at root of the dict.
        status_forcelist (iterable): HTTP statuses for which to force retry. Defaults to [429, 500, 502, 503, 504].
        method_whitelist (iterable): HTTP methods for which to force retry. Defaults t0 frozenset(['GET']).
    """
    s = requests.Session()

    extra_params_found = False
    extra_params_dict = kwargs.get('extra_params_dict', None)
    if extra_params_dict:
        extra_params_found = True
        logger.info('Loading extra parameters from dictionary')

    extra_params_json = kwargs.get('extra_params_json', '')
    if extra_params_json:
        if extra_params_found:
            raise SessionError('More than one set of extra parameters given!')
        extra_params_found = True
        logger.info('Loading extra parameters from: %s' % extra_params_json)
        extra_params_dict = load_json(extra_params_json)

    extra_params_yaml = kwargs.get('extra_params_yaml', '')
    if extra_params_found:
        if extra_params_yaml:
            raise SessionError('More than one set of extra parameters given!')
    else:
        if extra_params_yaml:
            logger.info('Loading extra parameters from: %s' %
                        extra_params_yaml)
            extra_params_dict = load_yaml(extra_params_yaml)
        else:
            extra_params_dict = dict()
    extra_params_lookup = kwargs.get('extra_params_lookup')
    if extra_params_lookup:
        extra_params_dict = extra_params_dict.get(extra_params_lookup)
        if extra_params_dict is None:
            raise SessionError('%s does not exist in extra_params!' %
                               extra_params_lookup)

    auth_found = False
    basic_auth = extra_params_dict.get('basic_auth')
    if basic_auth:
        logger.info('Loading authorisation from basic_auth parameter')
        auth_found = True
        del extra_params_dict['basic_auth']
    s.params = extra_params_dict

    auth = kwargs.get('auth')
    if auth:
        if auth_found:
            raise SessionError('More than one authorisation given!')
        logger.info('Loading authorisation from auth argument')
        auth_found = True
    bauth = kwargs.get('basic_auth')
    if bauth:
        if auth_found:
            raise SessionError('More than one authorisation given!')
        logger.info('Loading authorisation from basic_auth argument')
        basic_auth = bauth
        auth_found = True
    basic_auth_file = kwargs.get('basic_auth_file')
    if basic_auth_file:
        if auth_found:
            raise SessionError('More than one authorisation given!')
        logger.info('Loading authorisation from: %s' % basic_auth_file)
        basic_auth = load_file_to_str(basic_auth_file)
    if basic_auth:
        auth = decode(basic_auth)
    s.auth = auth

    status_forcelist = kwargs.get('status_forcelist',
                                  [429, 500, 502, 503, 504])
    method_whitelist = kwargs.get(
        'method_whitelist',
        frozenset(['HEAD', 'TRACE', 'GET', 'PUT', 'OPTIONS', 'DELETE']))

    retries = Retry(total=5,
                    backoff_factor=0.4,
                    status_forcelist=status_forcelist,
                    method_whitelist=method_whitelist,
                    raise_on_redirect=True,
                    raise_on_status=True)
    s.mount(
        'http://',
        HTTPAdapter(max_retries=retries,
                    pool_connections=100,
                    pool_maxsize=100))
    s.mount(
        'https://',
        HTTPAdapter(max_retries=retries,
                    pool_connections=100,
                    pool_maxsize=100))
    return s
コード例 #13
0
def multiple_progress_storing_tempdir(
    folder: str,
    iterators: List[Iterable[Dict]],
    keys: List[str],
    batch: Optional[str] = None,
) -> Tuple[Dict, Dict]:
    """Store progress in temporary directory. The folder persists until the final iteration of the last iterator
    allowing which iteration to start at and the batch code to be persisted between runs. Yields 2 dictionaries. The
    first contains key folder which is the temporary directory optionally with folder appended (and created if it
    doesn't exist). In key progress is held the current position in the iterator. It also contains the key batch
    containing a batch code to be passed as the batch parameter in create_in_hdx or update_in_hdx calls. The second
    dictionary is the next dictionary in the iterator. The WHERETOSTART environment variable can be set to RESET to
    force the deletion and recreation of the temporary directory or to a key value pair in the form key=value eg.
    iso3=PAK indicating where to start.

    Args:
        folder (str): Folder to create in temporary folder
        iterators (List[Iterable[Dict]): Iterate over each iterator in the list consecutively persisting progress
        keys (List[str]): Key to examine from dictionary from each iterator in the above list
        batch (Optional[str]): Batch to use if there isn't one in a file already.

    Returns:
        Tuple[int, Dict,Dict]: A tuple of the form (iterator index, info dictionary, next object in iterator)
    """
    delete_if_exists = False
    wheretostartenv = getenv("WHERETOSTART")
    if wheretostartenv:
        if wheretostartenv.upper() == "RESET":
            delete_if_exists = True
            logger.info(
                "Removing progress file and will start from beginning!"
            )
    with temp_dir_batch(
        folder,
        delete_if_exists,
        delete_on_success=True,
        delete_on_failure=False,
        batch=batch,
    ) as info:
        tempdir = info["folder"]
        batch = info["batch"]
        for i, key in enumerate(keys):
            progress_file = join(tempdir, "progress.txt")
            if wheretostartenv:
                wheretostart = get_wheretostart(
                    wheretostartenv, "Environment variable", key
                )
            else:
                if exists(progress_file):
                    contents = load_file_to_str(progress_file, strip=True)
                    wheretostart = get_wheretostart(contents, "File", key)
                else:
                    wheretostart = None
            with temp_dir_batch(
                str(i),
                False,
                delete_on_success=True,
                delete_on_failure=False,
                batch=batch,
                tempdir=tempdir,
            ) as info:
                for info, nextdict in progress_storing_folder(
                    info, iterators[i], key, wheretostart
                ):
                    save_str_to_file(info["progress"], progress_file)
                    yield i, info, nextdict
                if exists(progress_file):
                    remove(progress_file)
コード例 #14
0
def progress_storing_folder(
    info: Dict,
    iterator: Iterable[Dict],
    key: str,
    wheretostart: Optional[str] = None,
) -> Tuple[Dict, Dict]:
    """Store progress in folder in key folder of info dictionary parameter. Yields 2 dictionaries. The first is the
    info dictionary. It contains in key folder the folder being used to store progress and in key progress the current
    position in the iterator. If store_batch is True, that dictionary will also contain the key batch containing a batch
    code to be passed as the batch parameter in create_in_hdx or update_in_hdx calls. The second dictionary is the next
    dictionary in the iterator.

    Args:
        info (Dict): Dictionary containing folder and anything else to be yielded
        iterator (Iterable[Dict]): Iterate over this object persisting progress
        key (str): Key to examine from dictionary from iterator
        wheretostart (Optional[str]): Where in iterator to start

    Returns:
        Tuple[Dict,Dict]: A tuple of the form (info dictionary, next object in iterator)
    """
    folder = info["folder"]
    progress_file = join(folder, "progress.txt")

    if not wheretostart:
        contents = getenv("WHERETOSTART")
        if contents:
            wheretostart = get_wheretostart(
                contents, "Environment variable", key
            )
        else:
            if exists(progress_file):
                contents = load_file_to_str(progress_file, strip=True)
                wheretostart = get_wheretostart(contents, "File", key)
            else:
                wheretostart = None
    found = False
    for nextdict in iterator:
        current = nextdict[key]
        if wheretostart:
            if wheretostart == "IGNORE":
                continue
            if not found:
                if current == wheretostart:
                    found = True
                    logger.info(
                        f"Starting run from WHERETOSTART {wheretostart}"
                    )
                else:
                    logger.info(
                        "Run not started. Ignoring {}. WHERETOSTART ({}) not matched.".format(
                            current, wheretostart
                        )
                    )
                    continue
        output = f"{key}={current}"
        info["progress"] = output
        save_str_to_file(output, progress_file)
        yield info, nextdict
    if wheretostart and not found:
        raise NotFoundError(
            f"WHERETOSTART ({wheretostart}) not matched in iterator with key {key} and no run started!"
        )