def test_argument_which_accept_files_relative_and_abs_paths(
        mock_run_download,
        yamlarg,
        filepath_is_abs,
        # fixtures:
        pytestdir):
    '''test that arguments accepting files are properly processed and the relative paths
    are resolved relative to the yaml config file'''
    # setup files and relative paths depending on whether we passed relative path or absolute
    # int he config
    if filepath_is_abs:
        yamlarg_file = pytestdir.newfile()
        overrides = {
            yamlarg:
            ('sqlite:///' if yamlarg == 'dburl' else '') + yamlarg_file
        }
        # provide a sqlite memory if we are not testing dburl, otherwise run would fail:
        if yamlarg != 'dburl':
            overrides['dburl'] = 'sqlite:///:memory:'
        yamlfile = pytestdir.yamlfile(get_templates_fpath('download.yaml'),
                                      **overrides)
    else:
        overrides = {
            yamlarg: ('sqlite:///' if yamlarg == 'dburl' else '') + 'abc'
        }
        # provide a sqlite memory if we are not testing dburl, otherwise run would fail:
        if yamlarg != 'dburl':
            overrides['dburl'] = 'sqlite:///:memory:'
        yamlfile = pytestdir.yamlfile(get_templates_fpath('download.yaml'),
                                      **overrides)
        # and now create the file:
        yamlarg_file = join(dirname(yamlfile), 'abc')

    # create relative path:
    with open(yamlarg_file, 'w') as opn:
        if yamlarg == 'restricted_data':  # avoid errors if we are testing token file
            opn.write('BEGIN PGP MESSAGE ABC')

    # if we are not testing dburl

    runner = CliRunner()
    result = runner.invoke(cli, ['download', '-c', yamlfile])
    assert result.exit_code == 0
    run_download_args = mock_run_download.call_args_list[-1][1]

    if yamlarg == 'restricted_data':
        # assert we read the correct file:
        assert run_download_args[
            'authorizer'].token == b'BEGIN PGP MESSAGE ABC'
    elif yamlarg == 'dburl':
        # assert we have the right db url:
        assert str(run_download_args['session'].bind.engine.url
                   ) == 'sqlite:///' + yamlarg_file
    else:
        assert run_download_args[yamlarg] == yamlarg_file
Esempio n. 2
0
 def download_setup_func(filename, **params):
     yamldic = yaml_load(get_templates_fpath(filename))
     for key, val in params.items():
         if val is None:
             yamldic.pop(key, None)
         else:
             yamldic[key] = val
     path = os.path.join(basedir, filename)
     with open(path, 'w') as _opn:
         yaml.safe_dump(yamldic, _opn)
     return path, yamldic
Esempio n. 3
0
class clickutils(object):  #pylint: disable=invalid-name, too-few-public-methods
    """Container for Options validations, default settings so as not to pollute the click
    decorators"""

    TERMINAL_HELP_WIDTH = 110  # control width of help. 80 should be the default (roughly)
    DEFAULTDOC = yaml_load_doc(get_templates_fpath("download.yaml"))
    EQA = "(event search parameter)"
    DBURL_OR_YAML_ATTRS = dict(type=inputargs.extract_dburl_if_yamlpath,
                               metavar='TEXT or PATH',
                               help=("Database url where data has been saved. "
                                     "It can also be the path of a yaml file "
                                     "containing the property 'dburl' "
                                     "(e.g., the config file used for "
                                     "downloading)"),
                               required=True)
    ExistingPath = click.Path(exists=True,
                              file_okay=True,
                              dir_okay=False,
                              writable=False,
                              readable=True)

    @classmethod
    def set_help_from_yaml(cls, ctx, param, value):
        """
        When attaching this function as `callback` argument to an Option (`click.Option`),
        it will set
        an automatic help for all Options of the same command, which do not have an `help`
        specified and are found in the default config file for downloading
        (currently `download.yaml`).
        The Option having as callback this function must also have `is_eager=True`.
        Example:
        Assuming opt1, opt2, opt3 are variables of the config yaml file, and opt4 not, this
        sets the default help for opt1 and opt2:
        ```
        click.option('--opt1', ..., callback=set_help_from_yaml, is_eager=True,...)
        click.option('--opt2'...)
        click.option('--opt3'..., help='my custom help. Do not fetch help from config')
        click.option('--opt4'...)
        ...
        ```
        """
        cfg_doc = cls.DEFAULTDOC
        for option in (opt for opt in ctx.command.params
                       if opt.param_type_name == 'option'):
            if option.help is None:
                option.help = cfg_doc.get(option.name, "")
                # remove implementation details from the cli (avoid too much information,
                # or information specific to the yaml file and not the cli):
                idx = option.help.find('Implementation details:')
                if idx > -1:
                    option.help = option.help[:idx]

        return value
 def func(*args, **yaml_overrides):
     args = list(args)
     nodburl = False
     # override the db path with our currently tested one:
     if '-d' not in args and '--dburl' not in args and 'dburl' not in yaml_overrides:
         yaml_overrides['dburl'] = db.dburl
         nodburl = True
     # if -c or configfile is not specified, add it:
     if "-c" not in args and "--configfile" not in args:
         args.extend([
             '-c',
             pytestdir.yamlfile(get_templates_fpath("download.yaml"),
                                **yaml_overrides)
         ])
     elif nodburl:
         args += ['-d', str(db.dburl)]
     # process inputs:
     runner = CliRunner()
     result = runner.invoke(cli, ['download'] + args)
     return result
    def test_download_bad_values(
            self,
            # fixtures:
            db,
            run_cli_download):
        '''test different scenarios where the value in the dwonload.yaml are not well formatted'''
        result = run_cli_download(networks={'a': 'b'})  # conflict
        assert result.exit_code != 0
        assert 'Error: Conflicting names "network" / "networks"' in result.output
        result = run_cli_download(network={'a': 'b'})
        assert result.exit_code == 0
        # thus providing dict is actually fine and will iterate over its keys:
        assert self.mock_run_download.call_args_list[0][1]['network'] == ['a']
        # do some asserts only for this case to test how we print the arguments to string:
        # assert "tt_table: <TTTable object, " in result.output
        assert "starttime: 2006-01-01 00:00:00" in result.output
        assert "traveltimes_model:" in result.output
        _dburl = db.dburl
        if not db.is_sqlite:
            _dburl = secure_dburl(_dburl)
        # assert dburl is in result.output (sqlite:memory is quotes, postgres not. we do not
        # care to investigate why, jsut assert either string is there:
        assert "dburl: '%s'" % _dburl in result.output or "dburl: %s" % _dburl in result.output

        # check the session:
        # assert we did write to the db:
        assert db.session.query(Download).count() == 1

        result = run_cli_download(networks='!*')  # conflicting names
        assert result.exit_code != 0
        assert 'Error: Conflicting names "network" / "networks"' in result.output
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 1

        result = run_cli_download(network='!*')  # invalid value
        assert result.exit_code != 0
        assert 'Error: Invalid value for "network": ' in result.output
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 1

        result = run_cli_download(net='!*')  # conflicting names
        assert result.exit_code != 0
        assert 'Error: Conflicting names "network" / "net"' in result.output
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 1

        # test error from the command line. Result is the same as above as the check is made
        # AFTER click
        result = run_cli_download('-n', '!*')  # invalid value
        assert result.exit_code != 0
        assert 'Error: Invalid value for "network": ' in result.output
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 1

        # no such option:
        result = run_cli_download('--zrt', '!*')
        assert result.exit_code != 0
        assert 'Error: no such option: --zrt' in result.output  # why -z and not -zz? whatever...
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 1

        # no such option from within the yaml:
        result = run_cli_download(zz='!*')
        assert result.exit_code != 0
        assert 'Error: No such option "zz"' in result.output
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 1

        # what about conflicting arguments?
        result = run_cli_download(networks='!*', net='opu')  # invalid value
        assert result.exit_code != 0
        assert 'Conflicting names "network" / "net" / "networks"' in result.output or \
            'Conflicting names "network" / "networks" / "net"' in result.output
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 1

        result = run_cli_download(starttime=[])  # invalid type
        assert result.exit_code != 0
        assert 'Error: Invalid type for "starttime":' in result.output
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 1

        # mock implementing conflicting names in the yaml file:
        result = run_cli_download(start='wat')  # invalid value
        assert result.exit_code != 0
        assert 'Error: Conflicting names "starttime" / "start"' in result.output
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 1

        # mock implementing bad value in the cli: (cf with the previous test):
        # THE MESSAGE BELOW IS DIFFERENT BECAUSE WE PROVIDE A CLI VALIDATION FUNCTION
        # See the case of travetimes model below where, without a cli validation function,
        # the message is the same when we provide a bad argument in the yaml or from the cli
        result = run_cli_download('--starttime', 'wat')  # invalid value
        assert result.exit_code != 0
        assert 'Error: Invalid value for "-s" / "--start" / "--starttime": wat' in result.output
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 1

        # This should work:
        result = run_cli_download('--start', '2006-03-14')  # invalid value
        assert result.exit_code == 0
        run_download_kwargs = self.mock_run_download.call_args_list[-1][1]
        assert run_download_kwargs['starttime'] == datetime(2006, 3, 14)
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 2

        # now test the same as above BUT with a cli-only argument (-t0):
        result = run_cli_download(
            '-s', 'wat')  # invalid value typed from the command line
        assert result.exit_code != 0
        assert 'Error: Invalid value for "-s" / "--start" / "--starttime":' in result.output
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 2

        result = run_cli_download(endtime='wat')  # try with end
        assert result.exit_code != 0
        assert 'Error: Invalid value for "endtime":' in result.output
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 2

        result = run_cli_download(end='wat')  # try with end
        assert result.exit_code != 0
        assert 'Error: Conflicting names "endtime" / "end"' in result.output
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 2

        # now test the same as above BUT with the wrong value from the command line:
        result = run_cli_download(
            '-e', 'wat')  # invalid value typed from the command line
        assert result.exit_code != 0
        assert 'Error: Invalid value for "-e" / "--end" / "--endtime":' in result.output
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 2

        result = run_cli_download(traveltimes_model=[])  # invalid type
        assert result.exit_code != 0
        assert 'Error: Invalid type for "traveltimes_model":' in result.output
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 2

        result = run_cli_download(traveltimes_model='wat')  # invalid value
        assert result.exit_code != 0
        assert 'Error: Invalid value for "traveltimes_model":' in result.output
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 2

        # same as above but with error from the cli, not from within the config yaml:
        result = run_cli_download('--traveltimes-model',
                                  'wat')  # invalid value
        assert result.exit_code != 0
        assert 'Error: Invalid value for "traveltimes_model":' in result.output
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 2

        result = run_cli_download(removals=['inventory'])  # invalid value
        assert result.exit_code != 0
        assert 'Error: Missing value for "inventory"' in result.output
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 2

        d_yaml_file = get_templates_fpath("download.yaml")

        result = run_cli_download(
            dburl=d_yaml_file)  # existing file, invalid db url
        assert result.exit_code != 0
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 2

        result = run_cli_download(dburl="sqlite:/whatever")  # invalid db url
        assert result.exit_code != 0
        assert 'Error: Invalid value for "dburl":' in result.output
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 2

        result = run_cli_download(dburl="sqlite://whatever")  # invalid db url
        assert result.exit_code != 0
        assert 'Error: Invalid value for "dburl":' in result.output
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 2

        result = run_cli_download(dburl=[])  # invalid type
        assert result.exit_code != 0
        assert 'Error: Invalid type for "dburl":' in result.output
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 2

        # Test an invalif configfile. This can be done only via command line
        result = run_cli_download('-c', 'frjkwlag5vtyhrbdd_nleu3kvshg w')
        assert result.exit_code != 0
        assert 'Error: Invalid value for "-c" / "--config":' in result.output
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 2

        result = run_cli_download(removals=['maxmagnitude'
                                            ])  # remove an opt. param.
        assert result.exit_code == 0
        # check maxmagnitude is NOT in the eventws params:
        eventws_params = self.mock_run_download.call_args_list[-1][1][
            'eventws_params']
        assert 'maxmagnitude' not in eventws_params
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 3

        result = run_cli_download(removals=['advanced_settings'
                                            ])  # remove an opt. param.
        assert result.exit_code != 0
        assert 'Error: Missing value for "advanced_settings"' in result.output
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 3

        result = run_cli_download(
            advanced_settings={})  # remove an opt. param.
        assert result.exit_code != 0
        assert ('Error: Invalid value for "advanced_settings": '
                'Missing value for "download_blocksize"') in result.output
        # assert we did not write to the db, cause the error threw before setting up db:
        assert db.session.query(Download).count() == 3

        # search radius:
        for search_radius in [{'min': 5}, {'min': 5, 'max': 6, 'minmag': 7}]:
            result = run_cli_download(search_radius=search_radius)
            assert result.exit_code != 0
            assert ('Error: Invalid value for "search_radius": '
                    "provide either 'min', 'max' or "
                    "'minmag', 'maxmag', 'minmag_radius', 'maxmag_radius'"
                    ) in result.output

        result = run_cli_download(search_radius={'min': 5, 'max': '6'})
        assert result.exit_code != 0
        assert ('Error: Invalid value for "search_radius": '
                "numeric values expected") in result.output

        result = run_cli_download(search_radius={
            'minmag': 15,
            'maxmag': 7,
            'minmag_radius': 5,
            'maxmag_radius': 4
        })
        assert result.exit_code != 0
        assert ('Error: Invalid value for "search_radius": '
                'minmag should not be greater than maxmag') in result.output

        result = run_cli_download(search_radius={
            'minmag': 7,
            'maxmag': 8,
            'minmag_radius': -1,
            'maxmag_radius': 0
        })
        assert result.exit_code != 0
        assert ('Error: Invalid value for "search_radius": '
                'minmag_radius and maxmag_radius should be greater than 0'
                ) in result.output

        result = run_cli_download(search_radius={
            'minmag': 5,
            'maxmag': 5,
            'minmag_radius': 4,
            'maxmag_radius': 4
        })
        assert result.exit_code != 0
        assert ('Error: Invalid value for "search_radius": '
                'To supply a constant radius, '
                'set "min: 0" and specify the radius with the "max" argument'
                ) in result.output

        result = run_cli_download(search_radius={'min': -1, 'max': 5})
        assert result.exit_code != 0
        assert ('Error: Invalid value for "search_radius": '
                'min should not be lower than 0') in result.output

        result = run_cli_download(search_radius={'min': 0, 'max': 0})
        assert result.exit_code != 0
        assert ('Error: Invalid value for "search_radius": '
                'max should be greater than 0') in result.output

        result = run_cli_download(search_radius={'min': 4, 'max': 3})
        assert result.exit_code != 0
        assert ('Error: Invalid value for "search_radius": '
                'min should be lower than max') in result.output
class Test(object):

    pyfile = get_templates_fpath("paramtable.py")

    @property
    def logfilecontent(self):
        assert os.path.isfile(self._logfilename)
        with open(self._logfilename) as opn:
            return opn.read()

    # The class-level `init` fixture is marked with autouse=true which implies that all test
    # methods in the class will use this fixture without a need to state it in the test
    # function signature or with a class-level usefixtures decorator. For info see:
    # https://docs.pytest.org/en/latest/fixture.html#autouse-fixtures-xunit-setup-on-steroids
    @pytest.fixture(autouse=True)
    def init(self, request, pytestdir, db4process):
        db4process.create(to_file=True)
        session = db4process.session
        # sets up the mocked functions: db session handling (using the already created session)
        # and log file handling:
        with patch('stream2segment.utils.inputargs.get_session', return_value=session):
            with patch('stream2segment.main.closesession',
                       side_effect=lambda *a, **v: None):
                with patch('stream2segment.main.configlog4processing') as mock2:

                    def clogd(logger, logfilebasepath, verbose):
                        # config logger as usual, but redirects to a temp file
                        # that will be deleted by pytest, instead of polluting the program
                        # package:
                        ret = o_configlog4processing(logger,
                                                     pytestdir.newfile('.log') \
                                                     if logfilebasepath else None,
                                                     verbose)

                        self._logfilename = ret[0].baseFilename
                        return ret

                    mock2.side_effect = clogd

                    yield

    def inlogtext(self, string):
        '''Checks that `string` is in log text.
        The assertion `string in self.logfilecontent` fails in py3.5, although the differences
        between characters is the same position is zero. We did not find any better way than
        fixing it via this cumbersome function'''
        logtext = self.logfilecontent
        i = 0
        while len(logtext[i:i+len(string)]) == len(string):
            if (sum(ord(a)-ord(b) for a, b in zip(string, logtext[i:i+len(string)]))) == 0:
                return True
            i += 1
        return False

# ## ======== ACTUAL TESTS: ================================


    # Recall: we have 6 segments, issued from all combination of
    # station_inventory in [true, false] and segment.data in [ok, with_gaps, empty]
    # use db4process(with_inventory, with_data, with_gap) to return sqlalchemy query for
    # those segments in case. For info see db4process in conftest.py
    @mock.patch('stream2segment.main.run_process', side_effect=process_main_run)
    def test_simple_run_no_outfile_provided(self, mock_run,
                                            # fixtures:
                                            db4process, yamlfile):
        '''test a case where save inventory is True, and that we saved inventories'''
        # set values which will override the yaml config in templates folder:
        config_overrides = {'snr_threshold': 0,
                            'segment_select': {'has_data': 'true'}}
        yaml_file = yamlfile(**config_overrides)

        runner = CliRunner()

        result = runner.invoke(cli, ['process', '--dburl', db4process.dburl,
                               '-p', self.pyfile, '-c', yaml_file])

        assert not result.exception

        lst = mock_run.call_args_list
        assert len(lst) == 1
        args, kwargs = lst[0][0], lst[0][1]
        # assert third argument (`ondone` callback) is None 'ondone' or is a BaseWriter (no-op)
        # class:
        assert args[2] is None or \
            type(args[2]) == BaseWriter  # pylint: disable=unidiomatic-typecheck
        # assert "Output file:  n/a" in result output:
        assert re.search('Output file:\\s+n/a', result.output)

        # Note that apparently CliRunner() puts stderr and stdout together
        # (https://github.com/pallets/click/pull/868)
        # So we should test that we have these string twice:
        for subs in ["Processing function: ", "Config. file: "]:
            idx = result.output.find(subs)
            assert idx > -1

    # Recall: we have 6 segments, issued from all combination of
    # station_inventory in [true, false] and segment.data in [ok, with_gaps, empty]
    # use db4process(with_inventory, with_data, with_gap) to return sqlalchemy query for
    # those segments in case. For info see db4process in conftest.py
    @pytest.mark.parametrize("file_extension, options",
                             product(['.h5', '.csv'], [({}, []),
                                                       ({'segments_chunksize': 1}, []),
                                                       ({'segments_chunksize': 1}, ['--multi-process']),
                                                       ({}, ['--multi-process']),
                                                       ({'segments_chunksize': 1}, ['--multi-process', '--num-processes', '1']),
                                                       ({}, ['--multi-process', '--num-processes', '1'])]))
    def test_simple_run_retDict_complex_select(self, file_extension, options,
                                               # fixtures:
                                               pytestdir, db4process, yamlfile):
        '''test a case where we have a more complex select involving joins'''
        advanced_settings, cmdline_opts = options
        session = db4process.session
        # select the event times for the segments with data:
        etimes = sorted(_[1] for _ in session.query(Segment.id, Event.time).
                        join(Segment.event).filter(Segment.has_data))

        config_overrides = {'snr_threshold': 0,
                            'segment_select': {'has_data': 'true',
                                               'event.time': '<=%s' % (max(etimes).isoformat())}}
        if advanced_settings:
            config_overrides['advanced_settings'] = advanced_settings
        # the selection above should be the same as the previous test:
        # test_simple_run_retDict_saveinv,
        # as segment_select[event.time] includes all segments in segment_select['has_data'],
        # thus the code is left as it was in the method above
        yaml_file = yamlfile(**config_overrides)

        _seg = db4process.segments(with_inventory=True, with_data=True, with_gap=False).one()
        expected_first_row_seg_id = _seg.id
        station_id_whose_inventory_is_saved = _seg.station.id

        runner = CliRunner()
        filename = pytestdir.newfile(file_extension)
        result = runner.invoke(cli, ['process', '--dburl', db4process.dburl,
                               '-p', self.pyfile, '-c', yaml_file, filename] + cmdline_opts)

        assert not result.exception
        # check file has been correctly written:
        if file_extension == '.csv':
            csv1 = readcsv(filename)
            assert len(csv1) == 1
            assert csv1.loc[0, csv1.columns[0]] == expected_first_row_seg_id
        else:
            dfr = pd.read_hdf(filename)
            assert len(dfr) == 1
            assert dfr.iloc[0][SEGMENT_ID_COLNAME] == expected_first_row_seg_id

        self.inlogtext("""3 segment(s) found to process

segment (id=3): 4 traces (probably gaps/overlaps)
segment (id=2): Station inventory (xml) error: no data

1 of 3 segment(s) successfully processed
2 of 3 segment(s) skipped with error message (check log or details)""")
        # assert logfile exists:
        assert os.path.isfile(self._logfilename)

    # Recall: we have 6 segments, issued from all combination of
    # station_inventory in [true, false] and segment.data in [ok, with_gaps, empty]
    # use db4process(with_inventory, with_data, with_gap) to return sqlalchemy query for
    # those segments in case. For info see db4process in conftest.py
    def test_simple_run_retDict_high_snr_threshold(self,
                                                   # fixtures:
                                                   pytestdir, db4process, yamlfile):
        '''same as `test_simple_run_retDict_saveinv` above
        but with a very high snr threshold => no rows processed'''
        # setup inventories:
        session = db4process.session
        # set values which will override the yaml config in templates folder:
        config_overrides = {  # snr_threshold 3 is high enough to discard the only segment
                              # we would process otherwise:
                            'snr_threshold': 3,
                            'segment_select': {'has_data': 'true'}}
        yaml_file = yamlfile(**config_overrides)

        runner = CliRunner()
        filename = pytestdir.newfile('.csv')
        result = runner.invoke(cli, ['process', '--dburl', db4process.dburl,
                               '-p', self.pyfile, '-c', yaml_file, filename])

        assert not result.exception
        # no file written (see next comment for details). Check outfile is empty:
        with pytest.raises(EmptyDataError):
            csv1 = readcsv(filename)
        # check file has been correctly written: 2 segments have no data, thus they are skipped
        # and not logged
        # 2 segments have gaps/overlaps, thus they are skipped and logged
        # 1 segment has data but no inventory, thus skipped and logged
        # 1 segment with data and inventory, but snr is too low: skipped and logged
        assert self.inlogtext("""4 segment(s) found to process

segment (id=1): low snr 1.350154
segment (id=2): 4 traces (probably gaps/overlaps)
segment (id=4): Station inventory (xml) error: no data
segment (id=5): 4 traces (probably gaps/overlaps)

0 of 4 segment(s) successfully processed
4 of 4 segment(s) skipped with error message (check log or details)""")

    # Recall: we have 6 segments, issued from all combination of
    # station_inventory in [true, false] and segment.data in [ok, with_gaps, empty]
    # use db4process(with_inventory, with_data, with_gap) to return sqlalchemy query for
    # those segments in case. For info see db4process in conftest.py
    @pytest.mark.parametrize('select_with_data, seg_chunk',
                             [(True, None), (True, 1), (False, None), (False, 1)])
    def test_simple_run_retDict_seg_select_empty_and_err_segments(self,
                                                                  select_with_data, seg_chunk,
                                                                  # fixtures:
                                                                  pytestdir,
                                                                  db4process, yamlfile):
        '''test a segment selection that takes only non-processable segments'''
        # set values which will override the yaml config in templates folder:
        config_overrides = {'snr_threshold': 0,  # take all segments
                            # the following will select the station with no inventory.
                            # There are three segments associated with it:
                            # one with data and no gaps, one with data and gaps,
                            # the third with no data
                            'segment_select': {'station.latitude': '<10',
                                               'station.longitude': '<10'}}
        if select_with_data:
            config_overrides['segment_select']['has_data'] = 'true'
        if seg_chunk is not None:
            config_overrides['advanced_settings'] = {'segments_chunksize': seg_chunk}

        yaml_file = yamlfile(**config_overrides)

        runner = CliRunner()
        filename = pytestdir.newfile('.csv')
        result = runner.invoke(cli, ['process', '--dburl', db4process.dburl,
                                     '-p', self.pyfile,
                                     '-c', yaml_file,
                                     filename])
        assert not result.exception
        # check file has not been written (no data):
        with pytest.raises(EmptyDataError):
            csv1 = readcsv(filename)

        # see comment aboive on segments_select
        if select_with_data:
            # selecting only with data means out of the three candidate segments, one
            # is discarded prior to processing:
            assert self.inlogtext("""2 segment(s) found to process

segment (id=4): Station inventory (xml) error: no data
segment (id=5): 4 traces (probably gaps/overlaps)

0 of 2 segment(s) successfully processed
2 of 2 segment(s) skipped with error message (check log or details)""")
        else:
            assert self.inlogtext("""3 segment(s) found to process

segment (id=4): Station inventory (xml) error: no data
segment (id=5): 4 traces (probably gaps/overlaps)
segment (id=6): MiniSeed error: no data

0 of 3 segment(s) successfully processed
3 of 3 segment(s) skipped with error message (check log or details)""")

    # Recall: we have 6 segments, issued from all combination of
    # station_inventory in [true, false] and segment.data in [ok, with_gaps, empty]
    # use db4process(with_inventory, with_data, with_gap) to return sqlalchemy query for
    # those segments in case. For info see db4process in conftest.py
    @pytest.mark.parametrize("advanced_settings, cmdline_opts",
                             [({}, []),
                              ({'segments_chunksize': 1}, []),
                              ({'segments_chunksize': 1}, ['--multi-process']),
                              ({}, ['--multi-process']),
                              ({'segments_chunksize': 1}, ['--multi-process', '--num-processes', '1']),
                              ({}, ['--multi-process', '--num-processes', '1'])])
    def test_simple_run_ret_list(self, advanced_settings, cmdline_opts,
                                 # fixtures:
                                 pytestdir,
                                 db4process, yamlfile):
        '''test processing returning list, and also when we specify a different main function'''

        # set values which will override the yaml config in templates folder:
        config_overrides = {'snr_threshold': 0,  # take all segments
                            'segment_select': {'has_data': 'true'}}
        if advanced_settings:
            config_overrides['advanced_settings'] = advanced_settings

        yaml_file = yamlfile(**config_overrides)

        _seg = db4process.segments(with_inventory=True, with_data=True, with_gap=False).one()
        expected_first_row_seg_id = _seg.id
        station_id_whose_inventory_is_saved = _seg.station.id

        pyfile = self.pyfile

        # Now wrtite pyfile into a named temp file, with the method:
        # def main_retlist(segment, config):
        #    return main(segment, config).keys()
        # the method returns a list (which is what we want to test
        # and this way, we do not need to keep synchronized any additional file
        filename = pytestdir.newfile('.csv')
        pyfile2 = pytestdir.newfile('.py')
        if not os.path.isfile(pyfile2):

            with open(pyfile, 'r') as opn:
                content = opn.read()

            cont2 = content.replace("def main(segment, config):", """def main_retlist(segment, config):
    return list(main(segment, config).values())
def main(segment, config):""")
            with open(pyfile2, 'wb') as _opn:
                _opn.write(cont2.encode('utf8'))

        runner = CliRunner()
        result = runner.invoke(cli, ['process', '--dburl', db4process.dburl,
                                     '-p', pyfile2, '-f', "main_retlist",
                                     '-c', yaml_file,
                                     filename] + cmdline_opts)

        assert not result.exception
        # check file has been correctly written:
        csv1 = readcsv(filename)  # read first with header:
        # assert no rows:
        assert csv1.empty
        # now read without header:
        csv1 = readcsv(filename, header=False)
        assert len(csv1) == 1
        assert csv1.loc[0, csv1.columns[0]] == expected_first_row_seg_id

        assert self.inlogtext("""4 segment(s) found to process

segment (id=2): 4 traces (probably gaps/overlaps)
segment (id=4): Station inventory (xml) error: no data
segment (id=5): 4 traces (probably gaps/overlaps)

1 of 4 segment(s) successfully processed
3 of 4 segment(s) skipped with error message (check log or details)""")
        # assert logfile exists:
        assert os.path.isfile(self._logfilename)

    # Even though we are not interested here to check what is there on the created db,
    # because we test errors,
    # Recall: we have 6 segments, issued from all combination of
    # station_inventory in [true, false] and segment.data in [ok, with_gaps, empty]
    # use db4process(with_inventory, with_data, with_gap) to return sqlalchemy query for
    # those segments in case. For info see db4process in conftest.py
    @pytest.mark.parametrize("cmdline_opts",
                             [[], ['--multi-process'],
                              ['--multi-process', '--num-processes', '1']])
    @pytest.mark.parametrize("err_type, expects_log_2_be_configured",
                             [(None, False),
                              (ImportError, False),
                              (AttributeError, True),
                              (TypeError, True)])
    def test_errors_process_not_run(self,
                                    err_type, expects_log_2_be_configured, cmdline_opts,
                                    # fixtures:
                                    pytestdir, db4process, yamlfile):
        '''test processing in case of severla 'critical' errors (which do not launch the process
          None means simply a bad argument (funcname missing)'''
        pyfile = self.pyfile

        # REMEMBER THAT BY DEFAULT LEAVING THE segment_select IMPLEMENTED in conffile
        # WE WOULD HAVE NO SEGMENTS, as maxgap_numsamples is None for all segments of this test
        # Thus provide config overrides:
        yaml_file = yamlfile(segment_select={'has_data': 'true'})

        runner = CliRunner()
        # Now wrtite pyfile into a named temp file, BUT DO NOT SUPPLY EXTENSION
        # This seems to fail in python3 (FIXME: python2?)
        filename = pytestdir.newfile('.csv')
        pyfile2 = pytestdir.newfile('.py')

        with open(pyfile, 'r') as opn:
            content = opn.read()

        # here replace the stuff we need:
        if err_type == ImportError:
            # create the exception: implement a fake import
            content = content.replace("def main(", """import abcdefghijk_blablabla_456isjfger
def main2(""")
        elif err_type == AttributeError:
            # create the exception. Implement a bad signature whci hraises a TypeError
            content = content.replace("def main(", """def main2(segment, config):
    return "".attribute_that_does_not_exist_i_guess_blabla()

def main(""")
        elif err_type == TypeError:
            # create the exception. Implement a bad signature whci hraises a TypeError
            content = content.replace("def main(", """def main2(segment, config, wrong_argument):
    return int(None)

def main(""")
        else:  # err_type is None
            # this case does not do anything, but since we will call 'main2' as funcname
            # in `runner.invoke` (see below), we should raise a BadArgument
            pass

        with open(pyfile2, 'wb') as _opn:
            _opn.write(content.encode('utf8'))

        result = runner.invoke(cli, ['process', '--dburl', db4process.dburl, '--no-prompt',
                                     '-p', pyfile2, '-f', "main2",
                                     '-c', yaml_file,
                                     filename] + cmdline_opts)

        assert result.exception
        assert result.exit_code != 0
        stdout = result.output
        if expects_log_2_be_configured:
            # these cases raise BEFORE running pyfile
            # assert log config has not been called: (see self.init):
            assert self._logfilename is not None
            # we did open the output file:
            assert os.path.isfile(filename)
            # and we never wrote on it:
            assert os.stat(filename).st_size == 0
            # check correct outputs, in both log and output:
            outputs = [stdout, self.logfilecontent]
            for output in outputs:
                # Try to assert the messages on standard output being compatible with PY2,
                # as the messages might change
                assert err_type.__name__ in output \
                    and 'Traceback' in output and ' line ' in output
        else:
            # these cases raise BEFORE running pyfile
            # assert log config has not been called: (see self.init):
            with pytest.raises(Exception):
                # basically, assert we do not have the log file
                _ = self.logfilecontent
            assert 'Invalid value for "pyfile": ' in stdout
            further_string = 'main2' if err_type is None else 'No module named'
            assert further_string in stdout
            # we did NOt open the output file:
            assert not os.path.isfile(filename)

    # Recall: we have 6 segments, issued from all combination of
    # station_inventory in [true, false] and segment.data in [ok, with_gaps, empty]
    # use db4process(with_inventory, with_data, with_gap) to return sqlalchemy query for
    # those segments in case. For info see db4process in conftest.py
    @pytest.mark.parametrize("err_type", [None, ValueError])
    def test_errors_process_completed(self, err_type,
                                      # fixtures:
                                      pytestdir, db4process, yamlfile):
        '''test processing in case of non 'critical' errors i.e., which do not prevent the process
          to be completed. None means we do not override segment_select which, with the current
          templates, causes no segment to be selected'''
        pyfile = self.pyfile

        # REMEMBER THAT BY DEFAULT LEAVING THE segment_select IMPLEMENTED in conffile
        # WE WOULD HAVE NO SEGMENTS, as maxgap_numsamples is None for all segments of this test
        # Thus provide config overrides:
        if err_type is not None:
            yaml_file = yamlfile(segment_select={'has_data': 'true'})
        else:
            yaml_file = yamlfile()

        runner = CliRunner()
        # Now wrtite pyfile into a named temp file, BUT DO NOT SUPPLY EXTENSION
        # This seems to fail in python3 (FIXME: python2?)
        filename = pytestdir.newfile('.csv')
        pyfile2 = pytestdir.newfile('.py')

        with open(pyfile, 'r') as opn:
            content = opn.read()

        if err_type == ValueError:
            # create the exception. Implement a bad signature whci hraises a TypeError
            content = content.replace("def main(", """def main2(segment, config):
    return int('4d')

def main(""")
        else:
            # rename main to main2, as we will call 'main2' as funcname in 'runner.invoke' below
            # REMEMBER THAT THIS CASE HAS ACTUALLY NO SEGMENTS TO BE PROCESSED, see
            # 'yamlfile' fixture above
            content = content.replace("def main(", """def main2(""")

        with open(pyfile2, 'wb') as _opn:
            _opn.write(content.encode('utf8'))

        result = runner.invoke(cli, ['process', '--dburl', db4process.dburl, '--no-prompt',
                                     '-p', pyfile2, '-f', "main2",
                                     '-c', yaml_file,
                                     filename])

        assert not result.exception
        assert result.exit_code == 0
        stdout = result.output
        # these cases raise BEFORE running pyfile
        # assert log config has not been called: (see self.init):
        assert self._logfilename is not None
        # we did open the output file:
        assert os.path.isfile(filename)
        # and we never wrote on it:
        assert os.stat(filename).st_size == 0
        # check correct outputs, in both log and output:
        logfilecontent = self.logfilecontent
        if err_type is None:  # no segments processed
            # we want to check that a particular string (str2check) is in the stdout
            # However, str2check newlines count is not constant through
            # libraries and python versions. It might be due to click progressbar not showing on
            # eclipse. Therefore, assert a regex, where we relax the condition on newlines (\n+)
            str2check = \
                (r"0 segment\(s\) found to process\n"
                 r"\n+"
                 r"0 of 0 segment\(s\) successfully processed\n"
                 r"0 of 0 segment\(s\) skipped with error message \(check log or details\)")
            assert re.search(str2check, stdout)
            assert re.search(str2check, logfilecontent)
        else:
            # we want to check that a particular string (str2check) is in the stdout
            # However, str2check newlines count is not constant through
            # libraries and python versions. It might be due to click progressbar not showing on
            # eclipse. Therefore, assert a regex, where we relax the condition on newlines (\n+)
            str2check = \
                (r'4 segment\(s\) found to process\n'
                 r'\n+'
                 r'0 of 4 segment\(s\) successfully processed\n'
                 r'4 of 4 segment\(s\) skipped with error message \(check log or details\)')
            assert re.search(str2check, stdout)

            # logfile has also the messages of what was wrong. Note that
            # py2 prints:
            # "invalid literal for long() with base 10: '4d'"
            # and PY3 prints:
            # ""invalid literal for int() with base 10: '4d'"
            # instead of writing:
            # if PY2:
            #     assert "invalid literal for long() with base 10: '4d'" in logfilecontent
            # else:
            #     assert "invalid literal for int() with base 10: '4d'" in logfilecontent
            # let's be more relaxed (use .*). Also, use a regexp for cross-versions
            # compatibility about newlines (see comments above)
            str2check = \
                (r"4 segment\(s\) found to process\n"
                 r"\n+"
                 r"segment \([^\)]+\)\: invalid literal for .* with base 10: '4d'\n"
                 r"segment \([^\)]+\)\: invalid literal for .* with base 10: '4d'\n"
                 r"segment \([^\)]+\)\: invalid literal for .* with base 10: '4d'\n"
                 r"segment \([^\)]+\)\: invalid literal for .* with base 10: '4d'\n"
                 r"\n+"
                 r"0 of 4 segment\(s\) successfully processed\n"
                 r"4 of 4 segment\(s\) skipped with error message \(check log or details\)")
            assert re.search(str2check, logfilecontent)
 def func(**overridden_pars):
     return pytestdir.yamlfile(get_templates_fpath('paramtable.yaml'), **overridden_pars)
Esempio n. 8
0
def load_config_for_download(config, parseargs, **param_overrides):
    '''loads download arguments from the given config (yaml file or dict) after parsing and
    checking some of the dict keys.

    :return: a dict loaded from the given `config` and with parseed arguments (dict keys)

    Raises BadArgument in case of parsing errors, missisng arguments, conflicts etcetera
    '''
    try:
        config_dict = yaml_load(config, **param_overrides)
    except Exception as exc:
        raise BadArgument('config', exc)

    if parseargs:
        # few variables:
        configfile = config if (isinstance(config, string_types) and os.path.isfile(config))\
            else None

        # define first default event params in order to avoid typos
        def_evt_params = EVENTWS_SAFE_PARAMS

        # now, what we want to do here is basically convert config_dict keys
        # into suitable arguments for stream2segment functions: this includes
        # renaming params, parsing/converting their values, raising
        # BadArgument exceptions and so on

        # Let's configure a 'params' list, a list of dicts where each dict is a 'param checker'
        # with the following keys (at least one should be provided):
        # names: list of strings. provide it in order to check for optional names,
        #        check that only one param is provided, and
        #        replace whatever is found with the first item in the list
        # newname: string, provide it if you want to replace names above with this value
        #          instead first item in 'names'
        # defvalue: if provided, then the parameter is optional and will be set to this value
        #           if not provided, then the parameter is mandatory (BadArgument is raised in case)
        # newvalue: function accepting a value (the parameter value) raising whatever is
        #           needed if the parameter is invalid, and returning the correct parameter value
        params = [
            {
             'names': def_evt_params[:2],  # ['minlatitude', 'minlat'],
             'defvalue': None,
             'newvalue': between(-90.0, 90.0)
            },
            {
             'names': def_evt_params[2:4],  # ['maxlatitude', 'maxlat'],
             'defvalue': None,
             'newvalue': between(-90.0, 90.0)
            },
            {
             'names': def_evt_params[4:6],  # ['minlongitude', 'minlon'],
             'defvalue': None,
             'newvalue': between(-180.0, 180.0)
            },
            {
             'names': def_evt_params[6:8],  # ['maxlongitude', 'maxlon'],
             'defvalue': None,
             'newvalue': between(-180.0, 180.0)
            },
            {
             'names': def_evt_params[8:10],  # ['minmagnitude', 'minmag'],
             'defvalue': None
            },
            {
             'names': def_evt_params[10:12],  # ['maxmagnitude', 'maxmag'],
             'defvalue': None
            },
            {
             'names': def_evt_params[12:13],  # ['mindepth'],
             'defvalue': None
            },
            {
             'names': def_evt_params[13:14],  # ['maxdepth'],
             'defvalue': None
            },
            {
             'names': ['update_metadata'],
             'newvalue': parse_update_metadata
             },
            {
             'names': ['restricted_data'],
             'newname': 'authorizer',
             'newvalue': lambda val: create_auth(val, config_dict['dataws'], configfile)
            },
            {
             'names': ['dburl'],
             'newname': 'session',
             'newvalue': get_session
            },
            {
             'names': ['traveltimes_model'],
             'newname': 'tt_table',
             'newvalue': load_tt_table
            },
            {
             'names': ('starttime', 'start'),
             'newvalue': valid_date
            },
            {
             'names': ('endtime', 'end'),
             'newvalue': valid_date
            },
            {
             'names': ['eventws'],
             'newvalue': lambda url: valid_fdsn(url, is_eventws=True, configfile=configfile)
            },
            {
             'names': ['dataws'],
             'newvalue': lambda url: valid_fdsn(url, is_eventws=False)
            },
            {
             'names': ('network', 'net', 'networks'),
             'defvalue': [],
             'newvalue': nslc_param_value_aslist
            },
            {
             'names': ('station', 'sta', 'stations'),
             'defvalue': [],
             'newvalue': nslc_param_value_aslist
            },
            {
             'names': ('location', 'loc', 'locations'),
             'defvalue': [],
             'newvalue': nslc_param_value_aslist
            },
            {
             'names': ('channel', 'cha', 'channels'),
             'defvalue': [],
             'newvalue': nslc_param_value_aslist
            },
            {'names': ['eventws_params', 'eventws_query_args']},
            {
             'names': ['advanced_settings'],
             'newvalue': parse_download_advanced_settings
            },
            {
             'names': ['search_radius'],
             'newvalue': check_search_radius
            }
        ]

        # store all keys now because we might change them (see below):
        all_keys = set(config_dict)
        # do the check (this MODIFIES config_dict in place!):
        parse_arguments(config_dict, *params)

        # Now check for:
        # 1a. parameter supplied here NOT in the default config
        # 1b. parameter supplied here with different type of the default config
        # 2. Parameters in the default config not supplied here

        # First, create some sets of params names:
        # the parsed keys (all names defined above):
        parsed_keys = set(chain(*(_['names'] for _ in params)))
        # load original configuration (default in this package):
        orig_config = yaml_load(get_templates_fpath("download.yaml"))

        # Check 1a. and 1b.:
        for key in all_keys - parsed_keys:
            try:
                other_value = orig_config[key]
            except KeyError:
                raise BadArgument(key, '', 'No such option')
            try:
                typesmatch(config_dict[key], other_value)
            except Exception as exc:
                raise BadArgument(key, exc)

        # Check 2. :
        missing_keys = set(orig_config) - all_keys - parsed_keys
        if missing_keys:
            raise BadArgument(list(missing_keys), KeyError())

        # At last, put all event-related parameters (except starttime and endtime):
        # and in the eventws_params dict (the latter is an OPTIONAL dict
        # which can be set in the config for ADDITIONAL eventws parameters)
        # and check for conflicts:
        eventsearchparams = config_dict['eventws_params']
        # eventsearchparams might be none
        if not eventsearchparams:
            config_dict['eventws_params'] = eventsearchparams = {}
        for par in def_evt_params:
            if par in eventsearchparams:  # conflict:
                raise BadArgument('eventws_params',
                                  'conflicting parameter "%s"' % par)
            value = config_dict.pop(par, None)
            if value is not None:
                eventsearchparams[par] = value

    return config_dict
def test_download_verbosity(
        mock_run_download,
        mock_configlog,
        mock_closesess,
        mock_getsess,
        # fixtures:
        db,
        capsys,
        pytestdir):

    if not db.is_sqlite:
        pytest.skip("Skipping postgres test (only sqlite memory used)")

    db.create(to_file=False)
    dburl = db.dburl
    sess = db.session
    # mock get_session in order to return always the same session objet:
    mock_getsess.side_effect = lambda *a, **kw: sess
    # close session should not close session, otherwise with a memory db we loose the data
    mock_closesess.side_effect = lambda *a, **v: None

    # handlers should be removed each run_download call, otherwise we end up
    # appending them
    numloggers = [0]

    def clogd(logger, logfilebasepath, verbose):
        for h in logger.handlers[:]:
            logger.removeHandler(h)
        # config logger as usual, but redirects to a temp file
        # that will be deleted by pytest, instead of polluting the program
        # package:
        ret = o_configlog4download(
            logger,
            pytestdir.newfile('.log') if logfilebasepath else None, verbose)
        numloggers[0] = len(ret)
        return ret

    mock_configlog.side_effect = clogd

    last_known_id = [
        None
    ]  # stupid hack to assign to out-of-scope var (py2 compatible)

    def dblog_err_warn():
        qry = sess.query(Download.id, Download.log, Download.warnings,
                         Download.errors)
        if last_known_id[0] is not None:
            qry = qry.filter(Download.id > last_known_id[0])
        tup = qry.first()
        last_known_id[0] = tup[0]
        return tup[1], tup[2], tup[3]

    d_yaml_file = get_templates_fpath("download.yaml")

    # run verbosity = 0. As this does not configure loggers, previous loggers will not be removed
    # (see mock above). Thus launch all tests in increasing verbosity order (from 0 on)
    mock_run_download.side_effect = lambda *a, **v: None
    ret = o_download(d_yaml_file, log2file=False, verbose=False, dburl=dburl)
    out, err = capsys.readouterr()
    assert not out  # assert empty (avoid comparing to strings and potential py2 py3 headache)
    log, err, warn = dblog_err_warn()
    assert "N/A: either logger not configured, or " in log
    assert err == 0
    assert warn == 0
    assert numloggers[0] == 0

    # now let's see that if we raise an exception we also
    mock_run_download.side_effect = KeyError('a')
    # verbosity=1 configures loggers, but only the Db logger
    with pytest.raises(KeyError) as kerr:
        ret = o_download(d_yaml_file,
                         log2file=False,
                         verbose=False,
                         dburl=dburl)
    out, err = capsys.readouterr()
    assert not out
    log, err, warn = dblog_err_warn()
    assert "N/A: either logger not configured, or " in log
    assert err == 0
    assert warn == 0
    assert numloggers[0] == 0

    # verbosity=1 configures loggers, but only the Db logger
    mock_run_download.side_effect = lambda *a, **v: None
    ret = o_download(d_yaml_file, log2file=True, verbose=False, dburl=dburl)
    out, err = capsys.readouterr()
    # this is also empty cause mock_run_download is no-op
    assert not out  # assert empty
    log, err, warn = dblog_err_warn()
    assert "Completed in " in log
    assert 'No errors' in log  # 0 total errors
    assert 'No warnings' in log  # 0 total warnings
    assert numloggers[0] == 1

    # now let's see that if we raise an exception we also
    mock_run_download.side_effect = KeyError('a')
    with pytest.raises(KeyError) as kerr:
        ret = o_download(d_yaml_file,
                         log2file=True,
                         verbose=False,
                         dburl=dburl)
    out, err = capsys.readouterr()
    assert not out
    log, err, warn = dblog_err_warn()
    assert "Traceback (most recent call last):" in log
    assert err == 0
    assert warn == 0
    assert numloggers[0] == 1

    mock_run_download.side_effect = lambda *a, **v: None
    ret = o_download(d_yaml_file, log2file=True, verbose=True, dburl=dburl)
    out, err = capsys.readouterr()
    assert out  # assert non empty
    log, err, warn = dblog_err_warn()
    assert "Completed in " in log
    assert 'No errors' in log  # 0 total errors
    assert 'No warnings' in log  # 0 total warnings
    assert numloggers[0] == 2

    # now let's see that if we raise an exception we also
    mock_run_download.side_effect = KeyError('a')
    with pytest.raises(KeyError) as kerr:
        ret = o_download(d_yaml_file, log2file=True, verbose=True, dburl=dburl)
    out, err = capsys.readouterr()
    # Now out is not empty cause the logger which prints to stdout infos errors and critical is set:
    assert "Traceback (most recent call last):" in out
    assert "KeyError" in out
    log, err, warn = dblog_err_warn()
    assert "Traceback (most recent call last):" in log
    assert err == 0
    assert warn == 0
    assert numloggers[0] == 2
    def test_simple_run_no_outfile_provided_good_argslists(
            self,
            mock_get_chunksize_defaults,
            mock_process_segments_mp,
            mock_process_segments,
            mock_get_advanced_settings,
            mock_mp_Pool,
            advanced_settings,
            cmdline_opts,
            def_chunksize,
            # fixtures:
            pytestdir,
            db4process,
            clirunner,
            yamlfile):
        '''test arguments and calls are ok. Mock Pool imap_unordered as we do not
        want to confuse pytest in case
        '''

        if def_chunksize is None:
            mock_get_chunksize_defaults.side_effect = _o_get_chunksize_defaults
        else:
            mock_get_chunksize_defaults.side_effect = \
                lambda *a, **v: (def_chunksize, _o_get_chunksize_defaults()[1])

        class MockPool(object):
            def __init__(self, *a, **kw):
                pass

            def imap_unordered(self, *a, **kw):
                return map(*a, **kw)

            def close(self, *a, **kw):
                pass

            def join(self, *a, **kw):
                pass

        mock_mp_Pool.return_value = MockPool()

        # set values which will override the yaml config in templates folder:
        dir_ = pytestdir.makedir()
        config_overrides = {
            'snr_threshold': 0,
            'segment_select': {},  # take everything
            'root_dir': os.path.abspath(dir_)
        }
        if advanced_settings:
            config_overrides['advanced_settings'] = advanced_settings

        yaml_file = yamlfile(**config_overrides)

        # need to reset this global variable: FIXME: better handling?
        # process.main._inventories = {}

        pyfile = get_templates_fpath("save2fs.py")

        result = clirunner.invoke(cli, [
            'process', '--dburl', db4process.dburl, '-p', pyfile, '-c',
            yaml_file
        ] + cmdline_opts)
        assert clirunner.ok(result)

        # test some stuff and get configarg, the the REAL config passed in the processing
        # subroutines:
        assert mock_get_advanced_settings.called
        # assert there is no "skipped without messages" message, as it should be the case
        # when there is no function processing the output:
        assert "skipped without messages" not in result.output.lower()
        assert len(mock_get_advanced_settings.call_args_list) == 1
        configarg = mock_get_advanced_settings.call_args_list[0][0][
            0]  # positional argument

        seg_processed_count = query4process(
            db4process.session, configarg.get('segment_select', {})).count()
        # seg_process_count is 6. 'segments_chunksize' in advanced_settings is not given or 1.
        # def_chunksize can be None (i,e., 1200) or given (2)
        # See stream2segment.process.core._get_chunksize_defaults to see how we calculated
        # the expected calls to mock_process_segments*:
        if 'segments_chunksize' in advanced_settings:
            expected_callcount = seg_processed_count
        elif def_chunksize is None:
            expected_callcount = seg_processed_count
        else:
            _1 = seg_processed_count / def_chunksize
            if _1 == int(_1):
                expected_callcount = int(_1)
            else:
                expected_callcount = int(_1) + 1

        # assert we called the functions the specified amount of times
        if '--multi-process' in cmdline_opts and not advanced_settings:
            # remember that when we have advanced_settings it OVERRIDES
            # the original advanced_settings key in config, thus also multi-process flag
            assert mock_process_segments_mp.called
            assert mock_process_segments_mp.call_count == expected_callcount
            # process_segments_mp calls process_segments:
            assert mock_process_segments_mp.call_count == mock_process_segments.call_count
        else:
            assert mock_process_segments_mp.called == ('--multi-process'
                                                       in cmdline_opts)
            assert mock_process_segments.called
            assert mock_process_segments.call_count == expected_callcount
        # test that advanced settings where correctly written:
        real_advanced_settings = configarg.get('advanced_settings', {})
        assert ('segments_chunksize' in real_advanced_settings) == \
            ('segments_chunksize' in advanced_settings)

        # 'advanced_settings', if present HERE, will REPLACE 'advanced_settings' in config. Thus:
        if advanced_settings and '--multi-process' not in cmdline_opts:
            assert sorted(real_advanced_settings.keys()) == sorted(
                advanced_settings.keys())
            for k in advanced_settings.keys():
                assert advanced_settings[k] == real_advanced_settings[k]
        else:
            if 'segments_chunksize' in advanced_settings:
                assert real_advanced_settings['segments_chunksize'] == \
                    advanced_settings['segments_chunksize']
            assert ('multi_process' in real_advanced_settings) == \
                ('--multi-process' in cmdline_opts)
            if '--multi-process' in cmdline_opts:
                assert real_advanced_settings['multi_process'] is True
            assert ('num_processes' in real_advanced_settings) == \
                ('--num-processes' in cmdline_opts)
            if '--num-processes' in cmdline_opts:
                val = cmdline_opts[cmdline_opts.index('--num-processes') + 1]
                assert str(real_advanced_settings['num_processes']) == val
                # assert real_advanced_settings['num_processes'] is an int.
                # As we import int from futures in templates, we might end-up having
                # futures.newint. The type check is made by checking we have an integer
                # type as the native type. For info see:
                # http://python-future.org/what_else.html#passing-data-to-from-python-2-libraries
                # assert type(native(real_advanced_settings['num_processes'])) in integer_types
                assert isinstance(real_advanced_settings['num_processes'],
                                  integer_types)
 def func(**overridden_pars):
     return pytestdir.yamlfile(get_templates_fpath('download.yaml'), **overridden_pars)
Esempio n. 12
0
def test_click_template(mock_main_init, mock_input, pytestdir):
    runner = CliRunner()
    # assert help works:
    result = runner.invoke(cli, ['init', '--help'])
    assert not mock_main_init.called
    assert result.exit_code == 0

    expected_files = [
        'download.yaml', 'paramtable.py', 'paramtable.yaml', 'save2fs.py',
        'save2fs.yaml', 'jupyter.example.ipynb', 'jupyter.example.db'
    ]
    non_python_files = [
        _ for _ in expected_files
        if os.path.splitext(_)[1] not in ('.py', '.yaml')
    ]

    dir_ = pytestdir.makedir()
    path = os.path.join(dir_, 'abc')

    def max_mod_time():
        return max(
            os.path.getmtime(os.path.join(path, f)) for f in os.listdir(path))

    result = runner.invoke(cli, ['init', path])
    # FIXME: check how to mock os.path.isfile properly. This doesnot work:
    # assert mock_isfile.call_count == 5
    assert result.exit_code == 0
    assert mock_main_init.called
    files = os.listdir(path)
    assert sorted(files) == sorted(expected_files)
    assert not mock_input.called

    # assert we correctly wrote the files
    for fle in files:
        sourcepath = get_templates_fpath(fle)
        destpath = os.path.join(path, fle)
        if os.path.splitext(fle)[1] == '.yaml':
            # check loaded yaml, which also assures our templates are well formed:
            sourceconfig = yaml_load(sourcepath)
            destconfig = yaml_load(destpath)
            if os.path.basename(sourcepath) == 'download.yaml':
                assert sorted(sourceconfig.keys()) == sorted(destconfig.keys())
            else:
                # assert we have all keys. Note that 'advanced_settings' is not in
                # sourceconfig (it is added via jinja2 templating system):
                assert sorted(['advanced_settings'] + list(sourceconfig.keys())) \
                    == sorted(destconfig.keys())
            for key in sourceconfig.keys():
                assert type(sourceconfig[key]) == type(destconfig[key])
        elif os.path.splitext(fle)[1] == '.py':
            # check loaded python modules, which also assures our templates are well formed:
            sourcepy = load_source(sourcepath)
            destpy = load_source(destpath)
            # avoid comparing "__blabla__" methods as they are intended to be python
            # 'private' attributes and there are differences between py2 and py3
            # we want to test OUR stuff is the same
            sourcekeys = [
                a for a in dir(sourcepy) if (a[:2] + a[-2:]) != "____"
            ]
            destkeys = [a for a in dir(destpy) if (a[:2] + a[-2:]) != "____"]
            assert sorted(sourcekeys) == sorted(destkeys)
            for key in sourcekeys:
                assert type(getattr(sourcepy,
                                    key)) == type(getattr(destpy, key))
        elif fle not in non_python_files:
            raise ValueError(
                'The file "%s" is not supposed to be copied by `init`' % fle)

    # try to write to the same dir (1)
    mock_input.reset_mock()
    mock_input.side_effect = lambda arg: '1'  # overwrite all files
    maxmodtime = max_mod_time()
    # we'll test that files are modified, but on mac timestamps are rounded to seconds
    # so wait 1 second to be safe
    time.sleep(1)
    result = runner.invoke(cli, ['init', path])
    assert mock_input.called
    assert max_mod_time() > maxmodtime
    assert '%d file(s) copied in' % len(expected_files) in result.output

    # try to write to the same dir (2)
    for click_prompt_ret_val in ('', '2'):
        # '' => skip overwrite
        # '2' => overwrite only non existing
        # in thus case, both the above returned values produce the same result
        mock_input.reset_mock()
        mock_input.side_effect = lambda arg: click_prompt_ret_val
        maxmodtime = max_mod_time()
        time.sleep(1)  # see comment above
        result = runner.invoke(cli, ['init', path])
        assert mock_input.called
        assert max_mod_time() == maxmodtime
        assert 'No file copied' in result.output

    os.remove(os.path.join(path, expected_files[0]))
    # try to write to the same dir (2)
    mock_input.reset_mock()
    mock_input.side_effect = lambda arg: '2'  # overwrite non-existing (1) file
    maxmodtime = max_mod_time()
    time.sleep(1)  # see comment above
    result = runner.invoke(cli, ['init', path])
    assert mock_input.called
    assert max_mod_time() > maxmodtime
    assert '1 file(s) copied in' in result.output
    def init(self, request, db, data):
        # re-init a sqlite database (no-op if the db is not sqlite):
        db.create(to_file=False)
        # setup a run_id:
        rdw = Download()
        db.session.add(rdw)
        db.session.commit()
        self.run = rdw

        # side effects:
        self._dc_urlread_sideeffect = """http://geofon.gfz-potsdam.de/fdsnws/dataselect/1/query
ZZ * * * 2002-09-01T00:00:00 2005-10-20T00:00:00
UP ARJ * * 2013-08-01T00:00:00 2017-04-25

http://ws.resif.fr/fdsnws/dataselect/1/query
ZU * * HHZ 2015-01-01T00:00:00 2016-12-31T23:59:59.999999

"""
        self._mintraveltime_sideeffect = cycle([1])
        self._seg_data = data.read("GE.FLT1..HH?.mseed")
        self._seg_data_gaps = data.read("IA.BAKI..BHZ.D.2016.004.head")
        self._seg_data_empty = b''
        self._seg_urlread_sideeffect = [
            self._seg_data, self._seg_data_gaps, 413, 500, self._seg_data[:2],
            self._seg_data_empty, 413,
            URLError("++urlerror++"),
            socket.timeout()
        ]
        self.service = ''  # so get_datacenters_df accepts any row by default
        self.db_buf_size = 1
        self.routing_service = yaml_load(get_templates_fpath("download.yaml"))\
            ['advanced_settings']['routing_service_url']

        # NON db stuff (logging, patchers, pandas...):
        self.logout = StringIO()
        handler = StreamHandler(stream=self.logout)
        self._logout_cache = ""
        # THIS IS A HACK:
        query_logger.setLevel(logging.INFO)  # necessary to forward to handlers
        # if we called closing (we are testing the whole chain) the level will be reset
        # (to level.INFO) otherwise it stays what we set two lines above. Problems might arise
        # if closing sets a different level, but for the moment who cares
        query_logger.addHandler(handler)

        # when debugging, I want the full dataframe with to_string(), not truncated
        # NOTE: this messes up right alignment of numbers in DownloadStats (see utils.py)
        # FIRST, remember current settings and restore them in cleanup:
        _pd_display_maxcolwidth = pd.get_option('display.max_colwidth')
        pd.set_option('display.max_colwidth', -1)

        # define class level patchers (we do not use a yiled as we need to do more stuff in the
        # finalizer, see below
        patchers = []

        patchers.append(patch('stream2segment.utils.url.urlopen'))
        self.mock_urlopen = patchers[-1].start()

        # mock ThreadPool (tp) to run one instance at a time, so we get deterministic results:
        class MockThreadPool(object):
            def __init__(self, *a, **kw):
                pass

            def imap(self, func, iterable, *args):
                # make imap deterministic: same as standard python map:
                # everything is executed in a single thread the right input order
                return map(func, iterable)

            def imap_unordered(self, func_, iterable, *args):
                # make imap_unordered deterministic: same as standard python map:
                # everything is executed in a single thread in the right input order
                return map(func_, iterable)

            def close(self, *a, **kw):
                pass

        # assign patches and mocks:
        patchers.append(patch('stream2segment.utils.url.ThreadPool'))
        self.mock_tpool = patchers[-1].start()
        self.mock_tpool.side_effect = MockThreadPool

        # add finalizer:
        def delete():
            pd.set_option('display.max_colwidth', _pd_display_maxcolwidth)

            for patcher in patchers:
                patcher.stop()

            hndls = query_logger.handlers[:]
            handler.close()
            for h in hndls:
                if h is handler:
                    query_logger.removeHandler(h)

        request.addfinalizer(delete)
class Test(object):

    pyfile = get_templates_fpath("paramtable.py")

    @property
    def logfilecontent(self):
        assert os.path.isfile(self._logfilename)
        with open(self._logfilename) as opn:
            return opn.read()

    # The class-level `init` fixture is marked with autouse=true which implies that all test
    # methods in the class will use this fixture without a need to state it in the test
    # function signature or with a class-level usefixtures decorator. For info see:
    # https://docs.pytest.org/en/latest/fixture.html#autouse-fixtures-xunit-setup-on-steroids
    @pytest.fixture(autouse=True)
    def init(self, request, pytestdir, db4process):
        db4process.create(to_file=True)
        session = db4process.session
        # sets up the mocked functions: db session handling (using the already created session)
        # and log file handling:
        with patch('stream2segment.utils.inputargs.get_session',
                   return_value=session):
            with patch('stream2segment.main.closesession',
                       side_effect=lambda *a, **v: None):
                with patch(
                        'stream2segment.main.configlog4processing') as mock2:

                    def clogd(logger, logfilebasepath, verbose):
                        # config logger as usual, but redirects to a temp file
                        # that will be deleted by pytest, instead of polluting the program
                        # package:
                        ret = o_configlog4processing(logger,
                                                     pytestdir.newfile('.log') \
                                                     if logfilebasepath else None,
                                                     verbose)

                        self._logfilename = ret[0].baseFilename
                        return ret

                    mock2.side_effect = clogd

                    yield

# ## ======== ACTUAL TESTS: ================================

# Recall: we have 6 segments, issued from all combination of
# station_inventory in [true, false] and segment.data in [ok, with_gaps, empty]
# use db4process(with_inventory, with_data, with_gap) to return sqlalchemy query for
# those segments in case. For info see db4process in conftest.py

    @mock.patch('stream2segment.main.run_process',
                side_effect=process_main_run)
    def test_simple_run_no_outfile_provided(
            self,
            mock_run,
            # fixtures:
            db4process,
            clirunner,
            yamlfile):
        '''test a case where save inventory is True, and that we saved inventories'''
        # set values which will override the yaml config in templates folder:
        config_overrides = {
            'snr_threshold': 0,
            'segment_select': {
                'has_data': 'true'
            }
        }
        result = clirunner.invoke(cli, [
            'process', '--dburl', db4process.dburl, '-p', self.pyfile, '-c',
            yamlfile(**config_overrides), '-a'
        ])
        assert clirunner.ok(result)

        lst = mock_run.call_args_list
        assert len(lst) == 1
        args, kwargs = lst[0][0], lst[0][1]
        # assert third argument (`ondone` callback) is None 'ondone' or is a BaseWriter (no-op)
        # class:
        assert args[2] is None or \
            type(args[2]) == BaseWriter  # pylint: disable=unidiomatic-typecheck
        # assert "Output file:  n/a" in result output:
        assert re.search('Output file:\\s+n/a', result.output)
        # assert "Output file:  n/a" in result output:
        assert re.search(
            'Ignoring `append` functionality: output file does not exist '
            'or not provided', result.output)

    # Recall: we have 6 segments, issued from all combination of
    # station_inventory in [true, false] and segment.data in [ok, with_gaps, empty]
    # use db4process(with_inventory, with_data, with_gap) to return sqlalchemy query for
    # those segments in case. For info see db4process in conftest.py
    @pytest.mark.parametrize("advanced_settings, cmdline_opts", [
        ({}, ['-a']),
    ])
    def test_simple_run_retDict_saveinv_emptyfile(
            self,
            advanced_settings,
            cmdline_opts,
            # fixtures:
            pytestdir,
            db4process,
            clirunner,
            yamlfile):
        '''test a case where we create a temporary file, empty but opened before writing'''
        # set values which will override the yaml config in templates folder:
        config_overrides = {
            'snr_threshold': 0,
            'segment_select': {
                'has_data': 'true'
            }
        }
        if advanced_settings:
            config_overrides['advanced_settings'] = advanced_settings

        _seg = db4process.segments(with_inventory=True,
                                   with_data=True,
                                   with_gap=False).one()
        expected_first_row_seg_id = _seg.id
        station_id_whose_inventory_is_saved = _seg.station.id

        filename = pytestdir.newfile('output.csv', create=True)
        result = clirunner.invoke(cli, [
            'process', '--dburl', db4process.dburl, '-p', self.pyfile, '-c',
            yamlfile(**config_overrides), filename
        ] + cmdline_opts)
        assert clirunner.ok(result)

        # check file has been correctly written:
        csv1 = readcsv(filename)
        assert len(csv1) == 1
        assert csv1.loc[0, csv1.columns[0]] == expected_first_row_seg_id
        logtext = self.logfilecontent
        assert len(logtext) > 0
        assert "Appending results to existing file" in logtext

    # Recall: we have 6 segments, issued from all combination of
    # station_inventory in [true, false] and segment.data in [ok, with_gaps, empty]
    # use db4process(with_inventory, with_data, with_gap) to return sqlalchemy query for
    # those segments in case. For info see db4process in conftest.py
    @pytest.mark.parametrize('hdf', [True, False])
    @pytest.mark.parametrize('return_list', [True, False])
    @pytest.mark.parametrize("advanced_settings, cmdline_opts", [
        ({}, ['-a']),
        ({}, ['-a', '--multi-process']),
    ])
    @mock.patch('stream2segment.cli.click.confirm', return_value=True)
    def test_append(
            self,
            mock_click_confirm,
            advanced_settings,
            cmdline_opts,
            return_list,
            hdf,
            # fixtures:
            pytestdir,
            db4process,
            clirunner,
            yamlfile):
        '''test a typical case where we supply the append option'''
        if return_list and hdf:
            # hdf does not support returning lists
            return

        # set values which will override the yaml config in templates folder:
        config_overrides = {
            'snr_threshold': 0,
            'segment_select': {
                'has_data': 'true'
            }
        }
        if advanced_settings:
            config_overrides['advanced_settings'] = advanced_settings
        yaml_file = yamlfile(**config_overrides)

        _seg = db4process.segments(with_inventory=True,
                                   with_data=True,
                                   with_gap=False).one()
        expected_first_row_seg_id = _seg.id
        station_id_whose_inventory_is_saved = _seg.station.id

        session = db4process.session

        filename = pytestdir.newfile('.hdf' if hdf else '.csv')

        pyfile = self.pyfile
        if return_list:
            # modify python so taht 'main' returns a list by calling the default 'main'
            # and returning its keys:
            with open(self.pyfile, 'r') as opn:
                content = opn.read()

            pyfile = pytestdir.newfile('.py')
            cont2 = content.replace(
                "def main(segment, config):", """def main(segment, config):
    return list(main2(segment, config).values())
def main2(segment, config):""")
            with open(pyfile, 'wb') as _opn:
                _opn.write(cont2.encode('utf8'))

        mock_click_confirm.reset_mock()
        result = clirunner.invoke(cli, [
            'process', '--dburl', db4process.dburl, '-p', pyfile, '-c',
            yaml_file, filename
        ] + cmdline_opts)
        assert clirunner.ok(result)

        def read_hdf(filename):
            return pd.read_hdf(filename).reset_index(drop=True, inplace=False)

        # check file has been correctly written:
        csv1 = read_hdf(filename) if hdf else readcsv(filename,
                                                      header=not return_list)
        assert len(csv1) == 1
        segid_column = SEGMENT_ID_COLNAME if hdf else csv1.columns[0]
        assert csv1.loc[0, segid_column] == expected_first_row_seg_id
        logtext1 = self.logfilecontent
        assert "4 segment(s) found to process" in logtext1
        assert "Skipping 1 already processed segment(s)" not in logtext1
        assert "Ignoring `append` functionality: output file does not exist or not provided" \
            in logtext1
        assert "1 of 4 segment(s) successfully processed" in logtext1
        assert not mock_click_confirm.called

        # now test a second call, the same as before:
        mock_click_confirm.reset_mock()
        result = clirunner.invoke(cli, [
            'process', '--dburl', db4process.dburl, '-p', pyfile, '-c',
            yaml_file, filename
        ] + cmdline_opts)
        # check file has been correctly written:
        csv2 = read_hdf(filename) if hdf else readcsv(filename,
                                                      header=not return_list)
        assert len(csv2) == 1
        segid_column = SEGMENT_ID_COLNAME if hdf else csv1.columns[0]
        assert csv2.loc[0, segid_column] == expected_first_row_seg_id
        logtext2 = self.logfilecontent
        assert "3 segment(s) found to process" in logtext2
        assert "Skipping 1 already processed segment(s)" in logtext2
        assert "Appending results to existing file" in logtext2
        assert "0 of 3 segment(s) successfully processed" in logtext2
        assert not mock_click_confirm.called
        # assert two rows are equal:
        assert_frame_equal(csv1, csv2, check_dtype=True)

        # change the segment id of the written segment
        seg = session.query(Segment).filter(Segment.id == expected_first_row_seg_id).\
            first()
        new_seg_id = seg.id * 100
        seg.id = new_seg_id
        session.commit()

        # now test a second call, the same as before:
        mock_click_confirm.reset_mock()
        result = clirunner.invoke(cli, [
            'process', '--dburl', db4process.dburl, '-p', pyfile, '-c',
            yaml_file, filename
        ] + cmdline_opts)
        # check file has been correctly written:
        csv3 = read_hdf(filename) if hdf else readcsv(filename,
                                                      header=not return_list)
        assert len(csv3) == 2
        segid_column = SEGMENT_ID_COLNAME if hdf else csv1.columns[0]
        assert csv3.loc[0, segid_column] == expected_first_row_seg_id
        assert csv3.loc[1, segid_column] == new_seg_id
        logtext3 = self.logfilecontent
        assert "4 segment(s) found to process" in logtext3
        assert "Skipping 1 already processed segment(s)" in logtext3
        assert "Appending results to existing file" in logtext3
        assert "1 of 4 segment(s) successfully processed" in logtext3
        assert not mock_click_confirm.called
        # assert two rows are equal:
        assert_frame_equal(csv1, csv3[:1], check_dtype=True)

        # last try: no append (also set no-prompt to test that we did not prompt the user)
        mock_click_confirm.reset_mock()
        result = clirunner.invoke(cli, [
            'process', '--dburl', db4process.dburl, '-p', pyfile, '-c',
            yaml_file, filename
        ] + cmdline_opts[1:])
        # check file has been correctly written:
        csv4 = read_hdf(filename) if hdf else readcsv(filename,
                                                      header=not return_list)
        assert len(csv4) == 1
        segid_column = SEGMENT_ID_COLNAME if hdf else csv1.columns[0]
        assert csv4.loc[0, segid_column] == new_seg_id
        logtext4 = self.logfilecontent
        assert "4 segment(s) found to process" in logtext4
        assert "Skipping 1 already processed segment(s)" not in logtext4
        assert "Appending results to existing file" not in logtext4
        assert "1 of 4 segment(s) successfully processed" in logtext4
        assert 'Overwriting existing output file' in logtext4
        assert mock_click_confirm.called

        # last try: prompt return False
        mock_click_confirm.reset_mock()
        mock_click_confirm.return_value = False
        result = clirunner.invoke(cli, [
            'process', '--dburl', db4process.dburl, '-p', pyfile, '-c',
            yaml_file, filename
        ] + cmdline_opts[1:])
        assert result.exception
        assert type(result.exception) == SystemExit
        assert result.exception.code == 1
 def func(**overridden_pars):
     return pytestdir.yamlfile(get_templates_fpath('save2fs.yaml'),
                               **overridden_pars)
Esempio n. 16
0
def test_download_eventws_query_args(
        mock_isfile,
        mock_run_download,
        # fixtures:
        run_cli_download):  # pylint: disable=redefined-outer-name
    '''test different scenarios where we provide eventws query args from the command line'''

    d_yaml_file = get_templates_fpath("download.yaml")
    # FIRST SCENARIO: no  eventws_params porovided
    mock_run_download.reset_mock()
    def_yaml_dict = yaml_load(d_yaml_file)['eventws_params']
    assert not def_yaml_dict  # None or empty dict
    result = run_cli_download()  # invalid type
    assert result.exit_code == 0
    # assert the yaml (as passed to the download function) has the correct value:
    real_eventws_params = mock_run_download.call_args_list[0][1][
        'eventws_params']
    # just assert it has keys merged from the global event-related yaml keys
    assert 'maxmagnitude' not in real_eventws_params
    assert real_eventws_params

    # test by providing an eventsws param which is not optional:
    mock_run_download.reset_mock()
    def_yaml_dict = yaml_load(d_yaml_file)['eventws_params']
    assert not def_yaml_dict  # None or empty dict
    result = run_cli_download('--minmagnitude', '15.5')
    assert result.exit_code == 0
    # assert the yaml (as passed to the download function) has the correct value:
    real_eventws_params = mock_run_download.call_args_list[0][1][
        'eventws_params']
    # just assert it has keys merged from the global event-related yaml keys
    assert real_eventws_params['minmagnitude'] == 15.5

    # test by providing a eventsws param which is optional:
    mock_run_download.reset_mock()
    def_yaml_dict = yaml_load(d_yaml_file)['eventws_params']
    assert not def_yaml_dict  # None or empty dict
    result = run_cli_download('--minmagnitude',
                              '15.5',
                              eventws_params={'format': 'abc'})
    assert result.exit_code == 0
    # assert the yaml (as passed to the download function) has the correct value:
    real_eventws_params = mock_run_download.call_args_list[0][1][
        'eventws_params']
    # just assert it has keys merged from the global event-related yaml keys
    assert real_eventws_params['minmagnitude'] == 15.5
    assert real_eventws_params['format'] == 'abc'

    # conflicting args (supplying a global non-optional param in eventws's config):
    for pars in [['--minlatitude', '-minlat'], ['--maxlatitude', '-maxlat'],
                 ['--minlongitude', '-minlon'], ['--maxlongitude', '-maxlon'],
                 ['--minmagnitude', '-minmag'], ['--maxmagnitude', '-maxmag'],
                 ['--mindepth'], ['--maxdepth']]:
        for par1, par2 in product(pars, pars):
            mock_run_download.reset_mock()
            result = run_cli_download(
                par1, '15.5', eventws_params={par2.replace('-', ''): 15.5})
            assert result.exit_code != 1
            assert 'conflict' in result.output
            assert 'Invalid value for "eventws_params"' in result.output

    # test a eventws supplied as non existing file and not valid fdsnws:
    mock_isfile.reset_mock()
    assert not mock_isfile.called
    result = run_cli_download('--eventws', 'myfile')
    assert result.exit_code != 0
    assert 'eventws' in result.output
    assert mock_isfile.called
    def test_simple_run_no_outfile_provided(
            self,
            mock_run,
            advanced_settings,
            cmdline_opts,
            # fixtures:
            pytestdir,
            db4process,
            clirunner,
            yamlfile):
        '''test a case where save inventory is True, and that we saved inventories
        db is a fixture implemented in conftest.py and setup here in self.transact fixture
        '''
        # set values which will override the yaml config in templates folder:
        dir_ = pytestdir.makedir()
        config_overrides = {
            'snr_threshold': 0,
            'segment_select': {
                'has_data': 'true'
            },
            'root_dir': os.path.abspath(dir_)
        }
        if advanced_settings:
            config_overrides['advanced_settings'] = advanced_settings

        yaml_file = yamlfile(**config_overrides)
        # query data for testing now as the program will expunge all data from the session
        # and thus we want to avoid DetachedInstanceError(s):
        expected_only_written_segment = \
            db4process.segments(with_inventory=True, with_data=True, with_gap=False).one()
        # get seiscomp path of OK segment before the session is closed:
        path = os.path.join(dir_, expected_only_written_segment.sds_path())

        pyfile = get_templates_fpath("save2fs.py")

        result = clirunner.invoke(cli, [
            'process', '--dburl', db4process.dburl, '-p', pyfile, '-c',
            yaml_file
        ] + cmdline_opts)
        assert clirunner.ok(result)

        filez = os.listdir(os.path.dirname(path))
        assert len(filez) == 2
        stream1 = read(os.path.join(os.path.dirname(path), filez[0]),
                       format='MSEED')
        stream2 = read(os.path.join(os.path.dirname(path), filez[1]),
                       format='MSEED')
        assert len(stream1) == len(stream2) == 1
        assert not np.allclose(stream1[0].data, stream2[0].data)

        lst = mock_run.call_args_list
        assert len(lst) == 1
        args, kwargs = lst[0][0], lst[0][1]
        # assert third argument (`ondone` callback) is None 'ondone' or is a BaseWriter (no-op)
        # class:
        assert args[2] is None or \
            type(args[2]) == BaseWriter  # pylint: disable=unidiomatic-typecheck
        # assert "Output file:  n/a" in result output:
        assert re.search('Output file:\\s+n/a', result.output)

        # Note that apparently CliRunner() (see clirunner fixture) puts stderr and stdout
        # together (https://github.com/pallets/click/pull/868)
        # Reminder: previously, log erros where redirected to stderr
        # This is dangerous as we use a redirect to avoid external libraries to pritn to stderr
        # and logging to stderr might cause 'operation on closed file'.
        for subs in ["Processing function: ", "Config. file: "]:
            idx = result.output.find(subs)
            assert idx > -1
Esempio n. 18
0
def test_process_bad_types(pytestdir):
    '''bad types must be passed directly to download as click does a preliminary check'''

    p_yaml_file, p_py_file = \
        get_templates_fpaths("paramtable.yaml", "paramtable.py")

    # Note that our functions in inputargs module return SIMILART messages as click
    # not exactly the same one

    result = CliRunner().invoke(cli, ['process', '--pyfile', 'nrvnkenrgdvf'])
    assert result.exit_code != 0
    assert 'Error: Invalid value for "-p" / "--pyfile":' in result.output

    result = CliRunner().invoke(cli, [
        'process', '--dburl', 'nrvnkenrgdvf', '-c', p_yaml_file, '-p',
        p_py_file
    ])
    assert result.exit_code != 0
    assert 'Error: Invalid value for "dburl":' in result.output

    # if we do not provide click default values, they have invalid values and they take priority
    # (the --dburl arg is skipped):
    result = CliRunner().invoke(
        cli, ['process', '--dburl', 'nrvnkenrgdvf', '-c', p_yaml_file])
    assert result.exit_code != 0
    assert 'Missing option "-p" / "--pyfile"' in result.output

    result = CliRunner().invoke(cli, ['process', '--dburl', 'nrvnkenrgdvf'])
    assert result.exit_code != 0
    assert 'Missing option "-c" / "--config"' in result.output

    result = CliRunner().invoke(cli, [
        'process', '--dburl', 'nrvnkenrgdvf', '-c', p_yaml_file, '-p',
        p_py_file
    ])
    assert result.exit_code != 0
    assert 'Error: Invalid value for "dburl":' in result.output
    assert "nrvnkenrgdvf" in result.output

    d_yaml_file = get_templates_fpath('download.yaml')
    d_yaml_file = pytestdir.yamlfile(
        d_yaml_file, dburl='sqlite:///./path/to/my/db/sqlite.sqlite')
    result = CliRunner().invoke(cli, [
        'process', '--dburl', d_yaml_file, '-c', p_yaml_file, '-p', p_py_file
    ])
    assert result.exit_code != 0
    assert 'Error: Invalid value for "dburl":' in result.output

    d_yaml_file = pytestdir.yamlfile(d_yaml_file, dburl='sqlite:///:memory:')
    result = CliRunner().invoke(cli, [
        'process', '--funcname', 'nrvnkenrgdvf', '-c', p_yaml_file, '-p',
        p_py_file, '-d', d_yaml_file
    ])
    assert result.exit_code != 0
    assert 'Error: Invalid value for "pyfile": function "nrvnkenrgdvf" not found in' \
        in result.output

    result = CliRunner().invoke(
        cli,
        ['process', '-c', 'nrvnkenrgdvf', '-p', p_py_file, '-d', d_yaml_file])
    assert result.exit_code != 0
    # this is issued by click (see comment above)
    assert 'Invalid value for "-c" / "--config"' in result.output

    result = CliRunner().invoke(
        cli, ['process', '-c', p_py_file, '-p', p_py_file, '-d', d_yaml_file])
    assert result.exit_code != 0
    assert 'Error: Invalid value for "config"' in result.output
Esempio n. 19
0
class Test(object):

    pyfile = get_templates_fpath("paramtable.py")

    @property
    def logfilecontent(self):
        assert os.path.isfile(self._logfilename)
        with open(self._logfilename) as opn:
            return opn.read()

    # The class-level `init` fixture is marked with autouse=true which implies that all test
    # methods in the class will use this fixture without a need to state it in the test
    # function signature or with a class-level usefixtures decorator. For info see:
    # https://docs.pytest.org/en/latest/fixture.html#autouse-fixtures-xunit-setup-on-steroids
    @pytest.fixture(autouse=True)
    def init(self, request, pytestdir, db4process):
        db4process.create(to_file=True)
        session = db4process.session
        # sets up the mocked functions: db session handling (using the already created session)
        # and log file handling:
        with patch('stream2segment.utils.inputargs.get_session',
                   return_value=session):
            with patch('stream2segment.main.closesession',
                       side_effect=lambda *a, **v: None):
                with patch(
                        'stream2segment.main.configlog4processing') as mock2:

                    def clogd(logger, logfilebasepath, verbose):
                        # config logger as usual, but redirects to a temp file
                        # that will be deleted by pytest, instead of polluting the program
                        # package:
                        ret = o_configlog4processing(logger,
                                                     pytestdir.newfile('.log') \
                                                     if logfilebasepath else None,
                                                     verbose)

                        self._logfilename = ret[0].baseFilename
                        return ret

                    mock2.side_effect = clogd

                    yield

    def inlogtext(self, string):
        '''Checks that `string` is in log text.
        The assertion `string in self.logfilecontent` fails in py3.5, although the differences
        between characters is the same position is zero. We did not find any better way than
        fixing it via this cumbersome function'''
        logtext = self.logfilecontent
        i = 0
        while len(logtext[i:i + len(string)]) == len(string):
            if (sum(
                    ord(a) - ord(b)
                    for a, b in zip(string, logtext[i:i + len(string)]))) == 0:
                return True
            i += 1
        return False

# ## ======== ACTUAL TESTS: ================================

# Recall: we have 6 segments, issued from all combination of
# station_inventory in [true, false] and segment.data in [ok, with_gaps, empty]
# use db4process(with_inventory, with_data, with_gap) to return sqlalchemy query for
# those segments in case. For info see db4process in conftest.py

    @patch('stream2segment.process.db.get_inventory',
           side_effect=get_inventory)
    @patch('stream2segment.process.db.get_stream', side_effect=get_stream)
    def test_segwrapper(
            self,
            mock_getstream,
            mock_getinv,
            # fixtures:
            db4process,
            data):
        session = db4process.session
        segids = query4process(session, {}).all()
        seg_with_inv = \
            db4process.segments(with_inventory=True, with_data=True, with_gap=False).one()
        sta_with_inv_id = seg_with_inv.station.id
        invcache = {}

        def read_stream(segment, reload=False):
            '''calls segment.stream(reload) asserting that if segment has no
            data it raises. This function never raises'''
            if segment.data:
                segment.stream(reload)
            else:
                with pytest.raises(
                        Exception) as exc:  # all inventories are None
                    segment.stream(reload)

        prev_staid = None
        for segid in [_[0] for _ in segids]:
            segment = session.query(Segment).filter(
                Segment.id == segid).first()
            sta = segment.station
            staid = sta.id
            assert prev_staid is None or staid >= prev_staid
            staequal = prev_staid is not None and staid == prev_staid
            prev_staid = staid
            segment.station._inventory = invcache.get(sta.id, None)

            mock_getinv.reset_mock()
            if sta.id != sta_with_inv_id:
                with pytest.raises(Exception):  # all inventories are None
                    segment.inventory()
                assert mock_getinv.called
                # re-call it and assert we raise the previous Exception:
                ccc = mock_getinv.call_count
                with pytest.raises(Exception):  # all inventories are None
                    segment.inventory()
                assert mock_getinv.call_count == ccc
                # re-call it with reload=True and assert we raise the previous
                # exception, and that we called get_inv:
                with pytest.raises(Exception):  # all inventories are None
                    segment.inventory(True)
                assert mock_getinv.call_count == ccc + 1
            else:
                invcache[sta.id] = segment.inventory()
                if staequal:
                    assert not mock_getinv.called
                else:
                    assert mock_getinv.called
                assert len(segment.station.inventory_xml) > 0
                # re-call it with reload=True and assert we raise the previous
                # exception, and that we called get_inv:
                ccc = mock_getinv.call_count
                segment.inventory(True)
                assert mock_getinv.call_count == ccc + 1

            # call segment.stream
            assert not mock_getstream.called
            read_stream(segment)
            assert mock_getstream.call_count == 1
            read_stream(segment)
            assert mock_getstream.call_count == 1
            # with reload flag:
            read_stream(segment, True)
            assert mock_getstream.call_count == 2
            mock_getstream.reset_mock()

            segs = segment.siblings().all()
            # as channel's channel is either 'ok' or 'err' we should never have
            # other components
            assert len(segs) == 0

        # NOW TEST OTHER ORIENTATION PROPERLY. WE NEED TO ADD WELL FORMED SEGMENTS WITH CHANNELS
        # WHOSE ORIENTATION CAN BE DERIVED:
        staid = session.query(Station.id).first()[0]
        dcid = session.query(DataCenter.id).first()[0]
        eid = session.query(Event.id).first()[0]
        dwid = session.query(Download.id).first()[0]
        # add channels
        c_1 = Channel(station_id=staid,
                      location='ok',
                      channel="AB1",
                      sample_rate=56.7)
        c_2 = Channel(station_id=staid,
                      location='ok',
                      channel="AB2",
                      sample_rate=56.7)
        c_3 = Channel(station_id=staid,
                      location='ok',
                      channel="AB3",
                      sample_rate=56.7)
        session.add_all([c_1, c_2, c_3])
        session.commit()
        # add segments. Create attributes (although not strictly necessary to have bytes data)
        atts = data.to_segment_dict('trace_GE.APE.mseed')
        # build three segments with data:
        # "normal" segment
        sg1 = Segment(channel_id=c_1.id,
                      datacenter_id=dcid,
                      event_id=eid,
                      download_id=dwid,
                      event_distance_deg=35,
                      **atts)
        sg2 = Segment(channel_id=c_2.id,
                      datacenter_id=dcid,
                      event_id=eid,
                      download_id=dwid,
                      event_distance_deg=35,
                      **atts)
        sg3 = Segment(channel_id=c_3.id,
                      datacenter_id=dcid,
                      event_id=eid,
                      download_id=dwid,
                      event_distance_deg=35,
                      **atts)
        session.add_all([sg1, sg2, sg3])
        session.commit()
        # start testing:
        segids = query4process(session, {}).all()

        for segid in [_[0] for _ in segids]:
            segment = session.query(Segment).filter(
                Segment.id == segid).first()
            # staid = segment.station.id
            segs = segment.siblings()
            if segs.all():
                assert segment.id in (sg1.id, sg2.id, sg3.id)
                assert len(segs.all()) == 2
    def init(self, request, db, data):
        # re-init a sqlite database (no-op if the db is not sqlite):
        db.create(to_file=False)
        # setup a run_id:
        rdw = Download()
        db.session.add(rdw)
        db.session.commit()
        self.run = rdw

        # side effects:
        self._evt_urlread_sideeffect = """#EventID | Time | Latitude | Longitude | Depth/km | Author | Catalog | Contributor | ContributorID | MagType | Magnitude | MagAuthor | EventLocationName
20160508_0000129|2016-05-08 05:17:11.500000|1|1|60.0|AZER|EMSC-RTS|AZER|505483|ml|3|AZER|CASPIAN SEA, OFFSHR TURKMENISTAN
20160508_0000004|2016-05-08 01:45:30.300000|90|90|2.0|EMSC|EMSC-RTS|EMSC|505183|ml|4|EMSC|CROATIA
"""
        self._dc_urlread_sideeffect = """http://geofon.gfz-potsdam.de/fdsnws/dataselect/1/query
ZZ * * * 2002-09-01T00:00:00 2005-10-20T00:00:00
UP ARJ * * 2013-08-01T00:00:00 2017-04-25

http://ws.resif.fr/fdsnws/dataselect/1/query
ZU * * HHZ 2015-01-01T00:00:00 2016-12-31T23:59:59.999999

"""

        # Note: by default we set sta_urlsideeffect to return such a channels which result in 12
        # segments (see lat and lon of channels vs lat and lon of events above)
        self._sta_urlread_sideeffect = [
            """#Network|Station|Location|Channel|Latitude|Longitude|Elevation|Depth|Azimuth|Dip|SensorDescription|Scale|ScaleFreq|ScaleUnits|SampleRate|StartTime|EndTime
GE|FLT1||HHE|1|1|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2003-01-01T00:00:00|
GE|FLT1||HHN|1|1|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2003-01-01T00:00:00|
GE|FLT1||HHZ|1|1|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2003-01-01T00:00:00|
n1|s||c1|90|90|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2009-01-01T00:00:00|
n1|s||c2|90|90|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2009-01-01T00:00:00|
n1|s||c3|90|90|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2009-01-01T00:00:00|
""", """#Network|Station|Location|Channel|Latitude|Longitude|Elevation|Depth|Azimuth|Dip|SensorDescription|Scale|ScaleFreq|ScaleUnits|SampleRate|StartTime|EndTime
IA|BAKI||BHE|1|1|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2003-01-01T00:00:00|
IA|BAKI||BHN|1|1|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2003-01-01T00:00:00|
IA|BAKI||BHZ|1|1|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2003-01-01T00:00:00|
n2|s||c1|90|90|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2009-01-01T00:00:00|
n2|s||c2|90|90|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2009-01-01T00:00:00|
n2|s||c3|90|90|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2009-01-01T00:00:00|
"""
        ]

        self._mintraveltime_sideeffect = cycle([1])
        self._seg_data = data.read("GE.FLT1..HH?.mseed")
        self._seg_data_gaps = data.read("IA.BAKI..BHZ.D.2016.004.head")
        self._seg_data_empty = b''
        self._seg_urlread_sideeffect = [
            self._seg_data, self._seg_data_gaps, 413, 500, self._seg_data[:2],
            self._seg_data_empty, 413,
            URLError("++urlerror++"),
            socket.timeout()
        ]
        self.service = ''  # so get_datacenters_df accepts any row by default
        self.db_buf_size = 1
        self.routing_service = yaml_load(get_templates_fpath("download.yaml"))\
            ['advanced_settings']['routing_service_url']

        # NON db stuff (logging, patchers, pandas...):
        self.logout = StringIO()
        handler = StreamHandler(stream=self.logout)
        self._logout_cache = ""
        # THIS IS A HACK:
        query_logger.setLevel(logging.INFO)  # necessary to forward to handlers
        # if we called closing (we are testing the whole chain) the level will be reset
        # (to level.INFO) otherwise it stays what we set two lines above. Problems might arise
        # if closing sets a different level, but for the moment who cares
        query_logger.addHandler(handler)

        # when debugging, I want the full dataframe with to_string(), not truncated
        # NOTE: this messes up right alignment of numbers in DownloadStats (see utils.py)
        # FIRST, remember current settings and restore them in cleanup:
        _pd_display_maxcolwidth = pd.get_option('display.max_colwidth')
        pd.set_option('display.max_colwidth', -1)

        # define class level patchers (we do not use a yiled as we need to do more stuff in the
        # finalizer, see below
        patchers = []

        patchers.append(patch('stream2segment.utils.url.urlopen'))
        self.mock_urlopen = patchers[-1].start()

        # mock ThreadPool (tp) to run one instance at a time, so we get deterministic results:
        class MockThreadPool(object):
            def __init__(self, *a, **kw):
                pass

            def imap(self, func, iterable, *args):
                # make imap deterministic: same as standard python map:
                # everything is executed in a single thread the right input order
                return map(func, iterable)

            def imap_unordered(self, func_, iterable, *args):
                # make imap_unordered deterministic: same as standard python map:
                # everything is executed in a single thread in the right input order
                return map(func_, iterable)

            def close(self, *a, **kw):
                pass

        # assign patches and mocks:
        patchers.append(patch('stream2segment.utils.url.ThreadPool'))
        self.mock_tpool = patchers[-1].start()
        self.mock_tpool.side_effect = MockThreadPool

        # add finalizer:
        def delete():
            pd.set_option('display.max_colwidth', _pd_display_maxcolwidth)

            for patcher in patchers:
                patcher.stop()

            hndls = query_logger.handlers[:]
            handler.close()
            for h in hndls:
                if h is handler:
                    query_logger.removeHandler(h)

        request.addfinalizer(delete)
    def init(self, request, db, data):
        # re-init a sqlite database (no-op if the db is not sqlite):
        db.create(to_file=False)
        # setup a run_id:
        rdw = Download()
        db.session.add(rdw)
        db.session.commit()
        self.run = rdw

        # side effects:
        self._evt_urlread_sideeffect = """#EventID | Time | Latitude | Longitude | Depth/km | Author | Catalog | Contributor | ContributorID | MagType | Magnitude | MagAuthor | EventLocationName
20160508_0000129|2016-05-08 05:17:11.500000|1|1|60.0|AZER|EMSC-RTS|AZER|505483|ml|3|AZER|CASPIAN SEA, OFFSHR TURKMENISTAN
20160508_0000004|2016-05-08 01:45:30.300000|90|90|2.0|EMSC|EMSC-RTS|EMSC|505183|ml|4|EMSC|CROATIA
"""
        self._mintraveltime_sideeffect = cycle([1])
        self._seg_data = data.read("GE.FLT1..HH?.mseed")
        self._seg_data_gaps = data.read("IA.BAKI..BHZ.D.2016.004.head")
        self._seg_data_empty = b''
        self._seg_urlread_sideeffect = [
            self._seg_data, self._seg_data_gaps, 413, 500, self._seg_data[:2],
            self._seg_data_empty, 413,
            URLError("++urlerror++"),
            socket.timeout()
        ]
        self.service = ''  # so get_datacenters_df accepts any row by default
        self.db_buf_size = 1
        self.routing_service = yaml_load(get_templates_fpath("download.yaml"))\
            ['advanced_settings']['routing_service_url']

        # NON db stuff (logging, patchers, pandas...):
        self.loghandler = StreamHandler(stream=StringIO())

        # THIS IS A HACK:
        query_logger.setLevel(logging.INFO)  # necessary to forward to handlers
        # if we called closing (we are testing the whole chain) the level will be reset
        # (to level.INFO) otherwise it stays what we set two lines above. Problems might arise
        # if closing sets a different level, but for the moment who cares
        query_logger.addHandler(self.loghandler)

        # when debugging, I want the full dataframe with to_string(), not truncated
        # NOTE: this messes up right alignment of numbers in DownloadStats (see utils.py)
        # FIRST, remember current settings and restore them in cleanup:
        _pd_display_maxcolwidth = pd.get_option('display.max_colwidth')
        pd.set_option('display.max_colwidth', -1)

        # define class level patchers (we do not use a yiled as we need to do more stuff in the
        # finalizer, see below
        patchers = []

        patchers.append(patch('stream2segment.utils.url.urlopen'))
        self.mock_urlopen = patchers[-1].start()

        # mock ThreadPool (tp) to run one instance at a time, so we get deterministic results:
        class MockThreadPool(object):
            def __init__(self, *a, **kw):
                pass

            def imap(self, func, iterable, *args):
                # make imap deterministic: same as standard python map:
                # everything is executed in a single thread the right input order
                return map(func, iterable)

            def imap_unordered(self, func_, iterable, *args):
                # make imap_unordered deterministic: same as standard python map:
                # everything is executed in a single thread in the right input order
                return map(func_, iterable)

            def close(self, *a, **kw):
                pass

        # assign patches and mocks:
        patchers.append(patch('stream2segment.utils.url.ThreadPool'))
        self.mock_tpool = patchers[-1].start()
        self.mock_tpool.side_effect = MockThreadPool

        # add finalizer:
        def delete():
            pd.set_option('display.max_colwidth', _pd_display_maxcolwidth)

            for patcher in patchers:
                patcher.stop()

            hndls = query_logger.handlers[:]
            for h in hndls:
                if h is self.loghandler:
                    self.loghandler.close()
                    query_logger.removeHandler(h)

        request.addfinalizer(delete)