def test_argument_which_accept_files_relative_and_abs_paths( mock_run_download, yamlarg, filepath_is_abs, # fixtures: pytestdir): '''test that arguments accepting files are properly processed and the relative paths are resolved relative to the yaml config file''' # setup files and relative paths depending on whether we passed relative path or absolute # int he config if filepath_is_abs: yamlarg_file = pytestdir.newfile() overrides = { yamlarg: ('sqlite:///' if yamlarg == 'dburl' else '') + yamlarg_file } # provide a sqlite memory if we are not testing dburl, otherwise run would fail: if yamlarg != 'dburl': overrides['dburl'] = 'sqlite:///:memory:' yamlfile = pytestdir.yamlfile(get_templates_fpath('download.yaml'), **overrides) else: overrides = { yamlarg: ('sqlite:///' if yamlarg == 'dburl' else '') + 'abc' } # provide a sqlite memory if we are not testing dburl, otherwise run would fail: if yamlarg != 'dburl': overrides['dburl'] = 'sqlite:///:memory:' yamlfile = pytestdir.yamlfile(get_templates_fpath('download.yaml'), **overrides) # and now create the file: yamlarg_file = join(dirname(yamlfile), 'abc') # create relative path: with open(yamlarg_file, 'w') as opn: if yamlarg == 'restricted_data': # avoid errors if we are testing token file opn.write('BEGIN PGP MESSAGE ABC') # if we are not testing dburl runner = CliRunner() result = runner.invoke(cli, ['download', '-c', yamlfile]) assert result.exit_code == 0 run_download_args = mock_run_download.call_args_list[-1][1] if yamlarg == 'restricted_data': # assert we read the correct file: assert run_download_args[ 'authorizer'].token == b'BEGIN PGP MESSAGE ABC' elif yamlarg == 'dburl': # assert we have the right db url: assert str(run_download_args['session'].bind.engine.url ) == 'sqlite:///' + yamlarg_file else: assert run_download_args[yamlarg] == yamlarg_file
def download_setup_func(filename, **params): yamldic = yaml_load(get_templates_fpath(filename)) for key, val in params.items(): if val is None: yamldic.pop(key, None) else: yamldic[key] = val path = os.path.join(basedir, filename) with open(path, 'w') as _opn: yaml.safe_dump(yamldic, _opn) return path, yamldic
class clickutils(object): #pylint: disable=invalid-name, too-few-public-methods """Container for Options validations, default settings so as not to pollute the click decorators""" TERMINAL_HELP_WIDTH = 110 # control width of help. 80 should be the default (roughly) DEFAULTDOC = yaml_load_doc(get_templates_fpath("download.yaml")) EQA = "(event search parameter)" DBURL_OR_YAML_ATTRS = dict(type=inputargs.extract_dburl_if_yamlpath, metavar='TEXT or PATH', help=("Database url where data has been saved. " "It can also be the path of a yaml file " "containing the property 'dburl' " "(e.g., the config file used for " "downloading)"), required=True) ExistingPath = click.Path(exists=True, file_okay=True, dir_okay=False, writable=False, readable=True) @classmethod def set_help_from_yaml(cls, ctx, param, value): """ When attaching this function as `callback` argument to an Option (`click.Option`), it will set an automatic help for all Options of the same command, which do not have an `help` specified and are found in the default config file for downloading (currently `download.yaml`). The Option having as callback this function must also have `is_eager=True`. Example: Assuming opt1, opt2, opt3 are variables of the config yaml file, and opt4 not, this sets the default help for opt1 and opt2: ``` click.option('--opt1', ..., callback=set_help_from_yaml, is_eager=True,...) click.option('--opt2'...) click.option('--opt3'..., help='my custom help. Do not fetch help from config') click.option('--opt4'...) ... ``` """ cfg_doc = cls.DEFAULTDOC for option in (opt for opt in ctx.command.params if opt.param_type_name == 'option'): if option.help is None: option.help = cfg_doc.get(option.name, "") # remove implementation details from the cli (avoid too much information, # or information specific to the yaml file and not the cli): idx = option.help.find('Implementation details:') if idx > -1: option.help = option.help[:idx] return value
def func(*args, **yaml_overrides): args = list(args) nodburl = False # override the db path with our currently tested one: if '-d' not in args and '--dburl' not in args and 'dburl' not in yaml_overrides: yaml_overrides['dburl'] = db.dburl nodburl = True # if -c or configfile is not specified, add it: if "-c" not in args and "--configfile" not in args: args.extend([ '-c', pytestdir.yamlfile(get_templates_fpath("download.yaml"), **yaml_overrides) ]) elif nodburl: args += ['-d', str(db.dburl)] # process inputs: runner = CliRunner() result = runner.invoke(cli, ['download'] + args) return result
def test_download_bad_values( self, # fixtures: db, run_cli_download): '''test different scenarios where the value in the dwonload.yaml are not well formatted''' result = run_cli_download(networks={'a': 'b'}) # conflict assert result.exit_code != 0 assert 'Error: Conflicting names "network" / "networks"' in result.output result = run_cli_download(network={'a': 'b'}) assert result.exit_code == 0 # thus providing dict is actually fine and will iterate over its keys: assert self.mock_run_download.call_args_list[0][1]['network'] == ['a'] # do some asserts only for this case to test how we print the arguments to string: # assert "tt_table: <TTTable object, " in result.output assert "starttime: 2006-01-01 00:00:00" in result.output assert "traveltimes_model:" in result.output _dburl = db.dburl if not db.is_sqlite: _dburl = secure_dburl(_dburl) # assert dburl is in result.output (sqlite:memory is quotes, postgres not. we do not # care to investigate why, jsut assert either string is there: assert "dburl: '%s'" % _dburl in result.output or "dburl: %s" % _dburl in result.output # check the session: # assert we did write to the db: assert db.session.query(Download).count() == 1 result = run_cli_download(networks='!*') # conflicting names assert result.exit_code != 0 assert 'Error: Conflicting names "network" / "networks"' in result.output # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 1 result = run_cli_download(network='!*') # invalid value assert result.exit_code != 0 assert 'Error: Invalid value for "network": ' in result.output # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 1 result = run_cli_download(net='!*') # conflicting names assert result.exit_code != 0 assert 'Error: Conflicting names "network" / "net"' in result.output # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 1 # test error from the command line. Result is the same as above as the check is made # AFTER click result = run_cli_download('-n', '!*') # invalid value assert result.exit_code != 0 assert 'Error: Invalid value for "network": ' in result.output # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 1 # no such option: result = run_cli_download('--zrt', '!*') assert result.exit_code != 0 assert 'Error: no such option: --zrt' in result.output # why -z and not -zz? whatever... # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 1 # no such option from within the yaml: result = run_cli_download(zz='!*') assert result.exit_code != 0 assert 'Error: No such option "zz"' in result.output # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 1 # what about conflicting arguments? result = run_cli_download(networks='!*', net='opu') # invalid value assert result.exit_code != 0 assert 'Conflicting names "network" / "net" / "networks"' in result.output or \ 'Conflicting names "network" / "networks" / "net"' in result.output # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 1 result = run_cli_download(starttime=[]) # invalid type assert result.exit_code != 0 assert 'Error: Invalid type for "starttime":' in result.output # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 1 # mock implementing conflicting names in the yaml file: result = run_cli_download(start='wat') # invalid value assert result.exit_code != 0 assert 'Error: Conflicting names "starttime" / "start"' in result.output # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 1 # mock implementing bad value in the cli: (cf with the previous test): # THE MESSAGE BELOW IS DIFFERENT BECAUSE WE PROVIDE A CLI VALIDATION FUNCTION # See the case of travetimes model below where, without a cli validation function, # the message is the same when we provide a bad argument in the yaml or from the cli result = run_cli_download('--starttime', 'wat') # invalid value assert result.exit_code != 0 assert 'Error: Invalid value for "-s" / "--start" / "--starttime": wat' in result.output # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 1 # This should work: result = run_cli_download('--start', '2006-03-14') # invalid value assert result.exit_code == 0 run_download_kwargs = self.mock_run_download.call_args_list[-1][1] assert run_download_kwargs['starttime'] == datetime(2006, 3, 14) # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 2 # now test the same as above BUT with a cli-only argument (-t0): result = run_cli_download( '-s', 'wat') # invalid value typed from the command line assert result.exit_code != 0 assert 'Error: Invalid value for "-s" / "--start" / "--starttime":' in result.output # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 2 result = run_cli_download(endtime='wat') # try with end assert result.exit_code != 0 assert 'Error: Invalid value for "endtime":' in result.output # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 2 result = run_cli_download(end='wat') # try with end assert result.exit_code != 0 assert 'Error: Conflicting names "endtime" / "end"' in result.output # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 2 # now test the same as above BUT with the wrong value from the command line: result = run_cli_download( '-e', 'wat') # invalid value typed from the command line assert result.exit_code != 0 assert 'Error: Invalid value for "-e" / "--end" / "--endtime":' in result.output # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 2 result = run_cli_download(traveltimes_model=[]) # invalid type assert result.exit_code != 0 assert 'Error: Invalid type for "traveltimes_model":' in result.output # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 2 result = run_cli_download(traveltimes_model='wat') # invalid value assert result.exit_code != 0 assert 'Error: Invalid value for "traveltimes_model":' in result.output # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 2 # same as above but with error from the cli, not from within the config yaml: result = run_cli_download('--traveltimes-model', 'wat') # invalid value assert result.exit_code != 0 assert 'Error: Invalid value for "traveltimes_model":' in result.output # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 2 result = run_cli_download(removals=['inventory']) # invalid value assert result.exit_code != 0 assert 'Error: Missing value for "inventory"' in result.output # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 2 d_yaml_file = get_templates_fpath("download.yaml") result = run_cli_download( dburl=d_yaml_file) # existing file, invalid db url assert result.exit_code != 0 # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 2 result = run_cli_download(dburl="sqlite:/whatever") # invalid db url assert result.exit_code != 0 assert 'Error: Invalid value for "dburl":' in result.output # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 2 result = run_cli_download(dburl="sqlite://whatever") # invalid db url assert result.exit_code != 0 assert 'Error: Invalid value for "dburl":' in result.output # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 2 result = run_cli_download(dburl=[]) # invalid type assert result.exit_code != 0 assert 'Error: Invalid type for "dburl":' in result.output # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 2 # Test an invalif configfile. This can be done only via command line result = run_cli_download('-c', 'frjkwlag5vtyhrbdd_nleu3kvshg w') assert result.exit_code != 0 assert 'Error: Invalid value for "-c" / "--config":' in result.output # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 2 result = run_cli_download(removals=['maxmagnitude' ]) # remove an opt. param. assert result.exit_code == 0 # check maxmagnitude is NOT in the eventws params: eventws_params = self.mock_run_download.call_args_list[-1][1][ 'eventws_params'] assert 'maxmagnitude' not in eventws_params # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 3 result = run_cli_download(removals=['advanced_settings' ]) # remove an opt. param. assert result.exit_code != 0 assert 'Error: Missing value for "advanced_settings"' in result.output # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 3 result = run_cli_download( advanced_settings={}) # remove an opt. param. assert result.exit_code != 0 assert ('Error: Invalid value for "advanced_settings": ' 'Missing value for "download_blocksize"') in result.output # assert we did not write to the db, cause the error threw before setting up db: assert db.session.query(Download).count() == 3 # search radius: for search_radius in [{'min': 5}, {'min': 5, 'max': 6, 'minmag': 7}]: result = run_cli_download(search_radius=search_radius) assert result.exit_code != 0 assert ('Error: Invalid value for "search_radius": ' "provide either 'min', 'max' or " "'minmag', 'maxmag', 'minmag_radius', 'maxmag_radius'" ) in result.output result = run_cli_download(search_radius={'min': 5, 'max': '6'}) assert result.exit_code != 0 assert ('Error: Invalid value for "search_radius": ' "numeric values expected") in result.output result = run_cli_download(search_radius={ 'minmag': 15, 'maxmag': 7, 'minmag_radius': 5, 'maxmag_radius': 4 }) assert result.exit_code != 0 assert ('Error: Invalid value for "search_radius": ' 'minmag should not be greater than maxmag') in result.output result = run_cli_download(search_radius={ 'minmag': 7, 'maxmag': 8, 'minmag_radius': -1, 'maxmag_radius': 0 }) assert result.exit_code != 0 assert ('Error: Invalid value for "search_radius": ' 'minmag_radius and maxmag_radius should be greater than 0' ) in result.output result = run_cli_download(search_radius={ 'minmag': 5, 'maxmag': 5, 'minmag_radius': 4, 'maxmag_radius': 4 }) assert result.exit_code != 0 assert ('Error: Invalid value for "search_radius": ' 'To supply a constant radius, ' 'set "min: 0" and specify the radius with the "max" argument' ) in result.output result = run_cli_download(search_radius={'min': -1, 'max': 5}) assert result.exit_code != 0 assert ('Error: Invalid value for "search_radius": ' 'min should not be lower than 0') in result.output result = run_cli_download(search_radius={'min': 0, 'max': 0}) assert result.exit_code != 0 assert ('Error: Invalid value for "search_radius": ' 'max should be greater than 0') in result.output result = run_cli_download(search_radius={'min': 4, 'max': 3}) assert result.exit_code != 0 assert ('Error: Invalid value for "search_radius": ' 'min should be lower than max') in result.output
class Test(object): pyfile = get_templates_fpath("paramtable.py") @property def logfilecontent(self): assert os.path.isfile(self._logfilename) with open(self._logfilename) as opn: return opn.read() # The class-level `init` fixture is marked with autouse=true which implies that all test # methods in the class will use this fixture without a need to state it in the test # function signature or with a class-level usefixtures decorator. For info see: # https://docs.pytest.org/en/latest/fixture.html#autouse-fixtures-xunit-setup-on-steroids @pytest.fixture(autouse=True) def init(self, request, pytestdir, db4process): db4process.create(to_file=True) session = db4process.session # sets up the mocked functions: db session handling (using the already created session) # and log file handling: with patch('stream2segment.utils.inputargs.get_session', return_value=session): with patch('stream2segment.main.closesession', side_effect=lambda *a, **v: None): with patch('stream2segment.main.configlog4processing') as mock2: def clogd(logger, logfilebasepath, verbose): # config logger as usual, but redirects to a temp file # that will be deleted by pytest, instead of polluting the program # package: ret = o_configlog4processing(logger, pytestdir.newfile('.log') \ if logfilebasepath else None, verbose) self._logfilename = ret[0].baseFilename return ret mock2.side_effect = clogd yield def inlogtext(self, string): '''Checks that `string` is in log text. The assertion `string in self.logfilecontent` fails in py3.5, although the differences between characters is the same position is zero. We did not find any better way than fixing it via this cumbersome function''' logtext = self.logfilecontent i = 0 while len(logtext[i:i+len(string)]) == len(string): if (sum(ord(a)-ord(b) for a, b in zip(string, logtext[i:i+len(string)]))) == 0: return True i += 1 return False # ## ======== ACTUAL TESTS: ================================ # Recall: we have 6 segments, issued from all combination of # station_inventory in [true, false] and segment.data in [ok, with_gaps, empty] # use db4process(with_inventory, with_data, with_gap) to return sqlalchemy query for # those segments in case. For info see db4process in conftest.py @mock.patch('stream2segment.main.run_process', side_effect=process_main_run) def test_simple_run_no_outfile_provided(self, mock_run, # fixtures: db4process, yamlfile): '''test a case where save inventory is True, and that we saved inventories''' # set values which will override the yaml config in templates folder: config_overrides = {'snr_threshold': 0, 'segment_select': {'has_data': 'true'}} yaml_file = yamlfile(**config_overrides) runner = CliRunner() result = runner.invoke(cli, ['process', '--dburl', db4process.dburl, '-p', self.pyfile, '-c', yaml_file]) assert not result.exception lst = mock_run.call_args_list assert len(lst) == 1 args, kwargs = lst[0][0], lst[0][1] # assert third argument (`ondone` callback) is None 'ondone' or is a BaseWriter (no-op) # class: assert args[2] is None or \ type(args[2]) == BaseWriter # pylint: disable=unidiomatic-typecheck # assert "Output file: n/a" in result output: assert re.search('Output file:\\s+n/a', result.output) # Note that apparently CliRunner() puts stderr and stdout together # (https://github.com/pallets/click/pull/868) # So we should test that we have these string twice: for subs in ["Processing function: ", "Config. file: "]: idx = result.output.find(subs) assert idx > -1 # Recall: we have 6 segments, issued from all combination of # station_inventory in [true, false] and segment.data in [ok, with_gaps, empty] # use db4process(with_inventory, with_data, with_gap) to return sqlalchemy query for # those segments in case. For info see db4process in conftest.py @pytest.mark.parametrize("file_extension, options", product(['.h5', '.csv'], [({}, []), ({'segments_chunksize': 1}, []), ({'segments_chunksize': 1}, ['--multi-process']), ({}, ['--multi-process']), ({'segments_chunksize': 1}, ['--multi-process', '--num-processes', '1']), ({}, ['--multi-process', '--num-processes', '1'])])) def test_simple_run_retDict_complex_select(self, file_extension, options, # fixtures: pytestdir, db4process, yamlfile): '''test a case where we have a more complex select involving joins''' advanced_settings, cmdline_opts = options session = db4process.session # select the event times for the segments with data: etimes = sorted(_[1] for _ in session.query(Segment.id, Event.time). join(Segment.event).filter(Segment.has_data)) config_overrides = {'snr_threshold': 0, 'segment_select': {'has_data': 'true', 'event.time': '<=%s' % (max(etimes).isoformat())}} if advanced_settings: config_overrides['advanced_settings'] = advanced_settings # the selection above should be the same as the previous test: # test_simple_run_retDict_saveinv, # as segment_select[event.time] includes all segments in segment_select['has_data'], # thus the code is left as it was in the method above yaml_file = yamlfile(**config_overrides) _seg = db4process.segments(with_inventory=True, with_data=True, with_gap=False).one() expected_first_row_seg_id = _seg.id station_id_whose_inventory_is_saved = _seg.station.id runner = CliRunner() filename = pytestdir.newfile(file_extension) result = runner.invoke(cli, ['process', '--dburl', db4process.dburl, '-p', self.pyfile, '-c', yaml_file, filename] + cmdline_opts) assert not result.exception # check file has been correctly written: if file_extension == '.csv': csv1 = readcsv(filename) assert len(csv1) == 1 assert csv1.loc[0, csv1.columns[0]] == expected_first_row_seg_id else: dfr = pd.read_hdf(filename) assert len(dfr) == 1 assert dfr.iloc[0][SEGMENT_ID_COLNAME] == expected_first_row_seg_id self.inlogtext("""3 segment(s) found to process segment (id=3): 4 traces (probably gaps/overlaps) segment (id=2): Station inventory (xml) error: no data 1 of 3 segment(s) successfully processed 2 of 3 segment(s) skipped with error message (check log or details)""") # assert logfile exists: assert os.path.isfile(self._logfilename) # Recall: we have 6 segments, issued from all combination of # station_inventory in [true, false] and segment.data in [ok, with_gaps, empty] # use db4process(with_inventory, with_data, with_gap) to return sqlalchemy query for # those segments in case. For info see db4process in conftest.py def test_simple_run_retDict_high_snr_threshold(self, # fixtures: pytestdir, db4process, yamlfile): '''same as `test_simple_run_retDict_saveinv` above but with a very high snr threshold => no rows processed''' # setup inventories: session = db4process.session # set values which will override the yaml config in templates folder: config_overrides = { # snr_threshold 3 is high enough to discard the only segment # we would process otherwise: 'snr_threshold': 3, 'segment_select': {'has_data': 'true'}} yaml_file = yamlfile(**config_overrides) runner = CliRunner() filename = pytestdir.newfile('.csv') result = runner.invoke(cli, ['process', '--dburl', db4process.dburl, '-p', self.pyfile, '-c', yaml_file, filename]) assert not result.exception # no file written (see next comment for details). Check outfile is empty: with pytest.raises(EmptyDataError): csv1 = readcsv(filename) # check file has been correctly written: 2 segments have no data, thus they are skipped # and not logged # 2 segments have gaps/overlaps, thus they are skipped and logged # 1 segment has data but no inventory, thus skipped and logged # 1 segment with data and inventory, but snr is too low: skipped and logged assert self.inlogtext("""4 segment(s) found to process segment (id=1): low snr 1.350154 segment (id=2): 4 traces (probably gaps/overlaps) segment (id=4): Station inventory (xml) error: no data segment (id=5): 4 traces (probably gaps/overlaps) 0 of 4 segment(s) successfully processed 4 of 4 segment(s) skipped with error message (check log or details)""") # Recall: we have 6 segments, issued from all combination of # station_inventory in [true, false] and segment.data in [ok, with_gaps, empty] # use db4process(with_inventory, with_data, with_gap) to return sqlalchemy query for # those segments in case. For info see db4process in conftest.py @pytest.mark.parametrize('select_with_data, seg_chunk', [(True, None), (True, 1), (False, None), (False, 1)]) def test_simple_run_retDict_seg_select_empty_and_err_segments(self, select_with_data, seg_chunk, # fixtures: pytestdir, db4process, yamlfile): '''test a segment selection that takes only non-processable segments''' # set values which will override the yaml config in templates folder: config_overrides = {'snr_threshold': 0, # take all segments # the following will select the station with no inventory. # There are three segments associated with it: # one with data and no gaps, one with data and gaps, # the third with no data 'segment_select': {'station.latitude': '<10', 'station.longitude': '<10'}} if select_with_data: config_overrides['segment_select']['has_data'] = 'true' if seg_chunk is not None: config_overrides['advanced_settings'] = {'segments_chunksize': seg_chunk} yaml_file = yamlfile(**config_overrides) runner = CliRunner() filename = pytestdir.newfile('.csv') result = runner.invoke(cli, ['process', '--dburl', db4process.dburl, '-p', self.pyfile, '-c', yaml_file, filename]) assert not result.exception # check file has not been written (no data): with pytest.raises(EmptyDataError): csv1 = readcsv(filename) # see comment aboive on segments_select if select_with_data: # selecting only with data means out of the three candidate segments, one # is discarded prior to processing: assert self.inlogtext("""2 segment(s) found to process segment (id=4): Station inventory (xml) error: no data segment (id=5): 4 traces (probably gaps/overlaps) 0 of 2 segment(s) successfully processed 2 of 2 segment(s) skipped with error message (check log or details)""") else: assert self.inlogtext("""3 segment(s) found to process segment (id=4): Station inventory (xml) error: no data segment (id=5): 4 traces (probably gaps/overlaps) segment (id=6): MiniSeed error: no data 0 of 3 segment(s) successfully processed 3 of 3 segment(s) skipped with error message (check log or details)""") # Recall: we have 6 segments, issued from all combination of # station_inventory in [true, false] and segment.data in [ok, with_gaps, empty] # use db4process(with_inventory, with_data, with_gap) to return sqlalchemy query for # those segments in case. For info see db4process in conftest.py @pytest.mark.parametrize("advanced_settings, cmdline_opts", [({}, []), ({'segments_chunksize': 1}, []), ({'segments_chunksize': 1}, ['--multi-process']), ({}, ['--multi-process']), ({'segments_chunksize': 1}, ['--multi-process', '--num-processes', '1']), ({}, ['--multi-process', '--num-processes', '1'])]) def test_simple_run_ret_list(self, advanced_settings, cmdline_opts, # fixtures: pytestdir, db4process, yamlfile): '''test processing returning list, and also when we specify a different main function''' # set values which will override the yaml config in templates folder: config_overrides = {'snr_threshold': 0, # take all segments 'segment_select': {'has_data': 'true'}} if advanced_settings: config_overrides['advanced_settings'] = advanced_settings yaml_file = yamlfile(**config_overrides) _seg = db4process.segments(with_inventory=True, with_data=True, with_gap=False).one() expected_first_row_seg_id = _seg.id station_id_whose_inventory_is_saved = _seg.station.id pyfile = self.pyfile # Now wrtite pyfile into a named temp file, with the method: # def main_retlist(segment, config): # return main(segment, config).keys() # the method returns a list (which is what we want to test # and this way, we do not need to keep synchronized any additional file filename = pytestdir.newfile('.csv') pyfile2 = pytestdir.newfile('.py') if not os.path.isfile(pyfile2): with open(pyfile, 'r') as opn: content = opn.read() cont2 = content.replace("def main(segment, config):", """def main_retlist(segment, config): return list(main(segment, config).values()) def main(segment, config):""") with open(pyfile2, 'wb') as _opn: _opn.write(cont2.encode('utf8')) runner = CliRunner() result = runner.invoke(cli, ['process', '--dburl', db4process.dburl, '-p', pyfile2, '-f', "main_retlist", '-c', yaml_file, filename] + cmdline_opts) assert not result.exception # check file has been correctly written: csv1 = readcsv(filename) # read first with header: # assert no rows: assert csv1.empty # now read without header: csv1 = readcsv(filename, header=False) assert len(csv1) == 1 assert csv1.loc[0, csv1.columns[0]] == expected_first_row_seg_id assert self.inlogtext("""4 segment(s) found to process segment (id=2): 4 traces (probably gaps/overlaps) segment (id=4): Station inventory (xml) error: no data segment (id=5): 4 traces (probably gaps/overlaps) 1 of 4 segment(s) successfully processed 3 of 4 segment(s) skipped with error message (check log or details)""") # assert logfile exists: assert os.path.isfile(self._logfilename) # Even though we are not interested here to check what is there on the created db, # because we test errors, # Recall: we have 6 segments, issued from all combination of # station_inventory in [true, false] and segment.data in [ok, with_gaps, empty] # use db4process(with_inventory, with_data, with_gap) to return sqlalchemy query for # those segments in case. For info see db4process in conftest.py @pytest.mark.parametrize("cmdline_opts", [[], ['--multi-process'], ['--multi-process', '--num-processes', '1']]) @pytest.mark.parametrize("err_type, expects_log_2_be_configured", [(None, False), (ImportError, False), (AttributeError, True), (TypeError, True)]) def test_errors_process_not_run(self, err_type, expects_log_2_be_configured, cmdline_opts, # fixtures: pytestdir, db4process, yamlfile): '''test processing in case of severla 'critical' errors (which do not launch the process None means simply a bad argument (funcname missing)''' pyfile = self.pyfile # REMEMBER THAT BY DEFAULT LEAVING THE segment_select IMPLEMENTED in conffile # WE WOULD HAVE NO SEGMENTS, as maxgap_numsamples is None for all segments of this test # Thus provide config overrides: yaml_file = yamlfile(segment_select={'has_data': 'true'}) runner = CliRunner() # Now wrtite pyfile into a named temp file, BUT DO NOT SUPPLY EXTENSION # This seems to fail in python3 (FIXME: python2?) filename = pytestdir.newfile('.csv') pyfile2 = pytestdir.newfile('.py') with open(pyfile, 'r') as opn: content = opn.read() # here replace the stuff we need: if err_type == ImportError: # create the exception: implement a fake import content = content.replace("def main(", """import abcdefghijk_blablabla_456isjfger def main2(""") elif err_type == AttributeError: # create the exception. Implement a bad signature whci hraises a TypeError content = content.replace("def main(", """def main2(segment, config): return "".attribute_that_does_not_exist_i_guess_blabla() def main(""") elif err_type == TypeError: # create the exception. Implement a bad signature whci hraises a TypeError content = content.replace("def main(", """def main2(segment, config, wrong_argument): return int(None) def main(""") else: # err_type is None # this case does not do anything, but since we will call 'main2' as funcname # in `runner.invoke` (see below), we should raise a BadArgument pass with open(pyfile2, 'wb') as _opn: _opn.write(content.encode('utf8')) result = runner.invoke(cli, ['process', '--dburl', db4process.dburl, '--no-prompt', '-p', pyfile2, '-f', "main2", '-c', yaml_file, filename] + cmdline_opts) assert result.exception assert result.exit_code != 0 stdout = result.output if expects_log_2_be_configured: # these cases raise BEFORE running pyfile # assert log config has not been called: (see self.init): assert self._logfilename is not None # we did open the output file: assert os.path.isfile(filename) # and we never wrote on it: assert os.stat(filename).st_size == 0 # check correct outputs, in both log and output: outputs = [stdout, self.logfilecontent] for output in outputs: # Try to assert the messages on standard output being compatible with PY2, # as the messages might change assert err_type.__name__ in output \ and 'Traceback' in output and ' line ' in output else: # these cases raise BEFORE running pyfile # assert log config has not been called: (see self.init): with pytest.raises(Exception): # basically, assert we do not have the log file _ = self.logfilecontent assert 'Invalid value for "pyfile": ' in stdout further_string = 'main2' if err_type is None else 'No module named' assert further_string in stdout # we did NOt open the output file: assert not os.path.isfile(filename) # Recall: we have 6 segments, issued from all combination of # station_inventory in [true, false] and segment.data in [ok, with_gaps, empty] # use db4process(with_inventory, with_data, with_gap) to return sqlalchemy query for # those segments in case. For info see db4process in conftest.py @pytest.mark.parametrize("err_type", [None, ValueError]) def test_errors_process_completed(self, err_type, # fixtures: pytestdir, db4process, yamlfile): '''test processing in case of non 'critical' errors i.e., which do not prevent the process to be completed. None means we do not override segment_select which, with the current templates, causes no segment to be selected''' pyfile = self.pyfile # REMEMBER THAT BY DEFAULT LEAVING THE segment_select IMPLEMENTED in conffile # WE WOULD HAVE NO SEGMENTS, as maxgap_numsamples is None for all segments of this test # Thus provide config overrides: if err_type is not None: yaml_file = yamlfile(segment_select={'has_data': 'true'}) else: yaml_file = yamlfile() runner = CliRunner() # Now wrtite pyfile into a named temp file, BUT DO NOT SUPPLY EXTENSION # This seems to fail in python3 (FIXME: python2?) filename = pytestdir.newfile('.csv') pyfile2 = pytestdir.newfile('.py') with open(pyfile, 'r') as opn: content = opn.read() if err_type == ValueError: # create the exception. Implement a bad signature whci hraises a TypeError content = content.replace("def main(", """def main2(segment, config): return int('4d') def main(""") else: # rename main to main2, as we will call 'main2' as funcname in 'runner.invoke' below # REMEMBER THAT THIS CASE HAS ACTUALLY NO SEGMENTS TO BE PROCESSED, see # 'yamlfile' fixture above content = content.replace("def main(", """def main2(""") with open(pyfile2, 'wb') as _opn: _opn.write(content.encode('utf8')) result = runner.invoke(cli, ['process', '--dburl', db4process.dburl, '--no-prompt', '-p', pyfile2, '-f', "main2", '-c', yaml_file, filename]) assert not result.exception assert result.exit_code == 0 stdout = result.output # these cases raise BEFORE running pyfile # assert log config has not been called: (see self.init): assert self._logfilename is not None # we did open the output file: assert os.path.isfile(filename) # and we never wrote on it: assert os.stat(filename).st_size == 0 # check correct outputs, in both log and output: logfilecontent = self.logfilecontent if err_type is None: # no segments processed # we want to check that a particular string (str2check) is in the stdout # However, str2check newlines count is not constant through # libraries and python versions. It might be due to click progressbar not showing on # eclipse. Therefore, assert a regex, where we relax the condition on newlines (\n+) str2check = \ (r"0 segment\(s\) found to process\n" r"\n+" r"0 of 0 segment\(s\) successfully processed\n" r"0 of 0 segment\(s\) skipped with error message \(check log or details\)") assert re.search(str2check, stdout) assert re.search(str2check, logfilecontent) else: # we want to check that a particular string (str2check) is in the stdout # However, str2check newlines count is not constant through # libraries and python versions. It might be due to click progressbar not showing on # eclipse. Therefore, assert a regex, where we relax the condition on newlines (\n+) str2check = \ (r'4 segment\(s\) found to process\n' r'\n+' r'0 of 4 segment\(s\) successfully processed\n' r'4 of 4 segment\(s\) skipped with error message \(check log or details\)') assert re.search(str2check, stdout) # logfile has also the messages of what was wrong. Note that # py2 prints: # "invalid literal for long() with base 10: '4d'" # and PY3 prints: # ""invalid literal for int() with base 10: '4d'" # instead of writing: # if PY2: # assert "invalid literal for long() with base 10: '4d'" in logfilecontent # else: # assert "invalid literal for int() with base 10: '4d'" in logfilecontent # let's be more relaxed (use .*). Also, use a regexp for cross-versions # compatibility about newlines (see comments above) str2check = \ (r"4 segment\(s\) found to process\n" r"\n+" r"segment \([^\)]+\)\: invalid literal for .* with base 10: '4d'\n" r"segment \([^\)]+\)\: invalid literal for .* with base 10: '4d'\n" r"segment \([^\)]+\)\: invalid literal for .* with base 10: '4d'\n" r"segment \([^\)]+\)\: invalid literal for .* with base 10: '4d'\n" r"\n+" r"0 of 4 segment\(s\) successfully processed\n" r"4 of 4 segment\(s\) skipped with error message \(check log or details\)") assert re.search(str2check, logfilecontent)
def func(**overridden_pars): return pytestdir.yamlfile(get_templates_fpath('paramtable.yaml'), **overridden_pars)
def load_config_for_download(config, parseargs, **param_overrides): '''loads download arguments from the given config (yaml file or dict) after parsing and checking some of the dict keys. :return: a dict loaded from the given `config` and with parseed arguments (dict keys) Raises BadArgument in case of parsing errors, missisng arguments, conflicts etcetera ''' try: config_dict = yaml_load(config, **param_overrides) except Exception as exc: raise BadArgument('config', exc) if parseargs: # few variables: configfile = config if (isinstance(config, string_types) and os.path.isfile(config))\ else None # define first default event params in order to avoid typos def_evt_params = EVENTWS_SAFE_PARAMS # now, what we want to do here is basically convert config_dict keys # into suitable arguments for stream2segment functions: this includes # renaming params, parsing/converting their values, raising # BadArgument exceptions and so on # Let's configure a 'params' list, a list of dicts where each dict is a 'param checker' # with the following keys (at least one should be provided): # names: list of strings. provide it in order to check for optional names, # check that only one param is provided, and # replace whatever is found with the first item in the list # newname: string, provide it if you want to replace names above with this value # instead first item in 'names' # defvalue: if provided, then the parameter is optional and will be set to this value # if not provided, then the parameter is mandatory (BadArgument is raised in case) # newvalue: function accepting a value (the parameter value) raising whatever is # needed if the parameter is invalid, and returning the correct parameter value params = [ { 'names': def_evt_params[:2], # ['minlatitude', 'minlat'], 'defvalue': None, 'newvalue': between(-90.0, 90.0) }, { 'names': def_evt_params[2:4], # ['maxlatitude', 'maxlat'], 'defvalue': None, 'newvalue': between(-90.0, 90.0) }, { 'names': def_evt_params[4:6], # ['minlongitude', 'minlon'], 'defvalue': None, 'newvalue': between(-180.0, 180.0) }, { 'names': def_evt_params[6:8], # ['maxlongitude', 'maxlon'], 'defvalue': None, 'newvalue': between(-180.0, 180.0) }, { 'names': def_evt_params[8:10], # ['minmagnitude', 'minmag'], 'defvalue': None }, { 'names': def_evt_params[10:12], # ['maxmagnitude', 'maxmag'], 'defvalue': None }, { 'names': def_evt_params[12:13], # ['mindepth'], 'defvalue': None }, { 'names': def_evt_params[13:14], # ['maxdepth'], 'defvalue': None }, { 'names': ['update_metadata'], 'newvalue': parse_update_metadata }, { 'names': ['restricted_data'], 'newname': 'authorizer', 'newvalue': lambda val: create_auth(val, config_dict['dataws'], configfile) }, { 'names': ['dburl'], 'newname': 'session', 'newvalue': get_session }, { 'names': ['traveltimes_model'], 'newname': 'tt_table', 'newvalue': load_tt_table }, { 'names': ('starttime', 'start'), 'newvalue': valid_date }, { 'names': ('endtime', 'end'), 'newvalue': valid_date }, { 'names': ['eventws'], 'newvalue': lambda url: valid_fdsn(url, is_eventws=True, configfile=configfile) }, { 'names': ['dataws'], 'newvalue': lambda url: valid_fdsn(url, is_eventws=False) }, { 'names': ('network', 'net', 'networks'), 'defvalue': [], 'newvalue': nslc_param_value_aslist }, { 'names': ('station', 'sta', 'stations'), 'defvalue': [], 'newvalue': nslc_param_value_aslist }, { 'names': ('location', 'loc', 'locations'), 'defvalue': [], 'newvalue': nslc_param_value_aslist }, { 'names': ('channel', 'cha', 'channels'), 'defvalue': [], 'newvalue': nslc_param_value_aslist }, {'names': ['eventws_params', 'eventws_query_args']}, { 'names': ['advanced_settings'], 'newvalue': parse_download_advanced_settings }, { 'names': ['search_radius'], 'newvalue': check_search_radius } ] # store all keys now because we might change them (see below): all_keys = set(config_dict) # do the check (this MODIFIES config_dict in place!): parse_arguments(config_dict, *params) # Now check for: # 1a. parameter supplied here NOT in the default config # 1b. parameter supplied here with different type of the default config # 2. Parameters in the default config not supplied here # First, create some sets of params names: # the parsed keys (all names defined above): parsed_keys = set(chain(*(_['names'] for _ in params))) # load original configuration (default in this package): orig_config = yaml_load(get_templates_fpath("download.yaml")) # Check 1a. and 1b.: for key in all_keys - parsed_keys: try: other_value = orig_config[key] except KeyError: raise BadArgument(key, '', 'No such option') try: typesmatch(config_dict[key], other_value) except Exception as exc: raise BadArgument(key, exc) # Check 2. : missing_keys = set(orig_config) - all_keys - parsed_keys if missing_keys: raise BadArgument(list(missing_keys), KeyError()) # At last, put all event-related parameters (except starttime and endtime): # and in the eventws_params dict (the latter is an OPTIONAL dict # which can be set in the config for ADDITIONAL eventws parameters) # and check for conflicts: eventsearchparams = config_dict['eventws_params'] # eventsearchparams might be none if not eventsearchparams: config_dict['eventws_params'] = eventsearchparams = {} for par in def_evt_params: if par in eventsearchparams: # conflict: raise BadArgument('eventws_params', 'conflicting parameter "%s"' % par) value = config_dict.pop(par, None) if value is not None: eventsearchparams[par] = value return config_dict
def test_download_verbosity( mock_run_download, mock_configlog, mock_closesess, mock_getsess, # fixtures: db, capsys, pytestdir): if not db.is_sqlite: pytest.skip("Skipping postgres test (only sqlite memory used)") db.create(to_file=False) dburl = db.dburl sess = db.session # mock get_session in order to return always the same session objet: mock_getsess.side_effect = lambda *a, **kw: sess # close session should not close session, otherwise with a memory db we loose the data mock_closesess.side_effect = lambda *a, **v: None # handlers should be removed each run_download call, otherwise we end up # appending them numloggers = [0] def clogd(logger, logfilebasepath, verbose): for h in logger.handlers[:]: logger.removeHandler(h) # config logger as usual, but redirects to a temp file # that will be deleted by pytest, instead of polluting the program # package: ret = o_configlog4download( logger, pytestdir.newfile('.log') if logfilebasepath else None, verbose) numloggers[0] = len(ret) return ret mock_configlog.side_effect = clogd last_known_id = [ None ] # stupid hack to assign to out-of-scope var (py2 compatible) def dblog_err_warn(): qry = sess.query(Download.id, Download.log, Download.warnings, Download.errors) if last_known_id[0] is not None: qry = qry.filter(Download.id > last_known_id[0]) tup = qry.first() last_known_id[0] = tup[0] return tup[1], tup[2], tup[3] d_yaml_file = get_templates_fpath("download.yaml") # run verbosity = 0. As this does not configure loggers, previous loggers will not be removed # (see mock above). Thus launch all tests in increasing verbosity order (from 0 on) mock_run_download.side_effect = lambda *a, **v: None ret = o_download(d_yaml_file, log2file=False, verbose=False, dburl=dburl) out, err = capsys.readouterr() assert not out # assert empty (avoid comparing to strings and potential py2 py3 headache) log, err, warn = dblog_err_warn() assert "N/A: either logger not configured, or " in log assert err == 0 assert warn == 0 assert numloggers[0] == 0 # now let's see that if we raise an exception we also mock_run_download.side_effect = KeyError('a') # verbosity=1 configures loggers, but only the Db logger with pytest.raises(KeyError) as kerr: ret = o_download(d_yaml_file, log2file=False, verbose=False, dburl=dburl) out, err = capsys.readouterr() assert not out log, err, warn = dblog_err_warn() assert "N/A: either logger not configured, or " in log assert err == 0 assert warn == 0 assert numloggers[0] == 0 # verbosity=1 configures loggers, but only the Db logger mock_run_download.side_effect = lambda *a, **v: None ret = o_download(d_yaml_file, log2file=True, verbose=False, dburl=dburl) out, err = capsys.readouterr() # this is also empty cause mock_run_download is no-op assert not out # assert empty log, err, warn = dblog_err_warn() assert "Completed in " in log assert 'No errors' in log # 0 total errors assert 'No warnings' in log # 0 total warnings assert numloggers[0] == 1 # now let's see that if we raise an exception we also mock_run_download.side_effect = KeyError('a') with pytest.raises(KeyError) as kerr: ret = o_download(d_yaml_file, log2file=True, verbose=False, dburl=dburl) out, err = capsys.readouterr() assert not out log, err, warn = dblog_err_warn() assert "Traceback (most recent call last):" in log assert err == 0 assert warn == 0 assert numloggers[0] == 1 mock_run_download.side_effect = lambda *a, **v: None ret = o_download(d_yaml_file, log2file=True, verbose=True, dburl=dburl) out, err = capsys.readouterr() assert out # assert non empty log, err, warn = dblog_err_warn() assert "Completed in " in log assert 'No errors' in log # 0 total errors assert 'No warnings' in log # 0 total warnings assert numloggers[0] == 2 # now let's see that if we raise an exception we also mock_run_download.side_effect = KeyError('a') with pytest.raises(KeyError) as kerr: ret = o_download(d_yaml_file, log2file=True, verbose=True, dburl=dburl) out, err = capsys.readouterr() # Now out is not empty cause the logger which prints to stdout infos errors and critical is set: assert "Traceback (most recent call last):" in out assert "KeyError" in out log, err, warn = dblog_err_warn() assert "Traceback (most recent call last):" in log assert err == 0 assert warn == 0 assert numloggers[0] == 2
def test_simple_run_no_outfile_provided_good_argslists( self, mock_get_chunksize_defaults, mock_process_segments_mp, mock_process_segments, mock_get_advanced_settings, mock_mp_Pool, advanced_settings, cmdline_opts, def_chunksize, # fixtures: pytestdir, db4process, clirunner, yamlfile): '''test arguments and calls are ok. Mock Pool imap_unordered as we do not want to confuse pytest in case ''' if def_chunksize is None: mock_get_chunksize_defaults.side_effect = _o_get_chunksize_defaults else: mock_get_chunksize_defaults.side_effect = \ lambda *a, **v: (def_chunksize, _o_get_chunksize_defaults()[1]) class MockPool(object): def __init__(self, *a, **kw): pass def imap_unordered(self, *a, **kw): return map(*a, **kw) def close(self, *a, **kw): pass def join(self, *a, **kw): pass mock_mp_Pool.return_value = MockPool() # set values which will override the yaml config in templates folder: dir_ = pytestdir.makedir() config_overrides = { 'snr_threshold': 0, 'segment_select': {}, # take everything 'root_dir': os.path.abspath(dir_) } if advanced_settings: config_overrides['advanced_settings'] = advanced_settings yaml_file = yamlfile(**config_overrides) # need to reset this global variable: FIXME: better handling? # process.main._inventories = {} pyfile = get_templates_fpath("save2fs.py") result = clirunner.invoke(cli, [ 'process', '--dburl', db4process.dburl, '-p', pyfile, '-c', yaml_file ] + cmdline_opts) assert clirunner.ok(result) # test some stuff and get configarg, the the REAL config passed in the processing # subroutines: assert mock_get_advanced_settings.called # assert there is no "skipped without messages" message, as it should be the case # when there is no function processing the output: assert "skipped without messages" not in result.output.lower() assert len(mock_get_advanced_settings.call_args_list) == 1 configarg = mock_get_advanced_settings.call_args_list[0][0][ 0] # positional argument seg_processed_count = query4process( db4process.session, configarg.get('segment_select', {})).count() # seg_process_count is 6. 'segments_chunksize' in advanced_settings is not given or 1. # def_chunksize can be None (i,e., 1200) or given (2) # See stream2segment.process.core._get_chunksize_defaults to see how we calculated # the expected calls to mock_process_segments*: if 'segments_chunksize' in advanced_settings: expected_callcount = seg_processed_count elif def_chunksize is None: expected_callcount = seg_processed_count else: _1 = seg_processed_count / def_chunksize if _1 == int(_1): expected_callcount = int(_1) else: expected_callcount = int(_1) + 1 # assert we called the functions the specified amount of times if '--multi-process' in cmdline_opts and not advanced_settings: # remember that when we have advanced_settings it OVERRIDES # the original advanced_settings key in config, thus also multi-process flag assert mock_process_segments_mp.called assert mock_process_segments_mp.call_count == expected_callcount # process_segments_mp calls process_segments: assert mock_process_segments_mp.call_count == mock_process_segments.call_count else: assert mock_process_segments_mp.called == ('--multi-process' in cmdline_opts) assert mock_process_segments.called assert mock_process_segments.call_count == expected_callcount # test that advanced settings where correctly written: real_advanced_settings = configarg.get('advanced_settings', {}) assert ('segments_chunksize' in real_advanced_settings) == \ ('segments_chunksize' in advanced_settings) # 'advanced_settings', if present HERE, will REPLACE 'advanced_settings' in config. Thus: if advanced_settings and '--multi-process' not in cmdline_opts: assert sorted(real_advanced_settings.keys()) == sorted( advanced_settings.keys()) for k in advanced_settings.keys(): assert advanced_settings[k] == real_advanced_settings[k] else: if 'segments_chunksize' in advanced_settings: assert real_advanced_settings['segments_chunksize'] == \ advanced_settings['segments_chunksize'] assert ('multi_process' in real_advanced_settings) == \ ('--multi-process' in cmdline_opts) if '--multi-process' in cmdline_opts: assert real_advanced_settings['multi_process'] is True assert ('num_processes' in real_advanced_settings) == \ ('--num-processes' in cmdline_opts) if '--num-processes' in cmdline_opts: val = cmdline_opts[cmdline_opts.index('--num-processes') + 1] assert str(real_advanced_settings['num_processes']) == val # assert real_advanced_settings['num_processes'] is an int. # As we import int from futures in templates, we might end-up having # futures.newint. The type check is made by checking we have an integer # type as the native type. For info see: # http://python-future.org/what_else.html#passing-data-to-from-python-2-libraries # assert type(native(real_advanced_settings['num_processes'])) in integer_types assert isinstance(real_advanced_settings['num_processes'], integer_types)
def func(**overridden_pars): return pytestdir.yamlfile(get_templates_fpath('download.yaml'), **overridden_pars)
def test_click_template(mock_main_init, mock_input, pytestdir): runner = CliRunner() # assert help works: result = runner.invoke(cli, ['init', '--help']) assert not mock_main_init.called assert result.exit_code == 0 expected_files = [ 'download.yaml', 'paramtable.py', 'paramtable.yaml', 'save2fs.py', 'save2fs.yaml', 'jupyter.example.ipynb', 'jupyter.example.db' ] non_python_files = [ _ for _ in expected_files if os.path.splitext(_)[1] not in ('.py', '.yaml') ] dir_ = pytestdir.makedir() path = os.path.join(dir_, 'abc') def max_mod_time(): return max( os.path.getmtime(os.path.join(path, f)) for f in os.listdir(path)) result = runner.invoke(cli, ['init', path]) # FIXME: check how to mock os.path.isfile properly. This doesnot work: # assert mock_isfile.call_count == 5 assert result.exit_code == 0 assert mock_main_init.called files = os.listdir(path) assert sorted(files) == sorted(expected_files) assert not mock_input.called # assert we correctly wrote the files for fle in files: sourcepath = get_templates_fpath(fle) destpath = os.path.join(path, fle) if os.path.splitext(fle)[1] == '.yaml': # check loaded yaml, which also assures our templates are well formed: sourceconfig = yaml_load(sourcepath) destconfig = yaml_load(destpath) if os.path.basename(sourcepath) == 'download.yaml': assert sorted(sourceconfig.keys()) == sorted(destconfig.keys()) else: # assert we have all keys. Note that 'advanced_settings' is not in # sourceconfig (it is added via jinja2 templating system): assert sorted(['advanced_settings'] + list(sourceconfig.keys())) \ == sorted(destconfig.keys()) for key in sourceconfig.keys(): assert type(sourceconfig[key]) == type(destconfig[key]) elif os.path.splitext(fle)[1] == '.py': # check loaded python modules, which also assures our templates are well formed: sourcepy = load_source(sourcepath) destpy = load_source(destpath) # avoid comparing "__blabla__" methods as they are intended to be python # 'private' attributes and there are differences between py2 and py3 # we want to test OUR stuff is the same sourcekeys = [ a for a in dir(sourcepy) if (a[:2] + a[-2:]) != "____" ] destkeys = [a for a in dir(destpy) if (a[:2] + a[-2:]) != "____"] assert sorted(sourcekeys) == sorted(destkeys) for key in sourcekeys: assert type(getattr(sourcepy, key)) == type(getattr(destpy, key)) elif fle not in non_python_files: raise ValueError( 'The file "%s" is not supposed to be copied by `init`' % fle) # try to write to the same dir (1) mock_input.reset_mock() mock_input.side_effect = lambda arg: '1' # overwrite all files maxmodtime = max_mod_time() # we'll test that files are modified, but on mac timestamps are rounded to seconds # so wait 1 second to be safe time.sleep(1) result = runner.invoke(cli, ['init', path]) assert mock_input.called assert max_mod_time() > maxmodtime assert '%d file(s) copied in' % len(expected_files) in result.output # try to write to the same dir (2) for click_prompt_ret_val in ('', '2'): # '' => skip overwrite # '2' => overwrite only non existing # in thus case, both the above returned values produce the same result mock_input.reset_mock() mock_input.side_effect = lambda arg: click_prompt_ret_val maxmodtime = max_mod_time() time.sleep(1) # see comment above result = runner.invoke(cli, ['init', path]) assert mock_input.called assert max_mod_time() == maxmodtime assert 'No file copied' in result.output os.remove(os.path.join(path, expected_files[0])) # try to write to the same dir (2) mock_input.reset_mock() mock_input.side_effect = lambda arg: '2' # overwrite non-existing (1) file maxmodtime = max_mod_time() time.sleep(1) # see comment above result = runner.invoke(cli, ['init', path]) assert mock_input.called assert max_mod_time() > maxmodtime assert '1 file(s) copied in' in result.output
def init(self, request, db, data): # re-init a sqlite database (no-op if the db is not sqlite): db.create(to_file=False) # setup a run_id: rdw = Download() db.session.add(rdw) db.session.commit() self.run = rdw # side effects: self._dc_urlread_sideeffect = """http://geofon.gfz-potsdam.de/fdsnws/dataselect/1/query ZZ * * * 2002-09-01T00:00:00 2005-10-20T00:00:00 UP ARJ * * 2013-08-01T00:00:00 2017-04-25 http://ws.resif.fr/fdsnws/dataselect/1/query ZU * * HHZ 2015-01-01T00:00:00 2016-12-31T23:59:59.999999 """ self._mintraveltime_sideeffect = cycle([1]) self._seg_data = data.read("GE.FLT1..HH?.mseed") self._seg_data_gaps = data.read("IA.BAKI..BHZ.D.2016.004.head") self._seg_data_empty = b'' self._seg_urlread_sideeffect = [ self._seg_data, self._seg_data_gaps, 413, 500, self._seg_data[:2], self._seg_data_empty, 413, URLError("++urlerror++"), socket.timeout() ] self.service = '' # so get_datacenters_df accepts any row by default self.db_buf_size = 1 self.routing_service = yaml_load(get_templates_fpath("download.yaml"))\ ['advanced_settings']['routing_service_url'] # NON db stuff (logging, patchers, pandas...): self.logout = StringIO() handler = StreamHandler(stream=self.logout) self._logout_cache = "" # THIS IS A HACK: query_logger.setLevel(logging.INFO) # necessary to forward to handlers # if we called closing (we are testing the whole chain) the level will be reset # (to level.INFO) otherwise it stays what we set two lines above. Problems might arise # if closing sets a different level, but for the moment who cares query_logger.addHandler(handler) # when debugging, I want the full dataframe with to_string(), not truncated # NOTE: this messes up right alignment of numbers in DownloadStats (see utils.py) # FIRST, remember current settings and restore them in cleanup: _pd_display_maxcolwidth = pd.get_option('display.max_colwidth') pd.set_option('display.max_colwidth', -1) # define class level patchers (we do not use a yiled as we need to do more stuff in the # finalizer, see below patchers = [] patchers.append(patch('stream2segment.utils.url.urlopen')) self.mock_urlopen = patchers[-1].start() # mock ThreadPool (tp) to run one instance at a time, so we get deterministic results: class MockThreadPool(object): def __init__(self, *a, **kw): pass def imap(self, func, iterable, *args): # make imap deterministic: same as standard python map: # everything is executed in a single thread the right input order return map(func, iterable) def imap_unordered(self, func_, iterable, *args): # make imap_unordered deterministic: same as standard python map: # everything is executed in a single thread in the right input order return map(func_, iterable) def close(self, *a, **kw): pass # assign patches and mocks: patchers.append(patch('stream2segment.utils.url.ThreadPool')) self.mock_tpool = patchers[-1].start() self.mock_tpool.side_effect = MockThreadPool # add finalizer: def delete(): pd.set_option('display.max_colwidth', _pd_display_maxcolwidth) for patcher in patchers: patcher.stop() hndls = query_logger.handlers[:] handler.close() for h in hndls: if h is handler: query_logger.removeHandler(h) request.addfinalizer(delete)
class Test(object): pyfile = get_templates_fpath("paramtable.py") @property def logfilecontent(self): assert os.path.isfile(self._logfilename) with open(self._logfilename) as opn: return opn.read() # The class-level `init` fixture is marked with autouse=true which implies that all test # methods in the class will use this fixture without a need to state it in the test # function signature or with a class-level usefixtures decorator. For info see: # https://docs.pytest.org/en/latest/fixture.html#autouse-fixtures-xunit-setup-on-steroids @pytest.fixture(autouse=True) def init(self, request, pytestdir, db4process): db4process.create(to_file=True) session = db4process.session # sets up the mocked functions: db session handling (using the already created session) # and log file handling: with patch('stream2segment.utils.inputargs.get_session', return_value=session): with patch('stream2segment.main.closesession', side_effect=lambda *a, **v: None): with patch( 'stream2segment.main.configlog4processing') as mock2: def clogd(logger, logfilebasepath, verbose): # config logger as usual, but redirects to a temp file # that will be deleted by pytest, instead of polluting the program # package: ret = o_configlog4processing(logger, pytestdir.newfile('.log') \ if logfilebasepath else None, verbose) self._logfilename = ret[0].baseFilename return ret mock2.side_effect = clogd yield # ## ======== ACTUAL TESTS: ================================ # Recall: we have 6 segments, issued from all combination of # station_inventory in [true, false] and segment.data in [ok, with_gaps, empty] # use db4process(with_inventory, with_data, with_gap) to return sqlalchemy query for # those segments in case. For info see db4process in conftest.py @mock.patch('stream2segment.main.run_process', side_effect=process_main_run) def test_simple_run_no_outfile_provided( self, mock_run, # fixtures: db4process, clirunner, yamlfile): '''test a case where save inventory is True, and that we saved inventories''' # set values which will override the yaml config in templates folder: config_overrides = { 'snr_threshold': 0, 'segment_select': { 'has_data': 'true' } } result = clirunner.invoke(cli, [ 'process', '--dburl', db4process.dburl, '-p', self.pyfile, '-c', yamlfile(**config_overrides), '-a' ]) assert clirunner.ok(result) lst = mock_run.call_args_list assert len(lst) == 1 args, kwargs = lst[0][0], lst[0][1] # assert third argument (`ondone` callback) is None 'ondone' or is a BaseWriter (no-op) # class: assert args[2] is None or \ type(args[2]) == BaseWriter # pylint: disable=unidiomatic-typecheck # assert "Output file: n/a" in result output: assert re.search('Output file:\\s+n/a', result.output) # assert "Output file: n/a" in result output: assert re.search( 'Ignoring `append` functionality: output file does not exist ' 'or not provided', result.output) # Recall: we have 6 segments, issued from all combination of # station_inventory in [true, false] and segment.data in [ok, with_gaps, empty] # use db4process(with_inventory, with_data, with_gap) to return sqlalchemy query for # those segments in case. For info see db4process in conftest.py @pytest.mark.parametrize("advanced_settings, cmdline_opts", [ ({}, ['-a']), ]) def test_simple_run_retDict_saveinv_emptyfile( self, advanced_settings, cmdline_opts, # fixtures: pytestdir, db4process, clirunner, yamlfile): '''test a case where we create a temporary file, empty but opened before writing''' # set values which will override the yaml config in templates folder: config_overrides = { 'snr_threshold': 0, 'segment_select': { 'has_data': 'true' } } if advanced_settings: config_overrides['advanced_settings'] = advanced_settings _seg = db4process.segments(with_inventory=True, with_data=True, with_gap=False).one() expected_first_row_seg_id = _seg.id station_id_whose_inventory_is_saved = _seg.station.id filename = pytestdir.newfile('output.csv', create=True) result = clirunner.invoke(cli, [ 'process', '--dburl', db4process.dburl, '-p', self.pyfile, '-c', yamlfile(**config_overrides), filename ] + cmdline_opts) assert clirunner.ok(result) # check file has been correctly written: csv1 = readcsv(filename) assert len(csv1) == 1 assert csv1.loc[0, csv1.columns[0]] == expected_first_row_seg_id logtext = self.logfilecontent assert len(logtext) > 0 assert "Appending results to existing file" in logtext # Recall: we have 6 segments, issued from all combination of # station_inventory in [true, false] and segment.data in [ok, with_gaps, empty] # use db4process(with_inventory, with_data, with_gap) to return sqlalchemy query for # those segments in case. For info see db4process in conftest.py @pytest.mark.parametrize('hdf', [True, False]) @pytest.mark.parametrize('return_list', [True, False]) @pytest.mark.parametrize("advanced_settings, cmdline_opts", [ ({}, ['-a']), ({}, ['-a', '--multi-process']), ]) @mock.patch('stream2segment.cli.click.confirm', return_value=True) def test_append( self, mock_click_confirm, advanced_settings, cmdline_opts, return_list, hdf, # fixtures: pytestdir, db4process, clirunner, yamlfile): '''test a typical case where we supply the append option''' if return_list and hdf: # hdf does not support returning lists return # set values which will override the yaml config in templates folder: config_overrides = { 'snr_threshold': 0, 'segment_select': { 'has_data': 'true' } } if advanced_settings: config_overrides['advanced_settings'] = advanced_settings yaml_file = yamlfile(**config_overrides) _seg = db4process.segments(with_inventory=True, with_data=True, with_gap=False).one() expected_first_row_seg_id = _seg.id station_id_whose_inventory_is_saved = _seg.station.id session = db4process.session filename = pytestdir.newfile('.hdf' if hdf else '.csv') pyfile = self.pyfile if return_list: # modify python so taht 'main' returns a list by calling the default 'main' # and returning its keys: with open(self.pyfile, 'r') as opn: content = opn.read() pyfile = pytestdir.newfile('.py') cont2 = content.replace( "def main(segment, config):", """def main(segment, config): return list(main2(segment, config).values()) def main2(segment, config):""") with open(pyfile, 'wb') as _opn: _opn.write(cont2.encode('utf8')) mock_click_confirm.reset_mock() result = clirunner.invoke(cli, [ 'process', '--dburl', db4process.dburl, '-p', pyfile, '-c', yaml_file, filename ] + cmdline_opts) assert clirunner.ok(result) def read_hdf(filename): return pd.read_hdf(filename).reset_index(drop=True, inplace=False) # check file has been correctly written: csv1 = read_hdf(filename) if hdf else readcsv(filename, header=not return_list) assert len(csv1) == 1 segid_column = SEGMENT_ID_COLNAME if hdf else csv1.columns[0] assert csv1.loc[0, segid_column] == expected_first_row_seg_id logtext1 = self.logfilecontent assert "4 segment(s) found to process" in logtext1 assert "Skipping 1 already processed segment(s)" not in logtext1 assert "Ignoring `append` functionality: output file does not exist or not provided" \ in logtext1 assert "1 of 4 segment(s) successfully processed" in logtext1 assert not mock_click_confirm.called # now test a second call, the same as before: mock_click_confirm.reset_mock() result = clirunner.invoke(cli, [ 'process', '--dburl', db4process.dburl, '-p', pyfile, '-c', yaml_file, filename ] + cmdline_opts) # check file has been correctly written: csv2 = read_hdf(filename) if hdf else readcsv(filename, header=not return_list) assert len(csv2) == 1 segid_column = SEGMENT_ID_COLNAME if hdf else csv1.columns[0] assert csv2.loc[0, segid_column] == expected_first_row_seg_id logtext2 = self.logfilecontent assert "3 segment(s) found to process" in logtext2 assert "Skipping 1 already processed segment(s)" in logtext2 assert "Appending results to existing file" in logtext2 assert "0 of 3 segment(s) successfully processed" in logtext2 assert not mock_click_confirm.called # assert two rows are equal: assert_frame_equal(csv1, csv2, check_dtype=True) # change the segment id of the written segment seg = session.query(Segment).filter(Segment.id == expected_first_row_seg_id).\ first() new_seg_id = seg.id * 100 seg.id = new_seg_id session.commit() # now test a second call, the same as before: mock_click_confirm.reset_mock() result = clirunner.invoke(cli, [ 'process', '--dburl', db4process.dburl, '-p', pyfile, '-c', yaml_file, filename ] + cmdline_opts) # check file has been correctly written: csv3 = read_hdf(filename) if hdf else readcsv(filename, header=not return_list) assert len(csv3) == 2 segid_column = SEGMENT_ID_COLNAME if hdf else csv1.columns[0] assert csv3.loc[0, segid_column] == expected_first_row_seg_id assert csv3.loc[1, segid_column] == new_seg_id logtext3 = self.logfilecontent assert "4 segment(s) found to process" in logtext3 assert "Skipping 1 already processed segment(s)" in logtext3 assert "Appending results to existing file" in logtext3 assert "1 of 4 segment(s) successfully processed" in logtext3 assert not mock_click_confirm.called # assert two rows are equal: assert_frame_equal(csv1, csv3[:1], check_dtype=True) # last try: no append (also set no-prompt to test that we did not prompt the user) mock_click_confirm.reset_mock() result = clirunner.invoke(cli, [ 'process', '--dburl', db4process.dburl, '-p', pyfile, '-c', yaml_file, filename ] + cmdline_opts[1:]) # check file has been correctly written: csv4 = read_hdf(filename) if hdf else readcsv(filename, header=not return_list) assert len(csv4) == 1 segid_column = SEGMENT_ID_COLNAME if hdf else csv1.columns[0] assert csv4.loc[0, segid_column] == new_seg_id logtext4 = self.logfilecontent assert "4 segment(s) found to process" in logtext4 assert "Skipping 1 already processed segment(s)" not in logtext4 assert "Appending results to existing file" not in logtext4 assert "1 of 4 segment(s) successfully processed" in logtext4 assert 'Overwriting existing output file' in logtext4 assert mock_click_confirm.called # last try: prompt return False mock_click_confirm.reset_mock() mock_click_confirm.return_value = False result = clirunner.invoke(cli, [ 'process', '--dburl', db4process.dburl, '-p', pyfile, '-c', yaml_file, filename ] + cmdline_opts[1:]) assert result.exception assert type(result.exception) == SystemExit assert result.exception.code == 1
def func(**overridden_pars): return pytestdir.yamlfile(get_templates_fpath('save2fs.yaml'), **overridden_pars)
def test_download_eventws_query_args( mock_isfile, mock_run_download, # fixtures: run_cli_download): # pylint: disable=redefined-outer-name '''test different scenarios where we provide eventws query args from the command line''' d_yaml_file = get_templates_fpath("download.yaml") # FIRST SCENARIO: no eventws_params porovided mock_run_download.reset_mock() def_yaml_dict = yaml_load(d_yaml_file)['eventws_params'] assert not def_yaml_dict # None or empty dict result = run_cli_download() # invalid type assert result.exit_code == 0 # assert the yaml (as passed to the download function) has the correct value: real_eventws_params = mock_run_download.call_args_list[0][1][ 'eventws_params'] # just assert it has keys merged from the global event-related yaml keys assert 'maxmagnitude' not in real_eventws_params assert real_eventws_params # test by providing an eventsws param which is not optional: mock_run_download.reset_mock() def_yaml_dict = yaml_load(d_yaml_file)['eventws_params'] assert not def_yaml_dict # None or empty dict result = run_cli_download('--minmagnitude', '15.5') assert result.exit_code == 0 # assert the yaml (as passed to the download function) has the correct value: real_eventws_params = mock_run_download.call_args_list[0][1][ 'eventws_params'] # just assert it has keys merged from the global event-related yaml keys assert real_eventws_params['minmagnitude'] == 15.5 # test by providing a eventsws param which is optional: mock_run_download.reset_mock() def_yaml_dict = yaml_load(d_yaml_file)['eventws_params'] assert not def_yaml_dict # None or empty dict result = run_cli_download('--minmagnitude', '15.5', eventws_params={'format': 'abc'}) assert result.exit_code == 0 # assert the yaml (as passed to the download function) has the correct value: real_eventws_params = mock_run_download.call_args_list[0][1][ 'eventws_params'] # just assert it has keys merged from the global event-related yaml keys assert real_eventws_params['minmagnitude'] == 15.5 assert real_eventws_params['format'] == 'abc' # conflicting args (supplying a global non-optional param in eventws's config): for pars in [['--minlatitude', '-minlat'], ['--maxlatitude', '-maxlat'], ['--minlongitude', '-minlon'], ['--maxlongitude', '-maxlon'], ['--minmagnitude', '-minmag'], ['--maxmagnitude', '-maxmag'], ['--mindepth'], ['--maxdepth']]: for par1, par2 in product(pars, pars): mock_run_download.reset_mock() result = run_cli_download( par1, '15.5', eventws_params={par2.replace('-', ''): 15.5}) assert result.exit_code != 1 assert 'conflict' in result.output assert 'Invalid value for "eventws_params"' in result.output # test a eventws supplied as non existing file and not valid fdsnws: mock_isfile.reset_mock() assert not mock_isfile.called result = run_cli_download('--eventws', 'myfile') assert result.exit_code != 0 assert 'eventws' in result.output assert mock_isfile.called
def test_simple_run_no_outfile_provided( self, mock_run, advanced_settings, cmdline_opts, # fixtures: pytestdir, db4process, clirunner, yamlfile): '''test a case where save inventory is True, and that we saved inventories db is a fixture implemented in conftest.py and setup here in self.transact fixture ''' # set values which will override the yaml config in templates folder: dir_ = pytestdir.makedir() config_overrides = { 'snr_threshold': 0, 'segment_select': { 'has_data': 'true' }, 'root_dir': os.path.abspath(dir_) } if advanced_settings: config_overrides['advanced_settings'] = advanced_settings yaml_file = yamlfile(**config_overrides) # query data for testing now as the program will expunge all data from the session # and thus we want to avoid DetachedInstanceError(s): expected_only_written_segment = \ db4process.segments(with_inventory=True, with_data=True, with_gap=False).one() # get seiscomp path of OK segment before the session is closed: path = os.path.join(dir_, expected_only_written_segment.sds_path()) pyfile = get_templates_fpath("save2fs.py") result = clirunner.invoke(cli, [ 'process', '--dburl', db4process.dburl, '-p', pyfile, '-c', yaml_file ] + cmdline_opts) assert clirunner.ok(result) filez = os.listdir(os.path.dirname(path)) assert len(filez) == 2 stream1 = read(os.path.join(os.path.dirname(path), filez[0]), format='MSEED') stream2 = read(os.path.join(os.path.dirname(path), filez[1]), format='MSEED') assert len(stream1) == len(stream2) == 1 assert not np.allclose(stream1[0].data, stream2[0].data) lst = mock_run.call_args_list assert len(lst) == 1 args, kwargs = lst[0][0], lst[0][1] # assert third argument (`ondone` callback) is None 'ondone' or is a BaseWriter (no-op) # class: assert args[2] is None or \ type(args[2]) == BaseWriter # pylint: disable=unidiomatic-typecheck # assert "Output file: n/a" in result output: assert re.search('Output file:\\s+n/a', result.output) # Note that apparently CliRunner() (see clirunner fixture) puts stderr and stdout # together (https://github.com/pallets/click/pull/868) # Reminder: previously, log erros where redirected to stderr # This is dangerous as we use a redirect to avoid external libraries to pritn to stderr # and logging to stderr might cause 'operation on closed file'. for subs in ["Processing function: ", "Config. file: "]: idx = result.output.find(subs) assert idx > -1
def test_process_bad_types(pytestdir): '''bad types must be passed directly to download as click does a preliminary check''' p_yaml_file, p_py_file = \ get_templates_fpaths("paramtable.yaml", "paramtable.py") # Note that our functions in inputargs module return SIMILART messages as click # not exactly the same one result = CliRunner().invoke(cli, ['process', '--pyfile', 'nrvnkenrgdvf']) assert result.exit_code != 0 assert 'Error: Invalid value for "-p" / "--pyfile":' in result.output result = CliRunner().invoke(cli, [ 'process', '--dburl', 'nrvnkenrgdvf', '-c', p_yaml_file, '-p', p_py_file ]) assert result.exit_code != 0 assert 'Error: Invalid value for "dburl":' in result.output # if we do not provide click default values, they have invalid values and they take priority # (the --dburl arg is skipped): result = CliRunner().invoke( cli, ['process', '--dburl', 'nrvnkenrgdvf', '-c', p_yaml_file]) assert result.exit_code != 0 assert 'Missing option "-p" / "--pyfile"' in result.output result = CliRunner().invoke(cli, ['process', '--dburl', 'nrvnkenrgdvf']) assert result.exit_code != 0 assert 'Missing option "-c" / "--config"' in result.output result = CliRunner().invoke(cli, [ 'process', '--dburl', 'nrvnkenrgdvf', '-c', p_yaml_file, '-p', p_py_file ]) assert result.exit_code != 0 assert 'Error: Invalid value for "dburl":' in result.output assert "nrvnkenrgdvf" in result.output d_yaml_file = get_templates_fpath('download.yaml') d_yaml_file = pytestdir.yamlfile( d_yaml_file, dburl='sqlite:///./path/to/my/db/sqlite.sqlite') result = CliRunner().invoke(cli, [ 'process', '--dburl', d_yaml_file, '-c', p_yaml_file, '-p', p_py_file ]) assert result.exit_code != 0 assert 'Error: Invalid value for "dburl":' in result.output d_yaml_file = pytestdir.yamlfile(d_yaml_file, dburl='sqlite:///:memory:') result = CliRunner().invoke(cli, [ 'process', '--funcname', 'nrvnkenrgdvf', '-c', p_yaml_file, '-p', p_py_file, '-d', d_yaml_file ]) assert result.exit_code != 0 assert 'Error: Invalid value for "pyfile": function "nrvnkenrgdvf" not found in' \ in result.output result = CliRunner().invoke( cli, ['process', '-c', 'nrvnkenrgdvf', '-p', p_py_file, '-d', d_yaml_file]) assert result.exit_code != 0 # this is issued by click (see comment above) assert 'Invalid value for "-c" / "--config"' in result.output result = CliRunner().invoke( cli, ['process', '-c', p_py_file, '-p', p_py_file, '-d', d_yaml_file]) assert result.exit_code != 0 assert 'Error: Invalid value for "config"' in result.output
class Test(object): pyfile = get_templates_fpath("paramtable.py") @property def logfilecontent(self): assert os.path.isfile(self._logfilename) with open(self._logfilename) as opn: return opn.read() # The class-level `init` fixture is marked with autouse=true which implies that all test # methods in the class will use this fixture without a need to state it in the test # function signature or with a class-level usefixtures decorator. For info see: # https://docs.pytest.org/en/latest/fixture.html#autouse-fixtures-xunit-setup-on-steroids @pytest.fixture(autouse=True) def init(self, request, pytestdir, db4process): db4process.create(to_file=True) session = db4process.session # sets up the mocked functions: db session handling (using the already created session) # and log file handling: with patch('stream2segment.utils.inputargs.get_session', return_value=session): with patch('stream2segment.main.closesession', side_effect=lambda *a, **v: None): with patch( 'stream2segment.main.configlog4processing') as mock2: def clogd(logger, logfilebasepath, verbose): # config logger as usual, but redirects to a temp file # that will be deleted by pytest, instead of polluting the program # package: ret = o_configlog4processing(logger, pytestdir.newfile('.log') \ if logfilebasepath else None, verbose) self._logfilename = ret[0].baseFilename return ret mock2.side_effect = clogd yield def inlogtext(self, string): '''Checks that `string` is in log text. The assertion `string in self.logfilecontent` fails in py3.5, although the differences between characters is the same position is zero. We did not find any better way than fixing it via this cumbersome function''' logtext = self.logfilecontent i = 0 while len(logtext[i:i + len(string)]) == len(string): if (sum( ord(a) - ord(b) for a, b in zip(string, logtext[i:i + len(string)]))) == 0: return True i += 1 return False # ## ======== ACTUAL TESTS: ================================ # Recall: we have 6 segments, issued from all combination of # station_inventory in [true, false] and segment.data in [ok, with_gaps, empty] # use db4process(with_inventory, with_data, with_gap) to return sqlalchemy query for # those segments in case. For info see db4process in conftest.py @patch('stream2segment.process.db.get_inventory', side_effect=get_inventory) @patch('stream2segment.process.db.get_stream', side_effect=get_stream) def test_segwrapper( self, mock_getstream, mock_getinv, # fixtures: db4process, data): session = db4process.session segids = query4process(session, {}).all() seg_with_inv = \ db4process.segments(with_inventory=True, with_data=True, with_gap=False).one() sta_with_inv_id = seg_with_inv.station.id invcache = {} def read_stream(segment, reload=False): '''calls segment.stream(reload) asserting that if segment has no data it raises. This function never raises''' if segment.data: segment.stream(reload) else: with pytest.raises( Exception) as exc: # all inventories are None segment.stream(reload) prev_staid = None for segid in [_[0] for _ in segids]: segment = session.query(Segment).filter( Segment.id == segid).first() sta = segment.station staid = sta.id assert prev_staid is None or staid >= prev_staid staequal = prev_staid is not None and staid == prev_staid prev_staid = staid segment.station._inventory = invcache.get(sta.id, None) mock_getinv.reset_mock() if sta.id != sta_with_inv_id: with pytest.raises(Exception): # all inventories are None segment.inventory() assert mock_getinv.called # re-call it and assert we raise the previous Exception: ccc = mock_getinv.call_count with pytest.raises(Exception): # all inventories are None segment.inventory() assert mock_getinv.call_count == ccc # re-call it with reload=True and assert we raise the previous # exception, and that we called get_inv: with pytest.raises(Exception): # all inventories are None segment.inventory(True) assert mock_getinv.call_count == ccc + 1 else: invcache[sta.id] = segment.inventory() if staequal: assert not mock_getinv.called else: assert mock_getinv.called assert len(segment.station.inventory_xml) > 0 # re-call it with reload=True and assert we raise the previous # exception, and that we called get_inv: ccc = mock_getinv.call_count segment.inventory(True) assert mock_getinv.call_count == ccc + 1 # call segment.stream assert not mock_getstream.called read_stream(segment) assert mock_getstream.call_count == 1 read_stream(segment) assert mock_getstream.call_count == 1 # with reload flag: read_stream(segment, True) assert mock_getstream.call_count == 2 mock_getstream.reset_mock() segs = segment.siblings().all() # as channel's channel is either 'ok' or 'err' we should never have # other components assert len(segs) == 0 # NOW TEST OTHER ORIENTATION PROPERLY. WE NEED TO ADD WELL FORMED SEGMENTS WITH CHANNELS # WHOSE ORIENTATION CAN BE DERIVED: staid = session.query(Station.id).first()[0] dcid = session.query(DataCenter.id).first()[0] eid = session.query(Event.id).first()[0] dwid = session.query(Download.id).first()[0] # add channels c_1 = Channel(station_id=staid, location='ok', channel="AB1", sample_rate=56.7) c_2 = Channel(station_id=staid, location='ok', channel="AB2", sample_rate=56.7) c_3 = Channel(station_id=staid, location='ok', channel="AB3", sample_rate=56.7) session.add_all([c_1, c_2, c_3]) session.commit() # add segments. Create attributes (although not strictly necessary to have bytes data) atts = data.to_segment_dict('trace_GE.APE.mseed') # build three segments with data: # "normal" segment sg1 = Segment(channel_id=c_1.id, datacenter_id=dcid, event_id=eid, download_id=dwid, event_distance_deg=35, **atts) sg2 = Segment(channel_id=c_2.id, datacenter_id=dcid, event_id=eid, download_id=dwid, event_distance_deg=35, **atts) sg3 = Segment(channel_id=c_3.id, datacenter_id=dcid, event_id=eid, download_id=dwid, event_distance_deg=35, **atts) session.add_all([sg1, sg2, sg3]) session.commit() # start testing: segids = query4process(session, {}).all() for segid in [_[0] for _ in segids]: segment = session.query(Segment).filter( Segment.id == segid).first() # staid = segment.station.id segs = segment.siblings() if segs.all(): assert segment.id in (sg1.id, sg2.id, sg3.id) assert len(segs.all()) == 2
def init(self, request, db, data): # re-init a sqlite database (no-op if the db is not sqlite): db.create(to_file=False) # setup a run_id: rdw = Download() db.session.add(rdw) db.session.commit() self.run = rdw # side effects: self._evt_urlread_sideeffect = """#EventID | Time | Latitude | Longitude | Depth/km | Author | Catalog | Contributor | ContributorID | MagType | Magnitude | MagAuthor | EventLocationName 20160508_0000129|2016-05-08 05:17:11.500000|1|1|60.0|AZER|EMSC-RTS|AZER|505483|ml|3|AZER|CASPIAN SEA, OFFSHR TURKMENISTAN 20160508_0000004|2016-05-08 01:45:30.300000|90|90|2.0|EMSC|EMSC-RTS|EMSC|505183|ml|4|EMSC|CROATIA """ self._dc_urlread_sideeffect = """http://geofon.gfz-potsdam.de/fdsnws/dataselect/1/query ZZ * * * 2002-09-01T00:00:00 2005-10-20T00:00:00 UP ARJ * * 2013-08-01T00:00:00 2017-04-25 http://ws.resif.fr/fdsnws/dataselect/1/query ZU * * HHZ 2015-01-01T00:00:00 2016-12-31T23:59:59.999999 """ # Note: by default we set sta_urlsideeffect to return such a channels which result in 12 # segments (see lat and lon of channels vs lat and lon of events above) self._sta_urlread_sideeffect = [ """#Network|Station|Location|Channel|Latitude|Longitude|Elevation|Depth|Azimuth|Dip|SensorDescription|Scale|ScaleFreq|ScaleUnits|SampleRate|StartTime|EndTime GE|FLT1||HHE|1|1|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2003-01-01T00:00:00| GE|FLT1||HHN|1|1|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2003-01-01T00:00:00| GE|FLT1||HHZ|1|1|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2003-01-01T00:00:00| n1|s||c1|90|90|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2009-01-01T00:00:00| n1|s||c2|90|90|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2009-01-01T00:00:00| n1|s||c3|90|90|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2009-01-01T00:00:00| """, """#Network|Station|Location|Channel|Latitude|Longitude|Elevation|Depth|Azimuth|Dip|SensorDescription|Scale|ScaleFreq|ScaleUnits|SampleRate|StartTime|EndTime IA|BAKI||BHE|1|1|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2003-01-01T00:00:00| IA|BAKI||BHN|1|1|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2003-01-01T00:00:00| IA|BAKI||BHZ|1|1|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2003-01-01T00:00:00| n2|s||c1|90|90|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2009-01-01T00:00:00| n2|s||c2|90|90|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2009-01-01T00:00:00| n2|s||c3|90|90|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2009-01-01T00:00:00| """ ] self._mintraveltime_sideeffect = cycle([1]) self._seg_data = data.read("GE.FLT1..HH?.mseed") self._seg_data_gaps = data.read("IA.BAKI..BHZ.D.2016.004.head") self._seg_data_empty = b'' self._seg_urlread_sideeffect = [ self._seg_data, self._seg_data_gaps, 413, 500, self._seg_data[:2], self._seg_data_empty, 413, URLError("++urlerror++"), socket.timeout() ] self.service = '' # so get_datacenters_df accepts any row by default self.db_buf_size = 1 self.routing_service = yaml_load(get_templates_fpath("download.yaml"))\ ['advanced_settings']['routing_service_url'] # NON db stuff (logging, patchers, pandas...): self.logout = StringIO() handler = StreamHandler(stream=self.logout) self._logout_cache = "" # THIS IS A HACK: query_logger.setLevel(logging.INFO) # necessary to forward to handlers # if we called closing (we are testing the whole chain) the level will be reset # (to level.INFO) otherwise it stays what we set two lines above. Problems might arise # if closing sets a different level, but for the moment who cares query_logger.addHandler(handler) # when debugging, I want the full dataframe with to_string(), not truncated # NOTE: this messes up right alignment of numbers in DownloadStats (see utils.py) # FIRST, remember current settings and restore them in cleanup: _pd_display_maxcolwidth = pd.get_option('display.max_colwidth') pd.set_option('display.max_colwidth', -1) # define class level patchers (we do not use a yiled as we need to do more stuff in the # finalizer, see below patchers = [] patchers.append(patch('stream2segment.utils.url.urlopen')) self.mock_urlopen = patchers[-1].start() # mock ThreadPool (tp) to run one instance at a time, so we get deterministic results: class MockThreadPool(object): def __init__(self, *a, **kw): pass def imap(self, func, iterable, *args): # make imap deterministic: same as standard python map: # everything is executed in a single thread the right input order return map(func, iterable) def imap_unordered(self, func_, iterable, *args): # make imap_unordered deterministic: same as standard python map: # everything is executed in a single thread in the right input order return map(func_, iterable) def close(self, *a, **kw): pass # assign patches and mocks: patchers.append(patch('stream2segment.utils.url.ThreadPool')) self.mock_tpool = patchers[-1].start() self.mock_tpool.side_effect = MockThreadPool # add finalizer: def delete(): pd.set_option('display.max_colwidth', _pd_display_maxcolwidth) for patcher in patchers: patcher.stop() hndls = query_logger.handlers[:] handler.close() for h in hndls: if h is handler: query_logger.removeHandler(h) request.addfinalizer(delete)
def init(self, request, db, data): # re-init a sqlite database (no-op if the db is not sqlite): db.create(to_file=False) # setup a run_id: rdw = Download() db.session.add(rdw) db.session.commit() self.run = rdw # side effects: self._evt_urlread_sideeffect = """#EventID | Time | Latitude | Longitude | Depth/km | Author | Catalog | Contributor | ContributorID | MagType | Magnitude | MagAuthor | EventLocationName 20160508_0000129|2016-05-08 05:17:11.500000|1|1|60.0|AZER|EMSC-RTS|AZER|505483|ml|3|AZER|CASPIAN SEA, OFFSHR TURKMENISTAN 20160508_0000004|2016-05-08 01:45:30.300000|90|90|2.0|EMSC|EMSC-RTS|EMSC|505183|ml|4|EMSC|CROATIA """ self._mintraveltime_sideeffect = cycle([1]) self._seg_data = data.read("GE.FLT1..HH?.mseed") self._seg_data_gaps = data.read("IA.BAKI..BHZ.D.2016.004.head") self._seg_data_empty = b'' self._seg_urlread_sideeffect = [ self._seg_data, self._seg_data_gaps, 413, 500, self._seg_data[:2], self._seg_data_empty, 413, URLError("++urlerror++"), socket.timeout() ] self.service = '' # so get_datacenters_df accepts any row by default self.db_buf_size = 1 self.routing_service = yaml_load(get_templates_fpath("download.yaml"))\ ['advanced_settings']['routing_service_url'] # NON db stuff (logging, patchers, pandas...): self.loghandler = StreamHandler(stream=StringIO()) # THIS IS A HACK: query_logger.setLevel(logging.INFO) # necessary to forward to handlers # if we called closing (we are testing the whole chain) the level will be reset # (to level.INFO) otherwise it stays what we set two lines above. Problems might arise # if closing sets a different level, but for the moment who cares query_logger.addHandler(self.loghandler) # when debugging, I want the full dataframe with to_string(), not truncated # NOTE: this messes up right alignment of numbers in DownloadStats (see utils.py) # FIRST, remember current settings and restore them in cleanup: _pd_display_maxcolwidth = pd.get_option('display.max_colwidth') pd.set_option('display.max_colwidth', -1) # define class level patchers (we do not use a yiled as we need to do more stuff in the # finalizer, see below patchers = [] patchers.append(patch('stream2segment.utils.url.urlopen')) self.mock_urlopen = patchers[-1].start() # mock ThreadPool (tp) to run one instance at a time, so we get deterministic results: class MockThreadPool(object): def __init__(self, *a, **kw): pass def imap(self, func, iterable, *args): # make imap deterministic: same as standard python map: # everything is executed in a single thread the right input order return map(func, iterable) def imap_unordered(self, func_, iterable, *args): # make imap_unordered deterministic: same as standard python map: # everything is executed in a single thread in the right input order return map(func_, iterable) def close(self, *a, **kw): pass # assign patches and mocks: patchers.append(patch('stream2segment.utils.url.ThreadPool')) self.mock_tpool = patchers[-1].start() self.mock_tpool.side_effect = MockThreadPool # add finalizer: def delete(): pd.set_option('display.max_colwidth', _pd_display_maxcolwidth) for patcher in patchers: patcher.stop() hndls = query_logger.handlers[:] for h in hndls: if h is self.loghandler: self.loghandler.close() query_logger.removeHandler(h) request.addfinalizer(delete)