Example #1
0
def test_click_process(mock_process):
    runner = CliRunner()

    d_conffile, conffile, pyfile = \
        get_templates_fpaths("download.yaml", "paramtable.yaml", "paramtable.py")

    # test no dburl supplied
    mock_process.reset_mock()
    result = runner.invoke(cli, ['process', '-c', conffile, '-p', pyfile, 'c'])
    assert "Missing option" in result.output
    assert result.exc_info

    # test dburl supplied
    mock_process.reset_mock()
    result = runner.invoke(
        cli, ['process', '-d', 'd', '-c', conffile, '-p', pyfile, 'c'])
    lst = list(mock_process.call_args_list[0][0])
    assert lst == ['d', pyfile, None, conffile, 'c']
    assert result.exit_code == 0

    # test dburl supplied via config
    mock_process.reset_mock()
    result = runner.invoke(
        cli, ['process', '-d', d_conffile, '-c', conffile, '-p', pyfile, 'c'])
    lst = list(mock_process.call_args_list[0][0])
    assert lst == [yaml_load(d_conffile)['dburl'], pyfile, None, conffile, 'c']
    assert result.exit_code == 0

    # test funcname supplied via cli:
    mock_process.reset_mock()
    result = runner.invoke(cli, [
        'process', '--funcname', 'wat?', '-d', d_conffile, '-c', conffile,
        '-p', pyfile, 'c'
    ])
    lst = list(mock_process.call_args_list[0][0])
    assert lst == [
        yaml_load(d_conffile)['dburl'], pyfile, 'wat?', conffile, 'c'
    ]
    assert result.exit_code == 0

    # test an error in params: -dburl instead of --dburl:
    mock_process.reset_mock()
    result = runner.invoke(
        cli,
        ['process', '-dburl', d_conffile, '-c', conffile, '-p', pyfile, 'c'])
    assert not mock_process.called
    assert result.exit_code != 0

    # assert help works:
    mock_process.reset_mock()
    result = runner.invoke(cli, ['process', '--help'])
    assert not mock_process.called
    assert result.exit_code == 0
Example #2
0
def test_click_show(mock_create_s2s_show_app, mock_open_in_browser, mock_show):
    runner = CliRunner()
    d_conffile, conffile, pyfile = \
        get_templates_fpaths("download.yaml", "paramtable.yaml", "paramtable.py")

    # when asserting if we called open_in_browser, since tha latter is inside a thread which
    # executes with a delay of 1.5 seconds, we need to make our function here. Quite hacky,
    # but who cares
    def assert_opened_in_browser(url=None):  # if None, assert
        time.sleep(2)  # to be safe
        mock_open_in_browser.assert_called_once
        args = mock_open_in_browser.call_args_list[0][0]
        assert len(args) == 1
        assert args[0].startswith('http://127.0.0.1:')

    # test no dburl supplied
    mock_show.reset_mock()
    mock_open_in_browser.reset_mock()
    result = runner.invoke(cli, ['show', '-c', conffile, '-p', pyfile])
    assert "Missing option" in result.output
    assert result.exc_info
    assert not mock_open_in_browser.called

    # test dburl supplied
    mock_show.reset_mock()
    mock_open_in_browser.reset_mock()
    result = runner.invoke(cli,
                           ['show', '-d', 'd', '-c', conffile, '-p', pyfile])
    lst = list(mock_show.call_args_list[0][0])
    assert lst == ['d', pyfile, conffile]
    assert result.exit_code == 0
    assert_opened_in_browser('d')

    # test dburl supplied via config
    mock_show.reset_mock()
    mock_open_in_browser.reset_mock()
    result = runner.invoke(
        cli, ['show', '-d', d_conffile, '-c', conffile, '-p', pyfile])
    lst = list(mock_show.call_args_list[0][0])
    dburl = yaml_load(d_conffile)['dburl']
    assert lst == [dburl, pyfile, conffile]
    assert result.exit_code == 0
    assert_opened_in_browser(dburl)

    # test an error in params: -dburl instead of --dburl:
    mock_show.reset_mock()
    mock_open_in_browser.reset_mock()
    result = runner.invoke(
        cli, ['show', '-dburl', d_conffile, '-c', conffile, '-p', pyfile])
    assert not mock_show.called
    assert result.exit_code != 0
    assert not mock_open_in_browser.called

    # assert help works:
    mock_show.reset_mock()
    mock_open_in_browser.reset_mock()
    result = runner.invoke(cli, ['show', '--help'])
    assert not mock_show.called
    assert result.exit_code == 0
    assert not mock_open_in_browser.called
Example #3
0
def extract_dburl_if_yamlpath(value, param_name='dburl'):
    """
    Returns the database path from 'value':
    'value' can be a file (in that case is assumed to be a yaml file with the
    `param_name` key in it, which must denote a db path) or the database path otherwise
    """
    if not isinstance(value, string_types) or not value:
        raise TypeError('please specify a string denoting either a path to a yaml file with the '
                        '`dburl` parameter defined, or a valid db path')
    return yaml_load(value)[param_name] if (os.path.isfile(value)) else value
def test_yaml_load():
    # NB: all dic keys must be strings
    dic1 = {'a': 7, '5': 'h'}
    dic2 = {'a': 7, '7': 'h'}
    d = yaml_load(dic1, **dic2)
    assert d['a'] == 7
    assert d['5'] == 'h'
    assert d['7'] == 'h'
    assert sorted(d.keys()) == sorted(['a', '5', '7'])

    dic1 = {'a': 7, '5': 'h', 'v': {1: 2, 3: 4}}
    dic2 = {'a': 7, '7': 'h', 'v': {1: 2, 3: 5}}
    d = yaml_load(dic1, **dic2)
    assert d['a'] == 7
    assert d['5'] == 'h'
    assert d['7'] == 'h'
    assert d['v'][1] == 2
    assert d['v'][3] == 5
    assert sorted(d.keys()) == sorted(['a', '5', '7', 'v'])
Example #5
0
 def download_setup_func(filename, **params):
     yamldic = yaml_load(get_templates_fpath(filename))
     for key, val in params.items():
         if val is None:
             yamldic.pop(key, None)
         else:
             yamldic[key] = val
     path = os.path.join(basedir, filename)
     with open(path, 'w') as _opn:
         yaml.safe_dump(yamldic, _opn)
     return path, yamldic
Example #6
0
def yaml_get(yaml_content):
    '''Returns the arguments used for the eventws query stored in the yaml,
    or an empty dict in case of errors

    :param yaml_content: yaml formatted string representing a download config'''
    try:
        dic = yaml_load(StringIO(yaml_content))
        ret = {k: dic[k] for k in EVENTWS_SAFE_PARAMS if k in dic}
        additional_eventws_params = dic.get('eventws_query_args', None) or {}
        ret.update(additional_eventws_params)
        return ret
    except Exception as _:  # pylint: disable=broad-except
        return {}
Example #7
0
def load_config_for_process(dburl, pyfile, funcname=None, config=None, outfile=None,
                            **param_overrides):
    '''checks process arguments.
    Returns the tuple session, pyfunc, config_dict,
    where session is the dql alchemy session from `dburl`,
    pyfunc is the python function loaded from `pyfile`, and config_dict is the dict loaded from
    `config` which must denote a path to a yaml file, or None (config_dict will be empty
    in this latter case)
    '''
    try:
        session = get_session(dburl, True)
    except Exception as exc:
        raise BadArgument('dburl', exc)

    try:
        funcname = get_funcname(funcname)
    except Exception as exc:
        raise BadArgument('funcname', exc)

    try:
        # yaml_load accepts a file name or a dict
        config = yaml_load({} if config is None else config, **param_overrides)
    except Exception as exc:
        raise BadArgument('config', exc)

    # NOTE: contrarily to the download routine, we cannot check the types of the config because
    # no parameter is mandatory, and thus they might NOT be present in the config.

    try:
        pyfunc = load_pyfunc(pyfile, funcname)
    except Exception as exc:
        raise BadArgument('pyfile', exc)

    if outfile is not None:
        try:
            filewritable(outfile)
        except Exception as exc:
            raise BadArgument('outfile', exc)

    # nothing more to process
    return session, pyfunc, funcname, config
    def init(self, request, db, data):
        # re-init a sqlite database (no-op if the db is not sqlite):
        db.create(to_file=False)
        # setup a run_id:
        rdw = Download()
        db.session.add(rdw)
        db.session.commit()
        self.run = rdw

        # side effects:
        self._evt_urlread_sideeffect = """#EventID | Time | Latitude | Longitude | Depth/km | Author | Catalog | Contributor | ContributorID | MagType | Magnitude | MagAuthor | EventLocationName
20160508_0000129|2016-05-08 05:17:11.500000|1|1|60.0|AZER|EMSC-RTS|AZER|505483|ml|3|AZER|CASPIAN SEA, OFFSHR TURKMENISTAN
20160508_0000004|2016-05-08 01:45:30.300000|90|90|2.0|EMSC|EMSC-RTS|EMSC|505183|ml|4|EMSC|CROATIA
"""
        self._dc_urlread_sideeffect = """http://geofon.gfz-potsdam.de/fdsnws/dataselect/1/query
ZZ * * * 2002-09-01T00:00:00 2005-10-20T00:00:00
UP ARJ * * 2013-08-01T00:00:00 2017-04-25

http://ws.resif.fr/fdsnws/dataselect/1/query
ZU * * HHZ 2015-01-01T00:00:00 2016-12-31T23:59:59.999999

"""

        # Note: by default we set sta_urlsideeffect to return such a channels which result in 12
        # segments (see lat and lon of channels vs lat and lon of events above)
        self._sta_urlread_sideeffect = [
            """#Network|Station|Location|Channel|Latitude|Longitude|Elevation|Depth|Azimuth|Dip|SensorDescription|Scale|ScaleFreq|ScaleUnits|SampleRate|StartTime|EndTime
GE|FLT1||HHE|1|1|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2003-01-01T00:00:00|
GE|FLT1||HHN|1|1|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2003-01-01T00:00:00|
GE|FLT1||HHZ|1|1|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2003-01-01T00:00:00|
n1|s||c1|90|90|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2009-01-01T00:00:00|
n1|s||c2|90|90|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2009-01-01T00:00:00|
n1|s||c3|90|90|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2009-01-01T00:00:00|
""", """#Network|Station|Location|Channel|Latitude|Longitude|Elevation|Depth|Azimuth|Dip|SensorDescription|Scale|ScaleFreq|ScaleUnits|SampleRate|StartTime|EndTime
IA|BAKI||BHE|1|1|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2003-01-01T00:00:00|
IA|BAKI||BHN|1|1|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2003-01-01T00:00:00|
IA|BAKI||BHZ|1|1|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2003-01-01T00:00:00|
n2|s||c1|90|90|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2009-01-01T00:00:00|
n2|s||c2|90|90|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2009-01-01T00:00:00|
n2|s||c3|90|90|485.0|0.0|90.0|0.0|GFZ:HT1980:CMG-3ESP/90/g=2000|838860800.0|0.1|M/S|100.0|2009-01-01T00:00:00|
"""
        ]

        self._mintraveltime_sideeffect = cycle([1])
        self._seg_data = data.read("GE.FLT1..HH?.mseed")
        self._seg_data_gaps = data.read("IA.BAKI..BHZ.D.2016.004.head")
        self._seg_data_empty = b''
        self._seg_urlread_sideeffect = [
            self._seg_data, self._seg_data_gaps, 413, 500, self._seg_data[:2],
            self._seg_data_empty, 413,
            URLError("++urlerror++"),
            socket.timeout()
        ]
        self.service = ''  # so get_datacenters_df accepts any row by default
        self.db_buf_size = 1
        self.routing_service = yaml_load(get_templates_fpath("download.yaml"))\
            ['advanced_settings']['routing_service_url']

        # NON db stuff (logging, patchers, pandas...):
        self.logout = StringIO()
        handler = StreamHandler(stream=self.logout)
        self._logout_cache = ""
        # THIS IS A HACK:
        query_logger.setLevel(logging.INFO)  # necessary to forward to handlers
        # if we called closing (we are testing the whole chain) the level will be reset
        # (to level.INFO) otherwise it stays what we set two lines above. Problems might arise
        # if closing sets a different level, but for the moment who cares
        query_logger.addHandler(handler)

        # when debugging, I want the full dataframe with to_string(), not truncated
        # NOTE: this messes up right alignment of numbers in DownloadStats (see utils.py)
        # FIRST, remember current settings and restore them in cleanup:
        _pd_display_maxcolwidth = pd.get_option('display.max_colwidth')
        pd.set_option('display.max_colwidth', -1)

        # define class level patchers (we do not use a yiled as we need to do more stuff in the
        # finalizer, see below
        patchers = []

        patchers.append(patch('stream2segment.utils.url.urlopen'))
        self.mock_urlopen = patchers[-1].start()

        # mock ThreadPool (tp) to run one instance at a time, so we get deterministic results:
        class MockThreadPool(object):
            def __init__(self, *a, **kw):
                pass

            def imap(self, func, iterable, *args):
                # make imap deterministic: same as standard python map:
                # everything is executed in a single thread the right input order
                return map(func, iterable)

            def imap_unordered(self, func_, iterable, *args):
                # make imap_unordered deterministic: same as standard python map:
                # everything is executed in a single thread in the right input order
                return map(func_, iterable)

            def close(self, *a, **kw):
                pass

        # assign patches and mocks:
        patchers.append(patch('stream2segment.utils.url.ThreadPool'))
        self.mock_tpool = patchers[-1].start()
        self.mock_tpool.side_effect = MockThreadPool

        # add finalizer:
        def delete():
            pd.set_option('display.max_colwidth', _pd_display_maxcolwidth)

            for patcher in patchers:
                patcher.stop()

            hndls = query_logger.handlers[:]
            handler.close()
            for h in hndls:
                if h is handler:
                    query_logger.removeHandler(h)

        request.addfinalizer(delete)
def test_download_eventws_query_args(
        mock_isfile,
        mock_run_download,
        # fixtures:
        run_cli_download):  # pylint: disable=redefined-outer-name
    '''test different scenarios where we provide eventws query args from the command line'''

    d_yaml_file = get_templates_fpath("download.yaml")
    # FIRST SCENARIO: no  eventws_params porovided
    mock_run_download.reset_mock()
    def_yaml_dict = yaml_load(d_yaml_file)['eventws_params']
    assert not def_yaml_dict  # None or empty dict
    result = run_cli_download()  # invalid type
    assert result.exit_code == 0
    # assert the yaml (as passed to the download function) has the correct value:
    real_eventws_params = mock_run_download.call_args_list[0][1][
        'eventws_params']
    # just assert it has keys merged from the global event-related yaml keys
    assert 'maxmagnitude' not in real_eventws_params
    assert real_eventws_params

    # test by providing an eventsws param which is not optional:
    mock_run_download.reset_mock()
    def_yaml_dict = yaml_load(d_yaml_file)['eventws_params']
    assert not def_yaml_dict  # None or empty dict
    result = run_cli_download('--minmagnitude', '15.5')
    assert result.exit_code == 0
    # assert the yaml (as passed to the download function) has the correct value:
    real_eventws_params = mock_run_download.call_args_list[0][1][
        'eventws_params']
    # just assert it has keys merged from the global event-related yaml keys
    assert real_eventws_params['minmagnitude'] == 15.5

    # test by providing a eventsws param which is optional:
    mock_run_download.reset_mock()
    def_yaml_dict = yaml_load(d_yaml_file)['eventws_params']
    assert not def_yaml_dict  # None or empty dict
    result = run_cli_download('--minmagnitude',
                              '15.5',
                              eventws_params={'format': 'abc'})
    assert result.exit_code == 0
    # assert the yaml (as passed to the download function) has the correct value:
    real_eventws_params = mock_run_download.call_args_list[0][1][
        'eventws_params']
    # just assert it has keys merged from the global event-related yaml keys
    assert real_eventws_params['minmagnitude'] == 15.5
    assert real_eventws_params['format'] == 'abc'

    # conflicting args (supplying a global non-optional param in eventws's config):
    for pars in [['--minlatitude', '-minlat'], ['--maxlatitude', '-maxlat'],
                 ['--minlongitude', '-minlon'], ['--maxlongitude', '-maxlon'],
                 ['--minmagnitude', '-minmag'], ['--maxmagnitude', '-maxmag'],
                 ['--mindepth'], ['--maxdepth']]:
        for par1, par2 in product(pars, pars):
            mock_run_download.reset_mock()
            result = run_cli_download(
                par1, '15.5', eventws_params={par2.replace('-', ''): 15.5})
            assert result.exit_code != 1
            assert 'conflict' in result.output
            assert 'Invalid value for "eventws_params"' in result.output

    # test a eventws supplied as non existing file and not valid fdsnws:
    mock_isfile.reset_mock()
    assert not mock_isfile.called
    result = run_cli_download('--eventws', 'myfile')
    assert result.exit_code != 0
    assert 'eventws' in result.output
    assert mock_isfile.called
Example #10
0
def load_config_for_download(config, parseargs, **param_overrides):
    '''loads download arguments from the given config (yaml file or dict) after parsing and
    checking some of the dict keys.

    :return: a dict loaded from the given `config` and with parseed arguments (dict keys)

    Raises BadArgument in case of parsing errors, missisng arguments, conflicts etcetera
    '''
    try:
        config_dict = yaml_load(config, **param_overrides)
    except Exception as exc:
        raise BadArgument('config', exc)

    if parseargs:
        # few variables:
        configfile = config if (isinstance(config, string_types) and os.path.isfile(config))\
            else None

        # define first default event params in order to avoid typos
        def_evt_params = EVENTWS_SAFE_PARAMS

        # now, what we want to do here is basically convert config_dict keys
        # into suitable arguments for stream2segment functions: this includes
        # renaming params, parsing/converting their values, raising
        # BadArgument exceptions and so on

        # Let's configure a 'params' list, a list of dicts where each dict is a 'param checker'
        # with the following keys (at least one should be provided):
        # names: list of strings. provide it in order to check for optional names,
        #        check that only one param is provided, and
        #        replace whatever is found with the first item in the list
        # newname: string, provide it if you want to replace names above with this value
        #          instead first item in 'names'
        # defvalue: if provided, then the parameter is optional and will be set to this value
        #           if not provided, then the parameter is mandatory (BadArgument is raised in case)
        # newvalue: function accepting a value (the parameter value) raising whatever is
        #           needed if the parameter is invalid, and returning the correct parameter value
        params = [
            {
             'names': def_evt_params[:2],  # ['minlatitude', 'minlat'],
             'defvalue': None,
             'newvalue': between(-90.0, 90.0)
            },
            {
             'names': def_evt_params[2:4],  # ['maxlatitude', 'maxlat'],
             'defvalue': None,
             'newvalue': between(-90.0, 90.0)
            },
            {
             'names': def_evt_params[4:6],  # ['minlongitude', 'minlon'],
             'defvalue': None,
             'newvalue': between(-180.0, 180.0)
            },
            {
             'names': def_evt_params[6:8],  # ['maxlongitude', 'maxlon'],
             'defvalue': None,
             'newvalue': between(-180.0, 180.0)
            },
            {
             'names': def_evt_params[8:10],  # ['minmagnitude', 'minmag'],
             'defvalue': None
            },
            {
             'names': def_evt_params[10:12],  # ['maxmagnitude', 'maxmag'],
             'defvalue': None
            },
            {
             'names': def_evt_params[12:13],  # ['mindepth'],
             'defvalue': None
            },
            {
             'names': def_evt_params[13:14],  # ['maxdepth'],
             'defvalue': None
            },
            {
             'names': ['update_metadata'],
             'newvalue': parse_update_metadata
             },
            {
             'names': ['restricted_data'],
             'newname': 'authorizer',
             'newvalue': lambda val: create_auth(val, config_dict['dataws'], configfile)
            },
            {
             'names': ['dburl'],
             'newname': 'session',
             'newvalue': get_session
            },
            {
             'names': ['traveltimes_model'],
             'newname': 'tt_table',
             'newvalue': load_tt_table
            },
            {
             'names': ('starttime', 'start'),
             'newvalue': valid_date
            },
            {
             'names': ('endtime', 'end'),
             'newvalue': valid_date
            },
            {
             'names': ['eventws'],
             'newvalue': lambda url: valid_fdsn(url, is_eventws=True, configfile=configfile)
            },
            {
             'names': ['dataws'],
             'newvalue': lambda url: valid_fdsn(url, is_eventws=False)
            },
            {
             'names': ('network', 'net', 'networks'),
             'defvalue': [],
             'newvalue': nslc_param_value_aslist
            },
            {
             'names': ('station', 'sta', 'stations'),
             'defvalue': [],
             'newvalue': nslc_param_value_aslist
            },
            {
             'names': ('location', 'loc', 'locations'),
             'defvalue': [],
             'newvalue': nslc_param_value_aslist
            },
            {
             'names': ('channel', 'cha', 'channels'),
             'defvalue': [],
             'newvalue': nslc_param_value_aslist
            },
            {'names': ['eventws_params', 'eventws_query_args']},
            {
             'names': ['advanced_settings'],
             'newvalue': parse_download_advanced_settings
            },
            {
             'names': ['search_radius'],
             'newvalue': check_search_radius
            }
        ]

        # store all keys now because we might change them (see below):
        all_keys = set(config_dict)
        # do the check (this MODIFIES config_dict in place!):
        parse_arguments(config_dict, *params)

        # Now check for:
        # 1a. parameter supplied here NOT in the default config
        # 1b. parameter supplied here with different type of the default config
        # 2. Parameters in the default config not supplied here

        # First, create some sets of params names:
        # the parsed keys (all names defined above):
        parsed_keys = set(chain(*(_['names'] for _ in params)))
        # load original configuration (default in this package):
        orig_config = yaml_load(get_templates_fpath("download.yaml"))

        # Check 1a. and 1b.:
        for key in all_keys - parsed_keys:
            try:
                other_value = orig_config[key]
            except KeyError:
                raise BadArgument(key, '', 'No such option')
            try:
                typesmatch(config_dict[key], other_value)
            except Exception as exc:
                raise BadArgument(key, exc)

        # Check 2. :
        missing_keys = set(orig_config) - all_keys - parsed_keys
        if missing_keys:
            raise BadArgument(list(missing_keys), KeyError())

        # At last, put all event-related parameters (except starttime and endtime):
        # and in the eventws_params dict (the latter is an OPTIONAL dict
        # which can be set in the config for ADDITIONAL eventws parameters)
        # and check for conflicts:
        eventsearchparams = config_dict['eventws_params']
        # eventsearchparams might be none
        if not eventsearchparams:
            config_dict['eventws_params'] = eventsearchparams = {}
        for par in def_evt_params:
            if par in eventsearchparams:  # conflict:
                raise BadArgument('eventws_params',
                                  'conflicting parameter "%s"' % par)
            value = config_dict.pop(par, None)
            if value is not None:
                eventsearchparams[par] = value

    return config_dict
Example #11
0
    def init(self, request, db, data):
        # re-init a sqlite database (no-op if the db is not sqlite):
        db.create(to_file=False, process=True)

        # init db:
        session = db.session

        dct = DataCenter(station_url="345fbgfnyhtgrefs",
                         dataselect_url='edfawrefdc')
        session.add(dct)

        utcnow = datetime.utcnow()

        dwl = Download(run_time=utcnow)
        session.add(dwl)

        ws = WebService(url='webserviceurl')
        session.add(ws)
        session.commit()

        # id = 'firstevent'
        ev1 = Event(event_id='event1',
                    webservice_id=ws.id,
                    time=utcnow,
                    latitude=89.5,
                    longitude=6,
                    depth_km=7.1,
                    magnitude=56)
        # note: e2 not used, store in db here anyway...
        ev2 = Event(event_id='event2',
                    webservice_id=ws.id,
                    time=utcnow + timedelta(seconds=5),
                    latitude=89.5,
                    longitude=6,
                    depth_km=7.1,
                    magnitude=56)

        session.add_all([ev1, ev2])

        session.commit()  # refresh datacenter id (alo flush works)

        d = datetime.utcnow()

        s = Station(network='network',
                    station='station',
                    datacenter_id=dct.id,
                    latitude=90,
                    longitude=-45,
                    start_time=d)
        session.add(s)

        channels = [
            Channel(location='01', channel='HHE', sample_rate=6),
            Channel(location='01', channel='HHN', sample_rate=6),
            Channel(location='01', channel='HHZ', sample_rate=6),
            Channel(location='01', channel='HHW', sample_rate=6),
            Channel(location='02', channel='HHE', sample_rate=6),
            Channel(location='02', channel='HHN', sample_rate=6),
            Channel(location='02', channel='HHZ', sample_rate=6),
            Channel(location='04', channel='HHZ', sample_rate=6),
            Channel(location='05', channel='HHE', sample_rate=6),
            Channel(location='05gap_merged', channel='HHN', sample_rate=6),
            Channel(location='05err', channel='HHZ', sample_rate=6),
            Channel(location='05gap_unmerged', channel='HHZ', sample_rate=6)
        ]

        s.channels.extend(channels)
        session.commit()

        fixed_args = dict(datacenter_id=dct.id, download_id=dwl.id)

        # Note: data_gaps_merged is a stream where gaps can be merged via obspy.Stream.merge
        # data_gaps_unmerged is a stream where gaps cannot be merged (is a stream of three
        # different channels of the same event)
        data_gaps_unmerged = data.read("GE.FLT1..HH?.mseed")
        data_gaps_merged = data.read("IA.BAKI..BHZ.D.2016.004.head")
        data_ok = data.read("GE.FLT1..HH?.mseed")

        # create an 'ok' and 'error' Stream, the first by taking the first trace of
        # "GE.FLT1..HH?.mseed", the second by maipulating it
        obspy_stream = data.read_stream(
            "GE.FLT1..HH?.mseed")  # read(BytesIO(data_ok))
        obspy_trace = obspy_stream[0]

        # write data_ok is actually bytes data of 3 traces, write just the first one, we have
        # as it is it would be considered a trace with gaps, wwe have
        # another trace with gaps
        b = BytesIO()
        obspy_trace.write(b, format='MSEED')
        data_ok = b.getvalue()
        data_err = data_ok[:5]  # whatever slice should be ok

        seedid_ok = seedid_err = obspy_trace.get_id()
        seedid_gaps_unmerged = None
        seedid_gaps_merged = read(BytesIO(data_gaps_merged))[0].get_id()

        for evt, cha in product([ev1], channels):
            val = int(cha.location[:2])
            mseed = data_gaps_merged if "gap_merged" in cha.location else \
                data_err if "err" in cha.location else \
                data_gaps_unmerged if 'gap_unmerged' in cha.location else data_ok
            seedid = seedid_gaps_merged if "gap_merged" in cha.location else \
                seedid_err if 'err' in cha.location else \
                seedid_gaps_unmerged if 'gap_unmerged' in cha.location else seedid_ok

            # set times. For everything except data_ok, we set a out-of-bounds time:
            start_time = evt.time - timedelta(seconds=5)
            arrival_time = evt.time - timedelta(seconds=4)
            end_time = evt.time - timedelta(seconds=1)

            if "gap_merged" not in cha.location and 'err' not in cha.location and \
                    'gap_unmerged' not in cha.location:
                start_time = obspy_trace.stats.starttime.datetime
                arrival_time = (
                    obspy_trace.stats.starttime +
                    (obspy_trace.stats.endtime - obspy_trace.stats.starttime) /
                    2).datetime
                end_time = obspy_trace.stats.endtime.datetime

            seg = Segment(request_start=start_time,
                          arrival_time=arrival_time,
                          request_end=end_time,
                          data=mseed,
                          data_seed_id=seedid,
                          event_distance_deg=val,
                          event_id=evt.id,
                          **fixed_args)
            cha.segments.append(seg)

        session.commit()

        self.inventory_bytes = data.read("GE.FLT1.xml")
        self.inventory = data.read_inv("GE.FLT1.xml")

        pfile, cfile = get_templates_fpaths('paramtable.py', 'paramtable.yaml')
        self.pymodule = load_source(pfile)
        self.config = yaml_load(cfile)

        # remove segment_select, we use all segments here:
        self.config.pop('segment_select', None)
Example #12
0
def create_plot_manager(pyfile, configfile):
    pymodule = None if pyfile is None else load_source(pyfile)
    configdict = {} if configfile is None else yaml_load(configfile)
    global PLOT_MANAGER  # pylint: disable=global-statement
    PLOT_MANAGER = PlotManager(pymodule, configdict)
    return PLOT_MANAGER
Example #13
0
def test_click_template(mock_main_init, mock_input, pytestdir):
    runner = CliRunner()
    # assert help works:
    result = runner.invoke(cli, ['init', '--help'])
    assert not mock_main_init.called
    assert result.exit_code == 0

    expected_files = [
        'download.yaml', 'paramtable.py', 'paramtable.yaml', 'save2fs.py',
        'save2fs.yaml', 'jupyter.example.ipynb', 'jupyter.example.db'
    ]
    non_python_files = [
        _ for _ in expected_files
        if os.path.splitext(_)[1] not in ('.py', '.yaml')
    ]

    dir_ = pytestdir.makedir()
    path = os.path.join(dir_, 'abc')

    def max_mod_time():
        return max(
            os.path.getmtime(os.path.join(path, f)) for f in os.listdir(path))

    result = runner.invoke(cli, ['init', path])
    # FIXME: check how to mock os.path.isfile properly. This doesnot work:
    # assert mock_isfile.call_count == 5
    assert result.exit_code == 0
    assert mock_main_init.called
    files = os.listdir(path)
    assert sorted(files) == sorted(expected_files)
    assert not mock_input.called

    # assert we correctly wrote the files
    for fle in files:
        sourcepath = get_templates_fpath(fle)
        destpath = os.path.join(path, fle)
        if os.path.splitext(fle)[1] == '.yaml':
            # check loaded yaml, which also assures our templates are well formed:
            sourceconfig = yaml_load(sourcepath)
            destconfig = yaml_load(destpath)
            if os.path.basename(sourcepath) == 'download.yaml':
                assert sorted(sourceconfig.keys()) == sorted(destconfig.keys())
            else:
                # assert we have all keys. Note that 'advanced_settings' is not in
                # sourceconfig (it is added via jinja2 templating system):
                assert sorted(['advanced_settings'] + list(sourceconfig.keys())) \
                    == sorted(destconfig.keys())
            for key in sourceconfig.keys():
                assert type(sourceconfig[key]) == type(destconfig[key])
        elif os.path.splitext(fle)[1] == '.py':
            # check loaded python modules, which also assures our templates are well formed:
            sourcepy = load_source(sourcepath)
            destpy = load_source(destpath)
            # avoid comparing "__blabla__" methods as they are intended to be python
            # 'private' attributes and there are differences between py2 and py3
            # we want to test OUR stuff is the same
            sourcekeys = [
                a for a in dir(sourcepy) if (a[:2] + a[-2:]) != "____"
            ]
            destkeys = [a for a in dir(destpy) if (a[:2] + a[-2:]) != "____"]
            assert sorted(sourcekeys) == sorted(destkeys)
            for key in sourcekeys:
                assert type(getattr(sourcepy,
                                    key)) == type(getattr(destpy, key))
        elif fle not in non_python_files:
            raise ValueError(
                'The file "%s" is not supposed to be copied by `init`' % fle)

    # try to write to the same dir (1)
    mock_input.reset_mock()
    mock_input.side_effect = lambda arg: '1'  # overwrite all files
    maxmodtime = max_mod_time()
    # we'll test that files are modified, but on mac timestamps are rounded to seconds
    # so wait 1 second to be safe
    time.sleep(1)
    result = runner.invoke(cli, ['init', path])
    assert mock_input.called
    assert max_mod_time() > maxmodtime
    assert '%d file(s) copied in' % len(expected_files) in result.output

    # try to write to the same dir (2)
    for click_prompt_ret_val in ('', '2'):
        # '' => skip overwrite
        # '2' => overwrite only non existing
        # in thus case, both the above returned values produce the same result
        mock_input.reset_mock()
        mock_input.side_effect = lambda arg: click_prompt_ret_val
        maxmodtime = max_mod_time()
        time.sleep(1)  # see comment above
        result = runner.invoke(cli, ['init', path])
        assert mock_input.called
        assert max_mod_time() == maxmodtime
        assert 'No file copied' in result.output

    os.remove(os.path.join(path, expected_files[0]))
    # try to write to the same dir (2)
    mock_input.reset_mock()
    mock_input.side_effect = lambda arg: '2'  # overwrite non-existing (1) file
    maxmodtime = max_mod_time()
    time.sleep(1)  # see comment above
    result = runner.invoke(cli, ['init', path])
    assert mock_input.called
    assert max_mod_time() > maxmodtime
    assert '1 file(s) copied in' in result.output
    def init(self, request, db, data):
        # re-init a sqlite database (no-op if the db is not sqlite):
        db.create(to_file=False)
        # setup a run_id:
        rdw = Download()
        db.session.add(rdw)
        db.session.commit()
        self.run = rdw

        # side effects:
        self._dc_urlread_sideeffect = """http://geofon.gfz-potsdam.de/fdsnws/dataselect/1/query
ZZ * * * 2002-09-01T00:00:00 2005-10-20T00:00:00
UP ARJ * * 2013-08-01T00:00:00 2017-04-25

http://ws.resif.fr/fdsnws/dataselect/1/query
ZU * * HHZ 2015-01-01T00:00:00 2016-12-31T23:59:59.999999

"""
        self._mintraveltime_sideeffect = cycle([1])
        self._seg_data = data.read("GE.FLT1..HH?.mseed")
        self._seg_data_gaps = data.read("IA.BAKI..BHZ.D.2016.004.head")
        self._seg_data_empty = b''
        self._seg_urlread_sideeffect = [
            self._seg_data, self._seg_data_gaps, 413, 500, self._seg_data[:2],
            self._seg_data_empty, 413,
            URLError("++urlerror++"),
            socket.timeout()
        ]
        self.service = ''  # so get_datacenters_df accepts any row by default
        self.db_buf_size = 1
        self.routing_service = yaml_load(get_templates_fpath("download.yaml"))\
            ['advanced_settings']['routing_service_url']

        # NON db stuff (logging, patchers, pandas...):
        self.logout = StringIO()
        handler = StreamHandler(stream=self.logout)
        self._logout_cache = ""
        # THIS IS A HACK:
        query_logger.setLevel(logging.INFO)  # necessary to forward to handlers
        # if we called closing (we are testing the whole chain) the level will be reset
        # (to level.INFO) otherwise it stays what we set two lines above. Problems might arise
        # if closing sets a different level, but for the moment who cares
        query_logger.addHandler(handler)

        # when debugging, I want the full dataframe with to_string(), not truncated
        # NOTE: this messes up right alignment of numbers in DownloadStats (see utils.py)
        # FIRST, remember current settings and restore them in cleanup:
        _pd_display_maxcolwidth = pd.get_option('display.max_colwidth')
        pd.set_option('display.max_colwidth', -1)

        # define class level patchers (we do not use a yiled as we need to do more stuff in the
        # finalizer, see below
        patchers = []

        patchers.append(patch('stream2segment.utils.url.urlopen'))
        self.mock_urlopen = patchers[-1].start()

        # mock ThreadPool (tp) to run one instance at a time, so we get deterministic results:
        class MockThreadPool(object):
            def __init__(self, *a, **kw):
                pass

            def imap(self, func, iterable, *args):
                # make imap deterministic: same as standard python map:
                # everything is executed in a single thread the right input order
                return map(func, iterable)

            def imap_unordered(self, func_, iterable, *args):
                # make imap_unordered deterministic: same as standard python map:
                # everything is executed in a single thread in the right input order
                return map(func_, iterable)

            def close(self, *a, **kw):
                pass

        # assign patches and mocks:
        patchers.append(patch('stream2segment.utils.url.ThreadPool'))
        self.mock_tpool = patchers[-1].start()
        self.mock_tpool.side_effect = MockThreadPool

        # add finalizer:
        def delete():
            pd.set_option('display.max_colwidth', _pd_display_maxcolwidth)

            for patcher in patchers:
                patcher.stop()

            hndls = query_logger.handlers[:]
            handler.close()
            for h in hndls:
                if h is handler:
                    query_logger.removeHandler(h)

        request.addfinalizer(delete)
    def init(self, request, db, data):
        # re-init a sqlite database (no-op if the db is not sqlite):
        db.create(to_file=False)
        # setup a run_id:
        rdw = Download()
        db.session.add(rdw)
        db.session.commit()
        self.run = rdw

        # side effects:
        self._evt_urlread_sideeffect = """#EventID | Time | Latitude | Longitude | Depth/km | Author | Catalog | Contributor | ContributorID | MagType | Magnitude | MagAuthor | EventLocationName
20160508_0000129|2016-05-08 05:17:11.500000|1|1|60.0|AZER|EMSC-RTS|AZER|505483|ml|3|AZER|CASPIAN SEA, OFFSHR TURKMENISTAN
20160508_0000004|2016-05-08 01:45:30.300000|90|90|2.0|EMSC|EMSC-RTS|EMSC|505183|ml|4|EMSC|CROATIA
"""
        self._mintraveltime_sideeffect = cycle([1])
        self._seg_data = data.read("GE.FLT1..HH?.mseed")
        self._seg_data_gaps = data.read("IA.BAKI..BHZ.D.2016.004.head")
        self._seg_data_empty = b''
        self._seg_urlread_sideeffect = [
            self._seg_data, self._seg_data_gaps, 413, 500, self._seg_data[:2],
            self._seg_data_empty, 413,
            URLError("++urlerror++"),
            socket.timeout()
        ]
        self.service = ''  # so get_datacenters_df accepts any row by default
        self.db_buf_size = 1
        self.routing_service = yaml_load(get_templates_fpath("download.yaml"))\
            ['advanced_settings']['routing_service_url']

        # NON db stuff (logging, patchers, pandas...):
        self.loghandler = StreamHandler(stream=StringIO())

        # THIS IS A HACK:
        query_logger.setLevel(logging.INFO)  # necessary to forward to handlers
        # if we called closing (we are testing the whole chain) the level will be reset
        # (to level.INFO) otherwise it stays what we set two lines above. Problems might arise
        # if closing sets a different level, but for the moment who cares
        query_logger.addHandler(self.loghandler)

        # when debugging, I want the full dataframe with to_string(), not truncated
        # NOTE: this messes up right alignment of numbers in DownloadStats (see utils.py)
        # FIRST, remember current settings and restore them in cleanup:
        _pd_display_maxcolwidth = pd.get_option('display.max_colwidth')
        pd.set_option('display.max_colwidth', -1)

        # define class level patchers (we do not use a yiled as we need to do more stuff in the
        # finalizer, see below
        patchers = []

        patchers.append(patch('stream2segment.utils.url.urlopen'))
        self.mock_urlopen = patchers[-1].start()

        # mock ThreadPool (tp) to run one instance at a time, so we get deterministic results:
        class MockThreadPool(object):
            def __init__(self, *a, **kw):
                pass

            def imap(self, func, iterable, *args):
                # make imap deterministic: same as standard python map:
                # everything is executed in a single thread the right input order
                return map(func, iterable)

            def imap_unordered(self, func_, iterable, *args):
                # make imap_unordered deterministic: same as standard python map:
                # everything is executed in a single thread in the right input order
                return map(func_, iterable)

            def close(self, *a, **kw):
                pass

        # assign patches and mocks:
        patchers.append(patch('stream2segment.utils.url.ThreadPool'))
        self.mock_tpool = patchers[-1].start()
        self.mock_tpool.side_effect = MockThreadPool

        # add finalizer:
        def delete():
            pd.set_option('display.max_colwidth', _pd_display_maxcolwidth)

            for patcher in patchers:
                patcher.stop()

            hndls = query_logger.handlers[:]
            for h in hndls:
                if h is self.loghandler:
                    self.loghandler.close()
                    query_logger.removeHandler(h)

        request.addfinalizer(delete)