Esempio n. 1
0
def test_inv_cache(data):
    class Segment(object):
        def __init__(self, id_, staid):
            if staid is not None:
                self.station = Segment(staid, None)
            else:
                self.station = None
            self.id = id_

    def_size_limit = _default_size_limits()[1]

    inventory = data.read_inv("GE.FLT1.xml")

    @contextmanager
    def setup(inv_cache_instance):
        with patch.object(
                InventoryCache,
                '_size_limit_popped',
                wraps=inv_cache_instance._size_limit_popped) as _mock_popitem:
            yield inv_cache_instance, _mock_popitem

    # test LimitedSizeDict with no size_limit arg (no size limit):
    with setup(InventoryCache()) as (inv, mock_popitem):
        for i in range(def_size_limit):
            inv[Segment(i, 1)] = inventory
        assert not mock_popitem.called

    # test supplying always the same inventory
    with setup(InventoryCache()) as (inv, mock_popitem):
        # same station id (1) for all segments:
        # it does not matter: keys are removed if they have the same value
        # (via 'is' keyword), thus all keys will be removed
        for i in range(def_size_limit + 1):
            inv[Segment(i, 1)] = inventory
        assert mock_popitem.call_count == 1
        assert len(inv) == 0
    with setup(InventoryCache()) as (inv, mock_popitem):
        # different station ids (0,1,2,..) for all segments:
        # it does not matter: keys are removed if they have the same
        # value (via 'is' keyword), thus all keys will be removed
        mock_popitem.reset_mock()
        for i in range(def_size_limit + 1):
            inv[Segment(i, i)] = inventory
        assert mock_popitem.call_count == 1
        assert len(inv) == 0
    with setup(InventoryCache()) as (inv, mock_popitem):
        # now provide different objects, we remove only one element
        mock_popitem.reset_mock()
        for i in range(def_size_limit + 1):
            inv[Segment(i, i)] = ValueError('a')
        assert mock_popitem.call_count == 1
        assert len(inv) == def_size_limit
    with setup(InventoryCache()) as (inv, mock_popitem):
        # now provide same object again, we are again with 0 items
        v = ValueError('a')
        mock_popitem.reset_mock()
        for i in range(def_size_limit + 1):
            inv[Segment(i, i)] = v
        assert mock_popitem.call_count == 1
        assert len(inv) == 0
Esempio n. 2
0
    def test_query4gui(self, db):
        s = db.session.query(Station).first()
        e = db.session.query(Event).first()
        dc = db.session.query(DataCenter).first()
        run = db.session.query(Download).first()

        channels = [
            Channel(location='00', channel='HHE', sample_rate=6),
            Channel(location='00', channel='HHN', sample_rate=6),
            Channel(location='00', channel='HHZ', sample_rate=6),
            Channel(location='00', channel='HHW', sample_rate=6),

            Channel(location='10', channel='HHE', sample_rate=6),
            Channel(location='10', channel='HHN', sample_rate=6),
            Channel(location='10', channel='HHZ', sample_rate=6),

            Channel(location='', channel='HHE', sample_rate=6),
            Channel(location='', channel='HHN', sample_rate=6),

            Channel(location='30', channel='HHZ', sample_rate=6)]
        # expected lengths when querying for gui below. CHANGE THIS
        # IF YOU CHANGE THE PREVIOUS channels VARIABLE
        expected_lengths = [4, 4, 4, 4, 3, 3, 3, 2, 2, 1]

        s.channels.extend(channels)
        db.session.commit()

        args = dict(request_start=datetime.utcnow(),
                    request_end=datetime.utcnow(),
                    event_distance_deg=9,
                    arrival_time=datetime.utcnow(),
                    data=b'',
                    event_id=e.id,
                    datacenter_id=dc.id,
                    download_id=run.id)
        segments = []
        # and now it will work:
        for c in channels:
            segments.append(Segment(channel_id=c.id, **args))

        db.session.add_all(segments)
        db.session.commit()

        for leng, segment in zip(expected_lengths, segments):
            # assert the other segments are the expected lengh. Note that leng INCLUDES current
            # segment whereas siblings DOES NOT. So compare to leng-1:
            assert segment.siblings().count() == leng-1
    def test_prepare_for_download(self, db, tt_ak135_tts):
        # prepare:
        urlread_sideeffect = None  # use defaults from class
        events_df = self.get_events_df(urlread_sideeffect, db.session)
        net, sta, loc, cha = [], [], [], []
        datacenters_df, eidavalidator = \
            self.get_datacenters_df(urlread_sideeffect, db.session, None, self.routing_service,
                                    net, sta, loc, cha, db_bufsize=self.db_buf_size)
        channels_df = self.get_channels_df(urlread_sideeffect, db.session,
                                           datacenters_df, eidavalidator, net,
                                           sta, loc, cha, None, None, 100,
                                           False, None, None, -1,
                                           self.db_buf_size)
        assert len(
            channels_df
        ) == 12  # just to be sure. If failing, we might have changed the class default
        # events_df
        #    id  magnitude  latitude  longitude  depth_km                    time
        # 0  1   3.0        1.0       1.0        60.0     2016-05-08 05:17:11.500
        # 1  2   4.0        90.0      90.0       2.0      2016-05-08 01:45:30.300

        # channels_df:
        #    id  station_id  latitude  longitude  datacenter_id start_time end_time network station location channel
        # 0  1   1           1.0       1.0        1             2003-01-01 NaT       GE      FLT1             HHE
        # 1  2   1           1.0       1.0        1             2003-01-01 NaT       GE      FLT1             HHN
        # 2  3   1           1.0       1.0        1             2003-01-01 NaT       GE      FLT1             HHZ
        # 3  4   2           90.0      90.0       1             2009-01-01 NaT       n1      s                c1
        # 4  5   2           90.0      90.0       1             2009-01-01 NaT       n1      s                c2
        # 5  6   2           90.0      90.0       1             2009-01-01 NaT       n1      s                c3
        # 6   7   3           1.0       1.0        2             2003-01-01 NaT       IA      BAKI             BHE
        # 7   8   3           1.0       1.0        2             2003-01-01 NaT       IA      BAKI             BHN
        # 8   9   3           1.0       1.0        2             2003-01-01 NaT       IA      BAKI             BHZ
        # 9   10  4           90.0      90.0       2             2009-01-01 NaT       n2      s                c1
        # 10  11  4           90.0      90.0       2             2009-01-01 NaT       n2      s                c2
        # 11  12  4           90.0      90.0       2             2009-01-01 NaT       n2      s                c3

        # take all segments:
        segments_df = merge_events_stations(events_df,
                                            channels_df,
                                            dict(minmag=10,
                                                 maxmag=10,
                                                 minmag_radius=100,
                                                 maxmag_radius=200),
                                            tttable=tt_ak135_tts)

        # segments_df:
        #    channel_id  station_id  datacenter_id network station location channel  event_distance_deg  event_id  depth_km                    time               arrival_time
        # 0  1           1           1              GE      FLT1             HHE     500.555             1         60.0     2016-05-08 05:17:11.500 2017-05-10 12:39:13.463745
        # 1  2           1           1              GE      FLT1             HHN     500.555             1         60.0     2016-05-08 05:17:11.500 2017-05-10 12:39:13.463745
        # 2  3           1           1              GE      FLT1             HHZ     500.555             1         60.0     2016-05-08 05:17:11.500 2017-05-10 12:39:13.463745
        # 3  4           2           1              n1      s                c1      89.000              1         60.0     2016-05-08 05:17:11.500 NaT
        # 4  5           2           1              n1      s                c2      89.000              1         60.0     2016-05-08 05:17:11.500 NaT
        # 5  6           2           1              n1      s                c3      89.0                1         60.0     2016-05-08 05:17:11.500 NaT
        # 6  7           3           2              IA      BAKI             BHE     0.0                 1         60.0     2016-05-08 05:17:11.500 NaT
        # 7  8           3           2              IA      BAKI             BHN     0.0                 1         60.0     2016-05-08 05:17:11.500 NaT
        # 8  9           3           2              IA      BAKI             BHZ     0.0                 1         60.0     2016-05-08 05:17:11.500 NaT
        # 9  10          4           2              n2      s                c1      89.0                1         60.0     2016-05-08 05:17:11.500 NaT
        # 10  11          4           2              n2      s                c2      89.0                1         60.0     2016-05-08 05:17:11.500 NaT
        # 11  12          4           2              n2      s                c3      89.0                1         60.0     2016-05-08 05:17:11.500 NaT
        # 12  1           1           1              GE      FLT1             HHE     89.0                2         2.0      2016-05-08 01:45:30.300 NaT
        # 13  2           1           1              GE      FLT1             HHN     89.0                2         2.0      2016-05-08 01:45:30.300 NaT
        # 14  3           1           1              GE      FLT1             HHZ     89.0                2         2.0      2016-05-08 01:45:30.300 NaT
        # 15  4           2           1              n1      s                c1      0.0                 2         2.0      2016-05-08 01:45:30.300 NaT
        # 16  5           2           1              n1      s                c2      0.0                 2         2.0      2016-05-08 01:45:30.300 NaT
        # 17  6           2           1              n1      s                c3      0.0                 2         2.0      2016-05-08 01:45:30.300 NaT
        # 18  7           3           2              IA      BAKI             BHE     89.0                2         2.0      2016-05-08 01:45:30.300 NaT
        # 19  8           3           2              IA      BAKI             BHN     89.0                2         2.0      2016-05-08 01:45:30.300 NaT
        # 20  9           3           2              IA      BAKI             BHZ     89.0                2         2.0      2016-05-08 01:45:30.300 NaT
        # 21  10          4           2              n2      s                c1      0.0                 2         2.0      2016-05-08 01:45:30.300 NaT
        # 22  11          4           2              n2      s                c2      0.0                 2         2.0      2016-05-08 01:45:30.300 NaT
        # 23  12          4           2              n2      s                c3      0.0                 2         2.0      2016-05-08 01:45:30.300 NaT

        # make a copy of evts_stations_df cause we will modify in place the data frame
        #         segments_df =  self.get_arrivaltimes(urlread_sideeffect, evts_stations_df.copy(),
        #                                                    [1,2], ['P', 'Q'],
        #                                                         'ak135', mp_max_workers=1)

        expected = len(
            segments_df
        )  # no segment on db, we should have all segments to download
        wtimespan = [1, 2]
        assert Segment.id.key not in segments_df.columns
        assert Segment.download_id.key not in segments_df.columns
        orig_seg_df = segments_df.copy()
        # define a dc_dataselect_manager for open data only:
        dc_dataselect_manager = DcDataselectManager(datacenters_df,
                                                    Authorizer(None), False)
        segments_df, request_timebounds_need_update = \
            prepare_for_download(db.session, orig_seg_df, dc_dataselect_manager, wtimespan,
                                 retry_seg_not_found=True,
                                 retry_url_err=True,
                                 retry_mseed_err=True,
                                 retry_client_err=True,
                                 retry_server_err=True,
                                 retry_timespan_err=True,
                                 retry_timespan_warn=True)
        assert request_timebounds_need_update is False

        # segments_df: (not really the real dataframe, some columns are removed but relevant data is ok):
        #    channel_id  datacenter_id network station location channel  event_distance_deg  event_id            arrival_time          start_time            end_time
        # 0  1           1              GE      FLT1             HHE     0.0                 1        2016-05-08 05:17:12.500 2016-05-08 05:16:12 2016-05-08 05:19:12
        # 1  2           1              GE      FLT1             HHN     0.0                 1        2016-05-08 05:17:12.500 2016-05-08 05:16:12 2016-05-08 05:19:12
        # 2  3           1              GE      FLT1             HHZ     0.0                 1        2016-05-08 05:17:12.500 2016-05-08 05:16:12 2016-05-08 05:19:12
        # 3  4           1              n1      s                c1      89.0                1        2016-05-08 05:17:12.500 2016-05-08 05:16:12 2016-05-08 05:19:12
        # 4  5           1              n1      s                c2      89.0                1        2016-05-08 05:17:12.500 2016-05-08 05:16:12 2016-05-08 05:19:12
        # 5  6           1              n1      s                c3      89.0                1        2016-05-08 05:17:12.500 2016-05-08 05:16:12 2016-05-08 05:19:12
        # 6  7           2              IA      BAKI             BHE     0.0                 1        2016-05-08 05:17:12.500 2016-05-08 05:16:12 2016-05-08 05:19:12
        # 7  8           2              IA      BAKI             BHN     0.0                 1        2016-05-08 05:17:12.500 2016-05-08 05:16:12 2016-05-08 05:19:12
        # 8  9           2              IA      BAKI             BHZ     0.0                 1        2016-05-08 05:17:12.500 2016-05-08 05:16:12 2016-05-08 05:19:12
        # 9  10          2              n2      s                c1      89.0                1        2016-05-08 05:17:12.500 2016-05-08 05:16:12 2016-05-08 05:19:12
        # 10  11          2              n2      s                c2      89.0                1        2016-05-08 05:17:12.500 2016-05-08 05:16:12 2016-05-08 05:19:12
        # 11  12          2              n2      s                c3      89.0                1        2016-05-08 05:17:12.500 2016-05-08 05:16:12 2016-05-08 05:19:12
        # 12  1           1              GE      FLT1             HHE     89.0                2        2016-05-08 01:45:31.300 2016-05-08 01:44:31 2016-05-08 01:47:31
        # 13  2           1              GE      FLT1             HHN     89.0                2        2016-05-08 01:45:31.300 2016-05-08 01:44:31 2016-05-08 01:47:31
        # 14  3           1              GE      FLT1             HHZ     89.0                2        2016-05-08 01:45:31.300 2016-05-08 01:44:31 2016-05-08 01:47:31
        # 15  4           1              n1      s                c1      0.0                 2        2016-05-08 01:45:31.300 2016-05-08 01:44:31 2016-05-08 01:47:31
        # 16  5           1              n1      s                c2      0.0                 2        2016-05-08 01:45:31.300 2016-05-08 01:44:31 2016-05-08 01:47:31
        # 17  6           1              n1      s                c3      0.0                 2        2016-05-08 01:45:31.300 2016-05-08 01:44:31 2016-05-08 01:47:31
        # 18  7           2              IA      BAKI             BHE     89.0                2        2016-05-08 01:45:31.300 2016-05-08 01:44:31 2016-05-08 01:47:31
        # 19  8           2              IA      BAKI             BHN     89.0                2        2016-05-08 01:45:31.300 2016-05-08 01:44:31 2016-05-08 01:47:31
        # 20  9           2              IA      BAKI             BHZ     89.0                2        2016-05-08 01:45:31.300 2016-05-08 01:44:31 2016-05-08 01:47:31
        # 21  10          2              n2      s                c1      0.0                 2        2016-05-08 01:45:31.300 2016-05-08 01:44:31 2016-05-08 01:47:31
        # 22  11          2              n2      s                c2      0.0                 2        2016-05-08 01:45:31.300 2016-05-08 01:44:31 2016-05-08 01:47:31
        # 23  12          2              n2      s                c3      0.0                 2        2016-05-08 01:45:31.300 2016-05-08 01:44:31 2016-05-08 01:47:31

        assert Segment.id.key in segments_df.columns
        assert Segment.download_id.key not in segments_df.columns
        assert len(segments_df) == expected
        # assert len(db.session.query(Segment.id).all()) == len(segments_df)

        assert all(x[0] is None
                   for x in db.session.query(Segment.download_code).all())
        assert all(x[0] is None for x in db.session.query(Segment.data).all())

        # mock an already downloaded segment.
        # Set the first 7 to have a particular download status code
        urlerr, mseederr, outtime_err, outtime_warn = \
            s2scodes.url_err, s2scodes.mseed_err, s2scodes.timespan_err, s2scodes.timespan_warn
        downloadstatuscodes = [
            None, urlerr, mseederr, 413, 505, outtime_err, outtime_warn
        ]
        for i, download_code in enumerate(downloadstatuscodes):
            dic = segments_df.iloc[i].to_dict()
            dic['download_code'] = download_code
            dic['download_id'] = self.run.id
            # hack for deleting unused columns:
            for col in [
                    Station.network.key, Station.station.key,
                    Channel.location.key, Channel.channel.key
            ]:
                if col in dic:
                    del dic[col]
            # convet numpy values to python scalars:
            # pandas 20+ seems to keep numpy types in to_dict
            # https://github.com/pandas-dev/pandas/issues/13258
            # this was not the case in pandas 0.19.2
            # sql alchemy does not like that
            # (Curiosly, our pd2sql methods still work fine (we should check why)
            # So, quick and dirty:
            for k in dic.keys():
                if hasattr(dic[k], "item"):
                    dic[k] = dic[k].item()
            # postgres complains about nan primary keys
            if math.isnan(dic.get(Segment.id.key, 0)):
                del dic[Segment.id.key]

            # now we can safely add it:
            db.session.add(Segment(**dic))

        db.session.commit()

        assert len(db.session.query(
            Segment.id).all()) == len(downloadstatuscodes)

        # Now we have an instance of all possible errors on the db (5 in total) and a new
        # instance (not on the db). Assure all work:
        # set the num of instances to download anyway. Their number is the not saved ones, i.e.:
        to_download_anyway = len(segments_df) - len(downloadstatuscodes)
        for c in product([True, False], [True, False], [True, False],
                         [True, False], [True, False], [True, False],
                         [True, False]):
            s_df, request_timebounds_need_update = \
                prepare_for_download(db.session, orig_seg_df, dc_dataselect_manager, wtimespan,
                                     retry_seg_not_found=c[0],
                                     retry_url_err=c[1],
                                     retry_mseed_err=c[2],
                                     retry_client_err=c[3],
                                     retry_server_err=c[4],
                                     retry_timespan_err=c[5],
                                     retry_timespan_warn=c[6])
            to_download_in_this_case = sum(
                c)  # count the True's (bool sum works in python)
            assert len(s_df) == to_download_anyway + to_download_in_this_case
            assert request_timebounds_need_update is False

        # now change the window time span and see that everything is to be downloaded again:
        # do it for any retry combinations, as it should ALWAYS return "everything has to be
        # re-downloaded"
        wtimespan[1] += 5
        for c in product([True, False], [True, False], [True, False],
                         [True, False], [True, False], [True, False],
                         [True, False]):
            s_df, request_timebounds_need_update = \
                prepare_for_download(db.session, orig_seg_df, dc_dataselect_manager, wtimespan,
                                     retry_seg_not_found=c[0],
                                     retry_url_err=c[1],
                                     retry_mseed_err=c[2],
                                     retry_client_err=c[3],
                                     retry_server_err=c[4],
                                     retry_timespan_err=c[5],
                                     retry_timespan_warn=c[6])
            assert len(s_df) == len(orig_seg_df)
            assert request_timebounds_need_update is True  # because we changed wtimespan
        # this hol

        # now test that we raise a NothingToDownload
        # first, write all remaining segments to db, with 204 code so they will not be
        # re-downloaded
        for i in range(len(segments_df)):
            download_code = 204
            dic = segments_df.iloc[i].to_dict()
            dic['download_code'] = download_code
            dic['download_id'] = self.run.id
            # hack for deleting unused columns:
            for col in [
                    Station.network.key, Station.station.key,
                    Channel.location.key, Channel.channel.key
            ]:
                if col in dic:
                    del dic[col]
            # convet numpy values to python scalars:
            # pandas 20+ seems to keep numpy types in to_dict
            # https://github.com/pandas-dev/pandas/issues/13258
            # this was not the case in pandas 0.19.2
            # sql alchemy does not like that
            # (Curiosly, our pd2sql methods still work fine (we should check why)
            # So, quick and dirty:
            for k in dic.keys():
                if hasattr(dic[k], "item"):
                    dic[k] = dic[k].item()
            # postgres complains about nan primary keys
            if math.isnan(dic.get(Segment.id.key, 0)):
                del dic[Segment.id.key]

            # now we can safely add it:
            # brutal approach: add and commit, if error, rollback
            # if error it means we already wrote the segment on db and an uniqueconstraint
            # is raised
            try:
                db.session.add(Segment(**dic))
                db.session.commit()
            except SQLAlchemyError as _err:
                db.session.rollback()
        # test that we have the correct number of segments saved:
        assert db.session.query(Segment.id).count() == len(orig_seg_df)
        # try to test a NothingToDownload:
        # reset the old wtimespan otherwise everything will be flagged to be redownloaded:
        wtimespan[1] -= 5
        with pytest.raises(NothingToDownload):
            segments_df, request_timebounds_need_update = \
                prepare_for_download(db.session, orig_seg_df, dc_dataselect_manager, wtimespan,
                                     retry_seg_not_found=False,
                                     retry_url_err=False,
                                     retry_mseed_err=False,
                                     retry_client_err=False,
                                     retry_server_err=False,
                                     retry_timespan_err=False,
                                     retry_timespan_warn=False)
    def test_segwrapper(
            self,
            mock_getstream,
            mock_getinv,
            # fixtures:
            db4process,
            data):
        session = db4process.session
        segids = query4process(session, {}).all()
        seg_with_inv = \
            db4process.segments(with_inventory=True, with_data=True, with_gap=False).one()
        sta_with_inv_id = seg_with_inv.station.id
        invcache = {}

        def read_stream(segment, reload=False):
            '''calls segment.stream(reload) asserting that if segment has no
            data it raises. This function never raises'''
            if segment.data:
                segment.stream(reload)
            else:
                with pytest.raises(
                        Exception) as exc:  # all inventories are None
                    segment.stream(reload)

        prev_staid = None
        for segid in [_[0] for _ in segids]:
            segment = session.query(Segment).filter(
                Segment.id == segid).first()
            sta = segment.station
            staid = sta.id
            assert prev_staid is None or staid >= prev_staid
            staequal = prev_staid is not None and staid == prev_staid
            prev_staid = staid
            segment.station._inventory = invcache.get(sta.id, None)

            mock_getinv.reset_mock()
            if sta.id != sta_with_inv_id:
                with pytest.raises(Exception):  # all inventories are None
                    segment.inventory()
                assert mock_getinv.called
                # re-call it and assert we raise the previous Exception:
                ccc = mock_getinv.call_count
                with pytest.raises(Exception):  # all inventories are None
                    segment.inventory()
                assert mock_getinv.call_count == ccc
                # re-call it with reload=True and assert we raise the previous
                # exception, and that we called get_inv:
                with pytest.raises(Exception):  # all inventories are None
                    segment.inventory(True)
                assert mock_getinv.call_count == ccc + 1
            else:
                invcache[sta.id] = segment.inventory()
                if staequal:
                    assert not mock_getinv.called
                else:
                    assert mock_getinv.called
                assert len(segment.station.inventory_xml) > 0
                # re-call it with reload=True and assert we raise the previous
                # exception, and that we called get_inv:
                ccc = mock_getinv.call_count
                segment.inventory(True)
                assert mock_getinv.call_count == ccc + 1

            # call segment.stream
            assert not mock_getstream.called
            read_stream(segment)
            assert mock_getstream.call_count == 1
            read_stream(segment)
            assert mock_getstream.call_count == 1
            # with reload flag:
            read_stream(segment, True)
            assert mock_getstream.call_count == 2
            mock_getstream.reset_mock()

            segs = segment.siblings().all()
            # as channel's channel is either 'ok' or 'err' we should never have
            # other components
            assert len(segs) == 0

        # NOW TEST OTHER ORIENTATION PROPERLY. WE NEED TO ADD WELL FORMED SEGMENTS WITH CHANNELS
        # WHOSE ORIENTATION CAN BE DERIVED:
        staid = session.query(Station.id).first()[0]
        dcid = session.query(DataCenter.id).first()[0]
        eid = session.query(Event.id).first()[0]
        dwid = session.query(Download.id).first()[0]
        # add channels
        c_1 = Channel(station_id=staid,
                      location='ok',
                      channel="AB1",
                      sample_rate=56.7)
        c_2 = Channel(station_id=staid,
                      location='ok',
                      channel="AB2",
                      sample_rate=56.7)
        c_3 = Channel(station_id=staid,
                      location='ok',
                      channel="AB3",
                      sample_rate=56.7)
        session.add_all([c_1, c_2, c_3])
        session.commit()
        # add segments. Create attributes (although not strictly necessary to have bytes data)
        atts = data.to_segment_dict('trace_GE.APE.mseed')
        # build three segments with data:
        # "normal" segment
        sg1 = Segment(channel_id=c_1.id,
                      datacenter_id=dcid,
                      event_id=eid,
                      download_id=dwid,
                      event_distance_deg=35,
                      **atts)
        sg2 = Segment(channel_id=c_2.id,
                      datacenter_id=dcid,
                      event_id=eid,
                      download_id=dwid,
                      event_distance_deg=35,
                      **atts)
        sg3 = Segment(channel_id=c_3.id,
                      datacenter_id=dcid,
                      event_id=eid,
                      download_id=dwid,
                      event_distance_deg=35,
                      **atts)
        session.add_all([sg1, sg2, sg3])
        session.commit()
        # start testing:
        segids = query4process(session, {}).all()

        for segid in [_[0] for _ in segids]:
            segment = session.query(Segment).filter(
                Segment.id == segid).first()
            # staid = segment.station.id
            segs = segment.siblings()
            if segs.all():
                assert segment.id in (sg1.id, sg2.id, sg3.id)
                assert len(segs.all()) == 2
Esempio n. 5
0
    def init(self, request, db, data):
        # re-init a sqlite database (no-op if the db is not sqlite):
        db.create(to_file=True)

        # setup a run_id:
        r = Download()
        db.session.add(r)
        db.session.commit()
        self.run = r

        ws = WebService(id=1, url='eventws')
        db.session.add(ws)
        db.session.commit()
        self.ws = ws
        # setup an event:
        e1 = Event(id=1, webservice_id=ws.id, event_id='ev1', latitude=8, longitude=9, magnitude=5,
                   depth_km=4, time=datetime.utcnow())
        e2 = Event(id=2, webservice_id=ws.id, event_id='ev2', latitude=8, longitude=9, magnitude=5,
                   depth_km=4, time=datetime.utcnow())
        e3 = Event(id=3, webservice_id=ws.id, event_id='ev3', latitude=8, longitude=9, magnitude=5,
                   depth_km=4, time=datetime.utcnow())
        e4 = Event(id=4, webservice_id=ws.id, event_id='ev4', latitude=8, longitude=9, magnitude=5,
                   depth_km=4, time=datetime.utcnow())
        e5 = Event(id=5, webservice_id=ws.id, event_id='ev5', latitude=8, longitude=9, magnitude=5,
                   depth_km=4, time=datetime.utcnow())
        db.session.add_all([e1, e2, e3, e4, e5])
        db.session.commit()

        d1 = DataCenter(station_url='asd', dataselect_url='www.dc1/dataselect/query')
        d2 = DataCenter(station_url='asd', dataselect_url='www.dc2/dataselect/query')
        db.session.add_all([d1, d2])
        db.session.commit()

        # d1 has one station
        s_d1 = Station(datacenter_id=d1.id, latitude=11, longitude=11, network='N1', station='S1',
                       start_time=datetime.utcnow())
        s_d2 = Station(datacenter_id=d1.id, latitude=22.1, longitude=22.1, network='N1',
                       station='S2a', start_time=datetime.utcnow())
        s2_d2 = Station(datacenter_id=d1.id, latitude=22.2, longitude=22.2, network='N2',
                        station='S2b', start_time=datetime.utcnow())
        db.session.add_all([s_d1, s_d2, s2_d2])
        db.session.commit()

        # we are about to add 3 stations * 4 channels = 12 channels
        # we add also 1 segment pre channel
        # the segments data is as follows (data, download_code, maxgap)
        seg_data = ([None, s2scodes.url_err, None],
                    [None, s2scodes.mseed_err, None],
                    [None, None, None],
                    [None, s2scodes.timespan_err, None],
                    # station s_d2:
                    [b'x', 200, 0.2],
                    [b'x', s2scodes.timespan_warn, 3.9],
                    [b'x', 200, 0.6],
                    [b'x', 200, 0.3],
                    # station s_d3:
                    [b'x', 200, 0.1],
                    [b'x', s2scodes.timespan_warn, 3.9],
                    [b'x', 400, None],
                    [b'x', 500, None],
                    )

        i = 0
        for s in [s_d1, s_d2, s2_d2]:
            for cha in ['HHZ', 'HHE', 'HHN', 'ABC']:
                c = Channel(station_id=s.id, location='', channel=cha, sample_rate=56.7)
                db.session.add(c)
                db.session.commit()

                data, code, gap = seg_data[i]
                i += 1
                seg = Segment(channel_id=c.id, datacenter_id=s.datacenter_id,
                              event_id=e1.id, download_id=r.id,
                              event_distance_deg=35, request_start=datetime.utcnow(),
                              arrival_time=datetime.utcnow(),
                              request_end=datetime.utcnow() + timedelta(seconds=5), data=data,
                              download_code=code, maxgap_numsamples=gap)
                db.session.add(seg)
                db.session.commit()

        with patch('stream2segment.utils.inputargs.get_session',
                   return_value=db.session) as mock_session:
            yield
Esempio n. 6
0
 def __init__(self, id_, staid):
     if staid is not None:
         self.station = Segment(staid, None)
     else:
         self.station = None
     self.id = id_
Esempio n. 7
0
    def init(self, request, db, data):
        # re-init a sqlite database (no-op if the db is not sqlite):
        db.create(to_file=False, process=True)

        # init db:
        session = db.session

        dct = DataCenter(station_url="345fbgfnyhtgrefs",
                         dataselect_url='edfawrefdc')
        session.add(dct)

        utcnow = datetime.utcnow()

        dwl = Download(run_time=utcnow)
        session.add(dwl)

        ws = WebService(url='webserviceurl')
        session.add(ws)
        session.commit()

        # id = 'firstevent'
        ev1 = Event(event_id='event1',
                    webservice_id=ws.id,
                    time=utcnow,
                    latitude=89.5,
                    longitude=6,
                    depth_km=7.1,
                    magnitude=56)
        # note: e2 not used, store in db here anyway...
        ev2 = Event(event_id='event2',
                    webservice_id=ws.id,
                    time=utcnow + timedelta(seconds=5),
                    latitude=89.5,
                    longitude=6,
                    depth_km=7.1,
                    magnitude=56)

        session.add_all([ev1, ev2])

        session.commit()  # refresh datacenter id (alo flush works)

        d = datetime.utcnow()

        s = Station(network='network',
                    station='station',
                    datacenter_id=dct.id,
                    latitude=90,
                    longitude=-45,
                    start_time=d)
        session.add(s)

        channels = [
            Channel(location='01', channel='HHE', sample_rate=6),
            Channel(location='01', channel='HHN', sample_rate=6),
            Channel(location='01', channel='HHZ', sample_rate=6),
            Channel(location='01', channel='HHW', sample_rate=6),
            Channel(location='02', channel='HHE', sample_rate=6),
            Channel(location='02', channel='HHN', sample_rate=6),
            Channel(location='02', channel='HHZ', sample_rate=6),
            Channel(location='04', channel='HHZ', sample_rate=6),
            Channel(location='05', channel='HHE', sample_rate=6),
            Channel(location='05gap_merged', channel='HHN', sample_rate=6),
            Channel(location='05err', channel='HHZ', sample_rate=6),
            Channel(location='05gap_unmerged', channel='HHZ', sample_rate=6)
        ]

        s.channels.extend(channels)
        session.commit()

        fixed_args = dict(datacenter_id=dct.id, download_id=dwl.id)

        # Note: data_gaps_merged is a stream where gaps can be merged via obspy.Stream.merge
        # data_gaps_unmerged is a stream where gaps cannot be merged (is a stream of three
        # different channels of the same event)
        data_gaps_unmerged = data.read("GE.FLT1..HH?.mseed")
        data_gaps_merged = data.read("IA.BAKI..BHZ.D.2016.004.head")
        data_ok = data.read("GE.FLT1..HH?.mseed")

        # create an 'ok' and 'error' Stream, the first by taking the first trace of
        # "GE.FLT1..HH?.mseed", the second by maipulating it
        obspy_stream = data.read_stream(
            "GE.FLT1..HH?.mseed")  # read(BytesIO(data_ok))
        obspy_trace = obspy_stream[0]

        # write data_ok is actually bytes data of 3 traces, write just the first one, we have
        # as it is it would be considered a trace with gaps, wwe have
        # another trace with gaps
        b = BytesIO()
        obspy_trace.write(b, format='MSEED')
        data_ok = b.getvalue()
        data_err = data_ok[:5]  # whatever slice should be ok

        seedid_ok = seedid_err = obspy_trace.get_id()
        seedid_gaps_unmerged = None
        seedid_gaps_merged = read(BytesIO(data_gaps_merged))[0].get_id()

        for evt, cha in product([ev1], channels):
            val = int(cha.location[:2])
            mseed = data_gaps_merged if "gap_merged" in cha.location else \
                data_err if "err" in cha.location else \
                data_gaps_unmerged if 'gap_unmerged' in cha.location else data_ok
            seedid = seedid_gaps_merged if "gap_merged" in cha.location else \
                seedid_err if 'err' in cha.location else \
                seedid_gaps_unmerged if 'gap_unmerged' in cha.location else seedid_ok

            # set times. For everything except data_ok, we set a out-of-bounds time:
            start_time = evt.time - timedelta(seconds=5)
            arrival_time = evt.time - timedelta(seconds=4)
            end_time = evt.time - timedelta(seconds=1)

            if "gap_merged" not in cha.location and 'err' not in cha.location and \
                    'gap_unmerged' not in cha.location:
                start_time = obspy_trace.stats.starttime.datetime
                arrival_time = (
                    obspy_trace.stats.starttime +
                    (obspy_trace.stats.endtime - obspy_trace.stats.starttime) /
                    2).datetime
                end_time = obspy_trace.stats.endtime.datetime

            seg = Segment(request_start=start_time,
                          arrival_time=arrival_time,
                          request_end=end_time,
                          data=mseed,
                          data_seed_id=seedid,
                          event_distance_deg=val,
                          event_id=evt.id,
                          **fixed_args)
            cha.segments.append(seg)

        session.commit()

        self.inventory_bytes = data.read("GE.FLT1.xml")
        self.inventory = data.read_inv("GE.FLT1.xml")

        pfile, cfile = get_templates_fpaths('paramtable.py', 'paramtable.yaml')
        self.pymodule = load_source(pfile)
        self.config = yaml_load(cfile)

        # remove segment_select, we use all segments here:
        self.config.pop('segment_select', None)
Esempio n. 8
0
    def init(self, request, db, data):
        # re-init a sqlite database (no-op if the db is not sqlite):
        db.create(to_file=True)

        with patch(
                'stream2segment.gui.webapp.mainapp.plots.core._default_size_limits',
                return_value=(4, 1)) as mock1:

            self.app = create_s2s_show_app(db.dburl, self.pyfile,
                                           self.configfile)

            with self.app.app_context():
                # create a configured "Session" class
                # Session = sessionmaker(bind=self.engine)
                # create a Session
                # session = Session()

                session = self.session

                dc = DataCenter(station_url="345fbgfnyhtgrefs",
                                dataselect_url='edfawrefdc')
                session.add(dc)

                utcnow = datetime.utcnow()

                run = Download(run_time=utcnow)
                session.add(run)

                ws = WebService(url='webserviceurl')
                session.add(ws)
                session.commit()

                id = 'firstevent'
                e1 = Event(event_id='event1',
                           webservice_id=ws.id,
                           time=utcnow,
                           latitude=89.5,
                           longitude=6,
                           depth_km=7.1,
                           magnitude=56)
                e2 = Event(event_id='event2',
                           webservice_id=ws.id,
                           time=utcnow + timedelta(seconds=5),
                           latitude=89.5,
                           longitude=6,
                           depth_km=7.1,
                           magnitude=56)

                session.add_all([e1, e2])

                session.commit()  # refresh datacenter id (alo flush works)

                d = datetime.utcnow()

                s = Station(network='network',
                            station='station',
                            datacenter_id=dc.id,
                            latitude=90,
                            longitude=-45,
                            start_time=d)
                session.add(s)

                channels = [
                    Channel(location='01', channel='HHE', sample_rate=6),
                    Channel(location='01', channel='HHN', sample_rate=6),
                    Channel(location='01', channel='HHZ', sample_rate=6),
                    Channel(location='01', channel='HHW', sample_rate=6),
                    Channel(location='02', channel='HHE', sample_rate=6),
                    Channel(location='02', channel='HHN', sample_rate=6),
                    Channel(location='02', channel='HHZ', sample_rate=6),
                    Channel(location='03', channel='HHE', sample_rate=6),
                    Channel(location='03', channel='HHN', sample_rate=6),
                    Channel(location='04', channel='HHZ', sample_rate=6),
                    Channel(location='05', channel='HHE', sample_rate=6),
                    Channel(location='05gap_merged',
                            channel='HHN',
                            sample_rate=6),
                    Channel(location='05err', channel='HHZ', sample_rate=6),
                    Channel(location='05gap_unmerged',
                            channel='HHZ',
                            sample_rate=6)
                ]

                s.channels.extend(channels)
                session.commit()

                fixed_args = dict(
                    datacenter_id=dc.id,
                    download_id=run.id,
                )

                data_gaps_unmerged = data.to_segment_dict("GE.FLT1..HH?.mseed")
                data_gaps_merged = data.to_segment_dict(
                    "IA.BAKI..BHZ.D.2016.004.head")
                obspy_trace = read(BytesIO(data_gaps_unmerged['data']))[0]
                # write data_ok is actually bytes data of 3 traces, write just the first one, we have
                # as it is it would be considered a trace with gaps, wwe have
                # another trace with gaps
                b = BytesIO()
                obspy_trace.write(b, format='MSEED')
                start, end = obspy_trace.stats.starttime.datetime, obspy_trace.stats.endtime.datetime
                data_ok = dict(data_gaps_unmerged,
                               data=b.getvalue(),
                               start_time=start,
                               end_time=end,
                               arrival_time=start + (end - start) / 3)
                data_err = dict(data_ok, data=data_ok['data'][:5])

                for ev, c in product([e1, e2], channels):
                    val = int(c.location[:2])
                    data_atts = data_gaps_merged if "gap_merged" in c.location else \
                        data_err if "err" in c.location else data_gaps_unmerged \
                        if 'gap_unmerged' in c.location else data_ok
                    seg = Segment(event_distance_deg=val,
                                  event_id=ev.id,
                                  datacenter_id=dc.id,
                                  download_id=run.id,
                                  **data_atts)
                    c.segments.append(seg)

                session.commit()

                session.close()

                # set inventory
                self.inventory = data.read_inv("GE.FLT1.xml")

            yield
    def init(self, request, db, data):
        # re-init a sqlite database (no-op if the db is not sqlite):
        db.create(to_file=False)

        sess = db.session
        run = Download()
        sess.add(run)
        sess.commit()

        dcen = DataCenter(station_url="x/station/abc")  # invalid fdsn name
        with pytest.raises(IntegrityError):
            sess.add(dcen)
            sess.commit()
        sess.rollback()

        # https://service.iris.edu/fdsnws/station/1/

        dcen = DataCenter(
            station_url="x/station/fdsnws/station/1/")  # this is save (fdsn)
        sess.add(dcen)
        sess.commit()

        # this is safe (both provided):
        dcen = DataCenter(station_url="x/station/abc",
                          dataselect_url="x/station/abc")
        sess.add(dcen)
        sess.commit()

        ws = WebService(url='abc')
        sess.add(ws)
        sess.commit()

        event1 = Event(id=1,
                       event_id='a',
                       webservice_id=ws.id,
                       time=datetime.utcnow(),
                       magnitude=5,
                       latitude=66,
                       longitude=67,
                       depth_km=6)
        event2 = Event(id=2,
                       event_id='b',
                       webservice_id=ws.id,
                       time=datetime.utcnow(),
                       magnitude=5,
                       latitude=66,
                       longitude=67,
                       depth_km=6)
        sess.add_all([event1, event2])
        sess.commit()

        sta1 = Station(id=1,
                       network='n1',
                       station='s1',
                       datacenter_id=dcen.id,
                       latitude=66,
                       longitude=67,
                       start_time=datetime.utcnow())
        sta2 = Station(id=2,
                       network='n2',
                       station='s1',
                       datacenter_id=dcen.id,
                       latitude=66,
                       longitude=67,
                       start_time=datetime.utcnow())
        sess.add_all([sta1, sta2])
        sess.commit()

        cha1 = Channel(id=1,
                       location='l1',
                       channel='c1',
                       station_id=sta1.id,
                       sample_rate=6)
        cha2 = Channel(id=2,
                       location='l2',
                       channel='c2',
                       station_id=sta1.id,
                       sample_rate=6)
        cha3 = Channel(id=3,
                       location='l3',
                       channel='c3',
                       station_id=sta1.id,
                       sample_rate=6)
        cha4 = Channel(id=4,
                       location='l4',
                       channel='c4',
                       station_id=sta2.id,
                       sample_rate=6)
        sess.add_all([cha1, cha2, cha3, cha4])
        sess.commit()

        # segment 1, with two class labels 'a' and 'b'
        seg1 = Segment(event_id=event1.id,
                       channel_id=cha3.id,
                       datacenter_id=dcen.id,
                       event_distance_deg=5,
                       download_id=run.id,
                       arrival_time=datetime.utcnow(),
                       request_start=datetime.utcnow(),
                       request_end=datetime.utcnow())
        sess.add(seg1)
        sess.commit()

        cls1 = Class(label='a')
        cls2 = Class(label='b')

        sess.add_all([cls1, cls2])
        sess.commit()

        clb1 = ClassLabelling(segment_id=seg1.id, class_id=cls1.id)
        clb2 = ClassLabelling(segment_id=seg1.id, class_id=cls2.id)

        sess.add_all([clb1, clb2])
        sess.commit()

        # segment 2, with one class label 'a'
        seg2 = Segment(event_id=event1.id,
                       channel_id=cha2.id,
                       datacenter_id=dcen.id,
                       event_distance_deg=6.6,
                       download_id=run.id,
                       arrival_time=datetime.utcnow(),
                       request_start=datetime.utcnow(),
                       request_end=datetime.utcnow())

        sess.add(seg2)
        sess.commit()

        clb1 = ClassLabelling(segment_id=seg2.id, class_id=cls1.id)

        sess.add_all([clb1])
        sess.commit()

        # segment 3, no class label 'a' (and with data attr, useful later)
        seg3 = Segment(event_id=event1.id,
                       channel_id=cha1.id,
                       datacenter_id=dcen.id,
                       event_distance_deg=7,
                       download_id=run.id,
                       data=b'data',
                       arrival_time=datetime.utcnow(),
                       request_start=datetime.utcnow(),
                       request_end=datetime.utcnow())
        sess.add(seg3)
        sess.commit()