def close(self): self.stream.close() # This can raise a KeyError if the handler has already been # removed, but a later error can be raised if # StreamHandler.close() isn't called. This seems the best # compromise. :-( try: StreamHandler.close(self) except KeyError: pass _remove_from_reopenable(self._wr)
def close(self): self.stream.close() # This can raise a KeyError if the handler has already been # removed, but a later error can be raised if # StreamHandler.close() isn't called. This seems the best # compromise. :-( try: StreamHandler.close(self) except KeyError: # pragma: no cover pass _remove_from_reopenable(self._wr)
def set_stream(logger: logging.Logger, handler: logging.StreamHandler, stream: IO[str]) -> Generator[None, None, None]: # See `https://bugs.python.org/issue6333` for why this is necessary. old = handler.setStream(stream) logger.addHandler(handler) try: yield finally: logger.removeHandler(handler) handler.close() if old: handler.setStream(old)
class MultiproccessingStreamHandler(logging.Handler): """This stream handler makes it possible for several processes to log to the same stdout by using a queue. """ _handler = None _queue = None def __init__(self): logging.Handler.__init__(self) # create StreamHandler self._handler = StreamHandler() # setup queue and thread self._queue = multiprocessing.Queue(-1) thread = threading.Thread(target=self.receive) thread.daemon = True thread.start() def setFormatter(self, fmt): """Set the Formatter for this handler to fmt.""" logging.Handler.setFormatter(self, fmt) self._handler.setFormatter(fmt) def receive(self): """Thread which is run in the background. This Thread gets logging records from the queue and emits them.""" while True: try: record = self._queue.get() self._handler.emit(record) except (KeyboardInterrupt, SystemExit): raise except EOFError: break except: break def emit(self, record): """Log the logging record by putting it into the queue.""" try: self._queue.put_nowait(record) except (KeyboardInterrupt, SystemExit): raise except: self.handleError(record) def close(self): """Close handler.""" self._handler.close() logging.Handler.close(self)
def close(self): self.acquire() try: try: if self.producer: try: self.flush() finally: self.event_batch_data = None self.producer.close() finally: StreamHandler.close(self) finally: self.release()
def close(self): self.acquire() try: try: if self.stream: try: self.flush() finally: stream = self.stream self.stream = None if hasattr(stream, "close"): stream.close() finally: StreamHandler.close(self) finally: self.release()
def close(self): """Closes the stream.""" self.acquire() try: try: if self.stream: try: self.flush() finally: stream = self.stream self.stream = None if hasattr(stream, 'close'): stream.close() finally: # Issue #19523: call unconditionally to # prevent a handler leak when delay is set StreamHandler.close(self) finally: self.release()
def close(self): """ Метод закрывает файл логов, если он открыт и закрывает поток. """ self.acquire() try: try: if self.stream: try: self.flush() finally: stream = self.stream self.stream = None if hasattr(stream, "close"): stream.close() finally: StreamHandler.close(self) finally: self.release()
class MPLogHandler(logging.Handler): """multiprocessing log handler This handler makes it possible for several processes to log to the same file by using a queue. :param out_file: name of output file, if None then is output to stdout/stderr :type out_file: str """ def __init__(self, out_file = None): logging.Handler.__init__(self) if out_file is not None: self._handler = FH(out_file) else: self._handler = SH() self.queue = multiprocessing.Queue(-1) atexit.register(logging.shutdown) self._thrd = None self._is_child = False # Children will automatically register themselves as chilcren register_after_fork(self, MPLogHandler.set_is_child) def set_is_child(self): self._is_child = True def start_recv_thread(self): if self._thrd: return self._shutdown = False thrd = threading.Thread(target=self.receive) thrd.daemon = True thrd.start() self._thrd = thrd def setFormatter(self, fmt): logging.Handler.setFormatter(self, fmt) self._handler.setFormatter(fmt) def receive(self): while not self._shutdown: try: record = self.queue.get(True, 0.3) self._handler.emit(record) except (Queue.Empty,IOError): pass except (KeyboardInterrupt, SystemExit): raise except (EOFError,TypeError): break except: traceback.print_exc(file=sys.stderr) def shutdown_recv_thread(self): if self._thrd: self._shutdown = True self._thrd.join() self._thrd = None def send(self, s): self.queue.put_nowait(s) def _format_record(self, record): if record.args: record.msg = record.msg % record.args record.args = None if record.exc_info: dummy = self.format(record) record.exc_info = None return record def emit(self, record): try: s = self._format_record(record) # If we are a child, then send the record, otherwise simply emit it if self._is_child: self.send(s) else: self._handler.emit(s) except (KeyboardInterrupt, SystemExit): raise except: self.handleError(record) def close(self): self._handler.close() self.shutdown_recv_thread() logging.Handler.close(self)
def close(self): """Close the file.""" self.flush() self.stream.close() StreamHandler.close(self)
def close(self): self.stream.close() StreamHandler.close(self)
def setup_logging(dict_config=None, level=None, stream=None, logfile=None): """Setup the logging. This will override the logging configurations in the config file if specified (e.g., by command line arguments). Parameters ---------- dict_config : dict Dict of logging configurations specified in the config file. If this parameter specified, the logging will be reconfigured. level : str; Override the existing log level stream : string; "stderr", "stdout", or "" This controls where the log messages go to. If not None, then override the old ``StreamHandler`` settings; if ``stream=""``, then disable the ``StreamHandler``. logfile : str Specify the file where the log messages go to. If ``logfile=""``, then disable the ``FileHandler``. NOTE ---- If the logging already has ``StreamHandler`` or ``FileHandler`` configured, then the old handler will be **replaced** (i.e., remove the old one, then add the new one). """ # Default file open mode for logging to file filemode = "a" root_logger = logging.getLogger() # if dict_config: # XXX/NOTE: # ``basicConfig()`` does NOT accept parameter ``filemode`` if the # corresponding parameter ``filename`` NOT specified. filemode = dict_config.pop("filemode", filemode) # Clear existing handlers, otherwise further "basicConfig" calls # will be ignored for handler in root_logger.handlers: handler.close() root_logger.removeHandler(handler) # Initialize/reconfigure the logging, which will automatically # create a ``Formatter`` for handlers if necessary, and adding # the handlers to the "root" logger. logging.basicConfig(**dict_config) # if os.environ.get("DEBUG_FG21SIM"): print("DEBUG: Force 'DEBUG' logging level", file=sys.stderr) level = "DEBUG" if level is not None: level_int = getattr(logging, level.upper(), None) if not isinstance(level_int, int): raise ValueError("invalid log level: %s" % level) root_logger.setLevel(level_int) # # Configured logging has at least one handler with configured formatter. # Store the existing formatter to preserve the configured format styles. formatter = root_logger.handlers[0].formatter # if stream is None: pass elif stream in ["", "stderr", "stdout"]: for handler in root_logger.handlers: if isinstance(handler, StreamHandler): # remove old ``StreamHandler`` handler.close() root_logger.removeHandler(handler) if stream == "": # ``StreamHandler`` already disabled pass else: # Add new ``StreamHandler`` handler = StreamHandler(getattr(sys, stream)) handler.setFormatter(formatter) root_logger.addHandler(handler) else: raise ValueError("invalid stream: %s" % stream) # if logfile is not None: for handler in root_logger.handlers: if isinstance(handler, FileHandler): filemode = handler.mode # remove old ``FileHandler`` handler.close() root_logger.removeHandler(handler) if logfile == "": # ``FileHandler`` already disabled pass else: # Add new ``FileHandler`` handler = FileHandler(logfile, mode=filemode) handler.setFormatter(formatter) root_logger.addHandler(handler) logger.info("Set up logging.")
def close(self): StreamHandler.close(self) if self._fluentd: self._fluentd.flush()
def close(self): """Close the handler, not the handle with a hash as stamp to mark where logoutput ends.""" self.rbwrite(self.name) StreamHandler.close(self)
class Logger: """Writes system state to log files.""" def __init__(self, name=None): self.__name = name self.__loggers = { 'file': self.__file_logger, 'console': self.__console_logger } self.__log_level = {'info': INFO, 'debug': DEBUG} self.__modes = [] self.__logs_path = '' self.__common_log_handler = None self.__console_log_handler = None self.__log_format = None self.__logger = getLogger(self.__name) self.info = self.__logger.info self.debug = self.__logger.debug self.warning = self.__logger.warning self.error = self.__logger.error self.critical = self.__logger.critical def set_logs(self, mode=None, message_level='info', logs_directory=None): """Set logger handlers.""" if mode not in self.__loggers: raise ValueError('Mode "{}" is not support'.format(mode)) self.__modes.append(mode) if mode == 'file': if not logs_directory: raise ValueError('"logs_path" should not be None') self.__logs_directory = logs_directory self.__logger.setLevel(self.__log_level[message_level]) message_format = '%(levelname)-8s %(asctime)s (%(filename)s:%(lineno)d) %(message)-40s' self.__log_format = Formatter(fmt=message_format, datefmt="%y-%m-%d %H:%M:%S") self.__loggers.get(mode).__call__() def __file_logger(self): """Create and start loggers file handler.""" log_file = '{0}/{1}.log'.format(self.__logs_path, self.__name) # Existing log rewriting if os.path.exists(log_file): os.remove(log_file) self.__common_log_handler = FileHandler(log_file, mode='w', encoding='utf-8') self.__common_log_handler.setFormatter(self.__log_format) self.__logger.addHandler(self.__common_log_handler) def __console_logger(self): """Create and start loggers console handler.""" self.__console_log_handler = StreamHandler() self.__console_log_handler.setFormatter(self.__log_format) self.__logger.addHandler(self.__console_log_handler) def close_logs(self, mode): """Close logger handlers.""" if mode not in self.__modes: return if mode == 'file': self.__common_log_handler.close() self.__logger.removeHandler(self.__common_log_handler) elif mode == 'console': self.__console_log_handler.close() self.__logger.removeHandler(self.__console_log_handler) self.__modes.remove(mode)
class Test(object): # execute this fixture always even if not provided as argument: # https://docs.pytest.org/en/documentation-restructure/how-to/fixture.html#autouse-fixtures-xunit-setup-on-steroids @pytest.fixture(autouse=True) def init(self, request, db, data): # re-init a sqlite database (no-op if the db is not sqlite): db.create(to_file=False) # setup a run_id: rdw = Download() db.session.add(rdw) db.session.commit() self.run = rdw # side effects: self._evt_urlread_sideeffect = """#EventID | Time | Latitude | Longitude | Depth/km | Author | Catalog | Contributor | ContributorID | MagType | Magnitude | MagAuthor | EventLocationName 20160508_0000129|2016-05-08 05:17:11.500000|1|1|60.0|AZER|EMSC-RTS|AZER|505483|ml|3|AZER|CASPIAN SEA, OFFSHR TURKMENISTAN 20160508_0000004|2016-05-08 01:45:30.300000|90|90|2.0|EMSC|EMSC-RTS|EMSC|505183|ml|4|EMSC|CROATIA """ self._mintraveltime_sideeffect = cycle([1]) self._seg_data = data.read("GE.FLT1..HH?.mseed") self._seg_data_gaps = data.read("IA.BAKI..BHZ.D.2016.004.head") self._seg_data_empty = b'' self._seg_urlread_sideeffect = [ self._seg_data, self._seg_data_gaps, 413, 500, self._seg_data[:2], self._seg_data_empty, 413, URLError("++urlerror++"), socket.timeout() ] self.service = '' # so get_datacenters_df accepts any row by default self.db_buf_size = 1 self.routing_service = yaml_load(get_templates_fpath("download.yaml"))\ ['advanced_settings']['routing_service_url'] # NON db stuff (logging, patchers, pandas...): self.loghandler = StreamHandler(stream=StringIO()) # THIS IS A HACK: query_logger.setLevel(logging.INFO) # necessary to forward to handlers # if we called closing (we are testing the whole chain) the level will be reset # (to level.INFO) otherwise it stays what we set two lines above. Problems might arise # if closing sets a different level, but for the moment who cares query_logger.addHandler(self.loghandler) # when debugging, I want the full dataframe with to_string(), not truncated # NOTE: this messes up right alignment of numbers in DownloadStats (see utils.py) # FIRST, remember current settings and restore them in cleanup: _pd_display_maxcolwidth = pd.get_option('display.max_colwidth') pd.set_option('display.max_colwidth', -1) # define class level patchers (we do not use a yiled as we need to do more stuff in the # finalizer, see below patchers = [] patchers.append(patch('stream2segment.utils.url.urlopen')) self.mock_urlopen = patchers[-1].start() # mock ThreadPool (tp) to run one instance at a time, so we get deterministic results: class MockThreadPool(object): def __init__(self, *a, **kw): pass def imap(self, func, iterable, *args): # make imap deterministic: same as standard python map: # everything is executed in a single thread the right input order return map(func, iterable) def imap_unordered(self, func_, iterable, *args): # make imap_unordered deterministic: same as standard python map: # everything is executed in a single thread in the right input order return map(func_, iterable) def close(self, *a, **kw): pass # assign patches and mocks: patchers.append(patch('stream2segment.utils.url.ThreadPool')) self.mock_tpool = patchers[-1].start() self.mock_tpool.side_effect = MockThreadPool # add finalizer: def delete(): pd.set_option('display.max_colwidth', _pd_display_maxcolwidth) for patcher in patchers: patcher.stop() hndls = query_logger.handlers[:] for h in hndls: if h is self.loghandler: self.loghandler.close() query_logger.removeHandler(h) request.addfinalizer(delete) def log_msg(self): ret = self.loghandler.stream.getvalue() self.loghandler.stream.seek(0) self.loghandler.stream.truncate(0) return ret def setup_urlopen(self, urlread_side_effect): """setup urlopen return value. :param urlread_side_effect: a LIST of strings or exceptions returned by urlopen.read, that will be converted to an itertools.cycle(side_effect) REMEMBER that any element of urlread_side_effect which is a nonempty string must be followed by an EMPTY STRINGS TO STOP reading otherwise we fall into an infinite loop if the argument blocksize of url read is not negative !""" self.mock_urlopen.reset_mock() # convert returned values to the given urlread return value (tuple data, code, msg) # if k is an int, convert to an HTTPError retvals = [] # Check if we have an iterable (where strings are considered not iterables): if not hasattr(urlread_side_effect, "__iter__") or \ isinstance(urlread_side_effect, (bytes, str)): # it's not an iterable (wheere str/bytes/unicode are considered NOT iterable # in both py2 and 3) urlread_side_effect = [urlread_side_effect] for k in urlread_side_effect: mymock = Mock() if type(k) == int: mymock.read.side_effect = HTTPError('url', int(k), responses[k], None, None) elif type(k) in (bytes, str): def func(k): bio = BytesIO(k.encode('utf8') if type(k) == str else k) # py2to3 compatible def rse(*mymock, **v): rewind = not mymock and not v if not rewind: currpos = bio.tell() ret = bio.read(*mymock, **v) # hacky workaround to support cycle below: if reached the end, # go back to start if not rewind: cp = bio.tell() rewind = cp == currpos if rewind: bio.seek(0, 0) return ret return rse mymock.read.side_effect = func(k) mymock.code = 200 mymock.msg = responses[mymock.code] else: mymock.read.side_effect = k retvals.append(mymock) self.mock_urlopen.side_effect = cycle(retvals) def get_events_df(self, url_read_side_effect, session, url, evt_query_args, start, end, db_bufsize=30, timeout=15, show_progress=False): self.setup_urlopen(self._evt_urlread_sideeffect if url_read_side_effect is None else url_read_side_effect) return get_events_df(session, url, evt_query_args, start, end, db_bufsize, timeout, show_progress) @patch('stream2segment.download.modules.events.urljoin', side_effect=urljoin) def test_get_events(self, mock_urljoin, db): urlread_sideeffect = [ """#1|2|3|4|5|6|7|8|9|10|11|12|13 20160508_0000129|2016-05-08 05:17:11.500000|40.57|52.23|60.0|AZER|EMSC-RTS|AZER|505483|ml|3.1|AZER|CASPIAN SEA, OFFSHR TURKMENISTAN 20160508_0000004|2016-05-08 01:45:30.300000|44.96|15.35|2.0|EMSC|EMSC-RTS|EMSC|505183|ml|3.6|EMSC|CROATIA 20160508_0000113|2016-05-08 22:37:20.100000|45.68|26.64|163.0|BUC|EMSC-RTS|BUC|505351|ml|3.4|BUC|ROMANIA 20160508_0000113|2016-05-08 22:37:20.100000|45.68|26.64|163.0|BUC|EMSC-RTS|BUC|505351|ml|3.4|BUC|ROMANIA --- ERRROR --- THIS IS MALFORMED 20160508_abc0113|2016-05-08 22:37:20.100000| --- ERROR --- |26.64|163.0|BUC|EMSC-RTS|BUC|505351|ml|3.4|BUC|ROMANIA """ ] data = self.get_events_df(urlread_sideeffect, db.session, "http://eventws", {}, datetime.utcnow() - timedelta(seconds=1), datetime.utcnow(), db_bufsize=self.db_buf_size) # assert only first two events events were successfully saved assert len(db.session.query(Event).all()) == len(pd.unique(data['id'])) == \ len(data) == 3 # check that log has notified: log1 = self.log_msg() assert "20160508_0000113" in log1 assert "1 database row(s) not inserted" in log1 assert mock_urljoin.call_count == 1 mock_urljoin.reset_mock() # now download again, with an url error: urlread_sideeffect = [ 504, """1|2|3|4|5|6|7|8|9|10|11|12|13 20160508_0000129|2016-05-08 05:17:11.500000|40.57|52.23|60.0|AZER|EMSC-RTS|AZER|505483|ml|3.1|AZER|CASPIAN SEA, OFFSHR TURKMENISTAN 20160508_0000004|2016-05-08 01:45:30.300000|44.96|15.35|2.0|EMSC|EMSC-RTS|EMSC|505183|ml|3.6|EMSC|CROATIA 20160508_0000113|2016-05-08 22:37:20.100000|45.68|26.64|163.0|BUC|EMSC-RTS|BUC|505351|ml|3.4|BUC|ROMANIA 20160508_0000113|2016-05-08 22:37:20.100000|45.68|26.64|163.0|BUC|EMSC-RTS|BUC|505351|ml|3.4|BUC|ROMANIA --- ERRROR --- THIS IS MALFORMED 20160508_abc0113|2016-05-08 22:37:20.100000| --- ERROR --- |26.64|163.0|BUC|EMSC-RTS|BUC|505351|ml|3.4|BUC|ROMANIA """, URLError('blabla23___') ] with pytest.raises(FailedDownload) as fld: data = self.get_events_df(urlread_sideeffect, db.session, "http://eventws", {}, datetime.utcnow() - timedelta(seconds=1), datetime.utcnow(), db_bufsize=self.db_buf_size) # assert we got the same result as above: assert len(db.session.query(Event).all()) == len(pd.unique(data['id'])) == \ len(data) == 3 log2 = self.log_msg() # log text has the message about the second (successful) dwnload, with the # two rows discarded: assert "2 row(s) discarded" in log2 # test that the exception has expected mesage: assert "Unable to fetch events" in str(fld) # check that we splitted once, thus we called 2 times mock_urljoin # (plus the first call): assert mock_urljoin.call_count == 3 mock_urljoin.reset_mock() # now download again, with a recursion error (max iterations reached): urlread_sideeffect = [413] with pytest.raises(FailedDownload) as fld: data = self.get_events_df(urlread_sideeffect, db.session, "http://eventws", {}, datetime.utcnow() - timedelta(seconds=1), datetime.utcnow(), db_bufsize=self.db_buf_size) # assert we got the same result as above: assert len(db.session.query(Event).all()) == len(pd.unique(data['id'])) == \ len(data) == 3 log2 = self.log_msg() # nothing written to log: assert "Request seems to be too large" in log2 # assertion on exception: assert "Unable to fetch events" in str(fld) assert "maximum recursion depth reached" in str(fld) def test_get_events_eventws_not_saved(self, db): '''test request splitted, but failing due to a http error''' urlread_sideeffect = [socket.timeout, 500] # we want to return all times 413, and see that we raise a ValueError: with pytest.raises(FailedDownload) as fldl: # now it should raise because of a 413: data = self.get_events_df(urlread_sideeffect, db.session, "abcd", {}, start=datetime(2010, 1, 1), end=datetime(2011, 1, 1), db_bufsize=self.db_buf_size) # test that we raised the proper message: assert 'Unable to fetch events' in str(fldl) # assert we wrote the url assert len( db.session.query( WebService.url).filter(WebService.url == 'abcd').all()) == 1 # assert only three events were successfully saved to db (two have same id) assert not db.session.query(Event).all() # we cannot assert anything has been written to logger cause the exception are caucht # if we raun from main. This should be checked in functional tests where we test the whole # chain # assert "request entity too large" in self.log_msg() def get_pbar_total_steps(self): return _get_freq_mag_distrib({})[2].sum() @patch('stream2segment.download.modules.events.get_progressbar') @patch('stream2segment.download.modules.events.urljoin', side_effect=urljoin) def test_pbar1(self, mock_urljoin, mock_pbar, db): '''test request splitted, but failing due to a http error''' class Pbar(object): def __init__(self, *a, **kw): self.updates = [] def __enter__(self, *a, **kw): return self def __exit__(self, *a, **kw): pass def update(self, increment): self.updates.append(increment) mock_pbar.return_value = Pbar() urlread_sideeffect = [socket.timeout, 500] mock_pbar.reset_mock() mock_pbar.return_value.updates = [] with pytest.raises(FailedDownload) as fldl: # now it should raise because of a 413: _ = self.get_events_df(urlread_sideeffect, db.session, "abcd", {}, start=datetime(2010, 1, 1), end=datetime(2011, 1, 1), db_bufsize=self.db_buf_size) # test that we did not increment the pbar (exceptions) assert mock_pbar.call_args[1]['length'] == self.get_pbar_total_steps() assert mock_pbar.return_value.updates == [] # Now let's supply a bad response response, the # progressabr should not be called urlread_sideeffect = [''] mock_pbar.reset_mock() mock_pbar.return_value.updates = [] with pytest.raises(FailedDownload) as fldl: # now it should raise because of a 413: _ = self.get_events_df(urlread_sideeffect, db.session, "abcd", {}, start=datetime(2010, 1, 1), end=datetime(2011, 1, 1), db_bufsize=self.db_buf_size) # test that we did not increment the pbar (exceptions) assert "Discarding response (Empty input data)" in self.log_msg() assert not mock_pbar.called # Now let's supply a successful response: urlread_sideeffect = [ '''20160508_0000129|2016-05-08 05:17:11.500000|40.57|52.23|60.0|AZER|EMSC-RTS|AZER|505483|ml|3.1|AZER|CASPIAN SEA, OFFSHR TURKMENISTAN''' ] mock_pbar.reset_mock() mock_pbar.return_value.updates = [] _ = self.get_events_df(urlread_sideeffect, db.session, "abcd", {}, start=datetime(2010, 1, 1), end=datetime(2011, 1, 1), db_bufsize=self.db_buf_size) assert not mock_pbar.called assert "Discarding response (Empty input data)" not in self.log_msg() assert "Request seems to be too large, splitting into" not in self.log_msg( ) # Now let's supply a successful response: urlread_sideeffect = [ 413, '''20160508_0000129|2016-05-08 05:17:11.500000|40.57|52.23|60.0|AZER|EMSC-RTS|AZER|505483|ml|3.1|AZER|CASPIAN SEA, OFFSHR TURKMENISTAN''', 413, '''20160508_0000129|2016-05-08 05:17:11.500000|40.57|52.23|60.0|AZER|EMSC-RTS|AZER|505483|ml|3.1|AZER|CASPIAN SEA, OFFSHR TURKMENISTAN''', '''20160508_0000129|2016-05-08 05:17:11.500000|40.57|52.23|60.0|AZER|EMSC-RTS|AZER|505483|ml|3.1|AZER|CASPIAN SEA, OFFSHR TURKMENISTAN''', ] mock_pbar.reset_mock() mock_pbar.return_value.updates = [] data = self.get_events_df(urlread_sideeffect, db.session, "abcd", {}, start=datetime(2010, 1, 1), end=datetime(2011, 1, 1), db_bufsize=self.db_buf_size) logmsg = self.log_msg() assert 'Duplicated instances' in logmsg # test that we did not increment the pbar (exceptions) assert mock_pbar.call_args[1]['length'] == self.get_pbar_total_steps() # the first 413 produces a magnitude split 1part vs 9parts, # the second 413 produces a split 1vs9 on the 9 parts, thus 1*9 and 9*9: assert sum( mock_pbar.return_value.updates) == mock_pbar.call_args[1]['length'] assert "Request seems to be too large, splitting into" in logmsg # ================================================================= # The test below check th same for different magnitude bound values # ================================================================= mock_pbar.reset_mock() mock_urljoin.reset_mock() mock_pbar.return_value.updates = [] data = self.get_events_df(urlread_sideeffect, db.session, "abcd", {'minmag': 2}, start=datetime(2010, 1, 1), end=datetime(2011, 1, 1), db_bufsize=self.db_buf_size) assert 'Duplicated instances' in self.log_msg() # test that we did not increment the pbar (exceptions) assert mock_pbar.call_args[1]['length'] < self.get_pbar_total_steps() assert sum( mock_pbar.return_value.updates) == mock_pbar.call_args[1]['length'] # assert that we do not have maxmagnitude in the first request, # but in the first sub-request (index 1) (do not test other sub requests) req_kwargs = [_[1] for _ in mock_urljoin.call_args_list] assert not any( ['maxmagnitude' in req_kwargs[0], 'maxmag' in req_kwargs[0]]) assert any( ['maxmagnitude' in req_kwargs[1], 'maxmag' in req_kwargs[1]]) mock_pbar.reset_mock() mock_urljoin.reset_mock() mock_pbar.return_value.updates = [] data = self.get_events_df(urlread_sideeffect, db.session, "abcd", {'maxmag': 5}, start=datetime(2010, 1, 1), end=datetime(2011, 1, 1), db_bufsize=self.db_buf_size) assert 'Duplicated instances' in self.log_msg() # test that we did not increment the pbar (exceptions) assert mock_pbar.call_args[1]['length'] < self.get_pbar_total_steps() assert sum( mock_pbar.return_value.updates) == mock_pbar.call_args[1]['length'] # assert that we do not have minmagnitude in the first two sub-request (from index 1), # but in the third (do not test other sub requests) req_kwargs = [_[1] for _ in mock_urljoin.call_args_list] assert not any( ['minmagnitude' in req_kwargs[0], 'minmag' in req_kwargs[0]]) assert not any( ['minmagnitude' in req_kwargs[1], 'minmag' in req_kwargs[1]]) assert any( ['minmagnitude' in req_kwargs[2], 'minmag' in req_kwargs[2]]) @pytest.mark.parametrize('args', [{ 'minmag': 2.1 }, { 'minmag': 2.11 }, { 'minmag': 0, 'maxmag': 1.9 }, { 'minmag': 2, 'maxmag': 8 }]) @patch('stream2segment.download.modules.events.get_progressbar') def test_pbar2(self, mock_pbar, args, db): '''test request splitted, but failing due to a http error''' urlread_sideeffect = [ 413, '''20160508_0000129|2016-05-08 05:17:11.500000|40.57|52.23|60.0|AZER|EMSC-RTS|AZER|505483|ml|3.1|AZER|CASPIAN SEA, OFFSHR TURKMENISTAN''', 413, '''20160508_0000129|2016-05-08 05:17:11.500000|40.57|52.23|60.0|AZER|EMSC-RTS|AZER|505483|ml|3.1|AZER|CASPIAN SEA, OFFSHR TURKMENISTAN''', '''20160508_0000129|2016-05-08 05:17:11.500000|40.57|52.23|60.0|AZER|EMSC-RTS|AZER|505483|ml|3.1|AZER|CASPIAN SEA, OFFSHR TURKMENISTAN''', ] class Pbar(object): def __init__(self, *a, **kw): self.updates = [] def __enter__(self, *a, **kw): return self def __exit__(self, *a, **kw): pass def update(self, increment): self.updates.append(increment) mock_pbar.return_value = Pbar() data = self.get_events_df(urlread_sideeffect, db.session, "abcd", args, start=datetime(2010, 1, 1), end=datetime(2011, 1, 1), db_bufsize=self.db_buf_size) assert 'Duplicated instances' in self.log_msg() # test that we did not increment the pbar (exceptions) assert mock_pbar.call_args[1]['length'] < self.get_pbar_total_steps() assert sum( mock_pbar.return_value.updates) == mock_pbar.call_args[1]['length'] def test_get_events_eventws_from_file( self, # fixtures: db, pytestdir): '''test request splitted, but reading from events file''' urlread_sideeffect = [socket.timeout, 500] filepath = pytestdir.newfile('.txt', create=True) with open(filepath, 'w') as _fpn: _fpn.write("""1|2|3|4|5|6|7|8|9|10|11|12|13 20160508_0000129|2016-05-08 05:17:11.500000|40.57|52.23|60.0|AZER|EMSC-RTS|AZER|505483|ml|3.1|AZER|CASPIAN SEA, OFFSHR TURKMENISTAN 20160508_0000004|2016-05-08 01:45:30.300000|44.96|15.35|2.0|EMSC|EMSC-RTS|EMSC|505183|ml|3.6|EMSC|CROATIA 20160508_0000113|2016-05-08 22:37:20.100000|45.68|26.64|163.0|BUC|EMSC-RTS|BUC|505351|ml|3.4|BUC|ROMANIA 20160508_0000113|2016-05-08 22:37:20.100000|45.68|26.64|163.0|BUC|EMSC-RTS|BUC|505351|ml|3.4|BUC|ROMANIA --- ERRROR --- THIS IS MALFORMED 20160508_abc0113|2016-05-08 22:37:20.100000| --- ERROR --- |26.64|163.0|BUC|EMSC-RTS|BUC|505351|ml|3.4|BUC|ROMANIA """) log1 = self.log_msg() data = self.get_events_df(urlread_sideeffect, db.session, filepath, {}, start=datetime(2010, 1, 1), end=datetime(2011, 1, 1), db_bufsize=self.db_buf_size) # assert we got the same result as above: assert len(db.session.query(Event).all()) == len(pd.unique(data['id'])) == \ len(data) == 3 log2 = self.log_msg() # since one row is discarded, the message is something like: # 1 row(s) discarded (malformed server response data, e.g. NaN's). url: file:////private/var/folders/l9/zpp7wn1n4r7bt4vs39gylk4w0000gn/T/pytest-of-riccardo/pytest-442/test_get_events_eventws_from_f0/368e6e99-171c-40e1-ad8e-3afc40ebeeab.txt # however, we test the bare minimum: assert 'url: file:///' in log2 assert not self.mock_urlopen.called def test_get_events_errors( self, # fixtures: db, pytestdir): '''test request splitted, but reading from BAD events file''' urlread_sideeffect = [socket.timeout, 500] filepath = pytestdir.newfile('.txt', create=True) with open(filepath, 'w') as _fpn: _fpn.write( """000|45.68|26.64|163.0|BUC|EMSC-RTS|BUC|505351|ml|3.4|BUC|ROMANIA --- ERRROR --- THIS IS MALFORMED 20160508_abc0113|2016-05-08 22:37:20.100000| --- ERROR --- |26.64|163.0|BUC|EMSC-RTS|BUC|505351|ml|3.4|BUC|ROMANIA """) # provide a valid file (that exists but is malformed) and tst the err message: expected_err_msg = ('No event found. Check that the file is non empty ' 'and its content is valid') with pytest.raises(FailedDownload) as fdl: _ = self.get_events_df(urlread_sideeffect, db.session, filepath, {}, start=datetime(2010, 1, 1), end=datetime(2011, 1, 1), db_bufsize=self.db_buf_size) assert expected_err_msg in str(fdl) assert not self.mock_urlopen.called # Now provide a url and test the error message. Test that we get a 500 error # (Note that `urlread_sideeffect` above should raise a socket timeout and # an THHP error 500, but the first socket.timeout forces by design the split of # the request into sub-request, so the first exception caught is the HTTP error 500 with pytest.raises(FailedDownload) as fdl: _ = self.get_events_df(urlread_sideeffect, db.session, 'iris', {}, start=datetime(2010, 1, 1), end=datetime(2011, 1, 1), db_bufsize=self.db_buf_size) expected_err_msg = 'Unable to fetch events (HTTP Error 500: Internal Server Error)' # the string above might change across python versions: Thus: # assert expected_err_msg in str(fdl) # might fail and thus cause annoying debugs. # We then test that our message is there and a '500' is found in the error: assert 'Unable to fetch events' in str(fdl) assert '500' in str(fdl) assert self.mock_urlopen.called # urlopen HAS been called! self.mock_urlopen.reset_mock() # Now mock empty or invalid data: for url_read_side_effect in [b'', b'!invalid!']: # Now provide a FDSN url: with pytest.raises(FailedDownload) as fdl: _ = self.get_events_df([url_read_side_effect], db.session, 'iris', {}, start=datetime(2010, 1, 1), end=datetime(2011, 1, 1), db_bufsize=self.db_buf_size) assert 'No event found, try to change your search parameters' in str( fdl) assert self.mock_urlopen.called self.mock_urlopen.reset_mock() # Now provide a custom url (don't know if FDSN): with pytest.raises(FailedDownload) as fdl: _ = self.get_events_df([url_read_side_effect], db.session, 'http://custom_service', {}, start=datetime(2010, 1, 1), end=datetime(2011, 1, 1), db_bufsize=self.db_buf_size) assert ( 'No event found, try to change your search parameters. Check ' 'also that the service returns parsable data (FDSN-compliant)' ) in str(fdl) assert self.mock_urlopen.called self.mock_urlopen.reset_mock() # Now provide a custom "string" (url? file? if url, don't know if FDSN): with pytest.raises(FailedDownload) as fdl: _ = self.get_events_df([url_read_side_effect], db.session, 'filepath', {}, start=datetime(2010, 1, 1), end=datetime(2011, 1, 1), db_bufsize=self.db_buf_size) assert ('No event found. If you supplied a file, the file was not found: ' 'check path and typos. Otherwise, try to change your search parameters: ' 'check also that the service returns parsable data (FDSN-compliant)') \ in str(fdl) assert self.mock_urlopen.called self.mock_urlopen.reset_mock() @patch('stream2segment.download.modules.events.isf2text_iter', side_effect=isf2text_iter) def test_get_events_eventws_from_isc( self, mock_isf_to_text, # fixtures: db, data): '''test request splitted, but reading from BAD events file''' # now it should raise because of a 413: _ = self.get_events_df(None, db.session, 'emsc', {}, start=datetime(2010, 1, 1), end=datetime(2011, 1, 1), db_bufsize=self.db_buf_size) assert not mock_isf_to_text.called assert db.session.query(Event.id).count() == 2 with pytest.raises(FailedDownload) as fld: # now it should raise because of a 413: _ = self.get_events_df(None, db.session, 'isc', {}, start=datetime(2010, 1, 1), end=datetime(2011, 1, 1), db_bufsize=self.db_buf_size) assert "No event found, try to change your search parameters" in str( fld) assert mock_isf_to_text.called mock_isf_to_text.reset_mock() assert not mock_isf_to_text.called # now supply a valid isf file: _ = self.get_events_df( [data.read('event_request_sample_isc.isf').decode('utf8')], db.session, 'isc', {}, start=datetime(2010, 1, 1), end=datetime(2011, 1, 1), db_bufsize=self.db_buf_size) assert mock_isf_to_text.called assert db.session.query(Event.id).count() == 5 # looking at the file, these three events should be written assert db.session.query(Event.id).\ filter(Event.event_id.in_(['16868827', '600516599', '600516598'])).count() == 3 assert db.session.query(Event.contributor_id).\ filter(Event.event_id.in_(['16868827', '600516599', '600516598'])).count() == 3 # and this not: assert db.session.query(Event.id).\ filter(Event.event_id.in_(['15916121'])).count() == 0 assert db.session.query(Event.contributor_id).\ filter(Event.event_id.in_(['15916121'])).count() == 0 @patch('stream2segment.download.modules.events.islocalfile', side_effect=o_islocalfile) def test_get_events_eventws_format_param( self, mock_islocalfile, # fixtures: db, data, pytestdir): '''test that format is inferred, unless explicitly set, and all combination of these cases''' isf_file = pytestdir.newfile(create=True) shutil.copy(data.path('event_request_sample_isc.isf'), isf_file) txt_file = pytestdir.newfile(create=True) with open(txt_file, 'w') as _opn: _opn.write(self._evt_urlread_sideeffect) shutil.copy(data.path('event_request_sample_isc.isf'), isf_file) # valid isf file, no format => infer it for filepath, expected_events, evt_query_args in \ [(txt_file, 2, ({}, {'format': 'txt'})), (isf_file, 3, ({}, {'format': 'isf'}))]: for evt_query_arg in evt_query_args: db.session.query(Event).delete() _ = self.get_events_df([None], db.session, filepath, evt_query_arg, start=datetime(2010, 1, 1), end=datetime(2011, 1, 1), db_bufsize=self.db_buf_size) assert mock_islocalfile.call_args_list[-1][0][0] == \ filepath assert db.session.query(Event.id).count() == expected_events for filepath, expected_events, evt_query_arg in \ [(txt_file, 0, {'format': 'isf'}), (isf_file, 0, {'format': 'txt'})]: db.session.query(Event).delete() with pytest.raises(FailedDownload) as fdwl: _ = self.get_events_df([None], db.session, filepath, evt_query_arg, start=datetime(2010, 1, 1), end=datetime(2011, 1, 1), db_bufsize=self.db_buf_size) assert "No event found. Check that the file is non empty and its content is valid" \ in str(fdwl) assert mock_islocalfile.call_args_list[-1][0][0] == filepath assert db.session.query(Event.id).count() == expected_events def test_isf2text(self, data): '''test isc format=isf with iris equivalent''' # this file is stored in test data dir and represents the iris request: # https://service.iris.edu/fdsnws/event/1/query?starttime=2011-01-08T00:00:00&endtime=2011-01-08T00:05:00&format=text iris_req_file = 'event_request_sample_iris.txt' # this file is stored in test data dir and represents the same request # on isc: # http://www.isc.ac.uk/fdsnws/event/1/query?starttime=2011-01-08T00:00:00&endtime=2011-01-08T00:05:00&format=isf isc_req_file = 'event_request_sample_isc.isf' iris_df = response2normalizeddf( '', data.read(iris_req_file).decode('utf8'), 'event') ret = [] with open(data.path(isc_req_file)) as opn: for lst in isf2text_iter(opn, 'ISC', 'ISC'): ret.append('|'.join(lst)) isc_df = response2normalizeddf('', '\n'.join(ret), 'event') # sort values iris_df.sort_values(by=[Event.contributor_id.key], inplace=True) isc_df.sort_values(by=[Event.event_id.key], inplace=True) # Now, Event with event_location_name 'POLAND' has no magnitude # in isc_df, so first: iris_df = iris_df[ iris_df[Event.event_location_name.key].str.lower() != 'poland'] iris_df.reset_index(inplace=True, drop=True) isc_df.reset_index(inplace=True, drop=True) # 1. assert a value has correctly been parsed (by looking at the file content): assert isc_df[isc_df[Event.event_id.key] == '16868827'].loc[ 0, Event.magnitude.key] == 2.1 # and set the value to the corresponding iris value, which IN THIS CASE # differs (maybe due to the 'Err' field =0.2 reported in the isc file?): isc_df.at[isc_df.loc[isc_df[Event.event_id.key] == '16868827'].index, Event.magnitude.key] = 2.0 # test we set the value: assert isc_df[isc_df[Event.event_id.key] == '16868827'].loc[ 0, Event.magnitude.key] == 2.0 # 2. assert a value has correctly been parsed (by looking at the file content): assert isc_df[isc_df[Event.event_id.key] == '16868827'].loc[0, Event.mag_author.key] \ == 'THE' # and set the value to the corresponding iris value, which IN THIS CASE # differs (why?): isc_df.at[isc_df.loc[isc_df[Event.event_id.key] == '16868827'].index, Event.mag_author.key] = 'ATH' # test we set the value: assert isc_df[isc_df[Event.event_id.key] == '16868827'].loc[0, Event.mag_author.key] \ == 'ATH' assert (isc_df[Event.event_id.key].values == iris_df[ Event.contributor_id.key].values).all() assert (isc_df[Event.event_id.key].values == isc_df[ Event.contributor_id.key].values).all() # assert the following columns are equal:. We omit columns where the values # differ by spaces/upper cases / other minor stuff, like Event.event_location_name.key # or because they MUST differ (Event.event_id): for col in iris_df.columns: if col not in ( Event.event_id.key, # Event.time.key, Event.event_location_name.key, ): assert (iris_df[col].values == isc_df[col].values).all()