def get_dreport_str_iter(session, download_ids=None, config=True, log=True): '''Returns an iterator yielding the download report (log and config) for the given download_ids :param session: an sql-alchemy session denoting a db session to a database :param download_ids: (list of ints or None) if None, collect statistics from all downloads run. Otherwise limit the output to the downloads whose ids are in the list :param config: boolean (default: True). Whether to show the download config :param log: boolean (default: True). Whether to show the download log messages ''' data = infoquery(session, download_ids, config, log) for dwnl_id, dwnl_time, configtext, logtext in data: yield '' yield ascii_decorate('Download id: %d (%s)' % (dwnl_id, str(dwnl_time))) if config and log: yield '' yield 'Configuration:%s' % (' N/A' if not configtext else '') if configtext: yield '' yield configtext if config and log: yield '' yield 'Log messages:%s' % (' N/A' if not configtext else '') if logtext: yield '' yield logtext
def test_dstats_no_segments(self, mock_gettempdir, mock_open_in_browser, db, pytestdir): '''test a case where save inventory is True, and that we saved inventories''' # mock a download with only inventories, i.e. with no segments downloaded dwnl = Download() dwnl.run_time = datetime(2018, 12, 2, 16, 46, 56, 472330) db.session.add(dwnl) db.session.commit() runner = CliRunner() # text output, to file outfile = pytestdir.newfile('.txt') result = runner.invoke(cli, ['utils', self.CMD_NAME, '--dburl', db.dburl, outfile]) assert not result.exception content = open(outfile).read() assert """ OK OK Time Segment Internal Gaps Partially Span MSeed Url Not Bad Server OK Overlaps Saved Error Error Error Found Request Error TOTAL ------------------------ -- -------- --------- ----- ----- ----- ------- ------- -------- ----- www.dc1/dataselect/query 3 1 2 1 1 1 1 1 1 12 TOTAL 3 1 2 1 1 1 1 1 1 12""" in content assert result.output.startswith("""Fetching data, please wait (this might take a while depending on the db size and connection) download statistics written to """) assert not mock_open_in_browser.called assert not mock_gettempdir.called expected_string = ascii_decorate("Download id: 2") # result.output below is uncicode in PY2, whereas expected_string is str # Thus if PY2: expected_string = expected_string.decode('utf8') assert expected_string in content expected_string2 = """ Executed: 2018-12-02T16:46:56.472330 Event query parameters: N/A No segments downloaded """ assert expected_string2 in content[content.index(expected_string):] # run with html, test just that everything works fine result = runner.invoke(cli, ['utils', self.CMD_NAME, '--html', '--dburl', db.dburl, outfile]) assert not result.exception
def get_dstats_str_iter(session, download_ids=None, maxgap_threshold=0.5): '''Returns an iterator yielding the download statistics and information matching the given parameters. The returned string can be joined and printed to screen or file and is made of tables showing the segment data on the db per data-center and download run, plus some download information. :param session: an sql-alchemy session denoting a db session to a database :param download_ids: (list of ints or None) if None, collect statistics from all downloads run. Otherwise limit the output to the downloads whose ids are in the list. In any case, in case of more download runs to be considered, this function will yield also the statistics aggregating all downloads in a table at the end :param maxgap_threshold: (float, default 0.5). Sets the threshold whereby a segment is to be considered with gaps or overlaps. By default is 0.5, meaning that a segment whose 'maxgap_numsamples' value is > 0.5 has gaps, and a segment whose 'maxgap_numsamples' value is < 0.5 has overlaps. Such segments will be marked with a special class 'OK Gaps Overlaps' in the table columns. ''' # Benchmark: the bare minimum (with postgres on external server) request takes around 12 # sec and 14 seconds adding all necessary information. Therefore, we choose the latter maxgap_bexpr = get_maxgap_sql_expr(maxgap_threshold) data = session.query(func.count(Segment.id), Segment.download_code, Segment.datacenter_id, Segment.download_id, maxgap_bexpr) data = filterquery(data, download_ids).group_by(Segment.download_id, Segment.datacenter_id, Segment.download_code, maxgap_bexpr) dwlids = get_downloads(session, download_ids) show_aggregate_stats = len(dwlids) > 1 dcurl = get_datacenters(session) if show_aggregate_stats: agg_statz = DownloadStats2() stas = defaultdict(lambda: DownloadStats2()) GAP_OVLAP_CODE = DownloadStats2.GAP_OVLAP_CODE # pylint: disable=invalid-name for segcount, dwn_code, dc_id, dwn_id, has_go in data: statz = stas[dwn_id] if dwn_code == 200 and has_go is True: dwn_code = GAP_OVLAP_CODE statz[dcurl[dc_id]][dwn_code] += segcount if show_aggregate_stats: agg_statz[dcurl[dc_id]][dwn_code] += segcount evparamlen = None # used for alignement of strings (calculated lazily in loop below) for did, (druntime, evtparams) in viewitems(dwlids): yield '' yield '' yield ascii_decorate('Download id: %d' % did) yield '' yield 'Executed: %s' % str(druntime) yield "Event query parameters:%s" % (' N/A' if not evtparams else '') if evparamlen is None and evtparams: # calculate eventparamlen for string alignement evparamlen = max(len(_) for _ in evtparams) for param in sorted(evtparams): yield (" %-{:d}s = %s".format(evparamlen)) % ( param, str(evtparams[param])) yield '' statz = stas.get(did) if statz is None: yield "No segments downloaded" else: yield "Downlaoaded segments per data center url (row) and response type (column):" yield "" yield str(statz) if show_aggregate_stats: yield '' yield '' yield ascii_decorate('Aggregated stats (all downloads)') yield '' yield str(agg_statz)
def test_simple_dstats(self, mock_gettempdir, mock_open_in_browser, db, pytestdir): '''test a case where save inventory is True, and that we saved inventories''' runner = CliRunner() # text output, to file outfile = pytestdir.newfile('.txt') result = runner.invoke(cli, ['utils', self.CMD_NAME, '--dburl', db.dburl, outfile]) assert not result.exception content = open(outfile).read() assert """ OK OK Time Segment Internal Gaps Partially Span MSeed Url Not Bad Server OK Overlaps Saved Error Error Error Found Request Error TOTAL ------------------------ -- -------- --------- ----- ----- ----- ------- ------- -------- ----- www.dc1/dataselect/query 3 1 2 1 1 1 1 1 1 12 TOTAL 3 1 2 1 1 1 1 1 1 12""" in content assert result.output.startswith("""Fetching data, please wait (this might take a while depending on the db size and connection) download statistics written to """) assert not mock_open_in_browser.called assert not mock_gettempdir.called # text output, to stdout result = runner.invoke(cli, ['utils', self.CMD_NAME, '--dburl', db.dburl]) assert not result.exception assert """ OK OK Time Segment Internal Gaps Partially Span MSeed Url Not Bad Server OK Overlaps Saved Error Error Error Found Request Error TOTAL ------------------------ -- -------- --------- ----- ----- ----- ------- ------- -------- ----- www.dc1/dataselect/query 3 1 2 1 1 1 1 1 1 12 TOTAL 3 1 2 1 1 1 1 1 1 12""" in result.output assert not mock_open_in_browser.called expected_string = ascii_decorate("Download id: 1") # result.output below is uncicode in PY2, whereas expected_string is str # Thus if PY2: expected_string = expected_string.decode('utf8') assert expected_string in result.output assert not mock_gettempdir.called # Test html output. # First, implement function that parses the sta_data content into dict and check stuff: def jsonloads(varname, content): start = re.search('%s\\s*:\\s*' % varname, content).end() end = start while content[end] != '[': end += 1 end += 1 brakets = 1 while brakets > 0: if content[end] == "[": brakets += 1 elif content[end] == ']': brakets -= 1 end += 1 return json.loads(content[start:end]) # html output, to file outfile = pytestdir.newfile('.html') result = runner.invoke(cli, ['utils', self.CMD_NAME, '--html', '--dburl', db.dburl, outfile]) assert not result.exception content = open(outfile).read() sta_data = jsonloads('sta_data', content) networks = jsonloads('networks', content) assert len(sta_data) == db.session.query(Station.id).count() * 2 for i in range(0, len(sta_data), 2): staname = sta_data[i] data = sta_data[i+1] # Each sta is: [sta_name, [staid, stalat, stalon, sta_dcid, # d_id1, [code1, num_seg1 , ..., codeN, num_seg], # d_id2, [code1, num_seg1 , ..., codeN, num_seg], # ] if staname == 'S1': assert data[0] == 1 assert data[1] == 11 # lat assert data[2] == 11 # lon assert data[3] == 1 # dc id assert data[4] == networks.index('N1') assert data[5] == 1 # download id (only 1) # assert the correct segments categories # (list length is the number of categories, # each element is the num of segments for that category, # and the sum of all elements must be 4) assert sorted(data[6][1::2]) == [1, 1, 1, 1] elif staname == 'S2a': assert data[0] == 2 assert data[1] == 22.1 # lat assert data[2] == 22.1 # lon assert data[3] == 1 # dc id assert data[4] == networks.index('N1') assert data[5] == 1 # download id (only 1) # assert the correct segments categories # (list length is the number of categories, # each element is the num of segments for that category, # and the sum of all elements must be 4) assert sorted(data[6][1::2]) == [1, 1, 2] elif staname == 'S2b': assert data[0] == 3 assert data[1] == 22.2 # lat assert data[2] == 22.2 # lon assert data[3] == 1 # dc id assert data[4] == networks.index('N2') assert data[5] == 1 # download id (only 1) # assert the correct segments categories # (list length is the number of categories, # each element is the num of segments for that category, # and the sum of all elements must be 4) assert sorted(data[6][1::2]) == [1, 1, 1, 1] else: raise Exception('station should not be there: %s' % staname) assert result.output.startswith("""Fetching data, please wait (this might take a while depending on the db size and connection) download statistics written to """) assert not mock_open_in_browser.called assert not mock_gettempdir.called # html output, to file, setting maxgap to 0.2, so that S1a' has all three ok segments # with gaps result = runner.invoke(cli, ['utils', self.CMD_NAME, '-g', '0.15', '--html', '--dburl', db.dburl, outfile]) assert not result.exception content = open(outfile).read() # parse the sta_data content into dict and check stuff: sta_data = jsonloads('sta_data', content) networks = jsonloads('networks', content) assert len(sta_data) == db.session.query(Station.id).count() * 2 for i in range(0, len(sta_data), 2): staname = sta_data[i] data = sta_data[i+1] # Each sta is: [sta_name, [staid, stalat, stalon, sta_dcid, # d_id1, [code1, num_seg1 , ..., codeN, num_seg], # d_id2, [code1, num_seg1 , ..., codeN, num_seg], # ] if staname == 'S1': assert data[0] == 1 assert data[1] == 11 # lat assert data[2] == 11 # lon assert data[3] == 1 # dc id assert data[4] == networks.index('N1') assert data[5] == 1 # download id (only 1) # assert the correct segments categories # (list length is the number of categories, # each element is the num of segments for that category, # and the sum of all elements must be 4) assert sorted(data[6][1::2]) == [1, 1, 1, 1] elif staname == 'S2a': assert data[0] == 2 assert data[1] == 22.1 # lat assert data[2] == 22.1 # lon assert data[3] == 1 # dc id assert data[4] == networks.index('N1') assert data[5] == 1 # download id (only 1) # assert the correct segments categories # (list length is the number of categories, # each element is the num of segments for that category, # and the sum of all elements must be 4) assert sorted(data[6][1::2]) == [1, 3] elif staname == 'S2b': assert data[0] == 3 assert data[1] == 22.2 # lat assert data[2] == 22.2 # lon assert data[3] == 1 # dc id assert data[4] == networks.index('N2') assert data[5] == 1 # download id (only 1) # assert the correct segments categories # (list length is the number of categories, # each element is the num of segments for that category, # and the sum of all elements must be 4) assert sorted(data[6][1::2]) == [1, 1, 1, 1] else: raise Exception('station should not be there: %s' % staname) assert result.output.startswith("""Fetching data, please wait (this might take a while depending on the db size and connection) download statistics written to """) assert not mock_open_in_browser.called assert not mock_gettempdir.called # html output, to temp file mytmpdir = pytestdir.makedir() assert not os.listdir(mytmpdir) mock_gettempdir.side_effect = lambda *a, **v: mytmpdir result = runner.invoke(cli, ['utils', self.CMD_NAME, '--html', '--dburl', db.dburl]) assert not result.exception assert mock_open_in_browser.called assert mock_gettempdir.called assert os.listdir(mytmpdir) == ['s2s_dstats.html']
def process(dburl, pyfile, funcname=None, config=None, outfile=None, log2file=False, verbose=False, append=False, **param_overrides): """ Process the segment saved in the db and optionally saves the results into `outfile` in .csv format. Calles F the function named `funcname` defined in `pyfile` If `outfile` is given , then F should return lists/dicts to be written as csv row. If `outfile` is not given, then the returned values of F will be ignored (F is supposed to process data without returning a value, e.g. save processed miniSeed to the FileSystem) :param log2file: if True, all messages with level >= logging.INFO will be printed to a log file named <outfile>.<now>.log (where now is the current date and time ins iso format) or <pyfile>.<now>.log, if <outfile> is None :param verbose: if True, all messages with level logging.INFO, logging.ERROR and logging.CRITICAL will be printed to the screen, as well as a progress-bar showing the eta (estimated time available). :param param_overrides: paramter that will override the yaml config. Nested dict will be merged, not replaced """ # implementation details: this function returns 0 on success and raises otherwise. # First, it can raise ValueError for a bad parameter (checked before starting db session and # logger), # Then, during processing, each segment error which is not (ImportError, NameError, # AttributeError, SyntaxError, TypeError) is logged as warning and the program continues. # Other exceptions are raised, caught here and logged with level CRITICAL, with the stack trace: # this allows to help users to discovers possible bugs in pyfile, without waiting for # the whole process to finish # checks dic values (modify in place) and returns dic value(s) needed here: session, pyfunc, funcname, config_dict = \ load_config_for_process(dburl, pyfile, funcname, config, outfile, **param_overrides) loghandlers = configlog4processing(logger, (outfile or pyfile) if log2file else None, verbose) try: abp = os.path.abspath info = [ "Input database: %s" % secure_dburl(dburl), "Processing function: %s:%s" % (abp(pyfile), funcname), "Config. file: %s" % (abp(config) if config else 'n/a'), "Log file: %s" % (abp(loghandlers[0].baseFilename) if log2file else 'n/a'), "Output file: %s" % (abp(outfile) if outfile else 'n/a') ] logger.info(ascii_decorate("\n".join(info))) stime = time.time() writer_options = config_dict.get('advanced_settings', {}).get('writer_options', {}) run_process(session, pyfunc, get_writer(outfile, append, writer_options), config_dict, verbose) logger.info("Completed in %s", str(totimedelta(stime))) return 0 # contrarily to download, an exception should always raise and log as error # with the stack trace # (this includes pymodule exceptions e.g. TypeError) except KeyboardInterrupt: logger.critical("Aborted by user") # see comment above raise except: # @IgnorePep8 pylint: disable=broad-except logger.critical("Process aborted", exc_info=True) # see comment above raise finally: closesession(session)
def test_simple_dreport(self, mock_gettempdir, mock_open_in_browser, db, pytestdir): '''test a case where save inventory is True, and that we saved inventories''' # test "no flags" case: runner = CliRunner() # text output, to file outfile = pytestdir.newfile('.txt') result = runner.invoke( cli, ['utils', self.CMD_NAME, '--dburl', db.dburl, outfile]) assert not result.exception expected_string = ascii_decorate( "Download id: 1 (%s)" % str(db.session.query(Download.run_time).first()[0])) # result.output below is uncicode in PY2, whereas expected_string is str # Thus if PY2: expected_string = expected_string.decode('utf8') content = open(outfile).read() assert expected_string.strip() == content.strip() # test "normal" case: runner = CliRunner() # text output, to file outfile = pytestdir.newfile('.txt') result = runner.invoke(cli, [ 'utils', self.CMD_NAME, '--config', '--log', '--dburl', db.dburl, outfile ]) assert not result.exception expected_string = ascii_decorate( "Download id: 1 (%s)" % str(db.session.query(Download.run_time).first()[0])) expected_string += """ Configuration: N/A Log messages: N/A""" # result.output below is uncicode in PY2, whereas expected_string is str # Thus if PY2: expected_string = expected_string.decode('utf8') content = open(outfile).read() assert expected_string in content assert result.output.startswith( """Fetching data, please wait (this might take a while depending on the db size and connection) download report written to """) assert not mock_open_in_browser.called assert not mock_gettempdir.called # calling with no ouptut file (print to screen, i.e. result.output): result = runner.invoke( cli, ['utils', self.CMD_NAME, '--log', '--config', '--dburl', db.dburl]) assert not result.exception content = result.output assert expected_string in content assert result.output.startswith( """Fetching data, please wait (this might take a while depending on the db size and connection) """) assert not mock_open_in_browser.called assert not mock_gettempdir.called expected = """Fetching data, please wait (this might take a while depending on the db size and connection) """ # try with flags: result = runner.invoke( cli, ['utils', self.CMD_NAME, '--dburl', '--log', db.dburl]) assert expected in result.output assert not result.output[result.output.index(expected) + len(expected):] # try with flags: result = runner.invoke( cli, ['utils', self.CMD_NAME, '--dburl', '--config', db.dburl]) assert expected in result.output assert not result.output[result.output.index(expected) + len(expected):]