def test_invalid_log(): for text in ["", ERROR_404]: data = parse(text) cst.json_dumps(data) if not text: assert not (data['head'] or data['tail']) else: assert '404 - No Such Resource' in data['head'] and '404 - No Such Resource' in data['tail'] assert set(data.keys()) == set(cst.PARSE_KEYS) for k in ['first_log_time', 'latest_log_time', 'runtime', 'shutdown_reason', 'finish_reason']: assert data[k] == cst.NA for k in ['first_log_timestamp', 'latest_log_timestamp', 'latest_crawl_timestamp', 'latest_scrape_timestamp']: assert data[k] == 0 for k in ['pages', 'items']: assert data[k] is None # assert data['last_update_timestamp'] > 0 # 1546272001 # assert len(data['last_update_time']) == 19 # "2019-01-01 00:00:01" assert cst.string_to_timestamp(data['last_update_time']) == data['last_update_timestamp'] assert data['datas'] == [] for v in data['latest_matches'].values(): assert v == '' assert set(data['latest_matches'].keys()) == set(cst.LATEST_MATCHES_RESULT_DICT.keys()) for v in data['log_categories'].values(): assert v == dict(count=0, details=[]) assert set(data['log_categories'].keys()) == set(cst.LOG_CATEGORIES_RESULT_DICT.keys())
def test_chunk_size(psr): parser = psr(execute_main=False) os.remove(cst.TXT_PATH) assert not os.path.isdir(cst.TXT_PATH) parser.main() data = cst.read_data(cst.LOG_JSON_PATH) assert data['first_log_time'] == '2018-10-23 18:28:34' assert data['latest_log_time'] == '2018-10-23 18:29:42' cst.check_demo_data(data) assert os.path.getsize(cst.APPENDED_LOG_PATH) == cst.SIZE parser = psr(execute_main=False, chunk_size=10000) # 15,862 = 9924 + 5938, 15683 = 9938 + 5745 os.remove(cst.TXT_PATH) assert not os.path.isdir(cst.TXT_PATH) parser.main() data = cst.read_data(cst.LOG_JSON_PATH) cst.json_dumps(data) assert data['first_log_time'] == '2018-10-23 18:28:34' assert data['latest_log_time'] == '2018-10-23 18:29:42' cst.check_demo_data(data) assert os.path.getsize(cst.APPENDED_LOG_PATH) == 5938 if len(os.linesep) == 2 else 5745
def test_new_size_read_data(psr): appended_log = u'test' appended_log_length = len(appended_log) parser = psr() log_data = cst.read_data(cst.LOG_JSON_PATH) assert log_data['logparser_version'] == cst.LOGPARSER_VERSION cst.check_demo_data(log_data) last_update_timestamp = log_data['last_update_timestamp'] # Valid but short appended log cst.write_text(cst.LOG_PATH, appended_log, append=True) time.sleep(2) parser.main() assert os.path.getsize(cst.APPENDED_LOG_PATH) == 0 log_data = cst.read_data(cst.LOG_JSON_PATH) assert log_data['last_update_timestamp'] > last_update_timestamp assert log_data['size'] == cst.SIZE + appended_log_length assert log_data['position'] == cst.SIZE cst.check_demo_data( log_data ) # Previous parsed result is not affected by short appended log # Mismatching version log_data['logparser_version'] = '0.0.0' cst.write_text(cst.LOG_JSON_PATH, cst.json_dumps(log_data)) log_data = cst.read_data(cst.LOG_JSON_PATH) assert log_data['logparser_version'] == '0.0.0' cst.write_text(cst.LOG_PATH, appended_log, append=True) now_size = cst.SIZE + appended_log_length * 2 parser.main() assert os.path.getsize(cst.APPENDED_LOG_PATH) == now_size log_data = cst.read_data(cst.LOG_JSON_PATH) assert log_data['logparser_version'] == cst.LOGPARSER_VERSION assert log_data['size'] == now_size assert log_data['position'] == now_size cst.check_demo_data(log_data) # Broken json file cst.write_text(cst.LOG_JSON_PATH, appended_log, append=True) cst.write_text(cst.LOG_PATH, appended_log, append=True) now_size = cst.SIZE + appended_log_length * 3 parser.main() assert os.path.getsize(cst.APPENDED_LOG_PATH) == now_size log_data = cst.read_data(cst.LOG_JSON_PATH) assert log_data['size'] == now_size assert log_data['position'] == now_size cst.check_demo_data(log_data)