예제 #1
0
def test_invalid_log():
    for text in ["", ERROR_404]:
        data = parse(text)
        cst.json_dumps(data)
        if not text:
            assert not (data['head'] or data['tail'])
        else:
            assert '404 - No Such Resource' in data['head'] and '404 - No Such Resource' in data['tail']

        assert set(data.keys()) == set(cst.PARSE_KEYS)
        for k in ['first_log_time', 'latest_log_time', 'runtime', 'shutdown_reason', 'finish_reason']:
            assert data[k] == cst.NA
        for k in ['first_log_timestamp', 'latest_log_timestamp', 'latest_crawl_timestamp', 'latest_scrape_timestamp']:
            assert data[k] == 0
        for k in ['pages', 'items']:
            assert data[k] is None
        # assert data['last_update_timestamp'] > 0  # 1546272001
        # assert len(data['last_update_time']) == 19  # "2019-01-01 00:00:01"
        assert cst.string_to_timestamp(data['last_update_time']) == data['last_update_timestamp']
        assert data['datas'] == []

        for v in data['latest_matches'].values():
            assert v == ''
        assert set(data['latest_matches'].keys()) == set(cst.LATEST_MATCHES_RESULT_DICT.keys())

        for v in data['log_categories'].values():
            assert v == dict(count=0, details=[])
        assert set(data['log_categories'].keys()) == set(cst.LOG_CATEGORIES_RESULT_DICT.keys())
예제 #2
0
def test_chunk_size(psr):
    parser = psr(execute_main=False)
    os.remove(cst.TXT_PATH)
    assert not os.path.isdir(cst.TXT_PATH)
    parser.main()
    data = cst.read_data(cst.LOG_JSON_PATH)
    assert data['first_log_time'] == '2018-10-23 18:28:34'
    assert data['latest_log_time'] == '2018-10-23 18:29:42'
    cst.check_demo_data(data)
    assert os.path.getsize(cst.APPENDED_LOG_PATH) == cst.SIZE

    parser = psr(execute_main=False, chunk_size=10000)  # 15,862 = 9924 + 5938, 15683 = 9938 + 5745
    os.remove(cst.TXT_PATH)
    assert not os.path.isdir(cst.TXT_PATH)
    parser.main()
    data = cst.read_data(cst.LOG_JSON_PATH)
    cst.json_dumps(data)
    assert data['first_log_time'] == '2018-10-23 18:28:34'
    assert data['latest_log_time'] == '2018-10-23 18:29:42'
    cst.check_demo_data(data)
    assert os.path.getsize(cst.APPENDED_LOG_PATH) == 5938 if len(os.linesep) == 2 else 5745
예제 #3
0
def test_new_size_read_data(psr):
    appended_log = u'test'
    appended_log_length = len(appended_log)
    parser = psr()
    log_data = cst.read_data(cst.LOG_JSON_PATH)
    assert log_data['logparser_version'] == cst.LOGPARSER_VERSION
    cst.check_demo_data(log_data)
    last_update_timestamp = log_data['last_update_timestamp']

    # Valid but short appended log
    cst.write_text(cst.LOG_PATH, appended_log, append=True)
    time.sleep(2)
    parser.main()
    assert os.path.getsize(cst.APPENDED_LOG_PATH) == 0
    log_data = cst.read_data(cst.LOG_JSON_PATH)
    assert log_data['last_update_timestamp'] > last_update_timestamp
    assert log_data['size'] == cst.SIZE + appended_log_length
    assert log_data['position'] == cst.SIZE
    cst.check_demo_data(
        log_data
    )  # Previous parsed result is not affected by short appended log

    # Mismatching version
    log_data['logparser_version'] = '0.0.0'
    cst.write_text(cst.LOG_JSON_PATH, cst.json_dumps(log_data))
    log_data = cst.read_data(cst.LOG_JSON_PATH)
    assert log_data['logparser_version'] == '0.0.0'

    cst.write_text(cst.LOG_PATH, appended_log, append=True)
    now_size = cst.SIZE + appended_log_length * 2
    parser.main()
    assert os.path.getsize(cst.APPENDED_LOG_PATH) == now_size
    log_data = cst.read_data(cst.LOG_JSON_PATH)
    assert log_data['logparser_version'] == cst.LOGPARSER_VERSION
    assert log_data['size'] == now_size
    assert log_data['position'] == now_size
    cst.check_demo_data(log_data)

    # Broken json file
    cst.write_text(cst.LOG_JSON_PATH, appended_log, append=True)
    cst.write_text(cst.LOG_PATH, appended_log, append=True)
    now_size = cst.SIZE + appended_log_length * 3
    parser.main()
    assert os.path.getsize(cst.APPENDED_LOG_PATH) == now_size
    log_data = cst.read_data(cst.LOG_JSON_PATH)
    assert log_data['size'] == now_size
    assert log_data['position'] == now_size
    cst.check_demo_data(log_data)