コード例 #1
0
ファイル: test_logparser.py プロジェクト: sulthonzh/logparser
def test_demo_log_files(psr):
    psr()
    log_data = cst.read_data(cst.LOG_JSON_PATH)
    txt_data = cst.read_data(cst.TXT_JSON_PATH)
    for k in cst.PARSE_KEYS:
        if k not in ['last_update_time', 'last_update_timestamp']:
            assert log_data[k] == txt_data[k]

    # 2019-01-01T00_00_01.log
    # 2019-01-01T00_00_02.txt
    for case, data in zip(['log', 'txt'], [log_data, txt_data]):
        cst.check_demo_data(data)

        if case == 'log':
            job = cst.JOB
            ext = 'log'
        else:
            job = cst.JOB_TXT
            ext = 'txt'
        assert data['log_path'].endswith('%s.%s' % (job, ext))
        assert data['json_path'].endswith('%s.json' % job)
        assert data['json_url'].endswith('%s.json' % job)
        assert data['json_url'].startswith('http://%s' % cst.SCRAPYD_SERVER)

        assert data['size'] == cst.SIZE
        assert data['position'] == cst.SIZE
        assert data['status'] == cst.STATUS
        assert data['_head'] == cst.LOG_HEAD_LINES
        assert data['logparser_version'] == cst.LOGPARSER_VERSION
コード例 #2
0
def test_chunk_size(psr):
    parser = psr(execute_main=False)
    os.remove(cst.TXT_PATH)
    assert not os.path.isdir(cst.TXT_PATH)
    parser.main()
    data = cst.read_data(cst.LOG_JSON_PATH)
    assert data['first_log_time'] == '2018-10-23 18:28:34'
    assert data['latest_log_time'] == '2018-10-23 18:29:42'
    cst.check_demo_data(data)
    assert os.path.getsize(cst.APPENDED_LOG_PATH) == cst.SIZE

    parser = psr(execute_main=False, chunk_size=10000)  # 15,862 = 9924 + 5938, 15683 = 9938 + 5745
    os.remove(cst.TXT_PATH)
    assert not os.path.isdir(cst.TXT_PATH)
    parser.main()
    data = cst.read_data(cst.LOG_JSON_PATH)
    cst.json_dumps(data)
    assert data['first_log_time'] == '2018-10-23 18:28:34'
    assert data['latest_log_time'] == '2018-10-23 18:29:42'
    cst.check_demo_data(data)
    assert os.path.getsize(cst.APPENDED_LOG_PATH) == 5938 if len(os.linesep) == 2 else 5745
コード例 #3
0
def test_new_size_read_data(psr):
    appended_log = u'test'
    appended_log_length = len(appended_log)
    parser = psr()
    log_data = cst.read_data(cst.LOG_JSON_PATH)
    assert log_data['logparser_version'] == cst.LOGPARSER_VERSION
    cst.check_demo_data(log_data)
    last_update_timestamp = log_data['last_update_timestamp']

    # Valid but short appended log
    cst.write_text(cst.LOG_PATH, appended_log, append=True)
    time.sleep(2)
    parser.main()
    assert os.path.getsize(cst.APPENDED_LOG_PATH) == 0
    log_data = cst.read_data(cst.LOG_JSON_PATH)
    assert log_data['last_update_timestamp'] > last_update_timestamp
    assert log_data['size'] == cst.SIZE + appended_log_length
    assert log_data['position'] == cst.SIZE
    cst.check_demo_data(
        log_data
    )  # Previous parsed result is not affected by short appended log

    # Mismatching version
    log_data['logparser_version'] = '0.0.0'
    cst.write_text(cst.LOG_JSON_PATH, cst.json_dumps(log_data))
    log_data = cst.read_data(cst.LOG_JSON_PATH)
    assert log_data['logparser_version'] == '0.0.0'

    cst.write_text(cst.LOG_PATH, appended_log, append=True)
    now_size = cst.SIZE + appended_log_length * 2
    parser.main()
    assert os.path.getsize(cst.APPENDED_LOG_PATH) == now_size
    log_data = cst.read_data(cst.LOG_JSON_PATH)
    assert log_data['logparser_version'] == cst.LOGPARSER_VERSION
    assert log_data['size'] == now_size
    assert log_data['position'] == now_size
    cst.check_demo_data(log_data)

    # Broken json file
    cst.write_text(cst.LOG_JSON_PATH, appended_log, append=True)
    cst.write_text(cst.LOG_PATH, appended_log, append=True)
    now_size = cst.SIZE + appended_log_length * 3
    parser.main()
    assert os.path.getsize(cst.APPENDED_LOG_PATH) == now_size
    log_data = cst.read_data(cst.LOG_JSON_PATH)
    assert log_data['size'] == now_size
    assert log_data['position'] == now_size
    cst.check_demo_data(log_data)
コード例 #4
0
ファイル: test_parse.py プロジェクト: sulthonzh/logparser
def test_demo_log():
    modified_logstats = FRONT.replace("Crawled 3 pages (at 0 pages/min), scraped 2 items (at 0 items/min)",
                                      "Crawled 1 pages (at 2 pages/min), scraped 3 items (at 4 items/min)")
    for case, text in zip(['without_stats_dumped', 'whole_log', 'modified_logstats'],
                          [FRONT, FRONT + END, modified_logstats + END]):
        data = parse(text, headlines=50, taillines=100)  # 180 lines in total
        # cst.json_dumps(data)

        if case == 'without_stats_dumped':
            cst.check_demo_data(data, without_stats_dumped=True)
        elif case == 'modified_logstats':  # to test update_data_with_crawler_stats()
            cst.check_demo_data(data, without_stats_dumped=False, modified_logstats=True)
        else:
            cst.check_demo_data(data, without_stats_dumped=False)
コード例 #5
0
ファイル: test_logparser.py プロジェクト: sulthonzh/logparser
def test_new_file_read_data(psr):
    psr()
    log_data = cst.read_data(cst.LOG_JSON_PATH)
    last_update_timestamp = log_data['last_update_timestamp']

    # Skip parsing since data with same size found
    # Old file with old size
    parser = psr(execute_main=False, reset_logs=False)
    for i in range(2):
        time.sleep(2)
        parser.main()
        log_data = cst.read_data(cst.LOG_JSON_PATH)
        assert log_data['last_update_timestamp'] == last_update_timestamp
        cst.check_demo_data(log_data)

    # Old logfile with smaller size
    cst.write_text(cst.LOG_PATH, FRONT + END.replace('memory', ''))
    parser.main()
    log_data = cst.read_data(cst.LOG_JSON_PATH)
    assert log_data['last_update_timestamp'] == last_update_timestamp
    cst.check_demo_data(log_data)
    stats = cst.read_data(cst.STATS_JSON_PATH)
    assert cst.PROJECT not in stats['datas']
    # -> parse in next round
    parser.main()
    log_data = cst.read_data(cst.LOG_JSON_PATH)
    assert log_data['last_update_timestamp'] > last_update_timestamp
    cst.check_demo_data(log_data)
    stats = cst.read_data(cst.STATS_JSON_PATH)
    assert cst.PROJECT in stats['datas']

    # Read data fail
    time.sleep(2)
    cst.write_text(cst.LOG_JSON_PATH, u'')
    psr(reset_logs=False)
    log_data = cst.read_data(cst.LOG_JSON_PATH)
    assert log_data['last_update_timestamp'] > last_update_timestamp
    cst.check_demo_data(log_data)
コード例 #6
0
def test_log_categories_limit(psr):
    log_categories_limit = 3
    psr(log_categories_limit=log_categories_limit)
    data = cst.read_data(cst.LOG_JSON_PATH)
    cst.check_demo_data(data, log_categories_limit=log_categories_limit)