def test_page(page: int, date: str, texts: list, tests_cnt: int = 5, verbose: bool = False): not_processing_list = get_not_valid_cnts() page_text = texts[page] if page not in not_processing_list and validate_page(page_text): for _ in range(tests_cnt): page_date = extract(page_text, verbose=verbose) page_date_str = page_date.strftime("%d.%m.%Y %H:%M:%S") \ if page_date is not None else 'None' if date != page_date_str: print("Error: excepted '%s', got '%s'" % (date, page_date_str)) print("!!!!!!! Not equal") else: print("!!!!!!! Equal")
def main(): active_folder = ACTIVE_PAGES_DIR _path_markup_csv = join(RES_FOLDER, 'markup.csv') if os.path.exists(_path_markup_csv): table = read_csv(_path_markup_csv) else: table = get_test_table().get_all_values() with open(_path_markup_csv, 'w') as fio: _ = csv.writer(fio) for x in table: _.writerow(x) cnt_tests = 200 test_data = table[1:cnt_tests + 2] _path_active_texts_raw = join(LEARN_FOLDER, 'active_texts_raw.pkl') texts = get_texts_info(_path_active_texts_raw, active_folder) not_processing_list = get_not_valid_cnts() continue_list = [ 18, 9, 37, 42, 44, 47, 59, 50, 63, 60, 64, 68, 76, 81, 82, 89, 92, 105, 106, 116, 117, 118, 111, 120, 129, 134, 137, 141, 163, 164, 174, 175, 176, 178, 179, 185, 181, 188, 189, 198, 192, 195, ] start_cnt = 1 verbose = True # test_number = continue_list[1] # test_page(test_number, test_data[test_number][2], texts, verbose=verbose) test_data = test_data[start_cnt:] for page_str, _, date in test_data: page = int(page_str) print("Processing %s of %s" % (page_str, cnt_tests)) # # if page in continue_list: # print("Random result") # continue # if page not in continue_list: # # print("Random result") # continue page_text = texts[page] if page not in not_processing_list and validate_page(page_text): page_date = extract(page_text, verbose=verbose) page_date_str = page_date.strftime("%d.%m.%Y %H:%M:%S") \ if page_date is not None else 'None' if date != page_date_str: print("Error: excepted '%s', got '%s'" % (date, page_date_str)) with open('/tmp/test.html', 'w') as fio: fio.write(page_text) break