def testSmallCSVFile(self): results = {} # Add a specific thread, http://archive.4plebs.org/x/thread/23732801/ results.update(**httpGET_json(gen_thread_api_url('x', 23732801))) # First page of /x/ results.update(**httpGET_json(gen_index_api_url('x', 1))) # Turn that json dict into a list of Post objects postList = FourPlebsAPI_Post.from_post_json(results) output_csv_filepath = 'out/testcase-output-small-example.csv' CSVPostWriter.write_posts_to_csv(postList, output_csv_filepath, ALL_CONTENT_FLAGGERS) i = 0 # All lines in this CSV should contain commas! with open(output_csv_filepath, 'r') as f: for line in f: print("line {}".format(i)) self.assertIn(',', line) i += 1 with open(output_csv_filepath, 'r') as f: self.ensure_csv_has_no_empty_fields(f, count=4)
def gather_range_with_boards(start: int, end: int, boards: List[str]) -> List[FourPlebsAPI_Post]: """Given a start and end page range, gather posts from various boards.""" results = {} for i in range(start, end): for board in boards: results.update(**httpGET_json(gen_index_api_url(board, i))) print("Page {} of /{}/".format(i, board)) return FourPlebsAPI_Post.from_post_json(results)
def generate_large_example_csv(page_start=1, page_end=20, boards=['pol', 'x']): results = {} for i in range(page_start, page_end): for board in boards: results.update(**httpGET_json(gen_index_api_url(board, i))) print("{}th page...".format(i)) postList = FourPlebsAPI_Post.from_post_json(results) CSVPostWriter.write_posts_to_csv(postList, 'out/post-output-large.csv', ALL_CONTENT_FLAGGERS)
def generate_small_example_csv(): results = {} # Add a specific thread, http://archive.4plebs.org/x/thread/23732801/ results.update(**httpGET_json(gen_thread_api_url('x', 23732801))) for i in range(1, 10): # Get the posts from page 1-10 /pol/ results.update(**httpGET_json(gen_index_api_url('pol', i))) # Add on the posts from page 1 /x/ results.update(**httpGET_json(gen_index_api_url('x', 1))) # Turn that json dict into a list of Post objects postList = FourPlebsAPI_Post.from_post_json(results) # # For all posts from the two index pages (/x/, /pol/) # for post in postList: # print(post) CSVPostWriter.write_posts_to_csv(postList, 'out/post-output-small-example.csv', ALL_CONTENT_FLAGGERS)