def test_sort_articles(self): sample_article_0 = CMR_Article() sample_article_0.title = "ABC, Parker’s Piece, 7 July 2017" sample_article_0.url = "http://example_url_0.com" sample_article_1 = CMR_Article() sample_article_1.title = "Lee Hull, Corner House, Cambridge, 4 June 2017" sample_article_1.url = "http://example_url_1.com" sample_article_1.index_text = "Lee Hull, 4th June 2017" sample_article_1.category = CMR_Index_Categories.live sample_article_2 = CMR_Article() sample_article_2.title = "z" sample_article_2.url = "http://example_url_0.com" sample_article_2.index_text = "z" sample_article_2.category = CMR_Index_Categories.extra sample_article_3 = CMR_Article() sample_article_3.title = "z" sample_article_3.url = "http://example_url_0.com" sample_article_3.index_text = "w" sample_article_3.category = CMR_Index_Categories.extra articles = [sample_article_0, sample_article_1, sample_article_2, sample_article_3] sort_articles(articles) self.assertEqual(articles[0].title,'z') self.assertEqual(articles[0].index_text,'w') self.assertEqual(articles[1].title,'z') self.assertEqual(articles[1].index_text,'z') self.assertEqual(articles[2].title,"Lee Hull, Corner House, Cambridge, 4 June 2017") self.assertEqual(articles[3].title,"ABC, Parker’s Piece, 7 July 2017")
def example_save_index_html(): sample_article_0 = CMR_Article() sample_article_0.title = "ABC, Parker’s Piece, 7 July 2017" sample_article_0.url = "http://example_url_0.com" sample_article_1 = CMR_Article() sample_article_1.title = "Lee Hull, Corner House, Cambridge, 4 June 2017" sample_article_1.url = "http://example_url_1.com" sample_article_1.index_text = "Lee Hull, 4th June 2017" sample_article_1.category = CMR_Index_Categories.live articles = [sample_article_0, sample_article_1] print("before filling in missing details:") for article in articles: print("----") article.print_article_details() fill_in_missing_data_interactive(articles) save_index_html(articles, "test.html") articles = get_all_cmr_articles() fill_in_missing_data_interactive(articles) save_index_html(articles, "test.html")
def example_CMR_Article(): #Create a CMR_Article object print("------- create an article with no data in it") my_article = CMR_Article() # print out its contents my_article.print_article_details() # fill in some details print("------- set a title for the article") my_article.title = "Ed Sheeran, Wembley Stadium, July 1st 2017" # print out its contents my_article.print_article_details() # fill in more details print("------- set other data for the article") my_article.url = "http://made_up_address.html" my_article.index_text = "Ed Sheeran" my_article.category = CMR_Index_Categories.live # print out its contents my_article.print_article_details() # change in some details print("------- set a title for the article") my_article.title = "Ed Sheeran, Wembley Stadium, August 11th 2017" # print out its contents my_article.print_article_details()
def setup_articles(): sample_article_0 = CMR_Article() sample_article_0.title = "ABC, Parker’s Piece, 7 month-year" sample_article_0.url = "http://example_url_0.com" sample_article_1 = CMR_Article() sample_article_1.title = "Lee Hull, Corner House, Cambridge, 4 June 2017" sample_article_1.url = "http://example_url_1.com" sample_article_1.index_text = "Lee Hull, 4th Junish 2017" sample_article_1.category = CMR_Index_Categories.live sample_article_2 = CMR_Article() sample_article_2.title = "no idea, what this is, 1 month-year" sample_article_2.url = "http://example_url_2.com" articles = [sample_article_0, sample_article_1, sample_article_2] fill_in_missing_data(articles, _get_missing_index_text_test, _get_missing_category_test, _confirm_test, _confirm_test, _confirm_test) return articles
def get_index_anchors(soup, tag, category): #set up an empty list to hold data for each link articles_found = [] #the headings we're interested in all have class = given tag #ask soup for a list of such headings my_div = soup.find("div", { "class" : tag }) anchors = my_div.findAll('a') #iterate over these anchors compiling data into result_data for anchor in anchors: #for each one get the text the human sees and the link url this_index_text = str(anchor.contents[0]) this_url = str(anchor["href"]) # build a CMR_Article this_article = CMR_Article() this_article.index_text = this_index_text this_article.url = this_url this_article.category = category articles_found.append(this_article) #pass the results back to the calling code return articles_found