parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message) #continue ##### Get Most Viewed try: mostviewed_1to5 = [ link.find("a", href=re.compile(pattern)) for link in document_soup.find("li", class_="most-popular").find_all("li") ] mostviewed_linklist = [] for link in mostviewed_1to5: try: link = link.get("href") mostviewed_linklist = parserfunctions.linklist_actions( link, mostviewed_linklist) except: pass except: message = "Failed at getting MV link list" seriousness = 2 parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message) #continue try: pop_top_5_links = parserfunctions.get_top_5_list_links( mostviewed_linklist) except: pop_top_5_links = []
parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message) #continue ##### Get Most Viewed Items try: mostviewed_1to5 = [ link.find("a", href=link_pattern) for link in document_soup.find( "div", id="river-container").find_all("li") ] mostviewed_linklist = [] for link in mostviewed_1to5: try: link = link.get("href") mostviewed_linklist = parserfunctions.linklist_actions( link, mostviewed_linklist) except: pass except: message = "Failed at getting MV link list" seriousness = 2 parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message) #continue try: pop_top_5_links = parserfunctions.get_top_5_list_links( mostviewed_linklist) except: pop_top_5_links = []
except: pass if layout is None: message = "Failed to detect the layout" seriousness = 2 parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message) #continue ##### Get Most Viewed try: mostviewed_1to5 = [link.find("a", href=re.compile(pattern)) for link in document_soup.find("div", id="mostreademailed").find_all("li")] # ID is most e-mailed, but page states most read mostviewed_linklist = [] for link in mostviewed_1to5: try: link = link.get("href") mostviewed_linklist = parserfunctions.linklist_actions(link, mostviewed_linklist) except: pass except: message = "Failed at getting MV link list" seriousness = 2 parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message) #continue try: pop_top_5_links = parserfunctions.get_top_5_list_links(mostviewed_linklist) except: pop_top_5_links = [] message = "Failed to get a POP link" seriousness = 2 parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message)
parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message) #continue ##### Get Most Viewed try: mostviewed_1to5 = [ link.find("a", href=re.compile(pattern)) for link in document_soup.find("div", id="post_most").find_all("li") ] mostviewed_linklist = [] for link in mostviewed_1to5: try: link = link.get("href") mostviewed_linklist = parserfunctions.linklist_actions( link, mostviewed_linklist) except: pass except: message = "Failed at getting MV link list" seriousness = 2 parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message) #continue try: pop_top_5_links = parserfunctions.get_top_5_list_links( mostviewed_linklist) except: pop_top_5_links = []
except: pass if layout is None: message = "Failed to detect the layout" seriousness = 2 parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message) #continue ##### Get Most Viewed try: mostviewed_1to5 = [link.find("a", href=re.compile(pattern)) for link in document_soup.find("div", id="post_most").find_all("li")] mostviewed_linklist = [] for link in mostviewed_1to5: try: link = link.get("href") mostviewed_linklist = parserfunctions.linklist_actions(link, mostviewed_linklist) except: pass except: message = "Failed at getting MV link list" seriousness = 2 parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message) #continue try: pop_top_5_links = parserfunctions.get_top_5_list_links(mostviewed_linklist) except: pop_top_5_links = [] message = "Failed to get a POP link" seriousness = 2 parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message)
except: pass if layout is None: message = "Failed to detect the layout" seriousness = 2 parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message) #continue ##### Get Most Viewed try: mostviewed_1to5 = [link.find("a", href=re.compile(pattern)) for link in document_soup.find("li", class_="most-popular").find_all("li")] mostviewed_linklist = [] for link in mostviewed_1to5: try: link = link.get("href") mostviewed_linklist = parserfunctions.linklist_actions(link, mostviewed_linklist) except: pass except: message = "Failed at getting MV link list" seriousness = 2 parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message) #continue try: pop_top_5_links = parserfunctions.get_top_5_list_links(mostviewed_linklist) except: pop_top_5_links = [] message = "Failed to get a POP link" seriousness = 2 parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message)
if layout is None: print("Failed to detect the layout for document %s" % (homepage)) continue ##### Get Most Viewed try: mostviewed_1to5 = [ link.find("a", href=re.compile(pattern)) for link in document_soup.find("div", id="most-popular-tabs"). find("div", class_="tab").find_all("li") ] mostviewed_linklist = [] for link in mostviewed_1to5: try: link = link.get("href") mostviewed_linklist = parserfunctions.linklist_actions( link, mostviewed_linklist) except: pass except: message = "Failed at getting MV link list" seriousness = 2 parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message) #continue try: pop_top_5_links = parserfunctions.get_top_5_list_links( mostviewed_linklist) except: pop_top_5_links = []