Ejemplo n.º 1
0
    ### Reset key variables
    curr_time, curr_time_utc, document_data, document_soup, document_soup_on_page, insert_statements, is_pop, link, message, mostviewed_linklist, on_page, on_page_link_list, on_page_link_list_tmp, pop_rank, pop_top_5_links, seriousness = [
        None, None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None
    ]

    try:
        ### Get data from file
        curr_time, curr_time_utc = parserfunctions.get_curr_time(
            homepage, pub_tz)
        document_data = parserfunctions.open_data_file(homepage)
    except:
        message = "Failed to open document"
        seriousness = 1
        parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time,
                                        pubshort, homepage, seriousness,
                                        message)
        #continue

    ### Create a souped object
    try:
        document_soup = parserfunctions.soupify(document_data)
    except:
        message = "Failed to soupify document"
        seriousness = 1
        parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time,
                                        pubshort, homepage, seriousness,
                                        message)
        #continue

    ##### Get Most Viewed Items
Ejemplo n.º 2
0
    ### For each desktop homepage
    for homepage in file_list:
        print("Opening file %s (%s of %s for %s)" % (homepage, i, file_list_len, pubshort))
        i += 1
        
        ### Reset key variables
        curr_time, curr_time_utc, document_data, document_soup, document_soup_on_page, insert_statements, is_pop, is_pro, layout, link, message, mostviewed_linklist, on_page, on_page_link_list, on_page_link_list_tmp, pop_rank, pop_top_5_links, pro_rank, pro_top_5_links, prominence_linklist, seriousness = [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
        
        try:
            ### Get data from file
            curr_time, curr_time_utc = parserfunctions.get_curr_time(homepage, pub_tz)
            document_data = parserfunctions.open_data_file(homepage)
        except:
            message = "Failed to open document"
            seriousness = 1
            parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message)
            #continue

        ### Create a souped object
        try:
            document_soup = parserfunctions.soupify(document_data)
        except:
            message = "Failed to soupify document"
            seriousness = 1
            parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message)
            #continue
        
        ### Check layout
        layout = None
        try:
            if document_soup.find("div", class_=re.compile("co_3colvisual|co_2colvisual|co_2_5colvisual")) is not None: