예제 #1
0
### For each desktop homepage
for homepage in file_list:
    print("Opening file %s (%s of %s for %s)" %
          (homepage, i, file_list_len, pubshort))
    i += 1

    ### Reset key variables
    curr_time, curr_time_utc, document_data, document_soup, document_soup_on_page, insert_statements, is_pop, link, message, mostviewed_linklist, on_page, on_page_link_list, on_page_link_list_tmp, pop_rank, pop_top_5_links, seriousness = [
        None, None, None, None, None, None, None, None, None, None, None, None,
        None, None, None, None
    ]

    try:
        ### Get data from file
        curr_time, curr_time_utc = parserfunctions.get_curr_time(
            homepage, pub_tz)
        document_data = parserfunctions.open_data_file(homepage)
    except:
        message = "Failed to open document"
        seriousness = 1
        parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time,
                                        pubshort, homepage, seriousness,
                                        message)
        #continue

    ### Create a souped object
    try:
        document_soup = parserfunctions.soupify(document_data)
    except:
        message = "Failed to soupify document"
        seriousness = 1
예제 #2
0
if process_desktop is not None:
    ### Get list of files to parse
    file_list, file_list_len = parserfunctions.get_file_list(pubshort, homepages_dir)
    i = 1
    
    ### For each desktop homepage
    for homepage in file_list:
        print("Opening file %s (%s of %s for %s)" % (homepage, i, file_list_len, pubshort))
        i += 1
        
        ### Reset key variables
        curr_time, curr_time_utc, document_data, document_soup, document_soup_on_page, insert_statements, is_pop, is_pro, layout, link, message, mostviewed_linklist, on_page, on_page_link_list, on_page_link_list_tmp, pop_rank, pop_top_5_links, pro_rank, pro_top_5_links, prominence_linklist, seriousness = [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
        
        try:
            ### Get data from file
            curr_time, curr_time_utc = parserfunctions.get_curr_time(homepage, pub_tz)
            document_data = parserfunctions.open_data_file(homepage)
        except:
            message = "Failed to open document"
            seriousness = 1
            parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message)
            #continue

        ### Create a souped object
        try:
            document_soup = parserfunctions.soupify(document_data)
        except:
            message = "Failed to soupify document"
            seriousness = 1
            parserfunctions.error_log_entry(cur, conn, mysql_log_name, curr_time, pubshort, homepage, seriousness, message)
            #continue