Exemple #1
0
def compare_site_thread_csv(file, progress_var=None, step=100.0):
    if status["INTERFACE_MODE"]:
        thread_pool_csv = ThreadPool(settings["THREADPOOL_SIZE"])
    else:
        thread_pool_csv = ThreadPool(20)

    f = open(file, 'r')

    # calculate the step for each site
    row_count = sum(1 for row in f)
    site_step = step / row_count
    f.close()

    f = open(file, 'r')
    rows = csv.reader(f)

    for row in rows:
        compare_site_thread(row[0],
                            row[1],
                            progress_var=progress_var,
                            step=site_step,
                            thread_pool_csv=thread_pool_csv)
        # check program status
        if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
            f.close()
            return
    f.close()

    thread_pool_csv.wait_completion()
    thread_pool_csv.destroy()
Exemple #2
0
def compare_site_thread(old_url,
                        new_url,
                        progress_var=None,
                        step=100.0,
                        thread_pool_csv=None):
    # check program status
    if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
        return

    # checking multiple sites mode
    if thread_pool_csv:
        thread_pool = thread_pool_csv
    else:
        thread_pool = ThreadPool(settings["THREADPOOL_SIZE"])
    create_path()
    ind = 0

    old_url = old_url.strip()
    new_url = new_url.strip()

    # remove the "/" at the end of the url
    if old_url[-1] == '/':
        old_url = old_url[:-1]
    if new_url[-1] == '/':
        new_url = new_url[:-1]

    # add "http://" before url
    if not old_url.startswith("http"):
        old_url = "http://" + old_url
    if not new_url.startswith("http"):
        new_url = "http://" + new_url

    # print out the information for old and new sites
    entry_print("-----------------------------------------------------", True)
    entry_print("Old URL: " + old_url, True)
    entry_print("New URL: " + new_url, True)
    entry_print("-----------------------------------------------------", True)

    setup_step = step * 0.01
    if progress_var:
        progress_var.set(progress_var.get() + setup_step)

    # check if the new site needs login
    new_test = get_soup(new_url)
    if new_test:
        title = new_test.find("title")
        if title and title.get_text().strip() == "Login":
            entry_print(
                "New site needs login. Please use login mode to check this site!\n",
                True)
            return -1

    setup_step = step * 0.01
    if progress_var:
        progress_var.set(progress_var.get() + setup_step)

    # get the subpages of old and new sites
    try:
        sites = get_sites(old_url)
    except AttributeError:
        entry_print(
            "Can't find the site map from " + old_url +
            ". Please check if the url is valid!", True)
        thread_pool.destroy()
        return
    old_blog = get_blog_site(old_url)
    new_blog = get_blog_site(new_url)

    # check program status
    if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
        thread_pool.destroy()
        return

    blog_exists = False
    if old_blog and new_blog:
        blog_exists = True

    # if urls for subpages are not found
    if sites is None:
        record_error(new_url, "sites")
        if progress_var:
            progress_var.set(progress_var.get() + step)
        return False

    # if blog page is not found
    if old_blog is not None and new_blog is None:
        record_error(new_url, "blog")
    elif old_blog is None and new_blog is not None:
        record_error(old_url, "blog")

    setup_step = step * 0.02
    if progress_var:
        progress_var.set(progress_var.get() + setup_step)

    # print out site information
    entry_print("Site Information: ", True)

    # calculate the step for each page
    step *= 0.96
    if blog_exists:
        page_step = step / 2 / (len(sites) + 1)
        entry_print("Old Blog: " + old_blog, True)
        entry_print("New Blog: " + new_blog, True)
    else:
        page_step = step / (len(sites) + 1)

    entry_print("Number of non-blog pages: " + str(len(sites)), True)

    # check the homepage
    thread_pool.add_task(compare_homepage,
                         old_url=old_url,
                         new_url=new_url,
                         progress_var=progress_var,
                         step=page_step)

    # check all the sites in sitemap
    for site in sites:
        ind += 1
        if site.startswith("/home") or site.startswith("/main"):
            continue

        old_link = old_url + site
        new_link = new_url + site

        thread_pool.add_task(compare_page,
                             old_url=old_link,
                             new_url=new_link,
                             progress_var=progress_var,
                             step=page_step)

    # check all the blog pages
    if blog_exists:
        old_blog_soup = get_soup(old_blog)
        new_blog_soup = get_soup(new_blog)
        compare_blog(old_blog_soup,
                     new_blog_soup,
                     old_blog,
                     new_blog,
                     progress_var=progress_var,
                     step=step / 2)

    # single site mode
    if not thread_pool_csv:
        thread_pool.wait_completion()
        thread_pool.destroy()

    entry_print("-----------------------------------------------------\n")

    return True