Esempio n. 1
0
def set_meta(old_url, new_url, browser):
    # check program status
    if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
        return

    wait = WebDriverWait(browser, 20)
    old_soup = get_soup(old_url)
    old_meta = get_meta_soup(old_soup, old_url)

    # check program status
    if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
        return

    if new_url.endswith('/'):
        new_url = new_url[:-1]

    # truncate url if name exceeds 50 characters
    new_path = urlparse(new_url).path
    new_path_list = new_path.split('/')
    if len(new_path_list[-1]) > 50:
        new_path_list[-1] = new_path_list[-1][:50]
        new_path_dup = "/".join(new_path_list)
        new_url_dup = new_url.replace(new_path, new_path_dup)
        browser.get(new_url_dup)
    else:
        browser.get(new_url)

    if browser.title == "Login":
        login(browser, wait)

    new_soup = BeautifulSoup(browser.page_source, "html.parser")
    login_status = new_soup.find('a', id="ctl00_lnkGateway").get_text()
    if login_status == "Login":
        login_button = browser.find_element_by_id("ctl00_lnkGateway")
        login_button.click()
        wait.until(
            EC.visibility_of_element_located(
                (By.ID, "ctl00_ContentPlaceHolder1_txtUsername")))
        login(browser, wait)

    page_options = browser.find_element_by_xpath(
        '//li[@class="optionPageOptions"]')
    page_options.click()

    metadata_option = browser.find_element_by_xpath(
        '//span[@class="AB_icn AB_icn-metadata"]').find_element_by_xpath('..')
    url = metadata_option.get_attribute('href')
    rel_url = re.search("/cms/.*Metadata", url).group(0)
    new_hostname = urlparse(new_url).hostname
    target_url = "http://" + new_hostname + rel_url

    browser.get(target_url)

    enable_custom_checkbox = browser.find_elements_by_xpath(
        '//input[@type="checkbox"]')[0]
    if not enable_custom_checkbox.is_selected():
        enable_custom_checkbox.click()

    # migrate title
    title = old_meta["title"]
    title_entry = browser.find_elements_by_xpath('//input[@type="text"]')[6]
    title_entry.clear()
    try:
        title_entry.send_keys(title)
    except UnicodeDecodeError:
        migration_print("Unable to migrate title for " + new_url)
        migration_print("Title: " + old_meta["title"])
        migration_print("Description: " + old_meta["description"])
        migration_print("Keywords: " + old_meta["keywords"])
        migration_print(
            "-----------------------------------------------------------")
        ask_continue()
        return

    # migrate description
    description = old_meta["description"]
    if description != "none" and not description.startswith(
            "Learn more about"):
        description_entry = browser.find_elements_by_xpath(
            '//input[@type="text"]')[13]
        description_entry.clear()
        try:
            description_entry.send_keys(description)
        except UnicodeDecodeError:
            migration_print("Unable to migrate description for " + new_url)
            migration_print("Title: " + old_meta["title"])
            migration_print("Description: " + old_meta["description"])
            migration_print("Keywords: " + old_meta["keywords"])
            migration_print(
                "-----------------------------------------------------------")
            ask_continue()
            return

    # migrate keywords
    keywords = old_meta["keywords"]
    if keywords != "none":
        keywords_entry = browser.find_elements_by_xpath(
            '//input[@type="text"]')[14]
        keywords_entry.clear()
        try:
            keywords_entry.send_keys(keywords)
        except UnicodeDecodeError:
            migration_print("Unable to migrate keywords for " + new_url)
            migration_print("Title: " + old_meta["title"])
            migration_print("Description: " + old_meta["description"])
            migration_print("Keywords: " + old_meta["keywords"])
            migration_print(
                "-----------------------------------------------------------")
            ask_continue()
            return

    submit_button = browser.find_element_by_xpath('//input[@type="submit"]')
    submit_button.click()

    new_path = urlparse(new_url).path
    if not new_path:
        new_path = "/"
    else:
        ind = new_url.find(new_path)
        new_path = new_url[ind:]
    migration_print(new_path + " metadata migrated!")
Esempio n. 2
0
def migrate_meta(old_url, new_url, progress_var=None, step=100.0):
    old_url = old_url.strip()
    new_url = new_url.strip()

    # remove the "/" at the end of the url
    if old_url[-1] == '/':
        old_url = old_url[:-1]
    if new_url[-1] == '/':
        new_url = new_url[:-1]

    # add "http://" before url
    if not old_url.startswith("http"):
        old_url = "http://" + old_url
    if not new_url.startswith("http"):
        new_url = "http://" + new_url

    # print out the information for old and new sites
    migration_print("-----------------------------------------------------")
    migration_print("Old URL: " + old_url)
    migration_print("New URL: " + new_url)
    migration_print("-----------------------------------------------------")

    browser = webdriver.Chrome(executable_path=settings["EXECUTABLE_PATH"])
    browser.maximize_window()

    # check program status
    if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
        browser.quit()
        migration_print(
            "-----------------------------------------------------\n")
        return

    if progress_var:
        progress_var.set(progress_var.get() + step * 0.01)

    sites = get_sites(old_url)

    # check program status
    if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
        browser.quit()
        migration_print(
            "-----------------------------------------------------\n")
        return

    if progress_var:
        progress_var.set(progress_var.get() + step * 0.02)

    if not sites:
        migration_print("Unable to fetch subpage URLs form site map of " +
                        old_url)

    # find blog pages
    old_blog_page = get_blog_site(old_url)
    new_blog_page = get_blog_site(new_url)
    blog_exists = True

    if not old_blog_page or not new_blog_page:
        blog_exists = False

    # calculate the step for each subpage
    step *= 0.97
    if blog_exists:
        page_step = step / 2 / (len(sites) + 1)
    else:
        page_step = step / (len(sites) + 1)

    # migrate metadata for homepage
    set_meta(old_url, new_url, browser)

    # check program status
    if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
        browser.quit()
        migration_print(
            "-----------------------------------------------------\n")
        return

    if progress_var:
        progress_var.set(progress_var.get() + page_step)

    # migrate all non-blog pages
    for site in sites:
        # check program status
        if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
            browser.quit()
            migration_print(
                "-----------------------------------------------------\n")
            return

        old_link = old_url + site
        new_link = new_url + site
        try:
            set_meta(old_link, new_link, browser)
        except NoSuchElementException:
            migration_print("Missing Page: " + new_link, )
        if progress_var:
            progress_var.set(progress_var.get() + page_step)

    if not blog_exists:
        browser.quit()
        migration_print(
            "-----------------------------------------------------------")
        return

    step /= 2

    # check program status
    if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
        browser.quit()
        migration_print(
            "-----------------------------------------------------\n")
        return

    old_blog_soup = get_soup(old_blog_page)
    new_blog_soup = get_soup(new_blog_page, browser)

    old_blogs = old_blog_soup.find_all(['h5', 'h3'])
    new_blogs = new_blog_soup.find_all('a', class_="title")

    # check program status
    if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
        browser.quit()
        migration_print(
            "-----------------------------------------------------\n")
        return

    if progress_var:
        progress_var.set(progress_var.get() + step * 0.02)

    step *= 0.98

    # record blog posts as title, url pairs in dictionary
    old_list = []
    parsed_old_blogs = {}
    ind = 1
    for blog in old_blogs:
        title = blog.get_text()
        if title == "Categories":
            old_blogs.remove(blog)
            continue

        try:
            link = blog.a.get('href')
        except AttributeError:
            migration_print("Unable to find blog metadata for " + title)

        if title in parsed_old_blogs:
            parsed_old_blogs[title + str(ind)] = link
            old_list.append((title + str(ind), link))
            ind += 1
        else:
            parsed_old_blogs[title] = link
            old_list.append((title, link))

    new_list = []
    parsed_new_blogs = {}
    ind = 1
    for blog in new_blogs:
        title = blog.get_text()
        link = new_url + blog.get('href')
        if title in parsed_new_blogs:
            parsed_new_blogs[title + str(ind)] = link
            new_list.append((title + str(ind), link))
            ind += 1
        else:
            parsed_new_blogs[title] = link
            new_list.append((title, link))

    if not old_list or not new_list:
        browser.quit()
        return

    blog_step = step / (len(old_list) + 1)

    # migrate metadata for blog index page
    set_meta(old_blog_page, new_blog_page, browser)

    # check program status
    if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
        browser.quit()
        migration_print(
            "-----------------------------------------------------\n")
        return

    if progress_var:
        progress_var.set(progress_var.get() + blog_step)

    # migrate metadata for blog posts
    for ind in range(len(old_list)):
        # check program status
        if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
            browser.quit()
            migration_print(
                "-----------------------------------------------------\n")
            return

        if old_list[ind][0] == new_list[ind][0]:
            set_meta(old_list[ind][1], new_list[ind][1], browser)
        else:
            try:
                set_meta(parsed_old_blogs[old_list[ind][0]],
                         parsed_new_blogs[old_list[ind][0]], browser)
            except KeyError:
                migration_print("Cannot migrate metadata for blog page " +
                                new_list[ind][1])
                continue
        if progress_var:
            progress_var.set(progress_var.get() + blog_step)

    browser.quit()
    migration_print("-----------------------------------------------------\n")
Esempio n. 3
0
def migrate_post(old_post, new_blog, browser):
    # check program status
    if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
        return

    wait = WebDriverWait(browser, 20)
    browser.get(new_blog)

    if browser.title == "Login":
        login(browser, wait)

    new_soup = BeautifulSoup(browser.page_source, "html.parser")
    login_status = new_soup.find('a', id="ctl00_lnkGateway").get_text()
    if login_status == "Login":
        login_button = browser.find_element_by_id("ctl00_lnkGateway")
        login_button.click()
        wait.until(EC.visibility_of_element_located((By.ID, "ctl00_ContentPlaceHolder1_txtUsername")))
        login(browser, wait)

    page_option = browser.find_element_by_xpath("//li[@class='optionAddPage']")
    page_option.click()

    try:
        content_space_page = browser.find_element_by_xpath(
            '//li[@class="optionAddPage"]//ul//li/a[text()="News/Blog Content Page"]')
        content_space_page.click()
    except NoSuchElementException:
        migration_print("Can't find + News/Blog Content Page button. Please make sure Our Blog is a News/Blog Page.")

    try:
        title_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl06_txtTitle']")
    except NoSuchElementException:
        title_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl01_txtTitle']")
    title_entry.send_keys(old_post['title'][0])
    if old_post['title'][1]:
        title_entry.send_keys(str(old_post['title'][1]))

    generate_name = browser.find_element_by_xpath("//img[@title='Generate Name']")
    generate_name.click()

    try:
        name_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl06_txtCanonicalName']")
    except NoSuchElementException:
        name_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl01_txtCanonicalName']")
    name = refine_name(name_entry.get_attribute("value")[:50])
    if old_post['title'][1] and not name.endswith(str(old_post['title'][1])):
        name = refine_name(name_entry.get_attribute("value")[:49] + str(old_post['title'][1]))
    name_entry.clear()
    name_entry.send_keys(name)

    try:
        create_page = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl06_btnSubmit']")
    except NoSuchElementException:
        create_page = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl01_btnSubmit']")
    create_page.click()

    try:
        title_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl07_ctl48_field_title']")
    except NoSuchElementException:
        try:
            title_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl02_ctl48_field_title']")
        except NoSuchElementException:
            return
    title_entry.send_keys(old_post['title'][0])

    if old_post['summary']:
        try:
            summary_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl07_ctl48_field_summary']")
        except NoSuchElementException:
            summary_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl02_ctl48_field_summary']")
        summary_entry.send_keys(old_post['summary'])

    try:
        date_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl07_ctl48_field_published_date_dateInput']")
    except NoSuchElementException:
        date_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl02_ctl48_field_published_date_dateInput']")
    date_entry.send_keys(old_post['date'])

    remode_html = browser.find_element_by_xpath("//a[@class='reMode_html']")
    remode_html.click()

    article = get_article(old_post['url'])
    try:
        browser.switch_to.frame(browser.find_element_by_xpath("//td[@id='ctl00_ContentPlaceHolder1_ctl07_ctl48_field_body_ctl00Center']//iframe[2]"))
    except NoSuchElementException:
        browser.switch_to.frame(browser.find_element_by_xpath("//td[@id='ctl00_ContentPlaceHolder1_ctl02_ctl48_field_body_ctl00Center']//iframe[2]"))
    content_entry = browser.find_element_by_xpath("//textarea")
    content_entry.click()
    content_entry.send_keys(article)
    browser.switch_to.default_content()

    try:
        publish = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl07_ibPublishBottom']")
    except NoSuchElementException:
        publish = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl02_ibPublishBottom']")
    publish.click()

    try:
        publish2 = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl07_ibPublishTop']")
    except NoSuchElementException:
        publish2 = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl02_ibPublishTop']")
    publish2.click()

    try:
        yes = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl07_btnYes']")
    except NoSuchElementException:
        yes = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl02_btnYes']")
    yes.click()
Esempio n. 4
0
def migrate_blog(old_blog, new_blog, progress_var=None, step=100.0):
    old_url = old_blog.strip()
    new_url = new_blog.strip()

    # check program status
    if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
        return

    # remove the "/" at the end of the url
    if old_url[-1] == '/':
        old_url = old_url[:-1]
    if new_url[-1] == '/':
        new_url = new_url[:-1]

    # add "http://" before url
    if not old_url.startswith("http"):
        old_url = "http://" + old_url
    if not new_url.startswith("http"):
        new_url = "http://" + new_url

    # print out the information for old and new sites
    migration_print("-----------------------------------------------------")
    migration_print("Old URL: " + old_url)
    migration_print("New URL: " + new_url)
    migration_print("-----------------------------------------------------")

    # check program status
    if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
        return

    # create new webdriver
    browser = webdriver.Chrome(executable_path=settings["EXECUTABLE_PATH"])
    browser.maximize_window()

    if progress_var:
        progress_var.set(progress_var.get() + step * 0.01)

    blog_posts = get_blog_posts(old_url)

    if progress_var:
        progress_var.set(progress_var.get() + step * 0.02)

    step *= 0.95

    if not blog_posts:
        migration_print("Unable to get blog posts for " + old_url)
        browser.quit()
        return

    blog_step = step / len(blog_posts)

    for post in blog_posts:
        # check program status
        if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
            return

        migrate_post(post, new_url, browser)

        migration_print('\"' + post['title'][0] + "\" migrated!")
        if progress_var:
            progress_var.set(progress_var.get() + blog_step)

    set_status(new_blog, browser)
    if progress_var:
        progress_var.set(progress_var.get() + step * 0.02)

    browser.close()
def create_subpages(root_url, subpages, browser, progress_var=None, step=20.0):
    wait = WebDriverWait(browser, 20)
    browser.get(root_url)

    # check program status
    if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
        return

    # log into the page if the site needs login
    if browser.title == "Login":
        login(browser, wait)

    page_step = step / len(subpages)

    # create content space page at root_url
    for page in subpages:
        # check program status
        if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
            return

        browser.get(root_url)
        try:
            page_option = browser.find_element_by_xpath(
                "//li[@class='optionAddPage']")
            page_option.click()
        except NoSuchElementException:
            migration_print("Unable to create subpage for " + root_url)
            if progress_var:
                progress_var.set(progress_var.get() + page_step)
            continue

        content_space_page = browser.find_element_by_xpath(
            '//li[@class="optionAddPage"]//ul//li/a[text()="Content Space Page"]'
        )
        content_space_page.click()

        # check program status
        if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
            return

        # use different names for horizontal and vertical templates
        try:
            page_title_entry = browser.find_element_by_name(
                "ctl00$ContentPlaceHolder1$ctl06$txtTitle")
        except NoSuchElementException:
            page_title_entry = browser.find_element_by_name(
                "ctl00$ContentPlaceHolder1$ctl01$txtTitle")
        page_title_entry.send_keys(page[0])

        generate_title = browser.find_element_by_xpath(
            "//img[@title='Generate Name']")
        generate_title.click()

        try:
            page_name_entry = browser.find_element_by_name(
                "ctl00$ContentPlaceHolder1$ctl06$txtCanonicalName")
        except NoSuchElementException:
            page_name_entry = browser.find_element_by_name(
                "ctl00$ContentPlaceHolder1$ctl01$txtCanonicalName")
        page_name_entry.clear()
        page_name_entry.send_keys(page[1])

        try:
            create_page = browser.find_element_by_name(
                "ctl00$ContentPlaceHolder1$ctl06$btnSubmit")
        except NoSuchElementException:
            create_page = browser.find_element_by_name(
                "ctl00$ContentPlaceHolder1$ctl01$btnSubmit")
        create_page.click()

        migration_print(page[0] + " (" + page[1] + ") created!")

        if progress_var:
            progress_var.set(progress_var.get() + page_step)
def migrate_subpages(old_url, new_url, progress_var=None, step=100.0):
    old_url = old_url.strip()
    new_url = new_url.strip()

    # remove the "/" at the end of the url
    if old_url[-1] == '/':
        old_url = old_url[:-1]
    if new_url[-1] == '/':
        new_url = new_url[:-1]

    # add "http://" before url
    if not old_url.startswith("http"):
        old_url = "http://" + old_url
    if not new_url.startswith("http"):
        new_url = "http://" + new_url

    # print out the information for old and new sites
    migration_print("-----------------------------------------------------")
    migration_print("Old URL: " + old_url)
    migration_print("New URL: " + new_url)
    migration_print("-----------------------------------------------------")

    browser = webdriver.Chrome(executable_path=settings["EXECUTABLE_PATH"])
    wait = WebDriverWait(browser, 20)
    browser.maximize_window()

    # check program status
    if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
        browser.quit()
        migration_print(
            "-----------------------------------------------------\n")
        return

    if progress_var:
        progress_var.set(progress_var.get() + step * 0.01)

    if old_url.endswith('/'):
        old_url = old_url[:-1]
    if new_url.endswith('/'):
        new_url = new_url[:-1]

    parsed_subpages = get_subpages(old_url)
    browser.get(new_url)

    # check program status
    if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
        browser.quit()
        migration_print(
            "-----------------------------------------------------\n")
        return

    # log into the page if the site needs login
    if browser.title == "Login":
        login(browser, wait)

    if progress_var:
        progress_var.set(progress_var.get() + step * 0.02)

    step *= 0.97

    # avoid divided by zero error
    if not parsed_subpages:
        migration_print("Unable to fetch subpages from navigation menu of " +
                        old_url)
        return

    root_step = step / len(parsed_subpages)

    for page in parsed_subpages:
        # check program status
        if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]:
            break

        create_subpages(new_url + page[0],
                        page[1],
                        browser,
                        progress_var=progress_var,
                        step=root_step)

    migration_print("-----------------------------------------------------\n")
    browser.quit()