def set_meta(old_url, new_url, browser): # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: return wait = WebDriverWait(browser, 20) old_soup = get_soup(old_url) old_meta = get_meta_soup(old_soup, old_url) # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: return if new_url.endswith('/'): new_url = new_url[:-1] # truncate url if name exceeds 50 characters new_path = urlparse(new_url).path new_path_list = new_path.split('/') if len(new_path_list[-1]) > 50: new_path_list[-1] = new_path_list[-1][:50] new_path_dup = "/".join(new_path_list) new_url_dup = new_url.replace(new_path, new_path_dup) browser.get(new_url_dup) else: browser.get(new_url) if browser.title == "Login": login(browser, wait) new_soup = BeautifulSoup(browser.page_source, "html.parser") login_status = new_soup.find('a', id="ctl00_lnkGateway").get_text() if login_status == "Login": login_button = browser.find_element_by_id("ctl00_lnkGateway") login_button.click() wait.until( EC.visibility_of_element_located( (By.ID, "ctl00_ContentPlaceHolder1_txtUsername"))) login(browser, wait) page_options = browser.find_element_by_xpath( '//li[@class="optionPageOptions"]') page_options.click() metadata_option = browser.find_element_by_xpath( '//span[@class="AB_icn AB_icn-metadata"]').find_element_by_xpath('..') url = metadata_option.get_attribute('href') rel_url = re.search("/cms/.*Metadata", url).group(0) new_hostname = urlparse(new_url).hostname target_url = "http://" + new_hostname + rel_url browser.get(target_url) enable_custom_checkbox = browser.find_elements_by_xpath( '//input[@type="checkbox"]')[0] if not enable_custom_checkbox.is_selected(): enable_custom_checkbox.click() # migrate title title = old_meta["title"] title_entry = browser.find_elements_by_xpath('//input[@type="text"]')[6] title_entry.clear() try: title_entry.send_keys(title) except UnicodeDecodeError: migration_print("Unable to migrate title for " + new_url) migration_print("Title: " + old_meta["title"]) migration_print("Description: " + old_meta["description"]) migration_print("Keywords: " + old_meta["keywords"]) migration_print( "-----------------------------------------------------------") ask_continue() return # migrate description description = old_meta["description"] if description != "none" and not description.startswith( "Learn more about"): description_entry = browser.find_elements_by_xpath( '//input[@type="text"]')[13] description_entry.clear() try: description_entry.send_keys(description) except UnicodeDecodeError: migration_print("Unable to migrate description for " + new_url) migration_print("Title: " + old_meta["title"]) migration_print("Description: " + old_meta["description"]) migration_print("Keywords: " + old_meta["keywords"]) migration_print( "-----------------------------------------------------------") ask_continue() return # migrate keywords keywords = old_meta["keywords"] if keywords != "none": keywords_entry = browser.find_elements_by_xpath( '//input[@type="text"]')[14] keywords_entry.clear() try: keywords_entry.send_keys(keywords) except UnicodeDecodeError: migration_print("Unable to migrate keywords for " + new_url) migration_print("Title: " + old_meta["title"]) migration_print("Description: " + old_meta["description"]) migration_print("Keywords: " + old_meta["keywords"]) migration_print( "-----------------------------------------------------------") ask_continue() return submit_button = browser.find_element_by_xpath('//input[@type="submit"]') submit_button.click() new_path = urlparse(new_url).path if not new_path: new_path = "/" else: ind = new_url.find(new_path) new_path = new_url[ind:] migration_print(new_path + " metadata migrated!")
def migrate_meta(old_url, new_url, progress_var=None, step=100.0): old_url = old_url.strip() new_url = new_url.strip() # remove the "/" at the end of the url if old_url[-1] == '/': old_url = old_url[:-1] if new_url[-1] == '/': new_url = new_url[:-1] # add "http://" before url if not old_url.startswith("http"): old_url = "http://" + old_url if not new_url.startswith("http"): new_url = "http://" + new_url # print out the information for old and new sites migration_print("-----------------------------------------------------") migration_print("Old URL: " + old_url) migration_print("New URL: " + new_url) migration_print("-----------------------------------------------------") browser = webdriver.Chrome(executable_path=settings["EXECUTABLE_PATH"]) browser.maximize_window() # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: browser.quit() migration_print( "-----------------------------------------------------\n") return if progress_var: progress_var.set(progress_var.get() + step * 0.01) sites = get_sites(old_url) # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: browser.quit() migration_print( "-----------------------------------------------------\n") return if progress_var: progress_var.set(progress_var.get() + step * 0.02) if not sites: migration_print("Unable to fetch subpage URLs form site map of " + old_url) # find blog pages old_blog_page = get_blog_site(old_url) new_blog_page = get_blog_site(new_url) blog_exists = True if not old_blog_page or not new_blog_page: blog_exists = False # calculate the step for each subpage step *= 0.97 if blog_exists: page_step = step / 2 / (len(sites) + 1) else: page_step = step / (len(sites) + 1) # migrate metadata for homepage set_meta(old_url, new_url, browser) # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: browser.quit() migration_print( "-----------------------------------------------------\n") return if progress_var: progress_var.set(progress_var.get() + page_step) # migrate all non-blog pages for site in sites: # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: browser.quit() migration_print( "-----------------------------------------------------\n") return old_link = old_url + site new_link = new_url + site try: set_meta(old_link, new_link, browser) except NoSuchElementException: migration_print("Missing Page: " + new_link, ) if progress_var: progress_var.set(progress_var.get() + page_step) if not blog_exists: browser.quit() migration_print( "-----------------------------------------------------------") return step /= 2 # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: browser.quit() migration_print( "-----------------------------------------------------\n") return old_blog_soup = get_soup(old_blog_page) new_blog_soup = get_soup(new_blog_page, browser) old_blogs = old_blog_soup.find_all(['h5', 'h3']) new_blogs = new_blog_soup.find_all('a', class_="title") # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: browser.quit() migration_print( "-----------------------------------------------------\n") return if progress_var: progress_var.set(progress_var.get() + step * 0.02) step *= 0.98 # record blog posts as title, url pairs in dictionary old_list = [] parsed_old_blogs = {} ind = 1 for blog in old_blogs: title = blog.get_text() if title == "Categories": old_blogs.remove(blog) continue try: link = blog.a.get('href') except AttributeError: migration_print("Unable to find blog metadata for " + title) if title in parsed_old_blogs: parsed_old_blogs[title + str(ind)] = link old_list.append((title + str(ind), link)) ind += 1 else: parsed_old_blogs[title] = link old_list.append((title, link)) new_list = [] parsed_new_blogs = {} ind = 1 for blog in new_blogs: title = blog.get_text() link = new_url + blog.get('href') if title in parsed_new_blogs: parsed_new_blogs[title + str(ind)] = link new_list.append((title + str(ind), link)) ind += 1 else: parsed_new_blogs[title] = link new_list.append((title, link)) if not old_list or not new_list: browser.quit() return blog_step = step / (len(old_list) + 1) # migrate metadata for blog index page set_meta(old_blog_page, new_blog_page, browser) # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: browser.quit() migration_print( "-----------------------------------------------------\n") return if progress_var: progress_var.set(progress_var.get() + blog_step) # migrate metadata for blog posts for ind in range(len(old_list)): # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: browser.quit() migration_print( "-----------------------------------------------------\n") return if old_list[ind][0] == new_list[ind][0]: set_meta(old_list[ind][1], new_list[ind][1], browser) else: try: set_meta(parsed_old_blogs[old_list[ind][0]], parsed_new_blogs[old_list[ind][0]], browser) except KeyError: migration_print("Cannot migrate metadata for blog page " + new_list[ind][1]) continue if progress_var: progress_var.set(progress_var.get() + blog_step) browser.quit() migration_print("-----------------------------------------------------\n")
def migrate_post(old_post, new_blog, browser): # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: return wait = WebDriverWait(browser, 20) browser.get(new_blog) if browser.title == "Login": login(browser, wait) new_soup = BeautifulSoup(browser.page_source, "html.parser") login_status = new_soup.find('a', id="ctl00_lnkGateway").get_text() if login_status == "Login": login_button = browser.find_element_by_id("ctl00_lnkGateway") login_button.click() wait.until(EC.visibility_of_element_located((By.ID, "ctl00_ContentPlaceHolder1_txtUsername"))) login(browser, wait) page_option = browser.find_element_by_xpath("//li[@class='optionAddPage']") page_option.click() try: content_space_page = browser.find_element_by_xpath( '//li[@class="optionAddPage"]//ul//li/a[text()="News/Blog Content Page"]') content_space_page.click() except NoSuchElementException: migration_print("Can't find + News/Blog Content Page button. Please make sure Our Blog is a News/Blog Page.") try: title_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl06_txtTitle']") except NoSuchElementException: title_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl01_txtTitle']") title_entry.send_keys(old_post['title'][0]) if old_post['title'][1]: title_entry.send_keys(str(old_post['title'][1])) generate_name = browser.find_element_by_xpath("//img[@title='Generate Name']") generate_name.click() try: name_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl06_txtCanonicalName']") except NoSuchElementException: name_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl01_txtCanonicalName']") name = refine_name(name_entry.get_attribute("value")[:50]) if old_post['title'][1] and not name.endswith(str(old_post['title'][1])): name = refine_name(name_entry.get_attribute("value")[:49] + str(old_post['title'][1])) name_entry.clear() name_entry.send_keys(name) try: create_page = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl06_btnSubmit']") except NoSuchElementException: create_page = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl01_btnSubmit']") create_page.click() try: title_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl07_ctl48_field_title']") except NoSuchElementException: try: title_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl02_ctl48_field_title']") except NoSuchElementException: return title_entry.send_keys(old_post['title'][0]) if old_post['summary']: try: summary_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl07_ctl48_field_summary']") except NoSuchElementException: summary_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl02_ctl48_field_summary']") summary_entry.send_keys(old_post['summary']) try: date_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl07_ctl48_field_published_date_dateInput']") except NoSuchElementException: date_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl02_ctl48_field_published_date_dateInput']") date_entry.send_keys(old_post['date']) remode_html = browser.find_element_by_xpath("//a[@class='reMode_html']") remode_html.click() article = get_article(old_post['url']) try: browser.switch_to.frame(browser.find_element_by_xpath("//td[@id='ctl00_ContentPlaceHolder1_ctl07_ctl48_field_body_ctl00Center']//iframe[2]")) except NoSuchElementException: browser.switch_to.frame(browser.find_element_by_xpath("//td[@id='ctl00_ContentPlaceHolder1_ctl02_ctl48_field_body_ctl00Center']//iframe[2]")) content_entry = browser.find_element_by_xpath("//textarea") content_entry.click() content_entry.send_keys(article) browser.switch_to.default_content() try: publish = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl07_ibPublishBottom']") except NoSuchElementException: publish = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl02_ibPublishBottom']") publish.click() try: publish2 = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl07_ibPublishTop']") except NoSuchElementException: publish2 = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl02_ibPublishTop']") publish2.click() try: yes = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl07_btnYes']") except NoSuchElementException: yes = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl02_btnYes']") yes.click()
def migrate_blog(old_blog, new_blog, progress_var=None, step=100.0): old_url = old_blog.strip() new_url = new_blog.strip() # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: return # remove the "/" at the end of the url if old_url[-1] == '/': old_url = old_url[:-1] if new_url[-1] == '/': new_url = new_url[:-1] # add "http://" before url if not old_url.startswith("http"): old_url = "http://" + old_url if not new_url.startswith("http"): new_url = "http://" + new_url # print out the information for old and new sites migration_print("-----------------------------------------------------") migration_print("Old URL: " + old_url) migration_print("New URL: " + new_url) migration_print("-----------------------------------------------------") # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: return # create new webdriver browser = webdriver.Chrome(executable_path=settings["EXECUTABLE_PATH"]) browser.maximize_window() if progress_var: progress_var.set(progress_var.get() + step * 0.01) blog_posts = get_blog_posts(old_url) if progress_var: progress_var.set(progress_var.get() + step * 0.02) step *= 0.95 if not blog_posts: migration_print("Unable to get blog posts for " + old_url) browser.quit() return blog_step = step / len(blog_posts) for post in blog_posts: # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: return migrate_post(post, new_url, browser) migration_print('\"' + post['title'][0] + "\" migrated!") if progress_var: progress_var.set(progress_var.get() + blog_step) set_status(new_blog, browser) if progress_var: progress_var.set(progress_var.get() + step * 0.02) browser.close()
def create_subpages(root_url, subpages, browser, progress_var=None, step=20.0): wait = WebDriverWait(browser, 20) browser.get(root_url) # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: return # log into the page if the site needs login if browser.title == "Login": login(browser, wait) page_step = step / len(subpages) # create content space page at root_url for page in subpages: # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: return browser.get(root_url) try: page_option = browser.find_element_by_xpath( "//li[@class='optionAddPage']") page_option.click() except NoSuchElementException: migration_print("Unable to create subpage for " + root_url) if progress_var: progress_var.set(progress_var.get() + page_step) continue content_space_page = browser.find_element_by_xpath( '//li[@class="optionAddPage"]//ul//li/a[text()="Content Space Page"]' ) content_space_page.click() # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: return # use different names for horizontal and vertical templates try: page_title_entry = browser.find_element_by_name( "ctl00$ContentPlaceHolder1$ctl06$txtTitle") except NoSuchElementException: page_title_entry = browser.find_element_by_name( "ctl00$ContentPlaceHolder1$ctl01$txtTitle") page_title_entry.send_keys(page[0]) generate_title = browser.find_element_by_xpath( "//img[@title='Generate Name']") generate_title.click() try: page_name_entry = browser.find_element_by_name( "ctl00$ContentPlaceHolder1$ctl06$txtCanonicalName") except NoSuchElementException: page_name_entry = browser.find_element_by_name( "ctl00$ContentPlaceHolder1$ctl01$txtCanonicalName") page_name_entry.clear() page_name_entry.send_keys(page[1]) try: create_page = browser.find_element_by_name( "ctl00$ContentPlaceHolder1$ctl06$btnSubmit") except NoSuchElementException: create_page = browser.find_element_by_name( "ctl00$ContentPlaceHolder1$ctl01$btnSubmit") create_page.click() migration_print(page[0] + " (" + page[1] + ") created!") if progress_var: progress_var.set(progress_var.get() + page_step)
def migrate_subpages(old_url, new_url, progress_var=None, step=100.0): old_url = old_url.strip() new_url = new_url.strip() # remove the "/" at the end of the url if old_url[-1] == '/': old_url = old_url[:-1] if new_url[-1] == '/': new_url = new_url[:-1] # add "http://" before url if not old_url.startswith("http"): old_url = "http://" + old_url if not new_url.startswith("http"): new_url = "http://" + new_url # print out the information for old and new sites migration_print("-----------------------------------------------------") migration_print("Old URL: " + old_url) migration_print("New URL: " + new_url) migration_print("-----------------------------------------------------") browser = webdriver.Chrome(executable_path=settings["EXECUTABLE_PATH"]) wait = WebDriverWait(browser, 20) browser.maximize_window() # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: browser.quit() migration_print( "-----------------------------------------------------\n") return if progress_var: progress_var.set(progress_var.get() + step * 0.01) if old_url.endswith('/'): old_url = old_url[:-1] if new_url.endswith('/'): new_url = new_url[:-1] parsed_subpages = get_subpages(old_url) browser.get(new_url) # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: browser.quit() migration_print( "-----------------------------------------------------\n") return # log into the page if the site needs login if browser.title == "Login": login(browser, wait) if progress_var: progress_var.set(progress_var.get() + step * 0.02) step *= 0.97 # avoid divided by zero error if not parsed_subpages: migration_print("Unable to fetch subpages from navigation menu of " + old_url) return root_step = step / len(parsed_subpages) for page in parsed_subpages: # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: break create_subpages(new_url + page[0], page[1], browser, progress_var=progress_var, step=root_step) migration_print("-----------------------------------------------------\n") browser.quit()