def set_status(new_blog, browser): wait = WebDriverWait(browser, 20) browser.get(new_blog) if browser.title == "Login": login(browser, wait) option = browser.find_element_by_xpath("//li[@class='optionPageOptions']") option.click() # get relative path from a tag page_status = browser.find_element_by_xpath("//li[@class='optionPageOptions']//ul//li/a[text()='Page Status']") href = page_status.get_attribute('href') rel_url = re.search("/cms/.*PtlPageSubPages", href).group(0) main_url = re.match(".*\.televox\.west\.com", new_blog).group(0) browser.get(main_url + rel_url) elements = browser.find_elements_by_xpath("//span[@status='hidden_only']") for ind in range(len(elements)): elements = browser.find_elements_by_xpath("//span[@status='hidden_only']") target = elements[ind].find_element_by_tag_name("input") target.click()
def set_meta(old_url, new_url, browser): # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: return wait = WebDriverWait(browser, 20) old_soup = get_soup(old_url) old_meta = get_meta_soup(old_soup, old_url) # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: return if new_url.endswith('/'): new_url = new_url[:-1] # truncate url if name exceeds 50 characters new_path = urlparse(new_url).path new_path_list = new_path.split('/') if len(new_path_list[-1]) > 50: new_path_list[-1] = new_path_list[-1][:50] new_path_dup = "/".join(new_path_list) new_url_dup = new_url.replace(new_path, new_path_dup) browser.get(new_url_dup) else: browser.get(new_url) if browser.title == "Login": login(browser, wait) new_soup = BeautifulSoup(browser.page_source, "html.parser") login_status = new_soup.find('a', id="ctl00_lnkGateway").get_text() if login_status == "Login": login_button = browser.find_element_by_id("ctl00_lnkGateway") login_button.click() wait.until( EC.visibility_of_element_located( (By.ID, "ctl00_ContentPlaceHolder1_txtUsername"))) login(browser, wait) page_options = browser.find_element_by_xpath( '//li[@class="optionPageOptions"]') page_options.click() metadata_option = browser.find_element_by_xpath( '//span[@class="AB_icn AB_icn-metadata"]').find_element_by_xpath('..') url = metadata_option.get_attribute('href') rel_url = re.search("/cms/.*Metadata", url).group(0) new_hostname = urlparse(new_url).hostname target_url = "http://" + new_hostname + rel_url browser.get(target_url) enable_custom_checkbox = browser.find_elements_by_xpath( '//input[@type="checkbox"]')[0] if not enable_custom_checkbox.is_selected(): enable_custom_checkbox.click() # migrate title title = old_meta["title"] title_entry = browser.find_elements_by_xpath('//input[@type="text"]')[6] title_entry.clear() try: title_entry.send_keys(title) except UnicodeDecodeError: migration_print("Unable to migrate title for " + new_url) migration_print("Title: " + old_meta["title"]) migration_print("Description: " + old_meta["description"]) migration_print("Keywords: " + old_meta["keywords"]) migration_print( "-----------------------------------------------------------") ask_continue() return # migrate description description = old_meta["description"] if description != "none" and not description.startswith( "Learn more about"): description_entry = browser.find_elements_by_xpath( '//input[@type="text"]')[13] description_entry.clear() try: description_entry.send_keys(description) except UnicodeDecodeError: migration_print("Unable to migrate description for " + new_url) migration_print("Title: " + old_meta["title"]) migration_print("Description: " + old_meta["description"]) migration_print("Keywords: " + old_meta["keywords"]) migration_print( "-----------------------------------------------------------") ask_continue() return # migrate keywords keywords = old_meta["keywords"] if keywords != "none": keywords_entry = browser.find_elements_by_xpath( '//input[@type="text"]')[14] keywords_entry.clear() try: keywords_entry.send_keys(keywords) except UnicodeDecodeError: migration_print("Unable to migrate keywords for " + new_url) migration_print("Title: " + old_meta["title"]) migration_print("Description: " + old_meta["description"]) migration_print("Keywords: " + old_meta["keywords"]) migration_print( "-----------------------------------------------------------") ask_continue() return submit_button = browser.find_element_by_xpath('//input[@type="submit"]') submit_button.click() new_path = urlparse(new_url).path if not new_path: new_path = "/" else: ind = new_url.find(new_path) new_path = new_url[ind:] migration_print(new_path + " metadata migrated!")
def migrate_post(old_post, new_blog, browser): # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: return wait = WebDriverWait(browser, 20) browser.get(new_blog) if browser.title == "Login": login(browser, wait) new_soup = BeautifulSoup(browser.page_source, "html.parser") login_status = new_soup.find('a', id="ctl00_lnkGateway").get_text() if login_status == "Login": login_button = browser.find_element_by_id("ctl00_lnkGateway") login_button.click() wait.until(EC.visibility_of_element_located((By.ID, "ctl00_ContentPlaceHolder1_txtUsername"))) login(browser, wait) page_option = browser.find_element_by_xpath("//li[@class='optionAddPage']") page_option.click() try: content_space_page = browser.find_element_by_xpath( '//li[@class="optionAddPage"]//ul//li/a[text()="News/Blog Content Page"]') content_space_page.click() except NoSuchElementException: migration_print("Can't find + News/Blog Content Page button. Please make sure Our Blog is a News/Blog Page.") try: title_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl06_txtTitle']") except NoSuchElementException: title_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl01_txtTitle']") title_entry.send_keys(old_post['title'][0]) if old_post['title'][1]: title_entry.send_keys(str(old_post['title'][1])) generate_name = browser.find_element_by_xpath("//img[@title='Generate Name']") generate_name.click() try: name_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl06_txtCanonicalName']") except NoSuchElementException: name_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl01_txtCanonicalName']") name = refine_name(name_entry.get_attribute("value")[:50]) if old_post['title'][1] and not name.endswith(str(old_post['title'][1])): name = refine_name(name_entry.get_attribute("value")[:49] + str(old_post['title'][1])) name_entry.clear() name_entry.send_keys(name) try: create_page = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl06_btnSubmit']") except NoSuchElementException: create_page = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl01_btnSubmit']") create_page.click() try: title_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl07_ctl48_field_title']") except NoSuchElementException: try: title_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl02_ctl48_field_title']") except NoSuchElementException: return title_entry.send_keys(old_post['title'][0]) if old_post['summary']: try: summary_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl07_ctl48_field_summary']") except NoSuchElementException: summary_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl02_ctl48_field_summary']") summary_entry.send_keys(old_post['summary']) try: date_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl07_ctl48_field_published_date_dateInput']") except NoSuchElementException: date_entry = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl02_ctl48_field_published_date_dateInput']") date_entry.send_keys(old_post['date']) remode_html = browser.find_element_by_xpath("//a[@class='reMode_html']") remode_html.click() article = get_article(old_post['url']) try: browser.switch_to.frame(browser.find_element_by_xpath("//td[@id='ctl00_ContentPlaceHolder1_ctl07_ctl48_field_body_ctl00Center']//iframe[2]")) except NoSuchElementException: browser.switch_to.frame(browser.find_element_by_xpath("//td[@id='ctl00_ContentPlaceHolder1_ctl02_ctl48_field_body_ctl00Center']//iframe[2]")) content_entry = browser.find_element_by_xpath("//textarea") content_entry.click() content_entry.send_keys(article) browser.switch_to.default_content() try: publish = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl07_ibPublishBottom']") except NoSuchElementException: publish = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl02_ibPublishBottom']") publish.click() try: publish2 = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl07_ibPublishTop']") except NoSuchElementException: publish2 = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl02_ibPublishTop']") publish2.click() try: yes = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl07_btnYes']") except NoSuchElementException: yes = browser.find_element_by_xpath("//input[@id='ctl00_ContentPlaceHolder1_ctl02_btnYes']") yes.click()
def create_subpages(root_url, subpages, browser, progress_var=None, step=20.0): wait = WebDriverWait(browser, 20) browser.get(root_url) # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: return # log into the page if the site needs login if browser.title == "Login": login(browser, wait) page_step = step / len(subpages) # create content space page at root_url for page in subpages: # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: return browser.get(root_url) try: page_option = browser.find_element_by_xpath( "//li[@class='optionAddPage']") page_option.click() except NoSuchElementException: migration_print("Unable to create subpage for " + root_url) if progress_var: progress_var.set(progress_var.get() + page_step) continue content_space_page = browser.find_element_by_xpath( '//li[@class="optionAddPage"]//ul//li/a[text()="Content Space Page"]' ) content_space_page.click() # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: return # use different names for horizontal and vertical templates try: page_title_entry = browser.find_element_by_name( "ctl00$ContentPlaceHolder1$ctl06$txtTitle") except NoSuchElementException: page_title_entry = browser.find_element_by_name( "ctl00$ContentPlaceHolder1$ctl01$txtTitle") page_title_entry.send_keys(page[0]) generate_title = browser.find_element_by_xpath( "//img[@title='Generate Name']") generate_title.click() try: page_name_entry = browser.find_element_by_name( "ctl00$ContentPlaceHolder1$ctl06$txtCanonicalName") except NoSuchElementException: page_name_entry = browser.find_element_by_name( "ctl00$ContentPlaceHolder1$ctl01$txtCanonicalName") page_name_entry.clear() page_name_entry.send_keys(page[1]) try: create_page = browser.find_element_by_name( "ctl00$ContentPlaceHolder1$ctl06$btnSubmit") except NoSuchElementException: create_page = browser.find_element_by_name( "ctl00$ContentPlaceHolder1$ctl01$btnSubmit") create_page.click() migration_print(page[0] + " (" + page[1] + ") created!") if progress_var: progress_var.set(progress_var.get() + page_step)
def migrate_subpages(old_url, new_url, progress_var=None, step=100.0): old_url = old_url.strip() new_url = new_url.strip() # remove the "/" at the end of the url if old_url[-1] == '/': old_url = old_url[:-1] if new_url[-1] == '/': new_url = new_url[:-1] # add "http://" before url if not old_url.startswith("http"): old_url = "http://" + old_url if not new_url.startswith("http"): new_url = "http://" + new_url # print out the information for old and new sites migration_print("-----------------------------------------------------") migration_print("Old URL: " + old_url) migration_print("New URL: " + new_url) migration_print("-----------------------------------------------------") browser = webdriver.Chrome(executable_path=settings["EXECUTABLE_PATH"]) wait = WebDriverWait(browser, 20) browser.maximize_window() # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: browser.quit() migration_print( "-----------------------------------------------------\n") return if progress_var: progress_var.set(progress_var.get() + step * 0.01) if old_url.endswith('/'): old_url = old_url[:-1] if new_url.endswith('/'): new_url = new_url[:-1] parsed_subpages = get_subpages(old_url) browser.get(new_url) # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: browser.quit() migration_print( "-----------------------------------------------------\n") return # log into the page if the site needs login if browser.title == "Login": login(browser, wait) if progress_var: progress_var.set(progress_var.get() + step * 0.02) step *= 0.97 # avoid divided by zero error if not parsed_subpages: migration_print("Unable to fetch subpages from navigation menu of " + old_url) return root_step = step / len(parsed_subpages) for page in parsed_subpages: # check program status if status["INTERFACE_MODE"] and not status["CHECKING_STATUS"]: break create_subpages(new_url + page[0], page[1], browser, progress_var=progress_var, step=root_step) migration_print("-----------------------------------------------------\n") browser.quit()