def patent_parser(search_exp): """@todo: Docstring for patent_parser. """ patent_list = [] b = Browser("phantomjs") b.reload() b.visit( 'http://www.pss-system.gov.cn/sipopublicsearch/search/searchHome-searchIndex.shtml' ) b.fill('searchInfo', search_exp) b.click_link_by_text(u'检索') b.is_element_not_present_by_css('.s_c_conter', wait_time=8) for _ in xrange(10): item_list = b.find_by_css('.s_c_conter') for item in item_list: info_list = item.find_by_tag('td') if not urlset.has_url('patent', info_list[0].text[6:]): try: patent = Patent( id=info_list[0].text[6:], path='~', title=info_list[4].text[6:], abstract='~', inventor=info_list[7].text[5:].split(';')[:-1], applicant=info_list[6].text[10:].split(';')[:-1], category=info_list[5].text[8:].split('; '), update_time=time.strftime('%Y-%m-%dT%XZ', time.gmtime())) patent_list.append(patent) print patent.id, 'new' # @todo logs except: print 'error patent' if b.is_text_present(u'下一页'): b.click_link_by_text(u'下一页') b.is_element_not_present_by_css('.s_c_conter', wait_time=8) else: break try: solr.add('patent', patent_list) except: 'err adding patent' finally: b.quit()
def patent_parser(search_exp): """@todo: Docstring for patent_parser. """ patent_list = [] b = Browser("phantomjs") b.reload() b.visit('http://www.pss-system.gov.cn/sipopublicsearch/search/searchHome-searchIndex.shtml') b.fill('searchInfo', search_exp) b.click_link_by_text(u'检索') b.is_element_not_present_by_css('.s_c_conter', wait_time=8) for _ in xrange(10): item_list = b.find_by_css('.s_c_conter') for item in item_list: info_list = item.find_by_tag('td') if not urlset.has_url('patent', info_list[0].text[6:]): try: patent = Patent(id=info_list[0].text[6:], path='~', title=info_list[4].text[6:], abstract='~', inventor=info_list[7].text[5:].split(';')[:-1], applicant=info_list[6].text[10:].split(';')[:-1], category=info_list[5].text[8:].split('; '), update_time=time.strftime('%Y-%m-%dT%XZ', time.gmtime())) patent_list.append(patent) print patent.id, 'new' # @todo logs except: print 'error patent' if b.is_text_present(u'下一页'): b.click_link_by_text(u'下一页') b.is_element_not_present_by_css('.s_c_conter', wait_time=8) else: break try: solr.add('patent', patent_list) except: 'err adding patent' finally: b.quit()
def crawl(usr, pswd, out_path, driver="firefox"): bsr = Browser(driver) bsr.visit(URL_LOGIN) bsr.find_by_id("phone1").fill(usr) bsr.find_by_id("pswd").fill(pswd) bsr.find_by_id("login").click() if bsr.is_element_present_by_css("span.fw1.fs0.acnt"): print "Successfully login!" else: print "Login failed, bye!" bsr.visit("http://123.163.com/webmail/main/#mid=7") while bsr.is_element_not_present_by_css("div.list-time"): print "sleeping" time.sleep(1) bsr.find_by_css("span.iblock.icn-msg.list-icon.potr")[0].click() page_num = get_page_num(bsr) with open(out_path, "w") as out_f: for pi in xrange(page_num): print "Page %d/%d" % (pi+1, page_num) date_lst = bsr.find_by_css("div.list-time") date_msgs_lst = bsr.find_by_css("div.sms-item") #HACK for scrolling the sms list down because of AJAX-style of showing sms date_lst[-1].right_click() msg_i = 0 for di in xrange(len(date_lst)): date = date_lst[di].text.strip().split()[0] msg_num_mat = re.findall(r"\(\s*(\d+).\s*\)", date_lst[di].text) msg_num = int(msg_num_mat[0]) out_f.write("%s\t%d\n" % (date, msg_num)) for _ in range(msg_num): name_obj = date_msgs_lst[msg_i].find_by_css("span.js-cnt.name")[0] phone_obj = date_msgs_lst[msg_i].find_by_css("span.js-cnt.fc2")[0] time_obj = date_msgs_lst[msg_i].find_by_css("div.fr.w6.js-cnt.bm-hack-w6")[0] msg_obj = date_msgs_lst[msg_i].find_by_css("div.w4")[0] type_obj = date_msgs_lst[msg_i].find_by_css("div.fl.w3.thide.fc5")[0] out_f.write("%s\t%s\t%s\t%s\t%s\n" % (name_obj.html.encode("utf8"), \ phone_obj.html.strip("() ").encode("utf8"), \ time_obj.text.encode("utf8"), \ "0" if type_obj.visible else "1", \ msg_obj.text.encode("utf8"))) msg_i += 1 #next page next_page_link = bsr.find_by_css("div.fr.pager")[0].find_by_tag("a")[2] next_page_link.click()
def scrape(): # create mars_data dic that we can insert into mongo mars_data = {} # set the chromedriver path executable_path = {"executable_path": "/usr/local/bin/chromedriver"} browser = Browser("chrome", **executable_path, headless=False) # Mar News news_url = "https://mars.nasa.gov/news/" browser.visit(news_url) html = browser.html soup = BeautifulSoup(html, 'html.parser') news_title = soup.find("div", class_="content_title").text news_p = soup.find("div", class_="article_teaser_body").text mars_data["news_title"] = news_title mars_data["news_p"] = news_p # JPL Mars Space Images - Featured Image space_img_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars" browser.visit(space_img_url) xpath = '//*[@id="full_image"]' img_button = browser.find_by_xpath(xpath) img_button.click() browser.is_element_not_present_by_css("img.fancybox-image", wait_time=1) html = browser.html soup = BeautifulSoup(html, 'html.parser') featured_image_url = soup.find("img", class_="fancybox-image")["src"] if "http:" not in featured_image_url: featured_image_url = "https://www.jpl.nasa.gov" + featured_image_url mars_data["featured_image_url"] = featured_image_url # Mars Weather twitter_url = "https://twitter.com/marswxreport?lang=en" browser.visit(twitter_url) html = browser.html soup = BeautifulSoup(html, 'html.parser') mars_weather = soup.find("p", class_="TweetTextSize", text=re.compile("Sol")).text mars_data["mars_weather"] = mars_weather # Mars Facts space_facts_url = "https://space-facts.com/mars/" tables = pd.read_html(space_facts_url) df = tables[0] df.columns = ["description", "value"] df.set_index("description", inplace=True) html_table = df.to_html() mars_data["table"] = html_table # Mars Hemispheres hemisphere_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars" xpath1 = '//*[@id="product-section"]/div[2]/div[1]/a/img' xpath2 = '//*[@id="product-section"]/div[2]/div[2]/a/img' xpath3 = '//*[@id="product-section"]/div[2]/div[3]/a/img' xpath4 = '//*[@id="product-section"]/div[2]/div[4]/a/img' xpath_list = [xpath1, xpath2, xpath3, xpath4] hemisphere_image_urls = [] browser.visit(hemisphere_url) for xpath in xpath_list: img_button = browser.find_by_xpath(xpath) img_button.click() browser.is_element_not_present_by_css("img.jpg", wait_time=1) html = browser.html soup = BeautifulSoup(html, 'html.parser') title = soup.find('h2', class_="title").text img_url = soup.find('div', class_="wide-image-wrapper").ul.li.a['href'] hemisphere_image_urls.append({"title": title, "img_url": img_url}) browser.visit(hemisphere_url) mars_data["hemisphere"] = hemisphere_image_urls browser.quit() return mars_data
class Session: def __init__(self, browser, user): self.browser = Browser(browser) self.browser.visit('http://jizdenky.studentagency.cz/') self.browser.fill_form({'passwordAccountCode': user['login'], 'password': user['password']}) self.browser.execute_script('window.scrollTo(0, 100)') button = self.browser.find_by_value('Přihlásit').first button.click() self.user = user self.log = logging.getLogger(__name__) def go_search(self): self.browser.visit('http://jizdenky.studentagency.cz/') def search(self, task, date_return=None, is_open=False): self.browser.find_by_id('hp_form_itinerar').first \ .find_by_xpath('div/input[@type="radio"]' )[1 if date_return or is_open else 0].check() for city, i in [(task.from_city, 1), (task.to_city, 2)]: self.browser.find_by_css('input[tabindex="{}"]'.format(i)) \ .first.fill(city) for item in self.browser.find_by_css('.ui-menu-item'): link = item.find_by_tag('a') if link.value.lower() == city.lower(): link.click() break self.browser.fill('departure:dateField', task.date) if date_return: self.browser.fill('returnDeparture:dateField', date_return) if is_open: self.browser.check('returnTicketOpen') self.browser.find_option_by_text('ISIC').first.check() self.browser.find_by_value('Vyhledat').first.click() while self.browser.is_element_not_present_by_css('.left_column', wait_time=1): pass items = self.browser.find_by_css('.left_column') \ .find_by_xpath('div/div/*') connections = [] for item in items: if item.tag_name == 'h2': date_local = item.text.split(' ')[1] elif item.tag_name == 'div' and item.has_class('routeSummary'): assert date_local if date_local != task.date: break connections.append(Connection(item)) return connections def order_time(self, connection): while True: if connection.click(): self.browser dialog = self.browser.find_by_css('[id^=_wicket_window]') if dialog: dialog.first.find_by_tag('button').click() if self.browser.is_element_present_by_id('sumary_lines', wait_time=1): break self.browser.find_by_id('sumary_lines') \ .first.find_by_tag('button') \ .first.click() seats = {} bus = self.browser.find_by_css('.seatsContainer') if bus: for seat in bus.first.find_by_css( '.seatContainer:not([style*=blocked])'): seats[int(seat.find_by_tag('div').first.html[:-1])] = seat else: bus = self.browser.find_by_css('.vehicle') for seat in bus.first.find_by_css('.free, .selected'): seats[int(seat.text[:-1])] = seat return seats def order_seat(self, seat): if not seat.has_class('selected'): seat.click() for fs in self.browser.find_by_css('fieldset.topRoute'): legend = fs.find_by_css('legend') if legend and 'Pojištění' in legend[0].text: for package in fs.find_by_css('.insurancePackageType'): if 'nechci' in package.find_by_tag('label').text: package.find_by_tag('input').click() time.sleep(1) submit = self.browser.find_by_css('[name^=buttonContainer]').first interaction_type = submit.text reserved = 'Rezervovat' in interaction_type if not reserved: submit.click() time.sleep(1) data = (self.user['first'], self.user['last'], self.user['email'], self.user['phone']) for item, value in zip(self.browser.find_by_id('passengerInfo') .first.find_by_tag('input'), data): item.fill(value) submit = self.browser.find_by_css('[name^=buttonContainer]').first interaction_type = submit.text assert 'Rezervovat' in interaction_type agreement = self.browser.find_by_css('[name="bottomComponent:termsAgreementCont:termsAgreementCB"]') if agreement: agreement[0].check() time.sleep(1) submit.click() with open('conf.yaml') as f: conf = yaml.load(f) if 'email' in conf: email = conf['email'] while self.browser.is_element_not_present_by_id('ticketPage', wait_time=1): pass msg = MIMEText(self.browser.find_by_id('ticketPage').first.html, 'html') msg['Subject'] = 'SA reservation' msg['From'] = email['from'] msg['To'] = self.user['email'] username = email['username'] password = email['password'] server = smtplib.SMTP(email['server']) server.starttls() server.login(username, b64decode(password).decode()) server.sendmail(msg['From'], msg['To'], msg.as_string()) server.quit()
class GPlusEventManager(object): def __init__(self, email, passwd, otp): self.email = email self.passwd = passwd self.br = Browser('firefox') atexit.register(self.force_br_quit) # To dynamically load jQuery into the HTML head self.loadjq = """var head = document.getElementsByTagName('head')[0]; var script = document.createElement('script'); script.type = 'text/javascript'; script.src = '//ajax.googleapis.com/ajax/libs/jquery/1.10.1/jquery.min.js'; head.appendChild(script);""" self.otp = otp self.logged_in = self.login() def force_br_quit(self): try: self.br.quit() except: pass def create(self, title, desc, date, time): """ Create a new Google Plus event """ if not self.logged_in: self.logged_in = self.login() create_btn = 'div[guidedhelpid="events_create_event_button"]' self.br.find_by_css(create_btn)[0].click() return self.complete_form(title, desc, date, time, update=False) def update(self, id, title=None, desc=None, date=None, time=None): """ Update a Google Plus event """ if not self.logged_in: self.logged_in = self.login() self.br.visit(id) dropdown = 'div[class="A7kfHd q3sPdd"]' while self.br.is_element_not_present_by_css(dropdown): pass self.br.find_by_css(dropdown).click() self.br.find_by_xpath('//*[@id=":o"]/div').click() return self.complete_form(title, desc, date, time, update=True) def complete_form(self, title, desc, date, time, update): '''Fill event create/edit form, the CSS selectors are valid in both types of form''' title_input = 'input[placeholder="Event title"]' while self.br.is_element_not_present_by_css(title_input): pass if title: title_placeholder = self.br.find_by_css(title_input) title_placeholder.fill(title) if date: self.br.find_by_css('input[class="g-A-G T4 lUa"]').click() rm_date = '''document.body.getElementsByClassName("g-A-G T4 lUa") [0].value = ""''' self.br.execute_script(rm_date) date_field = 'input[class="g-A-G T4 lUa"]' self.br.find_by_css(date_field).type('{}\t'.format(date)) if time: self.br.execute_script(self.loadjq) loaded = False rm_time = '$(".EKa")[0].value = ""' while not loaded: try: self.br.execute_script(rm_time) except Exception, e: pass else: loaded = True time_field = 'input[class="g-A-G T4 EKa"]' self.br.find_by_css(time_field)[0].type('{}'.format(time)) if desc: set_desc = '''document.body.getElementsByClassName("yd editable") [1].innerHTML = "{}"'''.format(desc) self.br.execute_script(set_desc) invite_btn = self.br.find_by_css('div[guidedhelpid="sharebutton"]') invite_inp = self.br.find_by_css('input[class="i-j-h-G-G"]') invite_btn.click() if not update: # If new entry, invite Public group by default invite_inp.click() invite_inp.type('Public\n') invite_btn.click() while not self.br.is_text_present('Going ('): pass # wait on page load for new event url = self.br.url self.br.quit() return url # return event url
myClick_txt(u'帐号登录') # data里填真实的username和password data ={'username':'******','password':'******'} browser.fill_form(data) browser.find_by_css('.W_btn_a').first.click() import time browser.visit('http://weibo.com/message/history?uid=5175429989#_0') question = browser.find_by_css('.bubble_r .page').last.text lastAnswer = '' answer = '' j = 0 while True: try: if j % 100 == 0: browser.reload() while browser.is_element_not_present_by_css('.bubble_l .page'): browser.reload() j += 1 i = 0 while answer == lastAnswer and i < 1000: answer = browser.find_by_css('.bubble_l .page').last.text i += 1 print '小冰:',answer try: cursor.execute(sql%(lastAnswer,question,answer)) db.commit() except: db.rollback() question = sim_chat(answer) print '小黄鸡:',question lastAnswer = answer
# Find and click the 'search' button btnEmail = browser.find_by_id('username') btnPasswd = browser.find_by_id('password') btnEmail.fill('') #用户名 btnPasswd.fill('') #密码 #print dir(browser) # Interact with elements btnSubmit = browser.find_by_value("登 录") btnSubmit.click() #登录 # button.click() #time.sleep(10) time.sleep(6) browser.visit("http://download.csdn.net/my/downloads") while browser.is_element_not_present_by_css(".pageliststy"): time.sleep(2) urls = [url['href'] for url in browser.find_link_by_text("立即评价,通过可返分")] #总页面个数 pages = int(browser.find_by_css(".pageliststy")[-1]['href'].split('/')[-1]) for index in range(2, pages + 1): browser.visit("http://download.csdn.net/my/downloads/%d" % index) time.sleep(5) urls = urls + [ url['href'] for url in browser.find_link_by_text("立即评价,通过可返分") ] for url in urls: print url try:
# %% get_ipython().system(u'which chromedriver') # %% executable_path = {'executable_path': '/usr/local/bin/chromedriver'} # %% browser = Browser('chrome', **executable_path) # %% url = 'https://mars.nasa.gov/news/' browser.visit(url) # %% browser.is_element_not_present_by_css('.article_teaser_body', wait_time=5) # %% #using bs to write it into html html = browser.html soup = BeautifulSoup(html, "html.parser") # %% print(soup) # %% news_title = soup.find("div", class_="content_title").text news_paragraph = soup.find("div", class_="article_teaser_body").text print(f"Title: {news_title}") print(f"Para: {news_paragraph}")
# Import Splinter and Beautiful Soup from splinter import Browser from bs4 import BeautifulSoup as soup import pandas as pd # Sewt executable path and initialize Chrome browser executable_path = {'executable_path': '/usr/local/bin/chromedriver'} browser = Browser('chrome', **executable_path) # Visit Mars Nasa Site url = 'https://mars.nasa.gov/news/' browser.visit(url) # Optional delay for loading the page browser.is_element_not_present_by_css("ul.item_list li.slide", wait_time=3) # Setup HTML parser html = browser.html news_soup = soup(html, 'html.parser') slide_elem = news_soup.select_one('ul.item_list li.slide') # This line of code looks inside the slide.elem and specificall identifies the "div and class" slide_elem.find("div", class_="content_title") # Use the parent element to find the first `a` tag and save it as `news_title` news_title = slide_elem.find("div", class_="content_title").get_text() # Use the parent element to find the paragraph text news_p = slide_elem.find("div", class_="article_teaser_body").get_text() # ### Featured Images
class Session: def __init__(self, browser, user): self.browser = Browser(browser) self.browser.visit('http://jizdenky.studentagency.cz/') self.browser.fill_form({ 'passwordAccountCode': user['login'], 'password': user['password'] }) self.browser.execute_script('window.scrollTo(0, 100)') button = self.browser.find_by_value('Přihlásit').first button.click() self.user = user self.log = logging.getLogger(__name__) def go_search(self): self.browser.visit('http://jizdenky.studentagency.cz/') def search(self, task, date_return=None, is_open=False): self.browser.find_by_id('hp_form_itinerar').first \ .find_by_xpath('div/input[@type="radio"]' )[1 if date_return or is_open else 0].check() for city, i in [(task.from_city, 1), (task.to_city, 2)]: self.browser.find_by_css('input[tabindex="{}"]'.format(i)) \ .first.fill(city) for item in self.browser.find_by_css('.ui-menu-item'): link = item.find_by_tag('a') if link.value.lower() == city.lower(): link.click() break self.browser.fill('departure:dateField', task.date) if date_return: self.browser.fill('returnDeparture:dateField', date_return) if is_open: self.browser.check('returnTicketOpen') self.browser.find_option_by_text('ISIC').first.check() self.browser.find_by_value('Vyhledat').first.click() while self.browser.is_element_not_present_by_css('.left_column', wait_time=1): pass items = self.browser.find_by_css('.left_column') \ .find_by_xpath('div/div/*') connections = [] for item in items: if item.tag_name == 'h2': date_local = item.text.split(' ')[1] elif item.tag_name == 'div' and item.has_class('routeSummary'): assert date_local if date_local != task.date: break connections.append(Connection(item)) return connections def order_time(self, connection): while True: if connection.click(): self.browser dialog = self.browser.find_by_css('[id^=_wicket_window]') if dialog: dialog.first.find_by_tag('button').click() if self.browser.is_element_present_by_id('sumary_lines', wait_time=1): break self.browser.find_by_id('sumary_lines') \ .first.find_by_tag('button') \ .first.click() seats = {} bus = self.browser.find_by_css('.seatsContainer') if bus: for seat in bus.first.find_by_css( '.seatContainer:not([style*=blocked])'): seats[int(seat.find_by_tag('div').first.html[:-1])] = seat else: bus = self.browser.find_by_css('.vehicle') for seat in bus.first.find_by_css('.free, .selected'): seats[int(seat.text[:-1])] = seat return seats def order_seat(self, seat): if not seat.has_class('selected'): seat.click() for fs in self.browser.find_by_css('fieldset.topRoute'): legend = fs.find_by_css('legend') if legend and 'Pojištění' in legend[0].text: for package in fs.find_by_css('.insurancePackageType'): if 'nechci' in package.find_by_tag('label').text: package.find_by_tag('input').click() time.sleep(1) submit = self.browser.find_by_css('[name^=buttonContainer]').first interaction_type = submit.text reserved = 'Rezervovat' in interaction_type if not reserved: submit.click() time.sleep(1) data = (self.user['first'], self.user['last'], self.user['email'], self.user['phone']) for item, value in zip( self.browser.find_by_id('passengerInfo').first.find_by_tag( 'input'), data): item.fill(value) submit = self.browser.find_by_css('[name^=buttonContainer]').first interaction_type = submit.text assert 'Rezervovat' in interaction_type agreement = self.browser.find_by_css( '[name="bottomComponent:termsAgreementCont:termsAgreementCB"]') if agreement: agreement[0].check() time.sleep(1) submit.click() with open('conf.yaml') as f: conf = yaml.load(f) if 'email' in conf: email = conf['email'] while self.browser.is_element_not_present_by_id('ticketPage', wait_time=1): pass msg = MIMEText( self.browser.find_by_id('ticketPage').first.html, 'html') msg['Subject'] = 'SA reservation' msg['From'] = email['from'] msg['To'] = self.user['email'] username = email['username'] password = email['password'] server = smtplib.SMTP(email['server']) server.starttls() server.login(username, b64decode(password).decode()) server.sendmail(msg['From'], msg['To'], msg.as_string()) server.quit()
classfile.write(classid), classfile.write('\n') #print classlist #打开微信 wechaturl = 'http://wechat.shwilling.com/auth/qrcode/login?redirect=http%3A%2F%2Fwechat.shwilling.com%2Fsjtu%2Fcourse' browser.visit(wechaturl) print u'你现在有20s的时间扫描二维码确认登陆' time.sleep(10) print u'请稍等,本程序稍微有点慢...但是等待还是值得的.' myfile = open(u'all_scorelist.txt', 'w') for classid in classlist: time = ['/2014-2015-1', '/2014-2015-2', '/2015-2016-1', '/2015-2016-2'] for i in range(4): class_str = 'http://wechat.shwilling.com/sjtu/course/detail/' + classid + time[ i] browser.visit(class_str) if (browser.is_element_not_present_by_css('.d-name')): pass else: name = browser.find_by_css('.d-name').text timea = browser.find_by_css('.c-code').text meanscore = browser.find_by_css('.c-aver').text highscore = browser.find_by_css('.c-max').text print name, time[i], meanscore, highscore myfile.write( name.encode('utf-8')), myfile.write('\t'), myfile.write( time[i].encode('utf-8')), myfile.write('\t'), myfile.write( meanscore.encode( 'utf-8')), myfile.write('\t'), myfile.write( highscore.encode('utf-8')), myfile.write('\n') classfile.close()
if (re.match(pattern,ele.text)): classlist.append(ele.text) #打开微信 wechaturl='http://wechat.shwilling.com/auth/qrcode/login?redirect=http%3A%2F%2Fwechat.shwilling.com%2Fsjtu%2Fcourse' browser.visit(wechaturl) print u'你现在有20s的时间扫描二维码确认登陆' sleep(10) print u'请稍等,本程序稍微有点慢...但是等待还是值得的.' myfile=open(u'scorelist.txt','w') for classid in classlist: time=['/2014-2015-1','/2014-2015-2','/2015-2016-1'] for i in range(3): class_str='http://wechat.shwilling.com/sjtu/course/detail/'+classid+time[i] browser.visit(class_str) if (browser.is_element_not_present_by_css('.d-name')): pass else: name=browser.find_by_css('.d-name').text timea=browser.find_by_css('.c-code').text meanscore=browser.find_by_css('.c-aver').text highscore=browser.find_by_css('.c-max').text print name,time[i],meanscore,highscore myfile.write(name.encode('utf-8')),myfile.write('\t'),myfile.write(time[i].encode('utf-8')),myfile.write('\t'),myfile.write(meanscore.encode('utf-8')),myfile.write('\t'), myfile.write(highscore.encode('utf-8')),myfile.write('\n') browser.quit() myfile.close()
def main(): #how many accounts we need ntimes = 1 for i in range(1,ntimes+1): print "starting browser" firstname = names.get_first_name() #print "firstname", firstname lastname = names.get_last_name() #print "lastname", lastname browser = Browser() #Browser(user_agent="Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en)") browser.visit('https://passport.yandex.com/registration/mail') browser.find_by_id('firstname').fill(firstname) browser.find_by_id('lastname').fill(lastname) testlogin = False count = 0 while (testlogin == False): count = count + 1 login = firstname+lastname+str(randint(10,1000)) print "login:"******"div.control__error__login_notavailable", wait_time=2) if browser.is_text_present("username available"): testlogin = True else: print "login is not available, generate new" if (count>3): #print "logins in this script is unavailable now, please make new login generator" browser.quit() sys.exit("logins in this script is unavailable now, please make new login generator") password = password_generator.generate() print "password:"******"hint_question_id").click() #wait browser.is_element_not_present_by_css("li[role=\"presentation\"]", wait_time=3) #check first question browser.find_by_css("li[role=\"presentation\"]")[1].click() browser.find_by_id("hint_answer").fill(firstname) gateimgcode = captcha(browser) browser.find_by_id('answer').fill(gateimgcode) browser.find_by_css("button[type=\"submit\"]").click() testcaptcha = False count = 0 while (testcaptcha == False): count = count + 1 browser.is_element_not_present_by_css("div.control__error__captcha_incorrect", wait_time=2) if browser.is_text_present("characters were entered incorrectly"): print "captcha code is bad, try again" browser.find_by_id('password').fill(password) browser.find_by_id('password_confirm').fill(password) gateimgcode = captcha(browser) browser.find_by_id('answer').fill(gateimgcode) browser.find_by_css("button[type=\"submit\"]").click() else: testcaptcha = True if (count>3): #print "something wrong with captcha" browser.quit() sys.exit("something wrong with captcha") browser.is_element_not_present_by_tag("html", wait_time=2) if browser.is_text_present("Personal information"): today = datetime.date.today() filename = 'yandex'+str(today)+'.txt' file = open(filename,'a') file.write(login+'@yandex.com'+':'+login+':'+password+'\n') file.close() print str(i)+" accounts saved to "+filename browser.quit() else: #print "something wrong, please start script again" browser.quit() sys.exit("something wrong, please start script again")