def show(self): browser = RoboBrowser(parser='html.parser') browser.open('http://codeforces.com/enter') enter_form = browser.get_form('enterForm') enter_form['handleOrEmail'] = self.username enter_form['password'] = self.password browser.submit_form(enter_form) try: checks = list( map(lambda x: x.getText()[1:].strip(), browser.select('div.caption.titled'))) if self.username not in checks: click.secho('Login Failed.. Wrong password.', fg='red') return except Exception as e: click.secho('Login Failed.. Maybe wrong id/password.', fg='red') return browser.open('http://codeforces.com/friends') soup = browser.parsed()[0] # no need of soup ftable = soup.findAll( 'div', {'class': 'datatable'})[0].findAll('table')[0].findAll('tr')[1:] friends = [x.findAll('td')[1].getText().strip() for x in ftable] for f in friends: print(f)
def get_det(tid): tindex = gid.index(tid) rno = rid[tindex] pas = pid[tindex] print(rno, pas) br = RoboBrowser(history=True, parser="html.parser") br = RoboBrowser( user_agent= 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6' ) br.open('http://studentscorner.vardhaman.org') form = br.get_form(action="") form["rollno"] = rno form["wak"] = pas br.submit_form(form) #br.open("http://studentscorner.vardhaman.org")''' print(rno) #bot.reply_to(m,"wait") br.open("http://studentscorner.vardhaman.org/student_information.php") bt = br.parsed() th = br.select("th") #3 td = br.select("td") #8 print("In details " + rno) #print(z.geturl()) #if finalurl != "http://studentscorner.vardhaman.org/": try: return (str(th[3].text.strip()) + ":" + str(td[8].text.strip()) + "\n" + str(th[10].text.strip()) + ":" + str(td[17].text.strip()) + "\n" + str(th[29].text.strip()) + ":" + (str(td[33].text.strip())) + "\n" + str(th[31].text.strip()) + ":" + str(td[35].text.strip()) ) #details except IndexError: return ("Something is wrong")
def kuai_dai_li(self): ''' 快代理 :return:数据需要使用无头爬取 ''' url = 'https://www.kuaidaili.com/free/intr/' headers = { 'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_ ≥14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.132 Safari/537.36", 'Connection': 'close' } result = self.request_proxy('GET', url, headers=headers) soup = BeautifulSoup(result.text, 'html.parser') ips = soup.find('table', class_='layui-table').find('tbody').find('tr') for ip in ips: td = ip.find_all('td') host = td[0].get_text().lower() port = td[1].get_text().strip() self._format_ip({'http': f'http://{host}:{port}'}) browser = RoboBrowser() browser.open(url) time.sleep(3) table = browser.select('table.table table-bordered table-striped') print(table)
def submit(self): last_id, b, c, d, e = Submit.get_latest_verdict(self.username) browser = RoboBrowser(parser='html.parser') browser.open('http://codeforces.com/enter') enter_form = browser.get_form('enterForm') enter_form['handleOrEmail'] = self.username enter_form['password'] = self.password browser.submit_form(enter_form) try: checks = list( map(lambda x: x.getText()[1:].strip(), browser.select('div.caption.titled'))) if self.username not in checks: click.secho('Login Failed.. Wrong password.', fg='red') return except Exception as e: click.secho('Login Failed.. Maybe wrong id/password.', fg='red') return browser.open('http://codeforces.com/problemset/submit') submit_form = browser.get_form(class_='submit-form') submit_form['submittedProblemCode'] = self.prob_id submit_form['sourceFile'] = self.inputfile browser.submit_form(submit_form) if browser.url[-6:] != 'status': click.secho( 'Failed submission, probably you have submit the same file before', fg='red') return Submit.print_verdict(last_id, self.username, 100) click.secho('[{0}] submitted ...'.format(self.inputfile), fg='green')
def check_cred(rno, pas): status = check_url() if (status == "down"): return jsonify({'site': 'down'}) else: br = RoboBrowser(history=True, parser="html.parser") br = RoboBrowser( user_agent= 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6' ) br.open('http://studentscorner.vardhaman.org') form = br.get_form(action="") form["rollno"] = rno form["wak"] = pas br.submit_form(form) checkrno = str(br.select) if (rno in checkrno): br.open( "http://studentscorner.vardhaman.org/student_information.php") bt = br.parsed() th = br.select("th") #3 td = br.select("td") #8 name = str(td[8].text.strip()) print("In check_pas", finalurl) return jsonify({ 'valid': 'True', 'rollno': rno, 'pas': pas, 'name': name }) else: return jsonify({'valid': 'False'})
def date_wise_activity_diary(rno, pas): br = RoboBrowser(history=True, parser="html.parser") br = RoboBrowser( user_agent= 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6' ) br.open('http://studentscorner.vardhaman.org') form = br.get_form(action="") form["rollno"] = rno form["wak"] = pas br.submit_form(form) br.open( "http://studentscorner.vardhaman.org/Studentwise_AttendanceRegister.php" ) bt = br.parsed() th = br.select("th") #3 td = br.select("td") #8 l = [] att = [] #print str(th[55].text.strip())+":"+str(th[56].text.strip())#attend try: for i in range(10, 99): if (str(th[i].text.strip()) == "Attendance Percentage"): print(str(th[i + 1].text.strip())) return (str(th[i + 1].text.strip())) #if(finalurl != "http://studentscorner.vardhaman.org/"): #att.append("\033[1m"+str(th[i].text.strip())+" : *"+str(th[i+1].text.strip())+"*") # bot.send_message(tid,str(th[i].text.strip())+" : *"+str(th[i+1].text.strip())+"*",parse_mode= 'Markdown')#attend except IndexError: return ( "Attendance is Freesed.If attendance is not freesed you can see it in the website send the mail to the developer stating the issue." )
def captura(url): #metodo para acessar uma url e capturar informações global vetor_links global visitados_links global grafo print('-----' + url + '-------') visitados_links.append(url) #adiciona a url atual no vetor de visitados vetor_links.remove(url) #remove a url do vetor de "para visitar" browser = RoboBrowser() try: browser.open(url_navegavel(url)) #abre a url print("++++" + url_navegavel(url) + "+++") if(browser.response.status_code != 200): #verifica se o servidor respondeu ok return g = extrair_rdfa(url) grafo = merge_graphs(grafo,g) #chama a função de junção de grafos links = browser.select('a') #seleciona as tags 'a' da página for link in links: #varre os links, corrige e adiciona ao vetor de 'para visitar' if(link.has_attr('href') and valida_url(link['href'])): if corrige_url(link['href'], url) not in vetor_links and corrige_url(link['href'], url) not in visitados_links and (len(visitados_links) + len(vetor_links) < QUANTIDADE_PAGINAS): vetor_links.append(corrige_url(link['href'], url)) except: print("erro") e = sys.exc_info()[0] traceback.print_exc(file=sys.stdout) print(e)
def cgpa(rno, pas): br = RoboBrowser(history=True, parser="html.parser") br = RoboBrowser( user_agent= 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6' ) br.open('http://studentscorner.vardhaman.org') form = br.get_form(action="") form["rollno"] = rno form["wak"] = pas br.submit_form(form) br.open( "http://studentscorner.vardhaman.org/src_programs/students_corner/CreditRegister/credit_register.php" ) bt = br.parsed() th = br.select("th") #3 td = br.select("td") #8 c = str(br.select) t = "Cumulative Grade Point Average" i = c.index(t) try: return (str(th[1].text.strip()) + ":" + str(td[7].text.strip()) + "\n" + "Cumulative Grade Point Average:" + c[i + 33] + c[i + 34] + c[i + 35] + c[i + 36]) except IndexError: return ( "Something went wrong send the report to [email protected] stating the issue, with your rollno" )
def scrape(card_no): serch_card = Card(card_no) # -- スクレイピング -- browser = RoboBrowser(parser='html.parser') browser.open('https://www.google.co.jp/') form = browser.get_form(action='/search') form['q'] = serch_card.name browser.submit_form(form, list(form.submit_fields.values())[0]) new_card = Card() new_card.name = serch_card.name # 画像はとりあえずランダムに設定 new_card.image = random.choice([os.path.basename(file) for file in glob.glob('./selectImages/*.*')]) new_card.description = "検索結果:" + serch_card.name new_card.create() for a in browser.select('h3 > a'): new_child_card = Card() new_child_card.name = a.text[0:32].encode("utf-8") new_child_card.image = random.choice([os.path.basename(file) for file in glob.glob('./selectImages/*.*')]) new_child_card.description = a.get('href') new_child_card.parent_no = new_card.no new_child_card.create() redirect('/')
def login(handle): password = getpass('[Secured] Password of {}: '.format(handle)) print('> Signing in...') try: browser = RoboBrowser(parser='lxml') browser.open('http://codeforces.com/enter') enter_form = browser.get_form('enterForm') enter_form['handleOrEmail'] = handle enter_form['password'] = password browser.submit_form(enter_form) checks = list(map(lambda x: x.getText()[ 1:].strip(), browser.select('div.caption.titled'))) if handle.lower() not in str(checks).lower(): print('> !!! Login Failed. Please enter valid credentials') return None else: print('> Success!') return browser except Exception as e: print('>', e) return None
def fetch(url): browser = RoboBrowser(history=True, parser="html.parser") browser.open(url) votes = browser.select('.moderatorenSlider a.beitrag') followed_links = set() total_scores = {} for v in votes: if v["href"] in followed_links: continue else: followed_links.add(v["href"]) print(v["href"]) browser.follow_link(v) try: scores = extractVotes(browser) print(scores) for title, score in scores.items(): if title not in total_scores: total_scores[title] = (score, 1) else: score_, num = total_scores[title] total_scores[title] = (score_ + score, num + 1) except Exception as e: print(e) browser.back() return total_scores
def pypi_search(term): # TODO: Populate result list with each record as a dictionary # searched result. result = [] from robobrowser import RoboBrowser br = RoboBrowser(parser="lxml") br.open("https://pypi.org/") if not br.response.ok: raise ValueError("Failed at https://pypi.org/") form = br.get_form() form["q"] = term br.submit_form(form) if not br.response.ok: raise ValueError("Failed at form submit") links = br.select("a.package-snippet")[:5] for i, link in enumerate(links): rec = {} rec["index"] = i rec["href"] = link["href"] rec["name"] = link.span.text rec["description"] = link.p.text result.append(rec) return result
def login_to_cf(username, password): """login_to_cf creates a codeforces logged in session using RoboBrowser""" logger( 'info', 'Trying to login into codeforces for the handle : {0}'.format( username)) try: browser = RoboBrowser(parser='html.parser') browser.open('http://codeforces.com/enter') enter_form = browser.get_form('enterForm') enter_form['handleOrEmail'] = username enter_form['password'] = password browser.submit_form(enter_form) checks = str(browser.select('div.caption.titled')).count(username) if checks == 0 or username == "": logger('error', 'Login Failed.. Wrong password.') return (False, browser) except Exception as e: logger('error', 'Login Failed.. Maybe wrong id/password.') return (False, browser) global sessionUser sessionUser = username logger('success', 'Login successful, Welcome {0}!'.format(sessionUser)) return (True, browser)
def soov_fetch(OTSING): import re from robobrowser import RoboBrowser browser = RoboBrowser() browser.open('https://soov-ee.postimees.ee/keyword-' + OTSING + '/listings.html') try: page_numbers = int( str( browser.select('.pagination')[0].select('li') [len(browser.select('.pagination')[0].select('li')) - 2]).split('.html">')[1].split('</a')[0]) except: all_items = [] return all_items all_items = [] for page_number in range(1, page_numbers + 1): browser.open("https://soov-ee.postimees.ee/keyword-" + OTSING + "/" + str(page_number) + "/listings.html") items = browser.select('.item-list') counter = 0 for item in items[ 2:]: # esimesed kaks on listingute kohal need salvesta otsing ja selles nimekirjas on ka 0 kuulutust valismaalt error disclaimer, kuna need on sama class nameiga if 'Soovin' in str(item.select('span.thin')): pass if 'Müüa' in str(item.select('span.thin')): counter += 1 NAME = str(item.select('img')).replace('[<img alt="', '').split('" class')[0] if 'alt="' in NAME: NAME = NAME.split('alt="')[1] IMG_URL = str(item.select('img')).replace( '"/>]', '').split('src="')[1].split('"/>')[0] try: PRICE = str(item.select('.item-price')).split( 'margin">')[1].split('€<')[0] PRICE = ['-', PRICE] except: PRICE = '-' PRICE = ['-', PRICE] URL = str(item.select('.add-title')[0].select('a')).split( 'ref="')[1].split('">')[0] all_items.append( ['#' + str(counter), URL, NAME, IMG_URL, PRICE, 'SOOV']) return all_items
def main(): parser = argparse.ArgumentParser( description='Submit codeforces in command line') parser.add_argument('user', type=str, help='Your codeforces ID') parser.add_argument('prob', type=str, help='Codeforces problem ID (Ex: 33C)') parser.add_argument('file', type=str, help='path to the source code') args = parser.parse_args() user_name = args.user last_id, _ = get_submission_data(user_name) try: passwd = judgerConfig['codeforces'][user_name] except Exception: print("Configuration Failure.") return browser = RoboBrowser(parser='lxml') browser.open('http://codeforces.com/enter') enter_form = browser.get_form('enterForm') enter_form['handleOrEmail'] = user_name enter_form['password'] = passwd browser.submit_form(enter_form) try: checks = list(map(lambda x: x.getText()[1:].strip(), browser.select('div.caption.titled'))) if user_name not in checks: print("Login Failed.. probably because you've typed" "a wrong password.") return except Exception: print("Login Failed.. probably because you've typed" "a wrong password.") return browser.open('http://codeforces.com/problemset/submit') submit_form = browser.get_form(class_='submit-form') submit_form['submittedProblemCode'] = args.prob submit_form['sourceFile'] = args.file browser.submit_form(submit_form) if browser.url[-6:] != 'status': print('Your submission has failed, probably ' 'because you have submit the same file before.') return print('Submitted, wait for result...') while True: id_, verdict = get_submission_data(user_name) if id_ != last_id and verdict != 'TESTING': print('Verdict = {}'.format(verdict)) break time.sleep(5)
def DownloadPronunciations(words): print("Aiming to download " + str(words)) browser = RoboBrowser(history=True, parser="html5lib") print ("Connecting to Forvo...") browser.open('http://www.forvo.com/login/') form = browser.get_form(action=re.compile(r'login')) form["login"].value = forvoUsername form["password"].value = forvoPassword browser.submit_form(form) filepaths = [] for word in words: try: print ("Trying to download; " + word) #The #sv tells it to look for sverige! wordUrl = "http://www.forvo.com/word/" + word + "/#sv" browser.open(wordUrl) ConvertedDownloadWord = word ConvertedDownloadWord = ConvertedDownloadWord.replace("ö", "%C3%B6") ConvertedDownloadWord = ConvertedDownloadWord.replace("å", "%C3%A5") ConvertedDownloadWord = ConvertedDownloadWord.replace("ä", "%C3%A4") searchString = '"/download/mp3/' + ConvertedDownloadWord + '/sv/"' #print(browser.get_links("download")) #print(browser.select('a[href*="download"]')) downloads = browser.select('a[href*='+searchString+']') #for link in downloads: # print(link) if downloads: try: fullDownloadUrl = downloads[0].attrs["href"] print ('Attempt to download mp3 from ' + fullDownloadUrl) browser.open(fullDownloadUrl) #print ('Opened the mp3 site.') mp3Response = browser.response #print ('Read the mp3 response.') filepath = saveDirectory + word + ".mp3" file = open(filepath, 'wb') file.write(mp3Response.content) file.close() filepaths.append(filepath) except IndexError: print ("Could not load the webpage ", fullDownloadUrl) except NameError: print ("I'm not sure what this was. Maybe an incorrectly encoded string") except: print("Unexpected error while downloading:", sys.exc_info()[0]) else: print ("Couldn't find a download link :(.") except NameError : print ("Couldn't find ",searchWord) except : print("Unexpected error while searching:", sys.exc_info()[0]) return filepaths
def period_attendance(rno, pas): stat = check_url( rno, pas, "http://studentscorner.vardhaman.org/student_attendance.php") if (stat == "down"): return ("down") else: d = {} br = RoboBrowser(history=True, parser="html.parser") br = RoboBrowser( user_agent= 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6' ) br.open('http://studentscorner.vardhaman.org') form = br.get_form(action="") form["rollno"] = rno form["wak"] = pas br.submit_form(form) br.open("http://studentscorner.vardhaman.org/student_attendance.php") bt = br.parsed() th = br.select("th") #3 td = br.select("td") #8 l = [] att = [] try: for i in range(1, 49, 4): present = td[i + 3].text.strip() #Present period = td[i + 1].text.strip() #Period topic = td[i + 2].text.strip() #Topic present = present.upper() topic = topic[0].upper() + topic[1:].lower() # print(td[i],present, period, topic) if (present == "PRESENT"): #att.append("\033[1m"+str(td[i].text.strip())+" "+str(td[i+1].text.strip()) +" "+d+" -<b>"+t+"</b>") temp_dict = { str(td[i].text.strip()): present + "_-_" + period + "_-_" + topic } d.update(temp_dict) else: #att.append("\033[1m"+str(td[i].text.strip())+" "+p+" "+d+" - ~<i>"+t+"</i>~") #bot.send_message(tid,str(td[i].text.strip())+" "+p+" "+d+" - ~<i>"+t+"</i>~",parse_mode= 'Html')#attend) temp_dict = { str(td[i].text.strip()): present + "_-_" + period + "_-_" + topic } d.update(temp_dict) #break except IndexError: pass if (not d): d = {"status": "None"} pa = json.dumps(d) return (pa) else: temp_dict = {"status": "True"} d.update(temp_dict) pa = json.dumps(d) print(type(pa)) return (pa)
def get_stock_id_pages(base_url): browser = RoboBrowser(parser='html.parser') browser.open(base_url) url_list = [] for li in browser.select('tbody > tr'): url = li.get('data-href') url_list.append(url) return url_list
def attendance(tid, rno, pas): br = RoboBrowser(history=True, parser="html.parser") br = RoboBrowser( user_agent= 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6' ) br.open('http://studentscorner.vardhaman.org') form = br.get_form(action="") form["rollno"] = rno form["wak"] = pas br.submit_form(form) br.open("http://studentscorner.vardhaman.org/student_attendance.php") bt = br.parsed() th = br.select("th") #3 td = br.select("td") #8 l = [] att = [] #print str(th[55].text.strip())+":"+str(th[56].text.strip())#attend try: for i in range(40, 60): if (str(th[i].text.strip()) == "Attendance Percentage"): #if(finalurl != "http://studentscorner.vardhaman.org/"): #att.append("\033[1m"+str(th[i].text.strip())+" : *"+str(th[i+1].text.strip())+"*") bot.send_message(tid, str(th[i].text.strip()) + " : *" + str(th[i + 1].text.strip()) + "*", parse_mode='Markdown') #attend except IndexError: bot.send_message( tid, "*Attendance is Freesed*.\nIf attendance is not freesed you can see it in the website send the mail to \n *[email protected]*\nstating the issue.", parse_mode='Markdown') try: for i in range(9, 37, 4): t = td[i + 3].text.strip() p = td[i + 1].text.strip() d = td[i + 2].text.strip() t = t.upper() d = d[0].upper() + d[1:].lower() if (t == "PRESENT"): #att.append("\033[1m"+str(td[i].text.strip())+" "+str(td[i+1].text.strip()) +" "+d+" - <b>"+t+"</b>") bot.send_message(tid, str(td[i].text.strip()) + " " + str(td[i + 1].text.strip()) + " " + d + " - <b>" + t + "</b>", parse_mode='Html') #attend) else: #att.append("\033[1m"+str(td[i].text.strip())+" "+p+" "+d+" - ~<i>"+t+"</i>~") bot.send_message(tid, str(td[i].text.strip()) + " " + p + " " + d + " - ~<i>" + t + "</i>~", parse_mode='Html') #attend) #break except IndexError: pass return 1
def get_list_pages(base_url): browser = RoboBrowser(parser='html.parser') browser.open(base_url) url_list = [] for li in browser.select('ul.pager > li'): a = li.select('a')[0] url = urljoin(base_url, a.get('href')) url_list.append(url) return url_list
def make_booking(url, num_people, lead_name, hotel_name, email, country, tel): logging.info('[%s] requesting %s' % (datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), url)) browser = RoboBrowser(history=False, user_agent=random.choice(user_agents)) browser.open(url) # some error handling err = browser.select('div.error-message') if len(err) > 0: if '指定日時の予約受け付けは終了しました。' in str(err[0]): return 1 # 'Reservation ended' elif '指定時間は予約を受け付けておりません' in str(err[0]): return 2 # Reservation not accepted elif '予約受付期間外です' in str(err[0]): return 3 # Outside reservation period elif 'ただ今予約は受け付けておりません' in str(err[0]): return 4 # We do not accept reservations now. else: return 6 # unknown error form = browser.get_form() # fill form form['booking[client][adult]'].value = num_people form['booking[client][name]'].value = lead_name form['booking[client][company]'].value = hotel_name form['booking[client][email]'].value = email form['booking[client][email2]'].value = email form['booking[client][country]'].value = country form['booking[client][tel]'].value = tel browser.submit_form(form) print(browser.parsed) err = browser.select('div.error-message') if len(err) > 0 and '定員オーバーのため予約不可です' in str(err[0]): return 5 # capacity over # get pass confirmation page form2 = browser.get_form() # json2 = form2.serialize() browser.submit_form(form2) return 0 # success!
def draw_leaderboard(screen, width, height): """ Retrieve data from investopedia and draw leaderboard on screen. Args: screen: the display object width: display width height: display height """ clear_leaderboard(screen, width, height) email, password = get_credentials() # Use login credentials to access leaderboard base_url = 'http://www.investopedia.com/accounts/login.aspx?' browser = RoboBrowser(parser='lxml', history=True) browser.open(base_url) form = browser.get_form(id='account-api-form') form['email'] = email form['password'] = password browser.submit_form(form) browser.open('http://www.investopedia.com/simulator/ranking/') leaderboard = browser.select('tr') x = width / 15 y = 9 * height / 16 for i in range(1, 11): data = leaderboard[i].text.replace('\n', '') data = data.replace('. ', '') if i == 10: data = data[2:] else: data = data[1:] username = '' for char in data: if char == '(': break else: username = username + char if len(username) > 30: username = username[:30] # Display leaderboard info info = '{}. {}'.format(i, username) text = pygame.font.SysFont('Century Schoolbook', 40) label = text.render(info, 1, pitt_gold) screen.blit(label, (x, y)) pygame.display.update() if i == 5: x = 11 * width / 21 y = 9 * height / 16 else: y = y + 50
def main(): parser = argparse.ArgumentParser( description='Submit codeforces in command line') parser.add_argument('user', type=str, help='Your codeforces ID') parser.add_argument('prob', type=str, help='Codeforces problem ID (Ex: 33C)') parser.add_argument('file', type=str, help='path to the source code') args = parser.parse_args() user_name = args.user last_id, _ = get_submission_data(user_name) passwd = getpass() browser = RoboBrowser() browser.open('http://codeforces.com/enter') enter_form = browser.get_form('enterForm') enter_form['handle'] = user_name enter_form['password'] = passwd browser.submit_form(enter_form) try: checks = list(map(lambda x: x.getText()[1:].strip(), browser.select('div.caption.titled'))) if user_name not in checks: print("Login Failed.. probably because you've typed" "a wrong password.") return except Exception as e: print("Login Failed.. probably because you've typed" "a wrong password.") return browser.open('http://codeforces.com/problemset/submit') submit_form = browser.get_form(class_='submit-form') submit_form['submittedProblemCode'] = args.prob submit_form['sourceFile'] = args.file browser.submit_form(submit_form) if browser.url[-6:] != 'status': print('Your submission has failed, probably ' 'because you have submit the same file before.') return print('Submitted, wait for result...') while True: id_, verdict = get_submission_data(user_name) if id_ != last_id and verdict != 'TESTING': print('Verdict = {}'.format(verdict)) break time.sleep(5)
def get_locality_fips(self, user): # get locality fips ID by address from elsewhere on virginia.gov locality_browser = RoboBrowser(parser='html.parser', user_agent='HelloVote.org', history=True) locality_browser.open('http://www.tax.virginia.gov/fips') locality_form = locality_browser.get_form(id="build-fips-form") locality_form['street1'] = user['address'] locality_form['city'] = user['city'] locality_form['zipcode'] = user['zip'] locality_form['zipcodeext'] = '' # two 'op' buttons, submit & reset. thankfully submit is first. locality_browser.submit_form(locality_form, submit=locality_form['op']) return locality_browser.select('dl.dl-horizontal dd')[1].text.strip().upper()
def cli(prob_id, filename): # get latest submission id, so when submitting should have not equal id last_id, b, c, d, e = get_latest_verdict(config.username) # Browse to Codeforces browser = RoboBrowser(parser = 'html.parser') browser.open('http://codeforces.com/enter') enter_form = browser.get_form('enterForm') enter_form['handleOrEmail'] = config.username enter_form['password'] = config.password browser.submit_form(enter_form) try: checks = list(map(lambda x: x.getText()[1:].strip(), browser.select('div.caption.titled'))) if config.username not in checks: click.secho('Login Failed.. Wrong password.', fg = 'red') return except Exception as e: click.secho('Login Failed.. Maybe wrong id/password.', fg = 'red') return click.secho('[{0}] login successful! '.format(config.username), fg = 'green') click.secho('Submitting [{1}] for problem [{0}]'.format(prob_id, filename), fg = 'green') browser.open('https://codeforces.com/contest/'+prob_id[:-1]+'/problem/'+prob_id[-1]) submit_form = browser.get_form(class_ = 'submitForm') try: submit_form['sourceFile'] = filename except Exception as e: click.secho('File {0} not found in current directory'.format(filename)) return browser.submit_form(submit_form) if browser.url[-3:] != '/my': click.secho('Failed submission, probably you have submit the same file before', fg = 'red') return click.secho('[{0}] submitted ...'.format(filename), fg = 'green') hasStarted = False while True: id_, verdict_, time_, memory_, passedTestCount_ = get_latest_verdict(config.username) if id_ != last_id and verdict_ != 'TESTING' and verdict_ != None: if verdict_ == 'OK': click.secho('OK - Passed {} tests'.format(passedTestCount_), fg = 'green') else: click.secho("{} on test {}".format(verdict_, passedTestCount_ + 1), fg = 'red') click.secho('{} MS | {} KB'.format(time_, memory_), fg = ('green' if verdict_ == 'OK' else 'red')) break elif verdict_ == 'TESTING' and (not hasStarted): click.secho("Judgment has begun", fg='green') hasStarted = True time.sleep(0.5)
def get_available_countries(): """ :return: list of countries as presented in url GEONAMES_COUNTRIES. """ browser = RoboBrowser(parser='html.parser') browser.open(GEONAMES_COUNTRIES) return [ country_name.string.lower() for country_name in browser.select('.restable ')[0].find_all('a') if country_name.string is not None ]
def scores(): if request.method == "GET": #return login form return render_template("scores.html") if request.method == "POST": #connects to collegeboard.org browser = RoboBrowser() login_url = 'https://account.collegeboard.org/login/login?appId=287&DURL=https://apscore.collegeboard.org/scores/view-your-scores' browser.open(login_url) #logs in to collegeboard.org with user credentials form = browser.get_form(id='loginForm') form['person.userName'].value = request.form.get("username") form['person.password'].value = request.form.get("password") form.serialize() browser.submit_form(form) #redirects to AP scores page on collegeboard.org browser.open('https://apscore.collegeboard.org/scores/view-your-scores') #populates exams_final with exam names scraped from collegeboard.org exams = browser.select(".span5 > h4") exams_final = [] for exam in exams: exam = str(exam) exams_final.append(exam[4:-5]) #populates scores_final with scores scraped from collegeboard.org scores = browser.select(".span5 > span > em") scores_final = [] for score in scores: score = str(score) scores_final.append(score[4:-5]) #returns scores page return render_template("scored.html", exams=exams_final, scores=scores_final)
def applyf(rno, pas, out, indat, rea): br = RoboBrowser(parser="html.parser") br = RoboBrowser( user_agent= 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6' ) br.open('http://studentscorner.vardhaman.org') form = br.get_form(action="") #print(ppr) form["rollno"] = rno #ppr[0]# user.rno form["wak"] = pas #ppr[1]#user.pas br.submit_form(form) #print(pper) form = br.get_form(action="insert_permission.php") br.open("http://studentscorner.vardhaman.org/students_permission_form.php") form = br.get_form(action="insert_permission.php") form['out_time'] = out #pper[0] form['in_time'] = indat #pper[1] form['reason'] = rea #pper[2] #br.submit_form(form, submit=br.submit_fields['__CONFIRM__']) #payload = {'rollno': '17881A0526', 'wak': 'hodit@vardhaman', 'ok' : 'SignIn' } #br.open("http://studentscorner.vardhaman.org/students_permission_form.php", method='post',data=payload) #br.submit_form(form) #tt=str(br.select) #br.submit_form(form, submit=br.submit_fields['__CONFIRM__']) #payload = {'rollno': '17881A0526', 'wak': 'hodit@vardhaman', 'ok' : 'SignIn' } #br.open("http://studentscorner.vardhaman.org/students_permission_form.php", method='post',data=payload) br.submit_form(form) tt = str(br.select) print(tt) br.open("http://studentscorner.vardhaman.org/students_permission_form.php") td = br.select("td") if ("Permission Form Submitted Successfully" in tt): if (str(td[8].text.strip()) == "Student Name"): return (str(td[9].text.strip()).title() + "'s Permission Form Submitted Successfully") elif (str(td[29].text.strip()) == "Student Name"): return (str(td[30].text.strip()).title() + "'s Permission Form Submitted Successfully") else: return ( "Permission Form Submitted Successfully.\n\nYour name has not found in the permission,please send a mail to \[email protected]\nstating the issue." ) else: return ( "Something went wrong.Apply the form manually and mail to [email protected] stating the issue." ) print(checkper) return ("applied")
def scrape_td(parsed_url): br2 = RoboBrowser(history=True,parser="html.parser") try: br2.open(parsed_url) # attempt to open up parsed URL except: print(f"Unable to open{parsed_url}") col_list = [28,29,30, # 1 35,36,37, # 2 42,43,44, # 3 49,50,51, # 4 56,57,58, # 5 63,64,65, # 6 70,71,72, # 7 77,78,79, # 8 84,85,86, # 9 91,92,93, # 10 98,99,100, # 11 105,106,107, # 12 112,113,114, # 13 119,120,121, # 14 126,127,128, # 15 133,134,135, # 16 140,141,142, # 17 147,148,149, # 18 154,155,156, # 19 161,162,163, # 20 168,169,170, # 21 175,176,177, # 22 182,183,184, # 23 189,190,191, # 24 196,197,198, # 25 203,204,205, # 26 210,211,212, # 27 217,218,219, # 28 224,225,226, # 29 231,232,233, # 30 238,239,240, # 31 245,246,247, # 32 252,253,254] # 33 temp_list = [] # empty list to insert writes for each consumer for nums in col_list: try: strp_val = str(br2.select('td')[nums]).replace("<td>","").replace("</td>","").replace(",","") # parses string so we can turn temp_list.append(int(strp_val)) # value into an int except: print(f"Less than 33 threads for cluster 0{server_num}") writes_ = temp_list[0::3] writes_totals = sum(writes_) # total of writes located in temp list return writes_totals
def parse_url(server_num): try: if server_num >= 6: url = f"<url f string with {server num} injected>" first_url = f"<url f string with {server num} injected>" else: url = f"<url f string with {server num} injected>" first_url = f"<url f string with {server num} injected>" except: print(f"Something went wrong loading SB for C0{server_num}") br = RoboBrowser(history=True, parser="html.parser") try: br.open(url) # attempt to open url with RoboBrowser except: print(f"Unable to open {url}") if server_num == 7: end_scrape = str(br.select('a')[9])[9:46] else: end_scrape = str(br.select('a')[9])[9:48] combo_url = first_url+end_scrape # combine both strings to create URL for most recent job return combo_url
def scrape_cache(query, total): browser = RoboBrowser() listings = [] for i in range(1, total, 14): offset = i # which listing to start at per page. Increment by 14 browser.open('http://www.bing.com/search?q=%s&first=%d' % (query, offset)) # Database Schema - A sqlite database is used to make data queries more efficient. # id (Primary Key) - ID of the item # orig_url - Original URL of the site. # cache_url - Cached URL of the site. # desc - Quick description of the site. # grab all search attribute strings capt_list = browser.select('.b_caption') for capt in capt_list: # start a new listing listing = {} # display original url listing['orig_url'] = re.sub('<[^>]*>', '', str(capt.select('cite')[0])) # display description listing['desc'] = capt.p.string # '|' delimited list, containing the ids needed to cache id_string = capt.select('div.b_attribution')[0].get('u') print(id_string) if (id_string != None): ids = id_string.split('|') listing[ 'cache_url'] = "http://cc.bingj.com/cache.aspx?q=%s&d=%s&mkt=en-US&setlang=en-US&w=%s" % ( query, ids[2], ids[3]) else: listing['cache_url'] = None print(listing) listings.append(listing) print(":: End of dump %d" % i) # delay between page grabs time.sleep(1) # listings is given as an output object return (listings)
def getSongLinks(soundcloudURL): browser = RoboBrowser(history=False) # Go to SoundFlush and ask to rip the specified track. browser.open('http://soundflush.com/') form = browser.get_form(id='form_download') form['track_url'].value = soundcloudURL browser.submit_form(form) # Grab the download link and filename from the download page. downloadLink = browser.select('a#btn_save')[0] downloadURL = downloadLink['href'] downloadName = downloadLink['download'] return {'url' : downloadURL, 'name' : downloadName}
def scrape_cache(query, total): browser = RoboBrowser() listings = [] for i in range(1, total, 14): offset = i # which listing to start at per page. Increment by 14 browser.open('http://www.bing.com/search?q=%s&first=%d' % (query, offset)) # Database Schema - A sqlite database is used to make data queries more efficient. # id (Primary Key) - ID of the item # orig_url - Original URL of the site. # cache_url - Cached URL of the site. # desc - Quick description of the site. # grab all search attribute strings capt_list = browser.select('.b_caption') for capt in capt_list: # start a new listing listing = {} # display original url listing['orig_url'] = re.sub('<[^>]*>', '', str(capt.select('cite')[0])) # display description listing['desc'] = capt.p.string # '|' delimited list, containing the ids needed to cache id_string = capt.select('div.b_attribution')[0].get('u') print(id_string) if (id_string != None): ids = id_string.split('|') listing['cache_url'] = "http://cc.bingj.com/cache.aspx?q=%s&d=%s&mkt=en-US&setlang=en-US&w=%s" % (query, ids[2], ids[3]) else: listing['cache_url'] = None print(listing) listings.append(listing) print(":: End of dump %d" % i) # delay between page grabs time.sleep(1) # listings is given as an output object return(listings)
def getKeyFromMetaData(url): """ @brief Auxiliar function. Returns a list of normalized keywords. @param url String. @return listof(uStrings) Uses normalize(value) to fix unicode strings. The function selects values in the content property of the tags tagged as metadata. This content contain the website keywords for classification. """ # Variables: crawler = RoboBrowser(history=True) endpoint = "http://"+url endpointSSL = "https://"+url connFlag = True metadata = list() temp_list2 = list() keywords = list() # Procedimientos: try: crawler.open(endpoint, verify=False) except: connFlag = False if not connFlag: try: crawler.open(endpointSSL, verify=False) except: return None metadata = crawler.select("meta") # select metadata with keywords: for d in metadata: if 'name' in d.attrs.keys(): if d.attrs['name'] == 'keywords': temp_list = re.split(', |,', d.attrs['content']) temp_list2.extend(temp_list) for k in temp_list2: keywords.append(normalize(k)) return keywords
def submit(self, entry, message=None): browser = RoboBrowser(history=True) self.read_account() # [login] browser.open(self.login_url) login_form = browser.get_form(action='/account/login') login_form['UserName'].value = self.username login_form['Password'].value = self.password browser.submit_form(login_form) myname = browser.select('#header-account')[0].text print("[SUBMIT] login as \"%s\" @ %s" % (myname, datetime.now())) # [submit] browser.open(self.submit_url) submit_form = browser.get_form(action='/competitions/submissions/accept') submit_form['SubmissionUpload'].value = open(entry, 'r') if message: submit_form['SubmissionDescription'] = str(message) browser.submit_form(submit_form) print("[SUBMIT] submitted @ %s" % datetime.now())
def take_action(self, parsed_args): config_dir = '~/.kddcup2015-cli' config_dir = os.path.expanduser(config_dir) if os.path.isdir(config_dir): config = ConfigParser.ConfigParser(allow_no_value=True) config.readfp(open(config_dir + '/config')) if parsed_args.username: username = parsed_args.username else: username = config.get('user', 'username') if parsed_args.password: password = parsed_args.password else: password = config.get('user', 'password') base = 'https://www.kddcup2015.com' login_url = '/'.join([base, 'user-ajaxlogin.html']) data_url = '/'.join([base, 'submission-data.html']) browser = RoboBrowser() response = browser.session.post( login_url, dict(email=username, pwd=password)).json() if response['rs'] == 'error': self.app.stdout.write(response['msg']) browser.open(data_url) src_urls = list(map(lambda x: x['href'], browser.select('tr .blue'))) for url in src_urls: self.app.stdout.write('downloading %s\n' % url) request = browser.session.get(url, stream=True) with open(url[59:], "wb") as data_files: data_files.write(request.content)
def get_webdav_urls(username, password): # log in browser = RoboBrowser(history=True) browser.open('http://ctools.umich.edu') browser.follow_link(browser.find(id='ctoolsLogin')) login_form = browser.get_form() login_form['login'].value = username login_form['password'].value = password browser.submit_form(login_form) # get the results browser.follow_link(browser.find( class_='toolMenuLink ', title='For creating, revising, and deleting course and project sites' )) browser.open(browser.find(class_='portletMainIframe').attrs['src']) results = [] course_links = browser.select('#sitesForm td h4 a[target="_top"]') for course_link in course_links: if not course_link.attrs: continue href = course_link.attrs['href'] if '~' in href: continue results.append( 'https://ctools.umich.edu/dav' + findall('\/[^\/]+$', href)[0] ) return results
def songPick(url): browser = RoboBrowser(history=True) browser.open(url) song = browser.select('td') count = 0 songName = [] artistName = [] y = 2 while y == 2: for p in song: p = p.text if count == 1: artistName.append(p) elif count == 2: songName.append(p) count += 1 if count == 3: count = 0 x = randint(0,99) search = artistName[x] + " " + songName[x] if len(search) > 1: y = 3 return(search,artistName[x],songName[x])
username = raw_input ("Username: "******"Repository: " + link.select('td.repo')[0].text.encode("utf-8").strip() print "User: "******"utf-8").strip() print "Title: " + link.select('td.title')[0].select('a.execute')[0].text.encode("utf-8").strip() print "Updated " + link.select('td.date')[0].text.encode("utf-8").strip() print "\n----------------------" #obtain links with beautifulSoup links = browser.find_all('a') for link in links: try: #print(link.get('href')) if not link['href'].startswith("https"): link['href']='https://bitbucket.org'+link['href'].encode("utf-8").strip() #link['href']='/odigeoteam/frontend-html5' print link['href']
class Robot(object): """This robot have two functionality, which is to grab matakuliah data and to grab KRS of each mahasiswa. This robot also need username and password for authorization. :param str username: username for login :param str password: password for login """ def __init__(self, username, password): self.browser = RoboBrowser() self.username = username self.password = password self.matakuliah = [] def update_matakuliah(self): self.matakuliah = self._get_matakuliah() for obj in self.matakuliah: detail = self._get_matakuliah_detail(obj['link_detail']) obj['jadwal_kuliah'] = detail['jadwal_kuliah'] # obj['jadwal_uts'] = detail['jadwal_uts'] # obj['jadwal_uas'] = detail['jadwal_uas'] self._persist_matakuliah() def _persist_matakuliah(self): for obj in self.matakuliah: try: kelas = (Kelas.select() .where(Kelas.nama == obj['nama_kelas']).get()) except Kelas.DoesNotExist: kelas = Kelas() kelas.kode_mk = obj['kode_mk'] kelas.nama = obj['nama_kelas'] kelas.matakuliah = obj['matakuliah'] kelas.dosen = obj['dosen'] kelas.sks = obj['sks'] kelas.tipe = obj['tipe'] kelas.jadwal_kuliah = obj['jadwal_kuliah'] kelas.save() def __login(self): self.browser.open('http://akademika.ugm.ac.id') login_form = self.browser.get_form(id='form-login') login_form['username'].value = self.username login_form['password'].value = self.password self.browser.submit_form(login_form) def _get_matakuliah(self): self.__login() # go to 'informasi matakuliah' page link_matakuliah = self.browser.select('#navigation li a')[3] self.browser.follow_link(link_matakuliah) marshal = [] matakuliah_raw = browser.select('.table-common > tr')[1:] for raw in matakuliah_raw: data = raw.select('td') obj = {} obj['kode_mk'] = data[1].contents[0] obj['matakuliah'] = data[2].contents[0] obj['dosen'] = data[3].contents[0] obj['link_detail'] = data[4].contents[0] obj['nama_kelas'] = data[4].contents[0].get_text() obj['tipe'] = data[5].contents[0] obj['sks'] = data[6].contents[0] marshal.append(obj) return marshal def _get_matakuliah_detail(self, link): self.browser.follow_link(link) jadwal_row = self.browser.select('table > tr') # for brevity obj = {} obj['jadwal_kuliah'] = "" obj['jadwal_uts'] = "" obj['jadwal_uas'] = "" jadwal_kuliah_row = jadwal_row[0].select('table tr')[1:] for row in jadwal_kuliah_row: contents = [x.contents[0] for x in row.select('td')] data_string = "$".join(contents) obj['jadwal_kuliah'] = "|".join([data_string]) # TODO: find a way to get 'tanggal' # jadwal_uts_row = jadwal_row[1].select('table tr')[1:] # jadwal_uas_row = jadwal_row[2].select('table tr')[1:] return obj
import re from robobrowser import RoboBrowser url = 'http://www.qq.com/' b = RoboBrowser(history=True) b.open(url) today_top = b.find(id='todaytop').a print today_top['href'] b.follow_link(today_top) title = b.select('.hd h1')[0] print '*****************************' print title.text print '*****************************' print b.find(id='articleContent').text
def getWebsiteOneData(): data_og=[] page = urlopen(working_web).read() soup = BeautifulSoup(page) soup.prettify() #print(soup) #http://stackoverflow.com/questions/9253684/selecting-specific-tr-tags-with-beautifulsoup rows=soup.findAll('tr', {'class': 'rowB'}) for r in rows: tag_a = r.find('a') data_og.append([tag_a.text, tag_a['href']]) #print(data_og) mountain_dict={} print("scraping: ", str(len(data_og)), " mountains") for x in data_og: #we are now opening a specifc mountains detail page ###scrape resort name init_detail_url=prefix+x[1] proper_prefix_index=init_detail_url.rindex('/')+1 #scrape resort description #print(init_detail_url) browser_6 = RoboBrowser(history=True) browser_6.open(init_detail_url) r_d = browser_6.select('.resort_description') soup_7 = BeautifulSoup(str(r_d)) p_s= soup_7.findAll('p') description="" for p in p_s: description+=p.getText()+"\n" snow_report_url=init_detail_url[:proper_prefix_index]+"skireport.html" browser_2 = RoboBrowser(history=True) browser_2.open(snow_report_url) #print(init_detail_url) mountain_name=x[0] print(mountain_name) #print(init_detail_url) ###scrape resort url website_of_resort=browser_2.select('.contact_wrap') soup_4 = BeautifulSoup(str(website_of_resort)) try: website_of_resort_link=soup_4.find('a').getText() except: print("no webpage") website_of_resort_link="sorry, no link found, try google search" #print(website_of_resort_link) ###scrape lift tickets: lift_url=init_detail_url[:proper_prefix_index]+"lift-tickets.html" #print(lift_url) browser_4 = RoboBrowser(history=True) browser_4.open(lift_url) ticket_box=browser_4.select('.resort_ticket_price') soup_5 = BeautifulSoup(str(ticket_box)) data=[] #source: http://stackoverflow.com/questions/23377533/python-beautifulsoup-parsing-table for tr in soup_5.find_all('tr'): cols = tr.find_all('td') cols = [ele.text.strip() for ele in cols] data.append([ele for ele in cols if ele]) #child, junior, adult, senior #print(data) weekday_prices=data[1][1:-1] weekend_prices=data[2][1:] #print(weekday_prices) #print(weekend_prices) ###scrape trail map picture: image_url=init_detail_url[:proper_prefix_index]+"trailmap.html" browser_5 = RoboBrowser(history=True) browser_5.open(image_url) map_html=browser_5.select('.trailMap') soup_6=BeautifulSoup(str(map_html)) try: img_tag=soup_6.find("img") except: print("image not found") img_rc="NULL" try: img_src=img_tag['src'] except: img_src="NULL" ###scrape address: driving_url=init_detail_url[:proper_prefix_index]+"driving-directions.html" #print(driving_url) browser_3 = RoboBrowser(history=True) browser_3.open(driving_url) direction_text=browser_3.select('.directions') soup_4 = BeautifulSoup(str(direction_text)) destination_text = soup_4.find("input", {"id": "end"}) destination_text = destination_text['value'] #soup_3 = BeautifulSoup(str(mountain_name)) #mountain_name=soup_3.find('span').getText() #print(mountain_name) #trail info scraper runs_open=browser_2.select('.pie_chart_item') try: run_info=str(runs_open[0]) soup_2 = BeautifulSoup(run_info) numbers=soup_2.find('p').getText() of_index=numbers.index('of') num_open=int(numbers[0:of_index]) try: total_num=int(numbers[of_index+2:]) #print(num_open) #print(total_num) #print(runs_open) #exit() except: print("no trail number info") total_num=-1 except: print("no trail info") num_open=-1 if mountain_name not in mountain_dict: #send %instead of open and total percent_trails_open=float(num_open)/float(total_num) mountain_dict[mountain_name]={'resort_description':description, 'percent_trails_open':percent_trails_open, 'resort_website': website_of_resort_link, 'resort_location':destination_text, 'weekday_prices': weekday_prices, 'weekend_prices': weekend_prices, 'trail_map_url': img_src} print(len(mountain_dict)) return mountain_dict
class CF: # 基本信息 URL_HOME = 'http://codeforces.com/' URL_LOGIN = URL_HOME + 'enter' URL_SUBMIT = URL_HOME + 'problemset/submit' URL_STATUS = URL_HOME + 'submissions/' # 结果信息 INFO = ['RunID', 'Submit Time', 'Author', 'Pro.ID', 'Language', 'Judge Status', 'Time', 'Memory'] # 语言 LANGUAGE = { 'G++': '42', 'C': '42', 'G++11': '42', 'G++14': '50', 'GCC': '10', 'GCC11': '1', 'JAVA': '36', 'PYTHON2': '7', 'PYTHON3': '31', } # header = { 'Accept': 'text / html, application / xhtml + xml, ' 'application / xml;q = 0.9, image / webp, * / *;q = 0.8', 'Accept-Language':'zh-CN,zh;q=0.8,en;q=0.6', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) ' 'Ubuntu Chromium/52.0.2743.116 Chrome/52.0.2743.116 Safari/537.36', 'Origin': "http://codeforces.com", 'Host': "codeforces.com", 'Content-Type': 'application/x-www-form-urlencoded', 'Connection': 'keep-alive', } def __init__(self, user_id, password): self.user_id = user_id self.password = password self.browser = RoboBrowser() self.run_id = '' self.pre_id = '' self.res = {} def login(self): try: self.browser.open(CF.URL_LOGIN) except: ots = "Server Error" logger.error(ots) print(ots) return False enter_form = self.browser.get_form('enterForm') enter_form['handle'] = self.user_id enter_form['password'] = self.password try: self.browser.submit_form(enter_form) except: ots = "Server Error" logger.error(ots) print(ots) return False try: checks = list(map(lambda x: x.getText()[1:].strip(), self.browser.select('div.caption.titled'))) if self.user_id not in checks: ots = "Login Failed.. " logger.info(ots) print(ots) return False except: ots = "Server Error" logger.error(ots) return False ots = 'Login Successful!' logger.info(ots) return True def submit(self, pro_id, language, src): pro_id = str(pro_id).upper() try: language = CF.LANGUAGE[str(language).upper()] except: ots = 'language unrecognizable!' logger.error(ots) print(ots) return False try: self.browser.open(CF.URL_SUBMIT) except: ots = "Server Error" logger.error(ots) print(ots) return False submit_form = self.browser.get_form(class_='submit-form') submit_form['submittedProblemCode'] = pro_id submit_form['source'] = src submit_form['programTypeId'] = language self.browser.submit_form(submit_form) if self.browser.url[-6:] != 'status': ots = 'Submit Failed..(probably because you have submit the same file before.)' logger.info(ots) print(ots) return False ots = 'Submit Successful' logging.info(ots) print(ots) return True def init_id(self): if self.pre_id != '': return True url = CF.URL_STATUS + str(self.user_id) try: req = urllib.request.Request(url=url, headers=CF.header) page = urllib.request.urlopen(req, timeout=5) except: ots = "Server Error" logger.error(ots) print(ots) return False soup = BeautifulSoup(page, 'html5lib') tables = soup.find('table', {'class': 'status-frame-datatable'}) tmp = [] for row in tables.findAll('tr'): cols = row.findAll('td') cols = [ele.text.strip() for ele in cols] tmp = [ele.replace(u'\xa0', u' ') for ele in cols if ele] if len(tmp) == 8: break self.pre_id = tmp[0] return True def result(self): url = CF.URL_STATUS + str(self.user_id) try: page = urllib.request.urlopen(url, timeout=5) except: ots = "Server Error" logger.error(ots) print(ots) return False soup = BeautifulSoup(page, 'html5lib') tables = soup.find('table', {'class': 'status-frame-datatable'}) tmp = [] find = False for row in tables.findAll('tr'): cols = row.findAll('td') cols = [ele.text.strip() for ele in cols] tmp = [ele.replace(u'\xa0', u' ') for ele in cols if ele] if len(tmp) == 8: if tmp[0] == self.pre_id: break if not find: if self.run_id == '' or self.run_id == tmp[0]: find = True self.run_id = tmp[0] if find: break if not find: ots = "Can not find submissions!" logging.info(ots) print(ots) return True wait = ['Running', 'In queue'] if tmp[5].find(wait[0]) != -1 or tmp[5].find(wait[1]) != -1: logging.info(tmp[5]) return False for i in range(8): self.res[CF.INFO[i]] = tmp[i] print(CF.INFO[i], ':', tmp[i]) return True
session.verify = False # Skip SSL verification session.proxies = {'http': 'http://localhost:8081/'} browser = RoboBrowser(session=session, parser='lxml') # Browse to Genius # browser = RoboBrowser(history=True) browser.open('http://www.petitesannonces.ch/') # Search for Porcupine Tree form = browser.get_form(action='/recherche/') form # <RoboForm q=> form['q'].value = 'jardinage' browser.submit_form(form) # Look up the first song songs = browser.select('.ele a.title') for p in songs: print(p.text) # browser.follow_link(songs[0]) # lyrics = browser.select('.lyrics') # lyrics[0].text # \nHear the sound of music ... # # # Back to results page # browser.back() # # # Look up my favorite song # song_link = browser.get_link('trains') # browser.follow_link(song_link) #
num = 1000 month = str(date.today()).split('-')[1] day = str(date.today()).split('-')[2] rename('company', 'company'+month+day) if not os.path.exists('company'): os.makedirs('company') for com in com_list: com = com.replace(" ","+") url = "https://www.google.com/search?q="+com+"&tbm=nws&num="+str(num)+"&filter=0&cr=tw&ie=utf-8&oe=utf-8&hl=zh-TW" browser = RoboBrowser(user_agent='a python robot') browser.session.headers['User-Agent'] # a python robot browser.open(url) news_list = list() f = open('company\\'+com+'.csv','w') w = csv.writer(f) for body in browser.select('div#search'): for div in body.select('div.g'): for a in div.select('h3 a'): hreff = str(a.attrs.get('href')).replace('%3F','?').replace('%3D','=').replace('%26','&').replace('/url?q=','') href = hreff.split('&sa=U')[0] print (a.text) #print ('(',href,')') try: w.writerow ([a.text,href]) except: print ('error',a.text) w.writerow (['error',href]) f.close()
#print(form) if browser.find(text=re.compile("Login")): print('Compiled login fields form...') browser.submit_form(form) if browser.find(text=re.compile("The password you entered was incorrect")): print("Wrong password or username. Attempting to download anyway.") exit(); elif browser.find(text=re.compile("\"loggedIn\":true")): print("Logged in!") else: print("Login unsuccessful. Attempting to download anyway.") exit(); browser.open('https://www.deviantart.com/messages/#view=deviantwatch') page=browser.select('body') script=browser.select('script') for s in script: st=s.text json_start= st.find('{"api":"scmc","preload":') if json_start!=-1: json_end= st.find('}}}}) }',json_start) print('Analising the json') #print(str(json_start)+" "+str(json_end)) js=st[json_start:json_end+4] out_file = open("source.txt","w") out_file.write(js) out_file.close() #REPLACE ALL THE SINGLE QUOTE #REPLACE ALL THE \ WITH \\
#form.new_control('text','code',{'value':''}) #form.fixup() form['localid'].value=str(curProgram) form['language'].value='2' form['code'].value='import java.util.*;class Main{public static void main(String[]args) throws Exception{Scanner in = new Scanner(System.in);StringBuilder sb = new StringBuilder();while(in.hasNextLine()){sb.append(in.nextLine());}byte b=(byte)sb.charAt('+str(curByte)+');if((b>>'+str(shift)+'&0x01)==0){throw new Exception("Error");}}}' br.submit_form(form) #f3 = open('f3.html','w') #f3.write(str(tp)) #print(tp) a=br.find_all('a', href=True, text = re.compile('My Submissions')) for link in a: #print(a) br.follow_link(link) tr = br.select('.sectiontableentry1') stra = str(tr[8]) #print(stra[295:307]) if 'Wrong answer' in stra: binaryString += '1' else: binaryString += '0' print('Submitted',curProgram,curByte, binaryString[::-1]) if(curByte>1): intval = int(binaryString[::-1],2) if(intval==0): break input += chr(intval) binaryString = ''
class Login(object): def __init__(self): self.dr = webdriver.PhantomJS('phantomjs') # self.dr = webdriver.Firefox() self.dr.maximize_window() self.source = RoboBrowser(history=True) print "xxxx" def readdata(self, user): res = yaml.load(file('ini.yaml')) login_url = res['url']['login'] username = res[user]['username'] password = res[user]['password'] xuanchuantu = res['road']['xuanchuantu'] print username, password return login_url, username, password, xuanchuantu def login(self, url, username, password): # self.dr = webdriver.PhantomJS('phantomjs') # # self.dr = webdriver.Firefox() # self.dr.maximize_window() # self.source = RoboBrowser(history=True) self.dr.get(url) #动态识别元素id self.source.open(url) ids = self.source.select('input') username_id = str(ids[0].attrs['id']) password_id = str(ids[1].attrs['id']) button_id = str(ids[2].attrs['id']) #页面输入 self.dr.find_element_by_id(username_id).click() self.dr.find_element_by_id(username_id).send_keys(username) self.dr.find_element_by_id(password_id).click() self.dr.find_element_by_id(password_id).send_keys(password) self.dr.save_screenshot('./login.png') self.dr.find_element_by_name(button_id).click() time.sleep(2) #此方法后期可做元素遍历 def add_jj(self, img): print "简介" self.dr.find_elements_by_class_name('btn-search')[1].click() #进入新建预约页 self.dr.find_element_by_id('fileToUpload1').send_keys(img) self.dr.find_element_by_class_name('btn-confirm2').click() self.dr.find_element_by_name('title').click() self.dr.find_element_by_name('title').send_keys('whtest1') self.dr.find_element_by_name('leader').click() self.dr.find_element_by_name('leader').send_keys('whtest1') self.dr.find_element_by_name('tel').click() self.dr.find_element_by_name('tel').send_keys('13421111111') self.dr.find_element_by_name('location').click() self.dr.find_element_by_name('location').send_keys(u'朝阳区') self.dr.find_elements_by_tag_name('img')[2].click() frame = self.dr.find_elements_by_tag_name('iframe')[0].get_attribute('name') self.dr.switch_to_frame(frame) self.dr.find_element_by_id('ok').click() self.dr.switch_to_default_content() select = self.dr.find_elements_by_tag_name('select') select[0].find_elements_by_tag_name('option')[1].click() time.sleep(2) select[1].find_elements_by_tag_name('option')[1].click() # self.dr.find_elements_by_tag_name('select')[1].find_elements_by_tag_name('option')[1].click() self.dr.save_screenshot('./save.png') self.dr.find_element_by_link_text(u'保存').click() def add_yyxx(self): self.dr.find_element_by_link_text(u'预约信息').click() self.dr.find_element_by_name('s_start').send_keys('2015-01-08') self.dr.find_element_by_name('s_end').send_keys('2015-01-28') self.dr.find_element_by_xpath('/html/body/div[4]/div/div[2]/div/div[2]/div[1]/div[2]/span[5]/input').click() def quit(self): time.sleep(5) self.dr.quit()
import re from robobrowser import RoboBrowser url = 'http://itest.info/courses/2' b = RoboBrowser(history=True) b.open(url) class_name = b.select('.headline h2') print class_name[0].text class_desc = b.select('.tag-box') print class_desc[0].text class_time = b.select('h4') print class_time[0].text teacher = b.select('.thumbnail-style h3') print teacher[0].text qq = b.find(text=re.compile('QQ')) print qq qq_group = b.find(text=re.compile('\+selenium')) print qq_group
__author__ = 'hanz' #*--coding=utf-8--* import re from robobrowser import RoboBrowser from BeautifulSoup import BeautifulSoup dr = RoboBrowser(history=True) dr.open('http://yuyueweijian.test.zae.zhongsou.com/user/login') titles = dr.select('input') for title in titles: print title.attrs['id'],title.attrs['name']
browser = RoboBrowser() letters = [letter for letter in string.ascii_lowercase] # Login browser.open('https://www.bowdoin.edu/BowdoinDirectory/rmSignon.jsp') form = browser.get_forms()[1] form["uname"] = USERNAME form["pword"] = PASSWORD print "Logging in...", browser.submit_form(form) # Only do external access # browser.open("http://www.bowdoin.edu/BowdoinDirectory/lookup.jsp") # Make sure we actually logged in if browser.select("#sch"): print "done!" else: print "FAILED." quit() for letter in letters: # Get all students with last name beginning with letter form = browser.get_form(id='sch') form["ln"].value = letter form["so"].value = "stu" browser.submit_form(form) students = browser.select('.person') for student in students:
# coding: utf-8 import re from robobrowser import RoboBrowser url = "http://www.qq.com/" b = RoboBrowser(history=True) b.open(url) # 获取今日话题这个link today_top = b.find(id="todaytop").a print today_top["href"] b.follow_link(today_top) # 这个时候已经跳转到了今日话题的具体页面了 # 打印标题 title = b.select(".hd h1")[0] print "*************************************" print title.text print "*************************************" # 打印正文内容 print b.find(id="articleContent").text
#coding: utf-8 import re from robobrowser import RoboBrowser url = 'http://itest.info/courses/2' b = RoboBrowser(history=True) b.open(url) #页面上所有的a all_links = b.select('a') for link in all_links: print link.text # 页面上所有class是container的div divs = b.select('.container') print len(divs)
import re from robobrowser import RoboBrowser b = RoboBrowser(history=True) b.open('http://itest.info/courses/2') ''' form = b.get_form(action='/s') print form form['wd'].value = 'selenium' b.submit_form(form) ''' title = b.select('.headline h2') print title[0].text infos = b.select('h4') for info in infos: print info.text body = b.select('body') print body[0].text
class techMTimeSheetCls(): def __init__(self,parent): self.parent=parent self.settings=self.parent.settings self.tools=self.parent.customTools self.mainUrl='https://pacehr.techmahindra.com/psp/PACEHR/EMPLOYEE/HRMS/c/ROLE_EMPLOYEE.TL_MSS_EE_SRCH_PRD.GBL?FolderPath=PORTAL_ROOT_OBJECT.CO_EMPLOYEE_SELF_SERVICE.HC_TIME_REPORTING.HC_RECORD_TIME.HC_TL_SS_JOB_SRCH_EE_GBL&IsFolder=false&IgnoreParamTempl=FolderPath%2cIsFolder' self.timeSheetUrl='https://pacehr.techmahindra.com/psc/PACEHR/EMPLOYEE/HRMS/c/ROLE_EMPLOYEE.TL_MSS_EE_SRCH_PRD.GBL' self.sslVerification=True self.loginId=self.settings.techmID self.loginPass= self.settings.techmPass print("techMTimeSheetCls is ready!") def useProxy(self): os.environ["HTTP_PROXY"] = os.environ["HTTPS_PROXY"] = self.settings.nabProxy self.sslVerification=False def submitTimeSheet(self): if(not self.tools.isInternetAvailable): print("No internet available, Cannot submitTimeSheet") return self.browser = RoboBrowser(history=True, user_agent='Mozilla/5.0') print("----Loading techm site----") self.browser.open(self.mainUrl, verify=self.sslVerification) if(self.browser.response.ok): print("----Techm site loaded!, Checking for login form----") self.form = self.browser.get_form(id='login') if(self.browser.response.ok): print("----Login form loaded!, Filling login details----") self.form['userid']=self.loginId self.form['pwd']=self.loginPass self.browser.submit_form(self.form) if(self.browser.response.ok): print("----Login successful!, Loading timesheet url----") self.browser.open(self.timeSheetUrl, verify=self.sslVerification, method='post') if(self.browser.response.ok): print("----Timesheet url loaded, Checking timesheet form----") self.form2 = self.browser.get_form(id='TL_MSS_EE_SRCH_PRD') if(self.browser.response.ok): print("----Timesheet form loaded, Filling timesheet data----") self.form2['ICAction']='TL_SAVE_PB' self.form2['ICStateNum']='1' self.form2['TL_TR_WEEK_WRK_USER_FIELD_1$0']='PA' self.form2['USER_FIELD_3$0']='000000000000004' self.form2['PROJECT$0']='C01000000019679' self.form2['QTY_DAY1$0']='8.00' self.form2['QTY_DAY2$0']='8.00' self.form2['QTY_DAY3$0']='8.00' self.form2['QTY_DAY4$0']='8.00' self.form2['QTY_DAY5$0']='8.00' self.form2.fields.pop('TL_SAVE_PB') self.browser.submit_form(self.form2) if(self.browser.response.ok): print("----Timesheet submitted!----") expectedMsg = self.browser.select('span.PSEDITBOX_DISPONLY') if(expectedMsg): print(str(expectedMsg[0].text)) else: print("----Error! Unable to update timesheet-----") else: print("----Error! Unable to update timesheet-----") else: print("----Error! Unable to update timesheet-----") else: print("----Error! Unable to update timesheet-----") else: print("----Error! Unable to update timesheet-----") else: print("----Error! Unable to update timesheet-----") else: print("----Error! Unable to update timesheet-----")
import re from robobrowser import RoboBrowser from datetime import datetime, timedelta browser = RoboBrowser(history=True) browser.open("http://destinylfg.net") addMyGroup = browser.select('#new') form = browser.get_form(class_='form-horizontal panel-body') form['region'].value = 'northamerica' form['platform'].value = 'ps4' form['gamertag'].value = 'YOUR USERNAME' form['level'].value = 31 form['event'].value = 'strikes-nightfall-weekly' form['notes'].value ='#lfm 30+ add USERNAME will not reply on here.' startTime = datetime.now() endTime = datetime.now() browser.submit_form(form) print 'post' while(1): if(endTime - startTime > timedelta(seconds=30)): print 'post' startTime = datetime.now() endTime = datetime.now() browser.submit_form(form) wait = False
def get_source(app, version, username, password, base_unpack_dir=None, clean=True, keep=True, archive_type=None): browser = RoboBrowser() # log in to the MyAtlassian portal browser.open(LOGIN_PAGE) login_form = browser.get_form(id='form-login') login_form['username'].value = username login_form['password'].value = password browser.submit_form(login_form) if browser.response.status_code != 200: raise IOError("Login failed to Atlassian ID service.") # get the list of versions for the application we're wanting to download source for browser.open("%s/%s" % (SOURCE_DOWNLOAD_BASE, app)) versions = browser.select('table#source-download-table tr.smallish') row_number = 0 version_download_map = {} archive_type = select_archive_type(app, archive_type) archive_extension = get_archive_extension(archive_type) for version_row in versions: row_number += 1 try: columns = version_row.find_all('td') version_field = columns[0] if len(version_field) != 1: raise AtlassianSourceDownloadError("Version field not found.") version_text = version_field.text.strip() if len(version_text) == 0: raise AtlassianSourceDownloadError("Version field contained no text.") version_name_match = VERSION_EXTRACT_REGEX.match(version_text) if version_name_match is None: raise AtlassianSourceDownloadError("Couldn't match version number in field.") version_archive_type = version_name_match.group('type').lower() if version_archive_type != archive_extension: raise AtlassianSourceDownloadError("Archive type didn't match required type.") download_link_field = columns[-1].find('a') if len(download_link_field) != 1: raise AtlassianSourceDownloadError("Download link field not found or has multiple.") download_path = download_link_field.get('href').strip() if len(download_path) == 0: raise AtlassianSourceDownloadError("Download link URL contained no value.") version_download_map[version_name_match.group('version')] = download_path except AtlassianSourceDownloadError as ex: # print("Skipped row number %d. Reason: %s" % (row_number, ex)) pass # blow up if the version requested isn't in the list if version not in version_download_map: raise AtlassianSourceDownloadError("Unable to find version '%s' on Atlassian source site." % version) # set the default unpack dir if it wasn't provided and create it if it doesn't exist if base_unpack_dir is None: base_unpack_dir = '%s/versions' % os.getcwd() else: base_unpack_dir = base_unpack_dir.rstrip(os.path.sep) os.makedirs(base_unpack_dir, exist_ok=True) # find the specified version in the list and download it version_dir_path = '%s/%s/%s' % (base_unpack_dir, app, version) source_archive_name = '%s/%s_%s.%s' % (base_unpack_dir, app, version, archive_extension) if clean and os.path.isfile(source_archive_name): os.unlink(source_archive_name) if not os.path.isfile(source_archive_name): source_download_url = MY_ATLASSIAN + version_download_map[version] browser.open(source_download_url) with open(source_archive_name, 'wb') as source_archive_file: source_archive_file.write(browser.response.content) with get_archive_object(archive_type, source_archive_name) as src: os.makedirs(version_dir_path, exist_ok=True) top_dirs = list(set(d.split(os.path.sep)[0] for d in src.namelist())) if len(top_dirs) != 1: raise AtlassianSourceDownloadError("Couldn't unpack archive - unexpected contents.") # extract the archive to a temporary location that we can move from later if all goes well top_level_dir_name = top_dirs[0] archive_extraction_dir = mkdtemp() try: # unpack the archive to the temporary folder src.extractall(archive_extraction_dir) # remove the target directory if it already exists if os.path.isdir(version_dir_path): shutil.rmtree(version_dir_path) shutil.move(os.path.join(archive_extraction_dir, top_level_dir_name), version_dir_path) finally: shutil.rmtree(archive_extraction_dir) if not keep: os.unlink(source_archive_name) # now that we've got the source, return the path it lives at return version_dir_path
def getAcademicCalendarEvents(url): #Initialize ParseDateTime cal = parsedatetime.Calendar() # Initialize robobrowser browser = RoboBrowser(history=False) browser.open(url) event_tags = browser.select("#content-main tr") # Dictionary of semesters # in the form: {Spring_2015: [<events>], Fall_2014: [<events>]} semesters = {} # cycle through the event rows, adding them to the event array year = None season_year = "" # i.e. Spring_2015, to be used as a key for semesters Dict for event in event_tags: ##### Scraping the web-pages ##### # Headers in the Schedules are within <th></th> if len(event.find_all("th")) > 0: # grab the first word of the header (Usually the Season) match = re.search(r'(\w+).*(\d\d\d\d)', event.find("th").text) season = match.group(1) year = match.group(2) season_year = season + "_" + year # Create a new entry in the semester dict semesters[season_year] = [] else: # Event data is stored in 3 <td> tags: # The first tag is the name of the event # The second is the days of the week of the event (useless) # The third is the Month and date(s) of the event event_info = event.find_all("td") event_name = event_info[0].text messy_event_date = event_info[2].text ##### Extracting event info ##### # These dates are pretty messy, so get the important stuff with regex match = re.search(r'(\w+)\s(\d+)-*(\d*)(\w*)\s*(\d*)', messy_event_date) start_date = match.group(1) + " " + match.group(2) # i.e. December 1 start_date_and_year = match.group(1) + " " + match.group(2) + " " + str(year) # i.e. December 1 2014 source_date = match.group(1) + " " + str(year) # i.e. December 2014 # Determine End Date based on single or multi-day event if len(match.group(3)) > 0: # Multi-Day Event # i.e. December 13-15 end_date = match.group(1) + " " + match.group(3) elif len(match.group(4)) > 0: # i.e. December 13 - January 12 end_date = match.group(4) + " " + match.group(5) else: # Single Day Event end_date = start_date = start_date_and_year date_range_pair = None if start_date is end_date: # Single Day Event # Note that this is repetitive, consider refactoring into above if-else statements event_start_date = event_end_date = cal.parseDateText(start_date_and_year) else: date_range_string = start_date + "-" + end_date date_range_pair = cal.evalRanges(date_range_string, cal.parseDateText(source_date)) event_start_date = date_range_pair[0] event_end_date = date_range_pair[1] # Grab Year, Month, Day from dates event_start_date = [event_start_date[0], event_start_date[1], event_start_date[2]] event_end_date = [event_end_date[0], event_end_date[1], event_end_date[2]] ##### Convert dates to datetime.date objects ##### # This allows easy addition of days with timedelta (to make sure it works across months) start_date = datetime.date(event_start_date[0], event_start_date[1], event_start_date[2]) end_date = datetime.date(event_end_date[0], event_end_date[1], event_end_date[2]) # Fix off-by-one with multi-day event creation by adding a day to multi-day events # i.e. Google Calendar API interprets the end of an event to be the beginning of the # provided end date (12:00a.m.). We want that end date to be included, so add 1 day if not start_date == end_date: end_date += datetime.timedelta(days=1) # Format date in Google Calendar API style: YYYY-MM-DD # (since these are all all-day events) start_date = str(start_date.year) + "-" + str(start_date.month).zfill(2) + "-" + str(start_date.day).zfill(2) end_date = str(end_date.year) + "-" + str(end_date.month).zfill(2) + "-" + str(end_date.day).zfill(2) # Add event to the appropriate semester semesters[season_year].append((event_name, start_date, end_date)) return semesters
#coding: utf-8 import re from robobrowser import RoboBrowser url = 'http://testerhome.com/account/sign_in/' b = RoboBrowser(history=True) b.open(url) # 获取登陆表单 login_form = b.get_form(action='/account/sign_in') print login_form # 输入用户名和密码 login_form['user[login]'].value = 'your account' login_form['user[password]'].value = 'your password' # 提交表单 b.submit_form(login_form) # 打印登陆成功的信息 print b.select('.alert.alert-success')[0].text
from os import path import sys import yaml import json with open(path.join(path.dirname(sys.argv[0]), ".hallon-credentials.yaml")) as f: CREDENTIALS = yaml.safe_load(f) URL = "https://www.hallon.se/mina-sidor" br = RoboBrowser(parser="lxml") br.open(URL) form = br.get_form(action="/logga-in") form["UserName"].value = CREDENTIALS["username"] form["Password"].value = CREDENTIALS["password"] br.submit_form(form) usage = br.select("p.usage")[0].text.replace(",", ".").split() remaining = round(float(usage[0]), 2) total = int(usage[2]) used = round(float(total-remaining), 2) used_pct = round(used*100/total, 1) days_remaining = int(br.select("p.usage-daysleft")[0].text.split()[0]) print(json.dumps({"total": total, "remaining": remaining, "used": used, "used_pct": used_pct, "days_remaining": days_remaining}))