def download_linke(coords, proxy, port, saveFile, saveMode): print proxy, port print proxy != "" url = "http://www.soda-is.com/eng/services/service_invoke/gui.php?" + "xml_descript=soda_tl.xml&Submit2=Month" session = Session() session.verify = False if proxy != "": proxies = {proxy: port} session.proxies = proxies br = RoboBrowser(session=session, parser="lxml") br.open(url) linke_form = br.get_forms()[1] num = len(coords) index = 0 with open(saveFile, saveMode) as f: try: for coord in coords: inlon, inlat = coord linke_form["lat"].value = inlat linke_form["lon"].value = inlon sf = linke_form.submit_fields.getlist("execute") br.submit_form(linke_form, submit=sf[0]) linke_table = br.find("table", {"cellspacing": "0", "cellpadding": "2"}) linkes = get_monthly_linke_str(get_linke_values(linke_table)) s = "%s,%s,%s\n" % (format(inlon, "0.5f"), format(inlat, "0.5f"), linkes) if len(s) > 48: f.write(s) print "Done with point %i of %i: (%s, %s)" % ( index + 1, num, format(inlon, "0.5f"), format(inlat, "0.5f"), ) index += 1 br.back() print "DONE!" except Exception as e: not_dl = list(coords[index:]) with open(saveFile + "_notdownloaded.txt", "w") as nd: for c in not_dl: nd.write("%s,%s\n" % (str(c[0]), str(c[1]))) print e
def robobrowser_edit(): """Use robobrowser to increment population""" # login br = RoboBrowser(history=True, parser='lxml', user_agent='a python robot') br.open(login_form.LOGIN_URL) form = br.get_form(action='#') print('form before {}'.format(form)) form['email'].value = login_form.LOGIN_EMAIL form['password'].value = login_form.LOGIN_PASSWORD print('form after {}'.format(form)) br.submit_form(form) # edit country br.open(COUNTRY_URL) form = br.get_forms()[0] print('Population before:', form['population'].value) form['population'].value = str(int(form['population'].value) + 1) br.submit_form(form) # check population increased br.open(COUNTRY_URL) form = br.get_forms()[0] print('Population after:', form['population'].value) # some info about the session print('User-Agent') print(br.session.headers['User-Agent']) print('Cookies') print(br.session.cookies.items())
class vk_session: def __init__(self, root_path, proxy="", cookies=""): self.is_signed = False self.proxy = proxy self.root_path = root_path session = requests.session() if proxy: session.proxies.update({'http': 'http://' + proxy, 'ssl': proxy ,'https': 'https://' + proxy}) headers = { "ACCEPT": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "ACCEPT_ENCODING": "gzip, deflate, sdch", "ACCEPT_LANGUAGE": "ru-RU,ru;", "CONNECTION": "keep-alive", "REFERER": root_path, "UPGRADE_INSECURE_REQUESTS": "1", 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36', } session.headers = headers if cookies: session.cookies = cookies self.browser = RoboBrowser(session=session, timeout=4, history=False) def connect(self): self.browser.open(self.root_path) print("connected") def sign_in(self, username, password, captcha): try: form = self.browser.get_forms()[0] form["email"] = username form["pass"] = password if captcha: form["captcha_key"] = vk_captcha.decode(page=self.browser.parsed, root_path=self.root_path) self.browser.submit_form(form) except: print(username) raise def create_new_group(self, name, group_type, public_type): self.browser.open("https://m.vk.com/groups?act=new") form = self.browser.get_forms()[0] form["title"] = name form["type"] = group_type form["public_type"] = public_type self.browser.submit_form(form) time.sleep(1) def enter_captcha(self): form = self.browser.get_forms()[0] form["captcha_key"] = vk_captcha.decode(page=self.browser.parsed, root_path=self.root_path) self.browser.submit_form(form)
class BKBrowser(object): def __init__(self): # Browse url : self.result = None self.browser = RoboBrowser(parser="html.parser") self.browser.session.headers = config.headers # Mount with custom SSL Adapter self.browser.session.mount('https://', HTTPSAdapter()) def _connect(self): # Get to website print("- Connecting to url ...") self.browser.open(config.url) def _skip_first_page(self): button = self.browser.get_forms()[0] self.browser.submit_form(button) # Let's fill in the proper form ! def _fill_form(self): while not self.browser.find('p', {'class': 'ValCode'}): inputs_map = max_radio_map(self.browser) f = self.browser.get_forms()[0] for i in f.keys(): if f[i].value == '': answers_list = inputs_map.get(i, ['1']) f[i].value = random.choice(answers_list) f.serialize() self.browser.submit_form(f) def _fill_date_form(self): # Fill in Date/Time form and start the Questionnaire print("- Filling Forms Randomly ...") form = self.browser.get_forms()[0] form['JavaScriptEnabled'].value = '1' form['SurveyCode'].value = config.ID form['InputMonth'].value = config.date[0] form['InputDay'].value = config.date[1] form['InputHour'].value = config.time[0] form['InputMinute'].value = config.time[1] form.serialize() self.browser.submit_form(form) def get_validation_code(self): self._connect() self._skip_first_page() self._fill_date_form() self._fill_form() self.result = self.browser.find('p', {'class': 'ValCode'}).text return self.result def return_result(self): return self.result
def new_token(): random_string = ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(16)) from robobrowser import RoboBrowser browser = RoboBrowser() login_url = 'my_url' browser.open('https://github.com/login') form = browser.get_form() # print(form) form["login"].value = "thirstycode" form["password"].value = "" # print(form) browser.submit_form(form) browser.open('https://github.com/settings/tokens/new') form = browser.get_forms() # print(form) form[3]["oauth_access[description]"].value = random_string form[3]["oauth_access[scopes][]"].value = [ 'repo', 'admin:org', 'admin:public_key', 'admin:repo_hook', 'admin:org_hook', 'gist', 'notifications', 'user', 'delete_repo', 'write:discussion', 'admin:gpg_key' ] browser.submit_form(form[3]) # print(browser.parsed()) src = str(browser.parsed()) start = '<code class="token" id="new-oauth-token">' end = '</code>' result = re.search('%s(.*)%s' % (start, end), src).group(1) return (result) print(result)
def get_video_url(url): br = RoboBrowser(history=True, parser='lxml') br.open(url) cn = input('请问是否要转换为中文?(y/n)') if not cn: cn = 'y' if cn == 'y': # shift to simplified chinese lang = br.get_forms()[0] lang['session_language'].options = ['cn_CN'] lang['session_language'].value = 'cn_CN' br.submit_form(lang) # get video title vid_title = br.find('div', {'id': 'viewvideo-title'}).text.strip() print('the video you want to download is: {0}'.format(vid_title)) print('-----------------------------------------------------------') # get video id vid_id = re.findall( r'\d{6}', br.find('a', { 'href': '#featureVideo' }).attrs['onclick'])[0] # get real video link vid_real_url = 'http://192.240.120.34//mp43/{}.mp4'.format(vid_id) return vid_real_url, re.sub( """[\s+\.\!\/_,$%^*(+\"\']+|[+——!,。|?、~@#¥%……&*():]+""", " ", vid_title).strip()
def ExtractONEPAGE(page): final_res = '' browser = RoboBrowser(history=True, parser='html.parser', user_agent='Chrome/41.0.2228.0') while True: print('loop') browser.open( 'http://tools.prowebguru.com/free-online-image-extractor/free_online_image_extractor_tool.php' ) form = browser.get_forms({'class': 'form-horizontal'}) if len(form) != 0: print('broke') break this_form = form[0] this_form["website"] = page browser.submit_form(this_form) img_links = browser.find_all('img', src=True) for line in img_links: if '/tbn/' not in line['src'] and '.wp.com' in line['src']: final_res = line['src'] print(final_res) if final_res != '': with open('HenRUniqueComic.txt', 'a') as f: f.write(final_res + '\n')
class Scraper: def randString(self, n): return "l" * n def check(self, usn): password = usn rand = '' for i in password: rand += i + self.randString(2) encoded = base64.b64encode(rand.encode()) return encoded def scrape(self, usn, dob): self.browser = RoboBrowser(history=False, parser='html.parser') self.browser.open('http://parents.msrit.edu/index.php') form = self.browser.get_forms()[0] form['username'].value = usn form['password'].value = self.check(dob) form['passwd'].value = self.check(dob) self.browser.submit_form(form) def getHTML(self): """ returns parsed HTML """ return str(self.browser.parsed)
def getBrowser(contest): browser = RoboBrowser(parser="html.parser") browser.open('https://beta.atcoder.jp/login') form = browser.get_forms()[0] form['username'] = Auth.atcoder()['login'] form['password'] = Auth.atcoder()['password'] browser.submit_form(form) return browser
def getBrowser(contest): browser = RoboBrowser(parser="html.parser") browser.open(contest) soup = browser.parsed form = browser.get_forms()[0] form['login'] = Auth.yandexcontest()['login'] form['passwd'] = Auth.yandexcontest()['password'] browser.submit_form(form) return browser
def login(): browser = RoboBrowser(parser="html.parser") browser.open("https://www.codechef.com") login_form = browser.get_forms()[0] if login_form == None: print "Some Error Occurred" exit(0) login_form['name'] = User['username'] login_form['pass'] = User['password'] browser.submit_form(login_form) #authentication yet to be implemented return browser
def getBrowser(contest): browser = RoboBrowser(parser="html.parser", user_agent='Mozilla/5.0') browser.open(contest) soup = browser.parsed link = soup.findAll('a', 'link link_access_login') browser.open('https://official.contest.yandex.ru%s' % link[0]['href']) form = browser.get_forms()[0] form['login'] = Auth.opencup()['login'] form['password'] = Auth.opencup()['password'] browser.submit_form(form) return browser
def reload_pyanywhr_app(username=None, password=None): if username is None: username = raw_input("Username: "******"Password: "******"Referer"] = url_login form = browser.get_forms()[0] form["username"].value = username form["password"].value = password browser.submit_form(form) assert browser.response.status_code == 200 browser.open(url_web_app) assert browser.response.status_code == 200 assert browser.url == url_web_app form = browser.get_forms(class_="reload_web_app")[0] browser.submit_form(form) assert browser.response.status_code == 200 print("service reloaded") return browser.response.text
def __init__(self, v_url, v_user, v_password): try: br = RoboBrowser(parser="html.parser") br.open("{}{}".format(v_url.rstrip('/'), "/user/login")) form = br.get_forms()[1] form['username'].value = v_user form['password'].value = v_password br.submit_form(form) except Exception as error: print("Could not create browser: {}".format(error)) sys.exit(1) self.v_url = v_url self.br = br
class XSSFinder: def __init__(self, url): self.list_xss = [] # self.url = url self.browser = RoboBrowser(parser=PARSER, history=True) self.browser.open(url) self.links_finder = LinksFinder(self.browser.url) def find(self): links = self.links_finder.get_valid_links() for link in links: self.browser.open(link) forms = self.browser.get_forms() for form in forms: fields = form.fields for field in fields: form[field].value = VULNERABILITY_TESTING_STRING self.validate_xss_weakness(form, field) def validate_xss_weakness(self, form, field): try: self.browser.submit_form(form) self.add_threat_to_list(field, form.method) except InvalidSubmitError: pass def add_threat_to_list(self, parameter, xss_type): threat = XSSFlaw(self.browser.url, parameter, xss_type) if threat not in self.list_xss: self.list_xss.append(threat) def get_xss_flaws(self): self.find() if len(self.list_xss) == 0: return NO_RESULT_FOUND else: result = EMPTY_STRING for xss_threat in self.list_xss: result += URL result += xss_threat.get_url() result += NEW_LINE result += PARAMETER result += xss_threat.get_parameter() result += NEW_LINE result += TYPE result += xss_threat.get_xss_type() result += TWO_NEW_LINES return result
class infs_brsr: """This browser will have functions useful to someone browsing the Infusionsoft front end programatically. """ def __init__(self, appname, username, password, *args, **kwargs): self.loggedin=False self.browser=RoboBrowser(history=True) self.appname=appname self.username=username self.password=password self.baseurl = 'https://' + self.appname + '.infusionsoft.com' def openbase(self): self.browser.open(self.baseurl) def login(self): self.openbase() loginform = self.browser.get_form() loginform.fields['username'].value = self.username loginform.fields['password'].value = self.password self.browser.submit_form(loginform) # This next step is probably a bad idea. It needs # some form of control self.browser.follow_link(self.browser.get_links()[1]) self.loggedin=True def getapikey(self): if not self.loggedin: self.login() self.browser.open(self.baseurl + 'app/miscSetting/itemWrapper?systemId=nav.admin&settingModuleName=Application&settingTabName=Application') pageSoup = BeautifulSoup(self.browser.response.content, 'html.parser') self.apikey=pageSoup.findAll(id='Application_Encrypted_Key:_data')[0].text return self.apikey def importContactCSV(self, pathToCSV='/home/jlmarks/importme.csv'): if not self.loggedin: self.login() importURL = "https://" + self.appname + ".infusionsoft.com/Import/jumpToWizard.jsp?update=false&profileClass=com.infusion.crm.db.importer.profiles.ContactProfile" self.browser.open(importURL) frms = self.browser.get_forms() for eachform in frms: if 'id' in eachform.fields.keys(): self.thisimportid=eachform['id'].value correctform = eachform correctform.fields.pop('Back') correctform.fields['importFile'].value=open(pathToCSV, 'rb') self.browser.submit_form(correctform)
def arglogin(username, password): url = "http://aaaaarg.fail/auth/login" session = Session() br = RoboBrowser(session=session, history=True, parser="lxml") br.open(url) #print(br) try: form = br.get_forms()[1] except: return None form['email'].value = username form['password'].value = password br.submit_form(form) return br
def get_email_by_cin(cin): url = 'http://www.mca.gov.in/mcafoportal/viewCompanyMasterData.do' browser = RoboBrowser() browser.session.headers['User-Agent'] = random.choice(user_agents) browser.open(url) form = browser.get_forms()[-1] form['companyID'].value = cin browser.submit_form(form) table = browser.find('table', attrs={'class': 'result-forms'}) if not table: return None email_header = table.find('td', text='Email Id') if not email_header: return None email_row = email_header.findNext('td') email = str.strip(email_row.text) return email.lower()
def newfunc(): url = 'https://www.screener.in/' start = requests.session() open = start.get(url) #rb = RoboBrowser(history=True, parser="html.parser") #print(open.headers) start.headers = open.headers rb = RoboBrowser(session=start, history=True, parser="html.parser") rb.open(url) #ff = rb.get_form(class_='u-full-width') #ff = rb.get_form(id=re.compile("top-nav-search")) ff = rb.get_forms()[0] print(ff) # yInputControl = rb.find(class_=re.compile(r'\y-input__control\b')) yInputControl = rb.find(placeholder="Company search...") #print(yInputControl) yInputControl.value = 'PCPL'
def _login(user="", pw=""): """ Will login using given credentials to Rock. """ loginPage = (LOGIN_URL) # testing robobrowser browser = RoboBrowser(history=True, parser="lxml") browser.open(loginPage) form = browser.get_forms()[0] #TODO: dynamically find user field ID and pw ID form[USER_FIELD_ID].value = user form[PW_FIELD_ID].value = pw submitBtn = LOGIN_BTN_ID browser.submit_form(form, submit=form[submitBtn]) return browser
def scrape_this_page(page_name): page = requests.get(page_name) if page.status_code == requests.codes.ok: # If a folder on the desktop does not already exist for this given artist, create one, then set it as the # directory to save images to pictures = get_image_urls( html.fromstring(page.content).xpath('//a[not(ancestor::div[@class="gr-body"])]/@href')) downloaded_images = [] s = requests.session() s.headers.update({'Referer': 'http://www.deviantart.com/'}) USERAGENTS = ( 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.202 Safari/535.1', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0.1) Gecko/20100101', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50', 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)', 'Opera/9.99 (Windows NT 5.1; U; pl) Presto/9.9.9', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/ Safari/530.5', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.2 (KHTML, like Gecko) Chrome/6.0', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; pl; rv:1.9.1) Gecko/20090624 Firefox/3.5 (.NET CLR 3.5.30729)' ) browser = RoboBrowser(history=False, session=s, tries=3, user_agent=random.choice(USERAGENTS)) browser.open( 'https://www.deviantart.com/users/login?ref=http%3A%2F%2Fwww.deviantart.com%2F&remember_me=1') form = browser.get_forms()[1] form['username'] = '******' form['password'] = '******' browser.submit_form(form) for picture in pictures: # Make sure image has not already been downloaded, and that it is not simply a duplicate url with # the comments section open if picture not in downloaded_images and '#comments' not in picture: browser.open(picture) deviation_page = str(browser.parsed) if download_img(deviation_page, get_folder(page_name)): print("Grabbed " + picture) downloaded_images.append(picture) else: print('Bad Url') return len(downloaded_images)
def GeneOntology(name): print("Starting GeneOntology for " + name) br = RoboBrowser(parser="html.parser") br.open("http://geneontology.org/") form = br.get_forms()[1] geneinput = form["input"] species = form["species"] form["species"].value = "IXOSC" os.chdir("/home/david/Documents/blast/Blastfiles/outputfiles/Genelists") os.listdir(".") file = open(name, "r") string = "" for line in file.readlines(): #print(line) string = string + line form["input"] = string #print(form) br.submit_form(form) #print(br.find_all()) #DebugHtml(str(br.parsed)) table_link = br.find("a", href=re.compile("/tools/compareToRefListTxt.jsp")) br.follow_link(table_link) csv_content = br.response.content.decode("utf-8") savefile = open("GOoutput/" + name, "w") savefile.write(csv_content) savefile.close() print("finished")
def weblogic_checkout(URL_1,URL_2, username, password): # Creates a new browser object called browser, enables history, and opens up the weblogic URL in that browser. browser = RoboBrowser(history=True) browser.open(URL_1) # Creates an array of all the forms on the page, which is only one because this should be the sign-on screen. # Creates a variable called form and assigns it the first and only form on the page. forms = browser.get_forms() form = forms[0] # The two text-fields of the form are labeled j_username and j_password. I found this by using the chrome inspector on the page. # Enters the login info into the appropriate text fields. form['j_username'] = username form['j_password'] = password # Submits our login info. browser.submit_form(form) # Navigates to table with current server states. browser.open(URL_2) # Selects the table with the data that we need. data = browser.find('table', id="genericTableFormtable") # Prints the data with a new line after each. for info in data.findAll('td'): info = str(info) print(info) print(' ')
def fetch_highscores(first_name): # First, a RoboBrowser opens the highscores list. br = RoboBrowser() br.open('https://secure.runescape.com/m=hiscore_oldschool/hiscorepersonal') # To access a player's highscores, we must have RoboBrowser enter their username in the # 'search player' form. There are multiple forms on the page, but the 'search player' # form is the first one. form = br.get_forms()[0] # 'user1' is the name attribute of the form. We get the username from the first name using # the file 'usernames'. The purpose of a separate file for usernames is so I can present # this code but keep my friends and my usernames a secret. form.fields['user1'].value = get_username(first_name) # Sumbit the form. We are now in first_name's highscores page. br.submit_form(form) # src is a long HTML string which we will look through for the total level. src = str(br.parsed()) # Here is use BeautifulSoup. It's probably not necessary to switch from RoboBrowser, but # it was easier for me to get the code working. soup = BeautifulSoup(src, features='html.parser') # At the time of writing this code, 'class_ = centerDiv' is the closest location for the # total level. posts = soup.find_all(class_='centerDiv') player_string = posts[0].get_text() # 'player_string' has a lot of new line characters, and I only want one piece of the string. # So, I turn the string into a list using newline characters as the break points. player_list = player_string.splitlines() # The total level is at index number 47, so extract it. total_level = player_list[47] return total_level
def ExtractFirstImg(url): final_res = '' browser = RoboBrowser(history=True, parser='html.parser', user_agent='Chrome/41.0.2228.0') browser.open(url) read_button = browser.find('div', {'class': 'read-now'}) link = read_button.find('a', href=True) ComicFirstPage = link['href'] browser.open( 'http://tools.prowebguru.com/free-online-image-extractor/free_online_image_extractor_tool.php' ) form = browser.get_forms({'class': 'form-horizontal'}) this_form = form[0] this_form["website"] = ComicFirstPage browser.submit_form(this_form) img_links = browser.find_all('img', src=True) for line in img_links: if '/tbn/' not in line['src']: final_res = line['src'] + '|' + url print(final_res) with open('HenR.txt', 'a') as f: f.write(final_res + '\n')
def get_portal_auth() -> str: """ Attempts login to the Club1909 page and retrieves the cookie FortressPortalAuth :return: """ browser = RoboBrowser(session, history=True) browser.open(LOGIN_FORM_URL) login_form = browser.get_forms()[0] login_form['email'] = os.environ['club1909_username'] login_form['password'] = os.environ['club1909_password'] # TODO: check get_forms returns one value # TODO: check login errors / exceptions logging.debug( f"Attempt to login with {os.environ['club1909_username']} and {os.environ['club1909_password']} " ) browser.submit_form(login_form) logging.info( f"Found portal Auth code {browser.session.cookies['.FortressPortalAuth']}" ) return browser.session.cookies['.FortressPortalAuth']
pages = (following_users_num // 48) + 1 else: pages = following_users_num // 48 #タグ除去用 p = re.compile(r"<[^>]*?>") # [jump:1]形式除去用 jump = re.compile(r"\[jump:.+\]") #ファイルエンコード設定用 character_encoding = 'utf_8' # Webスクレイパーのログイン処理 pixiv_url = 'https://www.pixiv.net' browser = RoboBrowser(parser='lxml', history=True) browser.open('https://accounts.pixiv.net/login') form = browser.get_forms('form', class_='')[0] form['pixiv_id'] = client_info["pixiv_id"] form['password'] = client_info["password"] browser.submit_form(form) # フォローユーザー一覧ページのURLを設定 target_url = 'https://www.pixiv.net/bookmark.php?type=user&rest=show&p=' # 全てのフォローユーザーのユーザIDを取得 following_users_id = [] for i in range(1, pages + 1): print(target_url + str(i)) browser.open(target_url + str(i)) following_users = browser.find(class_='members') for user in following_users.find_all("input"): following_users_id.append(user.get("value"))
def main(): """This loops through every account in accounts.csv. Appending all their orders into 1 local html. That html file uses css pulled from amazon.com so it looks the excat same, and all of the links work, except the ones that requre login. """ if not os.path.isfile("history.html"): makeHistoryFile() if not os.path.isfile("accounts.csv"): makeAccountFile() print "accounts.csv file made. Fill in email/passwords and run again." return 1 with open("accounts.csv", "rU") as csvFile: reader = csv.reader(csvFile) for row in reader: email = str(row[0]) password = str(row[1]) update = str(row[2]) if update.lower() == "true": # html5lib parser required for broken html on gameSplits s = requests.Session() s.headers[ "User-Agent" ] = "Mozilla (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/601.2.7 (KHTML, like Gecko) Version/9.0.1 Safari/601.2.7" browser = RoboBrowser(history=True, parser="html5lib", session=s) browser.open( "https://www.amazon.com/ap/signin?_encoding=UTF8&openid.assoc_handle=usflex&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.mode=checkid_setup&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0&openid.ns.pape=http%3A%2F%2Fspecs.openid.net%2Fextensions%2Fpape%2F1.0&openid.pape.max_auth_age=0&openid.return_to=https%3A%2F%2Fwww.amazon.com%2F%3Fref_%3Dnav_ya_signin" ) form_signIn = browser.get_forms()[0] form_signIn["email"] = email form_signIn["password"] = password browser.submit_form(form_signIn) browser.open( "https://www.amazon.com/gp/css/history/orders/view.html?orderFilter=year-%s&startAtIndex=1000" ) orders = browser.find_all(class_="a-box-group a-spacing-base order") with open(r"./history.html", "a+") as historyFile: historyFile.seek(0) storedOrderIds = [] tempOrder = "" storeLine = False print "Collected orders from history.html" for line in historyFile: if line == "<!-- Start Order -->\n": storeLine = True continue if line == "<!-- End Order -->\n": storedOrderIds.append(getOrderId(cStringIO.StringIO(tempOrder))) tempOrder = "" storeLine = False if storeLine: tempOrder += line print "Orders stored", len(storedOrderIds) print "Find/Adding new orders for", email for order in orders: orderId = getOrderId(cStringIO.StringIO(order.__str__())) if not orderId in storedOrderIds: print "adding order", orderId historyFile.write("\n<!-- Start Order -->\n") historyFile.write(getAccountHtml(email)) historyFile.write(order.__str__()) historyFile.write("\n<!-- End Order -->\n") print "Done"
class Interaction(object): def __init__(self, httpc, interactions=None, verify_ssl=True): self.httpc = httpc self.browser = RoboBrowser() self.interactions = interactions self.verify_ssl = verify_ssl def pick_interaction(self, response, base): if self.interactions is None: return None self.browser._update_state(response) _bs = self.browser.parsed unic = "" for interaction in self.interactions: _match = 0 for attr, val in list(interaction["matches"].items()): if attr == "url": if val == base: _match += 1 elif attr == "title": if _bs is None: break if _bs.title is None: break if val in _bs.title.contents: _match += 1 else: _c = _bs.title.contents if isinstance(_c, list) and not isinstance(_c, str): for _line in _c: if val in _line: _match += 1 continue elif attr == "content": if unic and val in unic: _match += 1 if _match == len(interaction["matches"]): return interaction raise InteractionNeeded("No interaction matched") def pick_form(self, forms, **kwargs): """ Picks which form in a web-page that should be used :param forms: A list of robobrowser.Forms instances :return: The picked form or None if no form matched the criteria. """ _form = None if len(forms) == 1: _form = forms[0] else: if "pick" in kwargs: _dict = kwargs["pick"] for form in forms: if _form: break for key, _ava in list(_dict.items()): if key == "form": _keys = list(form.attrs.keys()) for attr, val in list(_ava.items()): if attr in _keys and val == form.attrs[attr]: _form = form elif key == "control": prop = _ava["id"] _default = _ava["value"] try: orig_val = form[prop] if isinstance(orig_val, str): if orig_val == _default: _form = form elif _default in orig_val: _form = form except KeyError: pass except Exception as err: pass elif key == "method": if form.method == _ava: _form = form else: _form = None if not _form: break elif "index" in kwargs: _form = forms[int(kwargs["index"])] return _form def select_form(self, response, **kwargs): """ Pick a form on a web page, possibly enter some information and submit the form. :param orig_response: The original response (as returned by requests) :return: The response do_click() returns """ self.browser._update_state(response) forms = self.browser.get_forms() form = self.pick_form(forms, **kwargs) if not forms: raise Exception("Can't pick a form !!") if "set" in kwargs: for key, val in list(kwargs["set"].items()): if key.startswith("_"): continue if "click" in kwargs and kwargs["click"] == key: continue try: form[key].value = val except (ValueError): pass except Exception as err: raise # cntrl = form.find_control(key) # if isinstance(cntrl, ListControl): # form[key] = [val] # else: # raise if form.action in kwargs["tester"].my_endpoints(): _res = {} for name, cnt in form.fields.items(): _res[name] = cnt.value return _res try: requests_args = kwargs["requests_args"] except KeyError: requests_args = {} self.browser.submit_form(form, **requests_args) return self.browser.state.response #noinspection PyUnusedLocal def chose(self, orig_response, path, **kwargs): """ Sends a HTTP GET to a url given by the present url and the given relative path. :param orig_response: The original response :param content: The content of the response :param path: The relative path to add to the base URL :return: The response do_click() returns """ try: _trace = kwargs["trace"] except KeyError: _trace = False if not path.startswith("http"): try: _url = orig_response.url except KeyError: _url = kwargs["location"] part = urlparse(_url) url = "%s://%s%s" % (part[0], part[1], path) else: url = path return self.httpc.send(url, "GET", trace=_trace) #return resp, "" def redirect(self, orig_response, url_regex, **kwargs): """ Simulates a JavaScript redirect by extracting the target of the redirection from the page content using the given regex :param orig_response: The original response :param url_regex: The regex that defines how the target of the redirect can be extracted from the content """ matches = re.findall(url_regex, orig_response.content) no_of_matches = len(matches) if not no_of_matches == 1: raise InteractionNeeded("Expected single match but found %d", no_of_matches) url = matches[0] return self.httpc.send(url, "GET") def post_form(self, response, **kwargs): """ The same as select_form but with no possibility of changing the content of the form. :param response: The original response (as returned by requests) :return: The response submit_form() returns """ form = self.pick_form(response, **kwargs) return self.browser.submit_form(form) def response(self, response, **kwargs): return {"text": response.text} #noinspection PyUnusedLocal def interaction(self, args): _type = args["type"] if _type == "form": return self.select_form elif _type == "link": return self.chose elif _type == "response": return self.response elif _type == "redirect": return self.redirect elif _type == "javascript_redirect": return self.redirect else: return no_func
class NetmagisClient(object): url = None # Netmagis's URL casurl = None # CAS's URL br = None # the browser reference c = None s = None def __init__(self, url, casurl): self.url = url self.casurl = casurl self.br = RoboBrowser(history=True, parser='lxml') # call the loginURL to authenticate def caslogin(self, login, passwd): uri = self.casurl+"?service="+self.url+"start" self.br.open(uri) form = self.br.get_form() form['username'].value = login form['password'].value = passwd self.br.submit_form(form) returnvalue = self.br.response.content.decode('utf8') if 'Logged as' in returnvalue: return 0 else: return 1 def addvhost(self, data): uri = self.url+"add" self.br.open(uri) f = self.br.get_forms()[2] f["name"] = data["name"] f["domain"] = data["domain"] f["nameref"] = data["nameref"] f["domainref"] = data["domainref"] self.br.submit_form(f) returnaddvhost = self.br.response.content.decode('utf8') if 'An error occurred in Netmagis application' in returnaddvhost: returnvalue = 1 else: returnvalue = 0 return returnvalue def add(self, data): uri = self.url+"add" self.br.open(uri) f = self.br.get_forms()[0] f["name"] = data["name"] f["domain"] = data["domain"] f["addr"] = data["addr"] f["mac"] = data["mac"] f["iddhcpprof"] = data["iddhcpprof"] f["hinfo"] = data["hinfo"] f["comment"] = data["comment"] f["respname"] = data["respname"] f["respmail"] = data["respmail"] self.br.submit_form(f) returnadd = self.br.response.content.decode('utf8') catchable_errors = ['An error occurred in Netmagis application'] if any(x in returnadd for x in catchable_errors): returnvalue = 1 else: if 'There is already a host named' in returnadd: f2 = self.br.get_forms()[0] self.br.submit_form(f2) self.br.response.content.decode('utf8') returnvalue = 0 else: returnvalue = 0 return returnvalue def deletename(self, data): uri = self.url+"del" self.br.open(uri) f = self.br.get_forms()[0] f["name"] = data["name"] f["domain"] = data["domain"] self.br.submit_form(f) firstsubmit = self.br.response.content.decode('utf8') if 'An error occurred in Netmagis application' in firstsubmit: returnvalue = 1 else: f2 = self.br.get_form() self.br.submit_form(f2) secondsubmit = self.br.response.content.decode('utf8') if 'An error occurred in Netmagis application' in secondsubmit: returnvalue = 1 else: returnvalue = 0 return returnvalue def deleteip(self, data): uri = self.url+"del" self.br.open(uri) form = self.br.get_forms()[1] form["addr"] = data["addr"] self.br.submit_form(form) firstsubmit = self.br.response.content.decode('utf8') if 'An error occurred in Netmagis application' in firstsubmit: returnvalue = 1 else: f2 = self.br.get_form() self.br.submit_form(f2) secondsubmit = self.br.response.content.decode('utf8') if 'An error occurred in Netmagis application' in secondsubmit: returnvalue = 1 else: returnvalue = 0 return returnvalue def exportcsv(self, data): uri = self.url+"net" self.br.open(uri) form = self.br.get_form(action='net') form['plages'].value = data['plage'] self.br.submit_form(form, submit=form['docsv']) print(self.br.response.content.decode('utf8')) def looklarge(self, data): uri = self.url+"add" self.br.open(uri) form = self.br.get_forms()[1] form['naddr'] = data['naddr'] form['plage'] = data['plage'] self.br.submit_form(form, submit=form['dosearch']) returnlooklarge = self.br.response.content.decode('utf8') if 'Aucun bloc' in returnlooklarge: return 0 f2 = self.br.get_forms()[0] returnvalue = f2.fields['addr'].value return returnvalue
print('Posted online') if r.text=="403": print('Unautorised 403') USERAGENTS ='Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:37.0) Gecko/20100101 Firefox/37.0' session = req_session() session.headers.update({'Referer': 'https://www.deviantart.com'}) browser = RoboBrowser(history=False, session=session, tries=2, user_agent=USERAGENTS) print("Attempting to log in to deviantArt...") browser.open('https://www.deviantart.com/users/login?ref=https%3A%2F%2Fwww.deviantart.com%2F&remember_me=1') form = browser.get_forms()[1] form['username'] = USERNAME form['password'] = PSWD #print(form) if browser.find(text=re.compile("Login")): print('Compiled login fields form...') browser.submit_form(form) if browser.find(text=re.compile("The password you entered was incorrect")): print("Wrong password or username. Attempting to download anyway.") exit(); elif browser.find(text=re.compile("\"loggedIn\":true")): print("Logged in!") else: print("Login unsuccessful. Attempting to download anyway.")
class Client: def __init__(self, data_path, logger, timeout=None, max_retries=None): """ :param str data_path: Where to store submissions :param logging.logger logger: Object to produce logs with :param int timeout: Default timeout, in seconds :param int max_retries: Number of retries """ self.__data_path = data_path self.__logger = logger self.__browser = RoboBrowser(parser='html.parser', timeout=timeout, tries=max_retries, multiplier=1, history=False) def login(self, username, password): """ :param str username: ZIMT username :param str password: ZIMT password """ main_page = moodleutils.get_main_page(MOODLE_DOMAIN) self.__browser.open(main_page) if self.__browser.url == main_page: return True self.__browser.open(CAS_URL) login_form = self.__browser.get_forms()[0] login_form['username'].value = username login_form['password'].value = password self.__browser.submit_form(login_form) self.__browser.open(main_page) self.__browser.parsed.decompose() return self.__browser.url == main_page def download_new_course_data(self, course_id, allowed_assignments): """ :param int course_id: Id of the course (can be found in the course url) :param set allowed_assignments: Ids or/and names of allowed assignments """ main_page = moodleutils.get_course_main_page(MOODLE_DOMAIN, course_id) self.__browser.open(main_page) course_name = self.__browser.select( '.page-header-headings')[0].h1.string[:] course_data = Course(course_id, course_name) classes = ['section main clearfix', 'section main clearfix current'] for class_ in classes: for section in self.__browser.find_all(class_=class_): for assign in section.find_all( class_='activity assign modtype_assign'): assign_id = assign['id'].split('-')[1] assign_name = assign.find( class_='instancename').contents[0] if assign_id not in allowed_assignments and \ assign_name not in allowed_assignments: self.__logger.warning('Assignment is not allowed, skip ' \ '[id={}, name=`{}\']'.format( assign_id, assign_name)) continue assign_data = self.__download_new_assignment_data( assign_id, self.__data_path) course_data.add_assignment(assign_data) self.__logger.info('Got assignment data ' \ '[id={}, name=`{}\']'.format( assign_data.id, assign_data.name)) self.__browser.parsed.decompose() return course_data def send_feedback(self, course_data): """ :param moodle.objects.Course course_data: course data with grades and comments """ for assign_data in course_data.assignments(): submissions_page = \ moodleutils.get_view_submissions_page( MOODLE_DOMAIN, assign_data.id) self.__browser.open(submissions_page) options_form = None for form in self.__browser.get_forms(): if self.__is_options_form(form): options_form = form break if options_form is None: self.__logger.error('No options form for assignment, ' \ 'skip [id={},name=`{}\']'.format( assign_data.id, assign_data.name)) continue if not self.__fill_options_form(options_form): self.__logger.error( 'Can not fill options form for assignment, skip ' \ '[id={},name=`{}\']'.format( assign_data.id, assign_data.name)) continue self.__browser.submit_form(options_form) grading_form = None for form in self.__browser.get_forms(): if self.__is_grading_form(form): grading_form = form break if grading_form is None: self.__logger.error('No grading form for assignment, ' \ 'skip [id={}, name=`{}\']'.format( assign_data.id, assign_data.name)) continue self.__logger.info('Process assignment submissions '\ '[id={}, name=`{}\']'.format( assign_data.id, assign_data.name)) for subm_data in assign_data.submissions(): if subm_data.grade is None: continue subm = self.__browser.find( class_='user{}'.format(subm_data.user_id)) if subm is None: continue submitted = subm.find(class_='submissionstatussubmitted') if submitted is None: continue subm_ts = self.__parse_timestamp( subm.find(class_='cell c7').contents[0]) if subm_data.timestamp != subm_ts: self.__logger.warning( 'Outdated submission, skip ' \ '[user_id={}, username=`{}\', timestamp={}]'.format( subm_data.user_id, subm_data.username, subm_data.timestamp)) continue if not self.__fill_grading_form(grading_form, subm_data): self.__logger.error( 'Can not fill grading form for submission, skip ' \ '[user_id={}, username=`{}\', timestamp={}]'.format( subm_data.user_id, subm_data.username, subm_data.timestamp)) continue self.__logger.info('Grading form was filled successfully '\ '[user_id={}, username=`{}\', timestamp={}]'.format( subm_data.user_id, subm_data.username, subm_data.timestamp)) self.__browser.submit_form(grading_form) self.__logger.info('Grading form was submitted for assignment ' \ '[id={}, name=`{}\']'.format( assign_data.id, assign_data.name)) self.__browser.parsed.decompose() def __parse_timestamp(self, date_str, date_locale='de_DE.utf8'): if date_str == '-': return 0 cur_locale = locale.getlocale() locale.setlocale(locale.LC_ALL, date_locale) # XXX install locale timestamp = datetime.strptime(date_str, '%A, %d. %B %Y, %H:%M').timestamp() locale.setlocale(locale.LC_ALL, cur_locale) return timestamp def __download_file(self, link, path): resp = self.__browser.session.get(link) # XXX ugly if resp.status_code != 200: self.__logger.error('Bad response code: {} ' \ '[link=`{}\']'.format(resp.status.code, link)) return False try: with open(path, 'w') as f: f.write(resp.content.decode('utf-8')) except: with open(path, 'wb') as f: f.write(resp.content) return True def __download_submission(self, subm, path): user_id = subm['class'][0][4:] username = subm.find(class_='cell c2').a.contents[0] timestamp = self.__parse_timestamp( subm.find(class_='cell c7').contents[0]) subm_path = os.path.join(path, 'user_' + user_id) subm_data = Submission(user_id, username, timestamp, subm_path) utils.remove_dir(subm_path) utils.make_dir(subm_path) for f in subm.find_all(class_='fileuploadsubmission'): name = f.a.contents[0] link = f.a['href'] if not self.__download_file(link, os.path.join(subm_path, name)): self.__logger.warning('Can not download file `{}\', ' \ 'skip submission [user_id={}, username=`{}\', timestamp={}]'.format( subm_data.user_id, subm_data.username, subm_data.timestamp)) return None else: self.__logger.info('Got file `{}\' ' \ '[user_id={}, username=`{}\', timerstamp={}]'.format(name, subm_data.user_id, subm_data.username, subm_data.timestamp)) return subm_data def __download_new_assignment_data(self, assign_id, path): submissions_page = moodleutils.get_view_submissions_page( MOODLE_DOMAIN, assign_id) self.__browser.open(submissions_page) table = self.__browser.find(class_='flexible generaltable generalbox') assign_path = os.path.join(path, 'assignment_' + assign_id) assign_name = self.__browser.find(role='main').h2.string[:] assign_data = Assignment(assign_id, assign_name) for subm in table.tbody.find_all('tr'): submitted = subm.find(class_='submissionstatussubmitted') if submitted is None: continue graded = subm.find(class_='submissiongraded') user_id = subm['class'][0][4:] username = subm.find(class_='cell c2').a.contents[0] subm_ts = self.__parse_timestamp( subm.find(class_='cell c7').contents[0]) grade_ts = self.__parse_timestamp( subm.find(class_='cell c10').contents[0]) if graded and subm_ts + 60 < grade_ts: # XXX +1 minute - to retest in case of delays self.__logger.debug('Submission is already graded ' \ '[user_id={}, username=`{}\', subm_ts={}, grade_ts={}]'.format( user_id, username, subm_ts, grade_ts)) continue if time.time() < subm_ts + 120: self.__logger.info('Submission is too new and will be evaluated next time ' \ '[user_id={}, username=`{}\', subm_ts={}, grade_ts={}]'.format( user_id, username, subm_ts, grade_ts)) continue subm_data = self.__download_submission(subm, assign_path) if subm_data is not None: self.__logger.info('Got submission data ' \ '[user_id={}, username=`{}\', timestamp={}]'.format( subm_data.user_id, subm_data.username, subm_data.timestamp)) assign_data.add_submission(subm_data) else: self.__logger.warning('Submission data is not downloaded, skip ' \ '[user_id={}, username=`{}\', timestamp={}]'.format( user_id, username, subm_ts)) return assign_data def __is_options_form(self, form): for field in form.keys(): if field == 'quickgrading': return True return False def __fill_options_form(self, form): try: form['filter'] = '' form['perpage'] = '-1' form['quickgrading'] = ['1'] return True except: return False def __is_grading_form(self, form): for field in form.keys(): if field.startswith('quickgrade_'): return True return False def __fill_grading_form(self, form, subm): try: # it is impossible to fill the form with 0 form['quickgrade_' + subm.user_id] = \ (subm.grade if subm.grade > 0 else 1e-20) # it is necessary to update form even if the data is the same old_comment = form['quickgrade_comments_' + subm.user_id].value new_comment = subm.comment if new_comment is None: new_comment = '' if new_comment == old_comment: new_comment += ' ' form['quickgrade_comments_' + subm.user_id] = new_comment return True except: return False
class ScheduleScrapper: """ """ def __init__(self): self.browser = RoboBrowser(history=False, parser='lxml') def _get_page_with_schedule(self, group=None, sdate=None, edate=None, teacher=None) -> list: """ This function find schedule for group :param group: A group for which search will be done :param sdate: Start date to search :param edate: End date to search :return: List of days with schedule """ self.browser.open(stng.SCHEDULE_URL + stng.GET_SCHEDULE_URL) form = self.browser.get_forms()[0] if group: form['group'].value = group.encode( stng.DEFAULT_ENCODING_FOR_REQUEST) if teacher: form['teacher'].value = teacher.encode( stng.DEFAULT_ENCODING_FOR_REQUEST) if sdate: form['sdate'].value = sdate.encode( stng.DEFAULT_ENCODING_FOR_REQUEST) if edate: form['edate'].value = edate.encode( stng.DEFAULT_ENCODING_FOR_REQUEST) self.browser.submit_form(form) soup = self.browser.parsed list_of_couples = soup.find_all('div', class_='col-md-6')[1:] return list_of_couples def _parse_schedule(self, list_of_couples: list, group=None, teacher=None) -> list: """ This function parse list of html tags to normal text list :param list_of_couples: list with html tags :return: list with formatting text """ if group: appeal = f'для групи {group}' else: appeal = f'для {teacher}' result = list() for elem in list_of_couples: date = elem.find('h4').text # result.append(f'Розклад на {date}\n') result_str = f'<strong>Розклад на {date}\n{appeal}</strong>\n' for i in elem.find_all('tr'): pair = i.find_all('td') if pair[2].text != '': pair_info = re.sub(" +", " ", pair[2].text) pair_time = f'\n{pair[0].text} пара ({pair[1].text[:5]} - {pair[1].text[5:]})' result_str += f'<i>{pair_time}</i>\n{pair_info}\n' continue result.append(result_str) return result def _get(self, link: str, query: str) -> list: self.browser.open(link + query) response = self.browser.response.content.decode( stng.DEFAULT_ENCODING_FOR_REQUEST) try: return json.load(io.StringIO(response))["suggestions"] except: return [] def groups_get(self, group: str) -> list: link = (stng.SCHEDULE_URL + stng.GROUP_EXISTS) return self._get(link, group) def teachers_get(self, teacher: str) -> list: link = (stng.SCHEDULE_URL + stng.TEACHER_EXISTS) return self._get(link, teacher) def get_schedule(self, group=None, sdate=None, edate=None, teacher=None) -> list: list_of_couple = self._get_page_with_schedule(group, sdate, edate, teacher) response = self._parse_schedule(list_of_couple, group, teacher) return response
block=block, ssl_version=ssl.PROTOCOL_TLSv1) # Browse url : browser = RoboBrowser(parser="lxml") browser.session.headers = config.headers # Mount with custom SSL Adapter browser.session.mount('https://', MyAdapter()) # Get to website print "- Connecting to url ..." browser.open(config.url) # Click on first button to go to second page: button = browser.get_forms()[0] browser.submit_form(button) # Fill in Date/Time form and start the Questionnaire form = browser.get_forms()[0] form['JavaScriptEnabled'].value = '1' form['SurveyCode'].value = config.ID form['InputMonth'].value = config.date[0] form['InputDay'].value = config.date[1] form['InputHour'].value = config.time[0] form['InputMinute'].value = config.time[1] form.serialize() browser.submit_form(form) print "- Filling Forms Randomly ..."
class Interaction(object): def __init__(self, httpc, interactions=None, verify_ssl=True): self.httpc = httpc self.browser = RoboBrowser() self.interactions = interactions self.verify_ssl = verify_ssl def pick_interaction(self, response, base): if self.interactions is None: return None self.browser._update_state(response) _bs = self.browser.parsed unic = "" for interaction in self.interactions: _match = 0 for attr, val in list(interaction["matches"].items()): if attr == "url": if val == base: _match += 1 elif attr == "title": if _bs is None: break if _bs.title is None: break if val in _bs.title.contents: _match += 1 else: _c = _bs.title.contents if isinstance(_c, list) and not isinstance(_c, str): for _line in _c: if val in _line: _match += 1 continue elif attr == "content": if unic and val in unic: _match += 1 if _match == len(interaction["matches"]): return interaction raise InteractionNeeded("No interaction matched") def pick_form(self, forms, **kwargs): """ Picks which form in a web-page that should be used :param forms: A list of robobrowser.Forms instances :return: The picked form or None if no form matched the criteria. """ _form = None if len(forms) == 1: _form = forms[0] else: if "pick" in kwargs: _dict = kwargs["pick"] for form in forms: if _form: break for key, _ava in list(_dict.items()): if key == "form": _keys = list(form.attrs.keys()) for attr, val in list(_ava.items()): if attr in _keys and val == form.attrs[attr]: _form = form elif key == "control": prop = _ava["id"] _default = _ava["value"] try: orig_val = form[prop] if isinstance(orig_val, str): if orig_val == _default: _form = form elif _default in orig_val: _form = form except KeyError: pass except Exception as err: pass elif key == "method": if form.method == _ava: _form = form else: _form = None if not _form: break elif "index" in kwargs: _form = forms[int(kwargs["index"])] return _form def select_form(self, response, **kwargs): """ Pick a form on a web page, possibly enter some information and submit the form. :param orig_response: The original response (as returned by requests) :return: The response do_click() returns """ self.browser._update_state(response) forms = self.browser.get_forms() form = self.pick_form(forms, **kwargs) if not forms: raise Exception("Can't pick a form !!") if "set" in kwargs: for key, val in list(kwargs["set"].items()): if key.startswith("_"): continue if "click" in kwargs and kwargs["click"] == key: continue try: form[key].value = val except (ValueError): pass except Exception as err: raise # cntrl = form.find_control(key) # if isinstance(cntrl, ListControl): # form[key] = [val] # else: # raise if form.action in kwargs["tester"].my_endpoints(): _res = {} for name, cnt in form.fields.items(): _res[name] = cnt.value return _res try: requests_args = kwargs["requests_args"] except KeyError: requests_args = {} self.browser.submit_form(form, **requests_args) return self.browser.state.response # noinspection PyUnusedLocal def chose(self, orig_response, path, **kwargs): """ Sends a HTTP GET to a url given by the present url and the given relative path. :param orig_response: The original response :param content: The content of the response :param path: The relative path to add to the base URL :return: The response do_click() returns """ if not path.startswith("http"): try: _url = orig_response.url except KeyError: _url = kwargs["location"] part = urlparse(_url) url = "%s://%s%s" % (part[0], part[1], path) else: url = path return self.httpc.send(url, "GET") # return resp, "" def redirect(self, orig_response, url_regex, **kwargs): """ Simulates a JavaScript redirect by extracting the target of the redirection from the page content using the given regex :param orig_response: The original response :param url_regex: The regex that defines how the target of the redirect can be extracted from the content """ matches = re.findall(url_regex, orig_response.content) no_of_matches = len(matches) if not no_of_matches == 1: raise InteractionNeeded("Expected single match but found %d", no_of_matches) url = matches[0] return self.httpc.send(url, "GET") def post_form(self, response, **kwargs): """ The same as select_form but with no possibility of changing the content of the form. :param response: The original response (as returned by requests) :return: The response submit_form() returns """ form = self.pick_form(response, **kwargs) return self.browser.submit_form(form) def response(self, response, **kwargs): return {"text": response.text} # noinspection PyUnusedLocal def interaction(self, args): _type = args["type"] if _type == "form": return self.select_form elif _type == "link": return self.chose elif _type == "response": return self.response elif _type == "redirect": return self.redirect elif _type == "javascript_redirect": return self.redirect else: return no_func
from robobrowser import RoboBrowser; import BeautifulSoup; """ Take HTML file as input return well indented HTML file using freeformatter.com """ input_html = open('rb2.html','r+'); browser = RoboBrowser(history=True); main_url = 'http://www.freeformatter.com/html-formatter.html'; browser.open(main_url); #print browser.find(); #print browser.response.text; forms = browser.get_forms(); form_counter = 1; #for f in forms: # print f,'\n',++form_counter; """ <RoboForm action=, inputstring=, inputurl=, indent=3spaces, forcenewwindow=false> """ form = browser.get_form(action='/html-formatter.html'); #print form; form['inputstring'] = input_html; form['forcenewwindow'] = 'false'; browser.submit_form(form); #print browser; resp = browser.response.content; output_file = open('rb3.html','w+'); output_file.write(resp);
document='<table border="1" style="border-collapse:collapse">' Users={ #Put users here } """ pwd=Users.items()[0][1] user=Users.items()[0][0] """ for user,pwd in Users.iteritems(): browser=RoboBrowser(history=True) browser.open('https://www.aqmetrix.com', verify=False) form=browser.get_forms().pop() form['nombre'].value=user form['passwd'].value=pwd browser.submit_form(form, verify=False) table=BeautifulSoup(browser.session.get('https://www.aqmetrix.com/aqx/diary/infodisp/get_infodisp_incidencias.php').content) j=0 for row in table.select('table tbody tr'): document=document+'<tr>' for data in row.select('td'): if data.select('a') == []: document=document+data.prettify() else: for i in data.select('a'): if 'PNG' in i.attrs['href']:
import re import socket import random # ----- Start crawling crid = 1 while(True): username = "******" % crid # print username; browser = RoboBrowser() browser.open('https://click2win.settrade.com/LoginRepOnRole.jsp?txtLogin='******'&txtPassword='******'&txtSecureKey=NONE&txtDefaultPage=%2FSETClick2WIN%2FSelectUserLeague.jsp&txtLoginPage=SETClick2WIN/index.jsp&txtBrokerId=089&txtSystem=ITP&txtRole=INTERNET&tmpUsername=&tmpPassword=') form = browser.get_forms()[0] browser.submit_form(form) form = browser.get_forms()[0] browser.submit_form(form) body = str(browser.parsed) # print type(body) # print body if ( "openStreaming" in body): print "OK -- " + username; else: print "FA ------------ " + username; crid+=1; # break;
def fanduel_salaries_scraper(credentials, bucket_name, obj_path, years=default_years, weeks=default_weeks): client = boto3.client('s3') browser = RoboBrowser() browser.open(login_url) login_form = browser.get_forms()[0] # Set login credentials login_form['ctl00$Body$EmailTextbox'].value = credentials['email'] login_form['ctl00$Body$PasswordTextbox'].value = credentials['password'] login_form.serialize() # Submit login form browser.submit_form(login_form) # Open the previously hidden page for yearIdx, year in enumerate(years): year_dict = years[yearIdx] year_key = list(year_dict.keys())[0] sn = year_dict[year_key] for week in weeks: w = week ew = week # Initialize the data to be written to the file formatted_data = '' for pos_idx, pos in enumerate(default_pos): pos_dict = default_pos[pos_idx] pos_key = list(pos_dict.keys())[0] p = pos_dict[pos_key] salary_data_url = 'https://fantasydata.com/nfl-stats/daily-fantasy-football-salary-and-projection-tool.aspx?fs={}&stype=0&sn={}&scope=0&w={}&ew={}&s=&t=0&p={}&st=FantasyPointsFanDuel&d=1&ls=&live=false&pid=true&minsnaps=4'.format( fs, sn, w, ew, p) # Delay before retrieving next set of data time.sleep(0.25) browser.open(salary_data_url) content = browser.find_all('tr') for idx, line in enumerate(content): # Only add the header once per year if idx == 0 and week == 0 and p == 2: formatted_data = headers + '\n' elif idx != 0: # Remove the comma from each salary 5,200 >> 5200 line_values = line.find_all(text=True) line_values[10] = line_values[10].replace(',', '') parsed_data = ','.join(line_values) stripped_line = parsed_data.strip('\n').strip(',') extra_fields = ',' + year_key next_line = stripped_line + extra_fields + '\n' formatted_data = formatted_data + next_line file_path = '{}/{}/{}.csv'.format(obj_path, year_key, week + 1) try: # Upload object to the S3 bucket client.put_object(Bucket=bucket_name, Body=formatted_data, Key=file_path) except RuntimeError as err: print('Failed to write to file: ', err) raise err print('Success! Uploaded data: {}'.format(file_path))
class Session(): # self.username =''; # self.password =''; # self.driver = None; def __init__(self,username='',password=''): self.username = username self.password = password self.browser = RoboBrowser() def login(self): self.browser.open('https://click2win.settrade.com/LoginRepOnRole.jsp?txtLogin='******'&txtPassword='******'&txtSecureKey=NONE&txtDefaultPage=%2FSETClick2WIN%2FSelectUserLeague.jsp&txtLoginPage=SETClick2WIN/index.jsp&txtBrokerId=089&txtSystem=ITP&txtRole=INTERNET&tmpUsername=&tmpPassword='******'fvSyncTimeURL']) servTime = self.browser.select('p')[0].get_text().split("|") servTime = int(servTime[1]) #+ config.hourshift*60*60*1000 servTime = servTime/1000 self.difftime = time.time() - servTime # print self.difftime def getStreamingVar(self): # ------ Generate flash variables self.browser.open(config.url+'/realtime/streaming5/flash/StreamingPage.jsp') src = self.browser.select('html')[0] src = src.get_text().encode('utf-8').split("\n") for i in src: if(i.find('flashVar')== -1 ): continue; i = i[i.find('flashVar'):] i = i[i.find('{')+1:] i = i[0:i.find('}')] src = i; break; flashVar = {} print src src = src.split(",") for line in src: line = line.split(":") flashVar[urllib.unquote(line[0])] = urllib.unquote(line[1][1:-1]) self.flashVar = flashVar self.syncTime() return flashVar def getInstrumentList(self): url = config.url + self.flashVar['fvDataProviderStrURL'] key = datetime.datetime.fromtimestamp(time.time()+self.difftime).strftime('%d/%m/%Y') + "_" + self.flashVar['fvBrokerId']+"_"+self.flashVar['fvUserref'] m = hashlib.md5() m.update(key) hs = m.hexdigest() # print "hash("+key +") = " + hs params = "boardType=equity&" params += "APIVersion="+config.APIVersion+"&" params += "subListName=&" params += "mainListName=.A&" params += "boardSubType=&" params += "service=12&" params += "q="+hs self.browser.open(url +"?"+params); text = self.browser.select('p')[0].get_text().split("|") if text[0] == 'T': category = text[6].split("^") # print category futures = []; options = []; equity = []; indexes = []; for cate in category: if cate == 'futuresAndUnderlying': insts = text[7].split("^") for inst in insts: inst = inst.split("~")[0] futures.append(inst); if cate == 'options': insts = text[8].split("^") for inst in insts: inst = inst.split("~")[0] options.append(inst); if cate == 'equity': insts = text[9].split("^") for inst in insts: inst = inst.split("~")[0] equity.append(inst); new_equity =[] reNW = re.compile('^.*-W$') reW = re.compile('^.*-W\d(\d*)$') reDW = re.compile('^(.|..|...|....)\d\d(C|P)\d\d\d\d.$') reF = re.compile('^.*-F$') reP = re.compile('^.*-P$') reQ = re.compile('^.*-Q$') for eq in equity: if(reNW.match(eq) or reW.match(eq) or reDW.match(eq) or reF.match(eq) or reP.match(eq) or reQ.match(eq)): #not simple pass; else: new_equity.append(eq); equity = new_equity; # print len(futures) # print len(options) # print len(equity) # total = len(futures)+ len(options) + len(equity) # print total # equity = equity[1:100] # futures = futures[1:100] # options = [] return (equity,futures,options) else: return None; def genKey(self): url = config.url + self.flashVar['fvGenerateKeyURL'] key = datetime.datetime.fromtimestamp(time.time()+self.difftime).strftime('%d/%m/%Y') + "_" + self.flashVar['fvBrokerId']+"_"+self.flashVar['fvUserref'] m = hashlib.md5() m.update(key) hs = m.hexdigest() # print "hash("+key +") = " + hs params = "time="+str(int(time.time()+self.difftime))+"&" params += "clientType="+self.flashVar['fvRealtimeClientType']+"&" params += "txtSETNET3="+self.flashVar['fvSETNET3']+"&" params += "APIVersion="+config.APIVersion+"&" params += "q="+hs self.browser.open(url +"?"+params); src = self.browser.select('p')[0].get_text().split("|") self.key=src; return src def genSocket(self): key = self.genKey() s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) print key; dns = socket.gethostbyname_ex(key[2]) host = dns[2][0] port = int(key[3]) conn = s.connect((host, port)) # s.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) ss = self.flashVar['fvUserref']+"|"+self.flashVar['fvBrokerId']+"|"+key[1]+"|" + str(int(time.time()+self.difftime)) + "|"+self.flashVar['fvRealtimeClientType']+"|"+config.APIVersion+"\n" s.send(ss) # print ss return s def marketSummarySocket(self): s = self.genSocket() s.send("REG|5\n") # print "REG|5\n" return s def tickerSocket(self): s = self.genSocket() s.send("REG|4^N~N^N~N^E~D^E~D\n") # print "REG|4^N~N^N~N^E~D^E~D\n" return s def bidofferSocket(self,insts): s = self.genSocket() ss = "REG|1^" for i in range(len(insts)): if(i == len(insts) - 1): ss+= insts[i] else: ss+= insts[i]+"~" ss += "^^" for i in range(len(insts)): if(i == len(insts) - 1): ss+= "M" else: ss+= "M~" ss += "\n" s.send(ss); print ss # print self.key return s
CHALLENGE_API = SITE_URL + 'competitions/ultimate-tic-tac-toe/game/challenge/<bot_name>/' print("TheAiGames Bot Auto-Challenger") print("") username = raw_input("Username: "******"[INFO] CBTeamName is ranked " + str(our_rank)) random.seed(os.urandom(8))
#!/usr/bin/python from robobrowser import RoboBrowser browser = RoboBrowser() browser.open("https://www.noip.com/members/dns/host.php?host_id=48545720") browser.get_form(id="clogs") form = browser.get_form(id="clogs") form["username"] = "******" form["password"] = "******" browser.submit_form(form) browser.get_forms() browser.submit_form(browser.get_forms()[0])
b = RoboBrowser(parser="lxml") b.open("http://www.chandrashekar.info") b.get_links() b.get_links() b.forms dir(b) b.get_links() b.get_links()[-3] l = b.get_links()[-3] b.follow_link(l) b.url b.back() b.url b.forward() b.url b.get_forms() b.get_forms()[0] f = b.get_forms()[0] f f["name"] = "smith" f["subject"] = "dslfj lsdjf lsdjf lksdj flsdjf" f b.submit_form(f) import requests r = requests.get("http://pypi.python.org/pypi", params={":action" : "search", "term" : term, "submit" : "search"}) r = requests.get("http://pypi.python.org/pypi", params={":action" : "search", "term" : "rest", "submit" : "search"}) r
def def_vs_scraper(credentials, bucket_name, obj_path, years=default_years, weeks=default_weeks): client = boto3.client('s3') browser = RoboBrowser() browser.open(login_url) login_form = browser.get_forms()[0] # Set login credentials login_form['ctl00$Body$EmailTextbox'].value = credentials['email'] login_form['ctl00$Body$PasswordTextbox'].value = credentials['password'] login_form.serialize() # Submit login form browser.submit_form(login_form) # Open the previously hidden page for yearIdx, year in enumerate(years): year_dict = years[yearIdx] year_key = list(year_dict.keys())[0] sn = year_dict[year_key] for week in weeks: for position_ranking in default_position_rankings: w = week ew = week pts_vs_url = 'https://fantasydata.com/nfl-stats/nfl-fantasy-football-points-allowed-defense-by-position.aspx?fs={}&stype=0&sn={}&scope={}&w={}&ew={}&s=&t=0&p=0&st={}&d=1&ls={}&live=false&pid=true&minsnaps=4'.format( fs, sn, scope, w, ew, position_ranking['url'], position_ranking['url']) # Delay before retrieving next set of data time.sleep(0.5) browser.open(pts_vs_url) content = browser.find_all('tr') # Initialize the data to be written to the file formatted_data = '' for idx, line in enumerate(content): # Only add the header once per year if idx == 0 and week == 0: formatted_data = headers + '\n' elif idx != 0: parsed_data = ','.join(line.find_all(text=True)) stripped_line = parsed_data.strip('\n').strip(',') year_value = str(list(year.keys())[0]) next_line = stripped_line + ',' + year_value + '\n' formatted_data = formatted_data + next_line # Make the directory for each year of CSV Data file_path = '{}/{}/{}/{}.csv'.format(obj_path, year_key, week + 1, position_ranking['file']) try: # Upload object to the S3 bucket client.put_object(Bucket=bucket_name, Body=formatted_data, Key=file_path) except RuntimeError as err: print('Failed to write to file: ', err) raise err print('Success! Uploaded data: {}'.format(file_path))
class Dagr: """deviantArt gallery ripper class""" NAME = basename(__file__) __version__="0.60" MAX_DEVIATIONS = 1000000 # max deviations def __init__(self): # Internals self.browser = None self.errors_count = dict() # Configuration self.username = "" self.password = "" self.overwrite = False self.reverse = False self.testOnly = False self.verbose = False # Current status self.deviant = "" def start(self): if not self.browser: # Set up fake browser self.set_browser() # Always run login self.login() def set_browser(self): USERAGENTS = ( 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.202 Safari/535.1', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0.1) Gecko/20100101', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50', 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)', 'Opera/9.99 (Windows NT 5.1; U; pl) Presto/9.9.9', 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-US) AppleWebKit/530.5 (KHTML, like Gecko) Chrome/ Safari/530.5', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.2 (KHTML, like Gecko) Chrome/6.0', 'Mozilla/5.0 (Windows; U; Windows NT 6.1; pl; rv:1.9.1) Gecko/20090624 Firefox/3.5 (.NET CLR 3.5.30729)' ) session = req_session() session.headers.update({'Referer': 'http://www.deviantart.com/'}) self.browser = RoboBrowser(history=False, session=session, tries=3, user_agent=random.choice(USERAGENTS)) def login(self): if not (self.username and self.password): return print("Attempting to log in to deviantArt...") self.browser.open('https://www.deviantart.com/users/login?ref=http%3A%2F%2Fwww.deviantart.com%2F&remember_me=1') form = self.browser.get_forms()[1] form['username'] = self.username form['password'] = self.password self.browser.submit_form(form) if self.browser.find(text=re.compile("The password you entered was incorrect")): print("Wrong password or username. Attempting to download anyway.") elif self.browser.find(text=re.compile("\"loggedIn\":true")): print("Logged in!") else: print("Login unsuccessful. Attempting to download anyway.") def get(self, url, file_name = None): if file_name is not None and (self.overwrite == False) and (path_exists(file_name)): print(file_name + " exists - skipping") return #TODO Test robobrowser retries and exceptions self.browser.open(url) if file_name is None: return str(self.browser.parsed) else: # Open our local file for writing local_file = open(file_name, "wb") #Write to our local file local_file.write(self.browser.response.content) local_file.close() def find_link(self, link): filelink = None mature_error = False self.browser.open(link) # Full image link (via download link) img_link = self.browser.get_link(text=re.compile("Download( (Image|File))?")) if img_link and img_link.get("href"): self.browser.follow_link(img_link) filelink = self.browser.url else: if self.verbose: print("Download link not found, falling back to direct image") # Fallback 1: try meta (filtering blocked meta) filesearch = self.browser.find("meta", {"name":"og:image"}) if filesearch: filelink = filesearch['content'] if basename(filelink).startswith("noentrythumb-"): filelink = None mature_error = True if not filelink: # Fallback 2: try collect_rid, full filesearch = self.browser.find("img", {"collect_rid":True, "class":re.compile(".*full")}) if not filesearch: # Fallback 3: try collect_rid, normal filesearch = self.browser.find("img", {"collect_rid":True, "class":re.compile(".*normal")}) if filesearch: filelink = filesearch['src'] if not filelink: if mature_error: raise DagrException("probably a mature deviation") else: raise DagrException("all attemps to find a link failed") filename = basename(filelink) return (filename, filelink) def handle_download_error(self, link, e): error_string = str(e) print("Download error (" + link + ") : " + error_string) if error_string in self.errors_count: self.errors_count[error_string] += 1 else: self.errors_count[error_string] = 1 def deviant_get(self, mode): print("Ripping " + self.deviant + "'s " + mode + "...") pat = "http://[a-zA-Z0-9_-]*\.deviantart\.com/art/[a-zA-Z0-9_-]*" modeArg = '_' if mode.find(':') != -1: mode = mode.split(':',1) modeArg = mode[1] mode = mode[0] #DEPTH 1 pages = [] for i in range(0,int(Dagr.MAX_DEVIATIONS/24),24): html = "" url = "" if mode == "favs": url = "http://" + self.deviant.lower() + ".deviantart.com/favourites/?catpath=/&offset=" + str(i) elif mode == "collection": url = "http://" + self.deviant.lower() + ".deviantart.com/favourites/" + modeArg + "?offset=" + str(i) elif mode == "scraps": url = "http://" + self.deviant.lower() + ".deviantart.com/gallery/?catpath=scraps&offset=" + str(i) elif mode == "gallery": url = "http://" + self.deviant.lower() + ".deviantart.com/gallery/?catpath=/&offset=" + str(i) elif mode == "album": url = "http://" + self.deviant.lower() + ".deviantart.com/gallery/" + modeArg + "?offset=" + str(i) elif mode == "query": url = "http://" + self.deviant.lower() + ".deviantart.com/gallery/?q=" + modeArg + "&offset=" + str(i) else: continue html = self.get(url) prelim = re.findall(pat, html, re.IGNORECASE|re.DOTALL) c = len(prelim) for match in prelim: if match in pages: c -= 1 else: pages.append(match) done = re.findall("(This section has no deviations yet!|This collection has no items yet!)", html, re.IGNORECASE|re.S) if len(done) >= 1 or c <= 0: break print(self.deviant + "'s " + mode + " page " + str(int((i/24)+1)) + " crawled...") if not self.reverse: pages.reverse() if len(pages) == 0: print(self.deviant + "'s " + mode + " had no deviations.") return 0 else: try: da_make_dirs(self.deviant + "/" + mode) if (mode == "query") or (mode == "album") or (mode == "collection"): da_make_dirs(self.deviant + "/" + mode + "/" + modeArg) except Exception as e: print(str(e)) print("Total deviations in " + self.deviant + "'s gallery found: " + str(len(pages))) ##DEPTH 2 counter2 = 0 for link in pages: counter2 += 1 if self.verbose: print("Downloading " + str(counter2) + " of " + str(len(pages)) + " ( " + link + " )") filename = "" filelink = "" try: filename,filelink = self.find_link(link) except (KeyboardInterrupt, SystemExit): raise except Exception as e: self.handle_download_error(link, e) continue if self.testOnly == False: if (mode == "query") or (mode=="album") or (mode == "collection"): self.get(filelink, self.deviant + "/" + mode + "/" + modeArg + "/" + filename) else: self.get(filelink, self.deviant + "/" + mode + "/" + filename) else: print(filelink) print(self.deviant + "'s gallery successfully ripped.") def group_get(self, mode): if mode == "favs": strmode = "favby" strmode2 = "favourites" strmode3 = "favs gallery" elif mode == "gallery": strmode = "gallery" strmode2 = "gallery" strmode3 = "gallery" else: print("?") sys.exit() print("Ripping " + self.deviant + "'s " + strmode2 + "...") folders = [] insideFolder = False #are we inside a gallery folder? html = self.get('http://' + self.deviant + '.deviantart.com/' + strmode2 + '/') if re.search(strmode2 + "/\?set=.+&offset=", html, re.IGNORECASE|re.S): insideFolder = True folders = re.findall(strmode + ":.+ label=\"[^\"]*\"", html, re.IGNORECASE) #no repeats folders = list(set(folders)) i = 0 while not insideFolder: html = self.get('http://' + self.deviant + '.deviantart.com/' + strmode2 + '/?offset=' + str(i)) k = re.findall(strmode + ":" + self.deviant + "/\d+\"\ +label=\"[^\"]*\"", html, re.IGNORECASE) if k == []: break flag = False for match in k: if match in folders: flag = True else: folders+=k if self.verbose: print("Gallery page " + str(int((i/10) + 1)) + " crawled...") if flag: break i += 10 #no repeats folders = list(set(folders)) if len(folders) == 0: print(self.deviant + "'s " + strmode3 + " is empty.") return 0 else: print("Total folders in " + self.deviant + "'s " + strmode3 + " found: " + str(len(folders))) if self.reverse: folders.reverse() pat = "http:\\/\\/[a-zA-Z0-9_-]*\.deviantart\.com\\/art\\/[a-zA-Z0-9_-]*" pages = [] for folder in folders: try: folderid = re.search("[0-9]+",folder,re.IGNORECASE).group(0) label = re.search("label=\"([^\"]*)",folder,re.IGNORECASE).group(1) except: continue for i in range(0,int(Dagr.MAX_DEVIATIONS/24),24): html = self.get("http://" + self.deviant.lower() + ".deviantart.com/" + strmode2 + "/?set=" + folderid + "&offset=" + str(i - 24)) prelim = re.findall(pat, html, re.IGNORECASE) if not prelim: break for x in prelim: p = str(re.sub(r'\\/','/',x)) if p not in pages: pages.append(p) if self.verbose: print("Page " + str(int((i/24) + 1)) + " in folder " + label + " crawled...") if not self.reverse: pages.reverse() try: if mode == "favs": da_make_dirs(self.deviant + "/favs/" + label) elif mode == "gallery": da_make_dirs(self.deviant + "/" + label) except Exception as err: print(err) counter = 0 for link in pages: counter += 1 if self.verbose: print("Downloading " + str(counter) + " of " + str(len(pages)) + " ( " + link + " )") filename = "" filelink = "" try: filename,filelink = self.find_link(link) except (KeyboardInterrupt, SystemExit): raise except Exception as e: self.handle_download_error(link, e) continue if self.testOnly == False: if mode == "favs": self.get(filelink, self.devianti + "/favs/" + label + "/" + filename) elif mode == "gallery": self.get(filelink, self.deviant + "/" + label + "/" + filename) else: print(filelink) print(self.deviant + "'s " + strmode3 + " successfully ripped.") def print_errors(self): if len(self.errors_count): print("Download errors count:") for error, count in self.errors_count.iteritems(): print("* " + error + " : " + str(count))