def pdbfixretrieve(joblink): browser=RoboBrowser(history=True) browser.open(joblink) stdout=browser.get_links('stdout') while not stdout: time.sleep(5) browser.open(joblink) stdout=browser.get_links('stdout') pdbout=browser.get_links('outpdb') browser.follow_link(stdout[0]) stdcontent=browser.response.content browser.follow_link(pdbout[0]) pdbcontent=browser.response.content pdb.set_trace() return stdcontent, pdbcontent
def _download_rib(dir, date): url = "http://archive.routeviews.org/route-views.wide/bgpdata/" dt_web = date.strftime("%Y") + "." + date.strftime("%m") + "/" print("Looking for RIB file...") br = RoboBrowser() br.open(url) link_date = br.get_link(dt_web) br.follow_link(link_date) link_rib = br.get_link("RIBS/") br.follow_link(link_rib) elem = "rib." + date.strftime("%Y") + date.strftime("%m") + date.strftime( "%d") _dt_web = date.strftime("%Y") + "." + date.strftime("%m") + "/" links = br.get_links(elem) one_link = links[0] file = (str(one_link).split('"'))[1] url_dw = "http://archive.routeviews.org/route-views.wide/bgpdata/" + _dt_web + "RIBS/" + file filename = dir + file r = requests.get(url_dw) with open(filename, "wb") as code: code.write(r.content) rib = _decompress_rib(filename) return rib
def pushedbutton(self, b): account = self.lineEdit.text() pasw = self.lineEdit_3.text() #use robobrowser module to manipulate web page browser = RoboBrowser(history=True) browser.open('http://web1.cmu.edu.tw/stdinfo/login.asp') form1 = browser.get_form(id='form1') form1['f_id'].value = account form1['f_pwd'].value = pasw browser.submit_form(form1) if browser.state.url == "http://web1.cmu.edu.tw/stdinfo/loginerr.asp": self.lineEdit_2.setText('帳號密碼錯了?') else: link_one = browser.get_link(text=re.compile('.意見調查')) browser.follow_link(link_one) list = [] for l in browser.get_links(text=re.compile('.填寫.')): list.append(l) list.pop(0) for li in list: browser.follow_link(li) form2 = browser.get_form(id='thisform') form2['Cos_Q1'].value = '1' browser.submit_form(form2) self.lineEdit_2.setText('Done!')
def pushedbutton(self,b): account = self.lineEdit.text() pasw = self.lineEdit_3.text() #use robobrowser module to manipulate web page browser = RoboBrowser(history = True) browser.open('http://web1.cmu.edu.tw/stdinfo/login.asp') form1 = browser.get_form(id = 'form1') form1['f_id'].value = account form1['f_pwd'].value = pasw browser.submit_form(form1) if browser.state.url == "http://web1.cmu.edu.tw/stdinfo/loginerr.asp": self.lineEdit_2.setText('帳號密碼錯了?') else: link_one = browser.get_link(text = re.compile('.意見調查')) browser.follow_link(link_one) list = [] for l in browser.get_links(text = re.compile('.填寫.')): list.append(l) list.pop(0) for li in list: browser.follow_link(li) form2 = browser.get_form(id = 'thisform') form2['Cos_Q1'].value = '1' browser.submit_form(form2) self.lineEdit_2.setText('Done!')
def pdbfixsubmit(pdbfile): "Submit structure to pdb_fix online tool" url='http://lorentz.dynstr.pasteur.fr/pdb/frozen_submission.php' browser=RoboBrowser(history=True) browser.open(url) upload_form=browser.get_form() upload_form['job_name'].value = pdbfile.split("/")[0]+'_atomfix' upload_form['inpdb'].value = open(pdbfile, 'r') browser.submit_form(upload_form) print 'submitted ', pdbfile.split("/")[0] joblink=browser.get_links('jobs') return joblink[0]['href']
class infs_brsr: """This browser will have functions useful to someone browsing the Infusionsoft front end programatically. """ def __init__(self, appname, username, password, *args, **kwargs): self.loggedin=False self.browser=RoboBrowser(history=True) self.appname=appname self.username=username self.password=password self.baseurl = 'https://' + self.appname + '.infusionsoft.com' def openbase(self): self.browser.open(self.baseurl) def login(self): self.openbase() loginform = self.browser.get_form() loginform.fields['username'].value = self.username loginform.fields['password'].value = self.password self.browser.submit_form(loginform) # This next step is probably a bad idea. It needs # some form of control self.browser.follow_link(self.browser.get_links()[1]) self.loggedin=True def getapikey(self): if not self.loggedin: self.login() self.browser.open(self.baseurl + 'app/miscSetting/itemWrapper?systemId=nav.admin&settingModuleName=Application&settingTabName=Application') pageSoup = BeautifulSoup(self.browser.response.content, 'html.parser') self.apikey=pageSoup.findAll(id='Application_Encrypted_Key:_data')[0].text return self.apikey def importContactCSV(self, pathToCSV='/home/jlmarks/importme.csv'): if not self.loggedin: self.login() importURL = "https://" + self.appname + ".infusionsoft.com/Import/jumpToWizard.jsp?update=false&profileClass=com.infusion.crm.db.importer.profiles.ContactProfile" self.browser.open(importURL) frms = self.browser.get_forms() for eachform in frms: if 'id' in eachform.fields.keys(): self.thisimportid=eachform['id'].value correctform = eachform correctform.fields.pop('Back') correctform.fields['importFile'].value=open(pathToCSV, 'rb') self.browser.submit_form(correctform)
def gather(self): browser = RoboBrowser() page = 0 browser.open(self.url) while (page < self.max_pages): links = browser.get_links() if (page == 0): for link in links: self.list.append(link) else: for link in links: if (self.isInTheList(link)): self.list.append(link) browser.follow_link(self.list[page]) page += 1
def gather(self): browser = RoboBrowser() page = 0 browser.open(self.url) while (page < self.max_pages): links = browser.get_links() if(page == 0): for link in links: self.list.append(link) else: for link in links: if(self.isInTheList(link)): self.list.append(link) browser.follow_link(self.list[page]) page+=1
def _login(): username = input('User ID: ') password = getpass('Password: '******'html.parser') browser.open('http://online.lloydsbank.co.uk/personal/logon.login.jsp') form = browser.get_form('frmLogin') form['frmLogin:strCustomerLogin_userID'] = username form['frmLogin:strCustomerLogin_pwd'] = password browser.submit_form(form) mem_info = getpass('Memorable information: ').lower() form_name = 'frmentermemorableinformation1' option_name = ':strEnterMemorableInformation_memInfo{}' form = browser.get_form(form_name) indices = re.findall('Character (\d+) :', form.parsed.text) indices = [int(x) for x in indices] for i, idx in enumerate(indices): form[form_name + option_name.format(i + 1)] = ' ' + mem_info[idx - 1] browser.submit_form(form) assert 'Lloyds Bank - Personal Account Overview' in browser.parsed.title accounts = {} for link in browser.get_links(): if 'lnkAccName' in link.attrs.get('id', ''): accounts[link.text] = link print('Accounts:', list(accounts)) account = input('Account: ') browser.follow_link(accounts[account]) export_link = browser.get_link(title='Export') browser.follow_link(export_link) return browser
def download_internal(user_id, from_date, to_date): """Download the csv files for the transaction between the given dates""" # Create the browser and open the lloyds login page browser = RoboBrowser(parser='html5lib') browser.open( 'https://online.lloydsbank.co.uk/personal/logon/login.jsp?WT.ac=hpIBlogon' ) while 'Enter Memorable Information' not in browser.parsed.title.text: print(browser.parsed.title.text) form = browser.get_form(id='frmLogin') form['frmLogin:strCustomerLogin_userID'] = str(user_id) form['frmLogin:strCustomerLogin_pwd'] = prompt('Enter password: '******'re logged in, now enter memorable information print(browser.parsed.title.text) form = browser.get_form(id='frmentermemorableinformation1') field = 'frmentermemorableinformation1:strEnterMemorableInformation_memInfo{0}' for i in range(1, 4): label = browser.find("label", {"for": field.format(i)}) form[field.format(i)] = ' ' + prompt(label.text.strip()) browser.submit_form(form) # hopefully now we're logged in... print(browser.parsed.title.text) links = [] for link in browser.get_links("View statement"): if link.text == "View statement": links.append(link) # loop through all accounts for index, link in enumerate(links): acc_name = link['data-wt-ac'].split(" resource")[0] print(acc_name) print(browser.parsed.title) browser.follow_link(link) yield acc_name, download_account_internal(browser, from_date, to_date) browser.back()
def main(competition, username, password): browser = RoboBrowser(history=True, parser="html.parser") base = 'https://www.kaggle.com' browser.open('/'.join([base, 'account/login'])) login_form = browser.get_form(action='/account/login') login_form['UserName'] = username login_form['Password'] = password browser.submit_form(login_form) browser.open('/'.join([base, 'c', competition, 'data'])) files = [] for a_href in browser.get_links(): if '.zip' in a_href.text: files.append(a_href) print('...downloading {0} files...'.format(len(files))) for f in files: request = browser.session.get(base + f.attrs['href'], stream=True) with open(f.attrs['name'], "wb") as zip_file: zip_file.write(request.content)
def download(competition, username, password): browser = RoboBrowser(history=True, parser="html.parser") base = 'https://www.kaggle.com' browser.open('/'.join([base, 'account/login'])) login_form = browser.get_form(action='/account/login') login_form['UserName'] = username login_form['Password'] = password browser.submit_form(login_form) browser.open('/'.join([base, 'c', competition, 'data'])) files = [] for a_href in browser.get_links(): if '.zip' in a_href.text: files.append(a_href) print('...downloading {0} files...'.format(len(files))) for f in files: request = browser.session.get(base + f.attrs['href'], stream=True) with open(f.attrs['name'], "wb") as zip_file: zip_file.write(request.content)
def pushedbutton(self, b): account = self.lineEdit.text() pasw = self.lineEdit_3.text() #use robobrowser module to manipulate web page browser = RoboBrowser(history=True) browser.open('http://web1.cmu.edu.tw/stdinfo/login.asp') form1 = browser.get_form(id='form1') form1['f_id'].value = account form1['f_pwd'].value = pasw browser.submit_form(form1) if browser.state.url == "http://web1.cmu.edu.tw/stdinfo/loginerr.asp": self.lineEdit_2.setText('帳號密碼錯了?') else: self.lineEdit_2.setText('成功登入,填寫中....') link_one = browser.get_link(text='教師教學意見調查') browser.follow_link(link_one) list = [] for l in browser.get_links(text='填寫'): list.append(l) list.pop(0) for li in list: browser.follow_link(li) form2 = browser.get_form(id='thisform') form2['CH_1'].value = '3' form2['CH_2'].value = '3' form2['CH_3'].value = '3' form2['CH_4'].value = '3' form2['CH_5'].value = '3' form2['CH_6'].value = '3' form2['CH_7'].value = '3' form2['CH_8'].value = '3' form2['CH_9'].value = '3' form2['CH_10'].value = '3' browser.submit_form(form2) self.lineEdit_2.setText('Done!')
def pushedbutton(self,b): account = self.lineEdit.text() pasw = self.lineEdit_3.text() #use robobrowser module to manipulate web page browser = RoboBrowser(history = True) browser.open('http://web1.cmu.edu.tw/stdinfo/login.asp') form1 = browser.get_form(id = 'form1') form1['f_id'].value = account form1['f_pwd'].value = pasw browser.submit_form(form1) if browser.state.url == "http://web1.cmu.edu.tw/stdinfo/loginerr.asp": self.lineEdit_2.setText('帳號密碼錯了?') else: self.lineEdit_2.setText('成功登入,填寫中....') link_one = browser.get_link(text = '教師教學意見調查') browser.follow_link(link_one) list = [] for l in browser.get_links(text = '填寫'): list.append(l) list.pop(0) for li in list: browser.follow_link(li) form2 = browser.get_form(id = 'thisform') form2['CH_1'].value = '3' form2['CH_2'].value = '3' form2['CH_3'].value = '3' form2['CH_4'].value = '3' form2['CH_5'].value = '3' form2['CH_6'].value = '3' form2['CH_7'].value = '3' form2['CH_8'].value = '3' form2['CH_9'].value = '3' form2['CH_10'].value = '3' browser.submit_form(form2) self.lineEdit_2.setText('Done!')
if __name__ == "__main__": sessions_page = sys.argv[1] if len(sys.argv) > 1: download_folder = sys.argv[2] if not os.path.exists(download_folder): os.mkdir(download_folder) else: download_folder = None browser = RoboBrowser() browser.open(sessions_page) lecture_links = browser.get_links(href=re.compile("lecture-")) for link in lecture_links: lecture_url = link["href"] # ensure absolute url: lecture_url = urljoin(sessions_page, lecture_url) mp3_url = get_mp3_url(lecture_url) if not mp3_url: print("No .mp3 at {}".format(lecture_url)) continue try: filename = download_file(mp3_url, output_dir=download_folder) print("Downloaded {}".format(filename))
def add_valid_links(self, url): browser = RoboBrowser(parser=PARSER, history=True) browser.open(url) links = browser.get_links() for link in links: self.add_valid_link(browser, link)
def _download_traces(dates, period): url = "http://archive.routeviews.org/route-views.wide/bgpdata/" MRAI_BIN_DIR = "/srv/agarcia/TFM/" #path to save files if (period == 0): dir = MRAI_BIN_DIR + "ONE_DAY_BGP_TRACES_BZ2/" elif (period == 1): dir = MRAI_BIN_DIR + "THREE_DAY_BGP_TRACES_BZ2/" else: dir = MRAI_BIN_DIR + "BGP_TRACES_BZ2/" if not os.path.exists(dir): os.makedirs(dir) for _dt in dates: dt = datetime.datetime.strptime(_dt, "%Y-%m-%d") dt_web = dt.strftime("%Y") + "." + dt.strftime("%m") + "/" print("Opening browser...") br = RoboBrowser() br.open(url) #Buscamos la fecha que queremos y hacemos click link_date = br.get_link(dt_web) br.follow_link(link_date) #Buscamos el link UPDATES y hacemos click link_update = br.get_link("UPDATES/") br.follow_link(link_update) #Obtenemos los 2 DIAS antes y despues de la fecha deseada #(5 dias en total) days = [] days.append(dt) if (period != 0): for day_p in range(1, period + 1): d_before = dt - datetime.timedelta(days=day_p) d_after = dt + datetime.timedelta(days=day_p) days.insert(0, d_before) days.append(d_after) print(days) #Para cada dia descargamos todos los BGP update traces for day in days: print("Downloading files of day " + day.strftime("%Y-%m-%d") + "\n") elem = "updates." + day.strftime("%Y") + day.strftime( "%m") + day.strftime("%d") _dt_web = day.strftime("%Y") + "." + day.strftime("%m") + "/" br.back() br.back() br.follow_link(br.get_link(dt_web)) br.follow_link(br.get_link("UPDATES/")) links = br.get_links(elem) for link in links: file = (str(link).split('"'))[1] url_dw = "http://archive.routeviews.org/route-views.wide/bgpdata/" + _dt_web + "UPDATES/" + file filename = dir + file r = requests.get(url_dw) with open(filename, "wb") as code: code.write(r.content)
r.url r.status_code r.headers["location"] requests.request? import robobrowser from robobrowser import RoboBrowser b = RoboBrowser(parser="lxml.html") b b.open("http://www.chandrashekar.info") b.url b.contents b.response b.response.status_code b.links dir(b) b.get_links() b = RoboBrowser(parser="lxml") b.open("http://www.chandrashekar.info") b.get_links() b.get_links() b.forms dir(b) b.get_links() b.get_links()[-3] l = b.get_links()[-3] b.follow_link(l) b.url b.back() b.url b.forward() b.url
# -*- coding: utf-8 -*- import re import sys from robobrowser import RoboBrowser from getpass import getpass account = input('account:') password = getpass('password:'******'http://web1.cmu.edu.tw/stdinfo/login.asp') form1 = browser.get_form(id='form1') form1['f_id'].value = account form1['f_pwd'].value = password browser.submit_form(form1) link_one = browser.get_link(text='期中網路教學意見調查') browser.follow_link(link_one) list = [] for l in browser.get_links(text=re.compile('填寫')): list.append(l) list.pop(0) for li in list: browser.follow_link(li) form2 = browser.get_form(id='thisform') form2['Cos_Q1'].value = '1' browser.submit_form(form2)
class MarketPoster: def __init__(self): self.login_url = 'http://forums.zybez.net/index.php?' \ 'app=curseauth&module=global§ion=login' self.browser = RoboBrowser(history=False, parser='html.parser') self.logged_in = False def login(self, login_name, login_password): login_name = login_name login_password = login_password self.browser.open(self.login_url) sign_in_form = self.browser.get_form(class_='authentication-box') sign_in_form['ips_username'].value = login_name sign_in_form['ips_password'].value = login_password self.browser.submit_form(sign_in_form) correct_url = 'http://forums.zybez.net/index.php' if self.browser.url == correct_url: self.logged_in = True return True else: return False def deleteItemPosts(self, post): item_url = self.getItemURL(post.item_name) self.browser.open(item_url) items_to_delete = self.browser.get_links(href=re.compile( "do=trade-delete")) for i in items_to_delete: self.browser.follow_link(i) self.browser.open(item_url) def deleteAllPosts(self): self.browser.open('http://forums.zybez.net/runescape-2007-prices') delete_button = self.browser.get_link('Remove all active offers') if delete_button is not None: self.browser.follow_link(delete_button) def getItemURL(self, item_name): item_name = item_name.split() item_name = '+'.join(item_name) item_data_url = 'http://forums.zybez.net' \ '/runescape-2007-prices/api/item/' + item_name item_data_dict = self.browser.session.get(item_data_url).json() item_id = item_data_dict['id'] item_url = 'http://forums.zybez.net/runescape-2007-prices/' \ + str(item_id) + '-' + item_name return item_url def postItem(self, post): price = post.price quantity = post.quantity note = post.note offer_type = post.offer_type contact_method = post.contact_method self.deleteItemPosts(post) post_item_form = self.browser.find( action='http://forums.zybez.net/index.php?app=priceguide&module=' 'public§ion=action&do=trade-add') post_item_form = self.browser.get_form(post_item_form) # Fill out and submit form post_item_form['type'].value = str(int(offer_type)) post_item_form['qty'].value = quantity post_item_form['price'].value = price post_item_form['notes'].value = note post_item_form['contact'].value = str(int(contact_method)) self.browser.submit_form(post_item_form)
import re from robobrowser import RoboBrowser browser = RoboBrowser() browser.open("https://duckduckgo.com") # Must find the proper id in the html form = browser.get_form(id="search_form_homepage") form form["q"].value = "python" browser.submit_form(form) links = browser.get_links() for link in links: print(link)
# -*- coding: utf-8 -*- import re import sys from robobrowser import RoboBrowser from getpass import getpass account = input('account:') password = getpass('password:'******'http://web1.cmu.edu.tw/stdinfo/login.asp') form1 = browser.get_form(id = 'form1') form1['f_id'].value = account form1['f_pwd'].value = password browser.submit_form(form1) link_one = browser.get_link(text = '期中網路教學意見調查') browser.follow_link(link_one) list = [] for l in browser.get_links(text = re.compile('填寫')): list.append(l) list.pop(0) for li in list: browser.follow_link(li) form2 = browser.get_form(id = 'thisform') form2['Cos_Q1'].value = '1' browser.submit_form(form2)
class KadenzeClient(object): def __init__(self): self.conf = Settings() self.base_url = "https://www.kadenze.com" self.session = Session() self.browser = RoboBrowser(history=True, session=self.session, parser="lxml", allow_redirects=False) def execute_login(self): print("Signing in www.kadenze.com ...") self.browser.open(self.base_url) signup_form = self.browser.get_form(id="login_user") signup_form['user[email]'].value = self.conf.login signup_form['user[password]'].value = self.conf.password self.browser.session.headers["Referer"] = self.base_url self.browser.submit_form(signup_form) def list_courses(self): self.browser.open(self.base_url) response = self.browser.parsed()[0].text courses = helpers.get_courses_from_json(response) return courses def list_sessions(self, course): sessions_url = "/".join((self.base_url, "courses", course, "sessions")) self.browser.open(sessions_url) links = self.browser.get_links() sessions = helpers.get_sessions_from_links(course, links) return sessions def list_videos(self, url): self.browser.open(self.base_url + url) response = self.browser.select("#video_json")[0]["value"] videos = helpers.get_videos_from_json(response, self.conf.video_format) return videos def download_videos_per_session(self, course, session, session_videos): print("Parsing session: {0}".format(session)) for i, video_url in enumerate(session_videos): filename = helpers.extract_filename(video_url) if i == 0: session_prefix = helpers.extract_session_prefix(filename) session_prefixed = session_prefix + "-" + session full_path = self.conf.path + "/" + course + "/" + session_prefixed os.makedirs(full_path, exist_ok=True) helpers.write_video(video_url, full_path, filename) def download_course_videos(self, course): sessions = self.list_sessions(course) videos = [self.list_videos(url) for url in sessions] videos_per_sessions = zip(sessions, videos) for session_data, session_videos in videos_per_sessions: session_data = session_data.replace("courses/", "").replace("sessions/", "") course, session = session_data.split("/")[-2], session_data.split("/")[-1] self.download_videos_per_session(course, session, session_videos) def download_all_courses_videos(self): self.execute_login() enrolled_courses = [helpers.format_course(course) for course in self.list_courses()] courses = set(self.conf.courses).intersection(enrolled_courses) for course in courses: print("Parsing course: {0}".format(course)) self.download_course_videos(course)
forecast_url weather_url weather_url.format(CITY="Bengaluru", APIKEY=api_key) q = weather_url.format(CITY="Bengaluru", APIKEY=api_key) r = requests.get(q) r r.ok r.json() from robobrowser import RoboBrowser br = RoboBrowser(parser="lxml") br br.open("http://pypi.org/") br.response br.response.ok br.url br.get_links() br.get_link("Register) br.get_link("Register") br.get_link(text="Register") br.get_link() get_ipython().run_line_magic('pinfo', 'br.get_link') br.get_link(text="Register") br.get_link(text_re="Register") br.get_forms() br.get_form() f = br.get_form() f["q"] f["q"] = "xml" br.submit_form(f) br.url br.response.ok
import re from robobrowser import RoboBrowser browser = RoboBrowser() browser.open("https://duckduckgo.com") # Must find the proper id in the html form = browser.get_form(id = "search_form_homepage") form form["q"].value = "python" browser.submit_form(form) links = browser.get_links() for link in links: print(link)
from robobrowser import RoboBrowser import os GOOGLE_USER = os.getenv('INGRESS_GOOGLE_USER') GOOGLE_PASS = os.getenv('INGRESS_GOOGLE_PASS') s = requests.Session() s.cookies = cookielib.LWPCookieJar() # cookiejar = cookielib.LWPCookieJar() # browser = mechanize.Browser() browser = RoboBrowser(user_agent='TestBot', history=True, session=s) # browser.set_cookiejar(cookiejar) browser.open('http://ingress.com/intel') for link in browser.get_links(url_regex='ServiceLogin'): browser.follow_link(link) browser.select_form(nr=0) browser.form['Email'] = GOOGLE_USER browser.form['Passwd'] = GOOGLE_PASS browser.submit() # req = mechanize.Request('http://www.ingress.com/rpc/dashboard.getGameScore', '{"method": "dashboard.getGameScore"}') s2 = requests.Session() s2.headers['method'] = 'dashboard.getGameScore' for cookie in s.cookies: if cookie.name == 'csrftoken': # req.add_header('X-CSRFToken', cookie.value) s2.headers['X-CSRFToken'] = cookie.value s.cookies.add_cookie_header(s2) browser = RoboBrowser(session=s2)
class ISServer: def __init__(self): global pw self.pw = pw self.startingpath = os.path.abspath(os.curdir) self.infusionsoftapp=self.getappname() self.baseurl = 'https://' + self.infusionsoftapp + '.infusionsoft.com/' self.infusionsoftAPIKey=self.getapikey() self.appurl = "https://" + self.infusionsoftapp + ".infusionsoft.com:443/api/xmlrpc" self.connection = xmlrpclib.ServerProxy(self.appurl) def getappname(self): return raw_input("Please enter appname:").strip('\n \t') def getapikey(self): global pw username = self.pw['username'] password = self.pw['password'] #Basically: # #Add username and password to your global variables. self.browser = RoboBrowser(history=True) self.browser.open(self.baseurl) logform = self.browser.get_form() logform.fields['username'].value = username logform.fields['password'].value = password self.browser.submit_form(logform) self.browser.follow_link(self.browser.get_links()[1]) self.browser.open(self.baseurl + 'app/miscSetting/itemWrapper?systemId=nav.admin&settingModuleName=Application&settingTabName=Application') pageSoup = BeautifulSoup(self.browser.response.content, 'html.parser') return pageSoup.findAll(id='Application_Encrypted_Key:_data')[0].text ######################################################## ## Methods to get records from various tables ## ## def getMatchingRecords(self, tableName, criteria, desiredFields=None, orderedBy=None): """Search at table by criteria """ return self.getAllRecords(tableName, searchCriteria=criteria, interestingData=desiredFields, orderedBy=orderedBy) def getTagCats(self): return self.getAllRecords("ContactGroupCategory") def getAllTags(self): return self.getAllRecords("ContactGroup") def getAllProductCats(self): return self.getAllRecords("ProductCategory") def getAllProducts(self): return self.getAllRecords("Product") def getAllRecords(self, tableName, interestingData=None, searchCriteria=None, orderedBy=None): if interestingData is None: interestingData = tables[tableName] if searchCriteria is None: searchCriteria={} if orderedBy is None: orderedBy = interestingData[0] records = [] p=0 while True: listOfDicts = self.connection.DataService.query(self.infusionsoftAPIKey, tableName, 1000, p, searchCriteria, interestingData, orderedBy, True) for each in listOfDicts: thisRecord={} for eachbit in interestingData: # this should be records.append(zip(interestingData, each)) perhaps if not each.has_key(eachbit): # TODO: research THIS each[eachbit]=None thisRecord[eachbit] = each[eachbit] records.append(thisRecord) if not(len(listOfDicts)==1000): break p+=1 return records def incrementlyGetRecords(self, tableName, interestingData=None, searchCriteria=None, orderedBy=None): if interestingData is None: interestingData = tables[tableName] if searchCriteria is None: searchCriteria={} if orderedBy is None: orderedBy = interestingData[0] records = [] p=0 while True: print tableName, p print "trying!" try: listOfDicts = self.connection.DataService.query(self.infusionsoftAPIKey, tableName, 1000, p, searchCriteria, interestingData, orderedBy, True) except Exception, e: print e ,p for each in listOfDicts: thisRecord={} for eachbit in interestingData: # this should be records.append(zip(interestingData, each)) perhaps if not each.has_key(eachbit): # TODO: research THIS each[eachbit]=None thisRecord[eachbit] = each[eachbit] records.append(thisRecord) if not(len(listOfDicts)==1000): break p+=1 if p%10==0: fname = tableName + "%010d" %(p) + ".csv" print 'writing', p, fname with open(fname, 'wb') as outfile: thisWriter = csv.DictWriter(outfile, records[0]) thisWriter.writeheader() thisWriter.writerows(records) records=[] fname = tableName + "%010d" %(p) + ".csv" print 'writing', p, fname with open(fname, 'wb') as outfile: thisWriter = csv.DictWriter(outfile, records[0]) thisWriter.writeheader() thisWriter.writerows(records)
class tat: global pw def __init__(self, appname=None): self.startingpath = os.path.abspath(os.curdir) if not appname: self.appname = self.getappname() else: self.appname = appname self.apppath = os.path.join(self.startingpath, self.appname) if not os.path.exists(self.apppath): os.mkdir(self.apppath) os.chdir(self.apppath) self.mapping = {} self.mapping["Contact"] = -1 self.mapping["Affiliate"] = -3 self.mapping["ContactAction"] = -5 self.mapping["Company"] = -6 self.mapping["OrderItem"] = -9 self.menu() def menu(self, context="initial"): if context is "initial": self.baseurl = "https://" + self.appname + ".infusionsoft.com/" self.apikey = self.getapikey() self.svr = ISServer.ISServer(self.appname, self.apikey) if not os.path.exists(self.apppath): os.mkdir(self.apppath) os.chdir(self.apppath) if not os.path.exists("files"): os.mkdir("files") os.chdir("files") self.usermenu = {} self.usermenu["downloadAPITables"] = "apit" self.usermenu["play"] = "play" self.usermenu["reports"] = "rpts" # for eachitem in self.usermenu.keys(): # print eachitem + ":\t" + self.usermenu[eachitem] # thisChoice = raw_input('please make a choice: ').strip(' \n\t') thisChoice = "play" if thisChoice == "apit": self.handleAPItables() elif thisChoice == "play": self.play() elif thisChoice == "rpts": self.downloadAllReports() else: self.inchandlefiles() def handlefiles(self): os.chdir(self.startingpath) if not os.path.exists("files"): os.mkdir("files") os.chdir("files") allfiles = self.svr.getAllRecords("FileBox") for eachfile in allfiles: downloadurl = self.baseurl + "Download?Id=" + str(eachfile["Id"]) self.browser.open(downloadurl) fileoutpath = os.path.join(self.startingpath, "files", eachfile["ContactId"], eachfile["FileName"]) if not os.path.exists(os.path.dirname(fileoutpath)): os.makedirs(fileoutpath) fout = open(fileoutpath, "wb") fout.write(self.browser.response.content) fout.close() def inchandleAPItables(self): apidata = {} self.customfields = self.svr.getAllRecords("DataFormField") for eachtable in ISServer.tables.keys(): if eachtable not in [ "LeadSourceExpense", "DataFormTab", "GroupAssign", "AffResource", "InvoiceItem", "UserGroup", "CProgram", "ActionSequence", "Template", "LeadSource", "Status", "Campaignee", "DataFormField", "OrderItem", "DataFormGroup", "ProductOptValue", "ContactGroup", "Company", "TicketStage", "ProductCategoryAssign", "ContactGroupAssign", ]: print "starting " + eachtable if eachtable not in self.mapping.keys(): self.mapping[eachtable] = 99 fields = ISServer.tables[eachtable] + [ "_" + fld["Name"] for fld in self.customfields if fld["FormId"] is self.mapping[eachtable] ] self.svr.incrementlyGetRecords(eachtable, interestingData=fields) print "done writing " + eachtable else: print "already completed " + eachtable self.apidata = apidata def inchandleAPItable(self, tablename): self.customfields = self.svr.getAllRecords("DataFormField") if tablename not in self.mapping.keys(): self.mapping[tablename] = 99 fields = ISServer.tables[tablename] + [ "_" + fld["Name"] for fld in self.customfields if fld["FormId"] is self.mapping[tablename] ] self.svr.incrementlyGetRecords(tablename, interestingData=fields) print "done writing " + tablename def inchandlefiles(self): os.chdir(self.startingpath) self.svr.incgetfiles(self.browser) def downloadContact0files(self, numberofmostrecentfilestodownload): thesefiles = self.svr.getAllRecords("FileBox", searchCriteria={"ContactId": 0}) for eachfile in thesefiles[-int(numberofmostrecentfilestodownload) :]: print "doing " + str(eachfile) self.svr.getfile(self.browser, eachfile) def play(self): print "she's all yours captain!" def downloadAReport(self, reportname): self.browser.open(self.baseurl + "Reports/exportResults.jsp?reportClass=" + reportname) reportForm = [eachform for eachform in self.browser.get_forms() if eachform.action == "qbExport.jsp"] if len(reportForm) > 0: self.browser.submit_form(reportForm[0], submit=reportForm[0].submit_fields["process"]) with open(reportname + ".csv", "wb") as outfile: outfile.write(self.browser.response.content) else: print "no " + reportname def downloadAllReports(self): for reportname in [ "AffiliateActivitySummary", "AffiliateLedger", "AffiliateRedirectActivity", "AffiliateReferral", "AffPayout", "AllOrders", "AllSales", "AllSalesItemized", "ARAgingReport", "CampaigneeBasic", "CampaigneeByDay", "CampaignProductConversion", "ClickThroughPercentage", "ClickThroughPercentageByEmail", "ContactDistributed", "CProgramRevenueSummary", "CreditCard", "CreditsIssued", "CustomerLifetimeValue", "DailyPayments", "DailyReceivables", "DailySalesTotals", "DashboardCampaign", "DashboardEmail", "DashboardLeads", "DashboardOrders", "DashboardUsers", "DigitalProductKey", "EmailBatchSearch", "EmailBroadcastConversionReport", "EmailConversion", "EmailSentSearch", "FailedCharge", "FaxBatchSearch", "FollowUpSequenceConversionReport", "FunnelFlowRecipient", "FunnelFlowRecipientWaiting", "FunnelGoalAchieved", "FunnelQueuedFlowItem", "FunnelUniqueContacts", "GroupAdds", "HabeasDetail", "InvoiceNetIncome", "LeadSourceConversion", "LeadSourceIncome", "LeadSourceROI", "LeadSourceROIByCategory", "MonthlyPayments", "MonthlyReceivables", "MonthlySalesTotals", "MonthlySalesTotalsByProduct", "OptOutSearch", "PaymentsReport", "PieceResponse", "ProductNetIncome", "Receivables", "RevenueForecastReport", "TaskSearch", "VoiceBatchSearch", "VoiceOptOutSearch", "WebformActivitySummary", "WebFormTracking", ]: self.downloadAReport(reportname) def getFilePath(self): return tkFileDialog.askopenfilename() def getFolderPath(self): return tkFileDialog.askdirectory() def getappname(self): return raw_input("Please enter appname:").strip("\n \t") def getapikey(self): global pw username = pw["username"] password = pw["password"] # Basically: # #Add username and password to your global variables. self.browser = RoboBrowser(history=True) self.browser.open(self.baseurl) logform = self.browser.get_form() logform.fields["username"].value = username logform.fields["password"].value = password self.browser.submit_form(logform) self.browser.follow_link(self.browser.get_links()[1]) self.browser.open( self.baseurl + "app/miscSetting/itemWrapper?systemId=nav.admin&settingModuleName=Application&settingTabName=Application" ) pageSoup = BeautifulSoup(self.browser.response.content, "html.parser") return pageSoup.findAll(id="Application_Encrypted_Key:_data")[0].text def handleAPItables(self): apidata = {} self.customfields = self.svr.getAllRecords("DataFormField") for eachtable in ISServer.tables.keys(): print "starting " + eachtable if eachtable not in self.mapping.keys(): self.mapping[eachtable] = 99 fields = ISServer.tables[eachtable] + [ "_" + fld["Name"] for fld in self.customfields if fld["FormId"] is self.mapping[eachtable] ] apidata[eachtable] = self.svr.getAllRecords(eachtable, interestingData=fields) with open(eachtable + ".csv", "wb") as outfile: writer = csv.DictWriter(outfile, fields) writer.writeheader() writer.writerows(apidata[eachtable]) print "done writing " + eachtable self.apidata = apidata def handlewebforms(self): # for eachid # webformsubmissionpath="https://" + self.appname + ".infusionsoft.com/app/webformSubmission/contactTabDetails?customFormWebResultId=" + str(x) pass def creditCardsToCSV(self): ccs = self.svr.getAllRecords( "CreditCard", interestingData=[ "Id", "ContactId", "CardType", "Last4", "ExpirationMonth", "ExpirationYear", "Email", "StartDateMonth", "StartDateYear", "Status", ], ) os.chdir(self.startingpath) if not os.path.exists("pyDatas"): os.mkdir("pyDatas") os.chdir("pyDatas") with open("ccs.csv", "wb") as outfile: thiswriter = csv.DictWriter(outfile, ccs[0].keys()) thiswriter.writeheader() thiswriter.writerows(ccs) print "File written to " + str(os.path.abspath(os.curdir)) os.chdir(self.startingpath) def contactsToCSV(self): os.chdir(self.startingpath) self.customfields = self.svr.getAllRecords("DataFormField") fields = ISServer.tables["Contact"] + ["_" + fld["Name"] for fld in self.customfields if fld["FormId"] == -1] cons = self.svr.getAllRecords("Contact", interestingData=fields) if not os.path.exists("pyDatas"): os.mkdir("pyDatas") os.chdir("pyDatas") with open("contacts.csv", "wb") as outfile: thiswriter = csv.DictWriter(outfile, cons[0].keys()) thiswriter.writeheader() thiswriter.writerows(cons)
# pip3 install robobrowser from robobrowser import RoboBrowser browser = RoboBrowser() browser.open('https://cdn.hackerrank.com/hackerrank/static/contests/capture-the-flag/infinite/qds.html') history = ['https://cdn.hackerrank.com/hackerrank/static/contests/capture-the-flag/infinite/qds.html'] font = browser.find_all('font') links = browser.get_links() while len(font) < 50000 and len(links) > 0: for link in links: if link not in history: browser.follow_link(link) font.extend(browser.find_all()) links.extend(browser.get_links()) history.append(link) links.remove(link) print(font)
class fullexporter(): global pw def __init__(self): self.startingpath = os.path.abspath(os.curdir) self.appname=self.getappname() self.mapping={} self.mapping['Contact']=-1 self.mapping['Affiliate']=-3 self.mapping['ContactAction']=-5 self.mapping['Company']=-6 self.mapping['OrderItem']=-9 self.menu() def menu(self, context="initial"): if context is "initial": self.baseurl = 'https://' + self.appname + '.infusionsoft.com/' self.apikey=self.getapikey() self.svr = ISServer.ISServer(self.appname, self.apikey) self.apppath = os.path.join(self.startingpath, self.appname) if not os.path.exists(self.apppath): os.mkdir(self.apppath) os.chdir(self.apppath) if not os.path.exists('files'): os.mkdir('files') os.chdir('files') self.usermenu={} self.usermenu['downloadAPITables'] = 'apit' self.usermenu['play'] = 'play' self.usermenu['reports'] = 'rpts' for eachitem in self.usermenu.keys(): print eachitem + ":\t" + self.usermenu[eachitem] thisChoice = raw_input('please make a choice: ').strip(' \n\t') if thisChoice == 'apit': self.handleAPItables() elif thisChoice == 'play': self.play() elif thisChoice == 'rpts': self.downloadAllReports() else: self.menu() def handlefiles(self): os.chdir(self.startingpath) if not os.path.exists('files'): os.mkdir('files') os.chdir('files') allfiles = self.svr.getAllRecords('FileBox') for eachfile in allfiles: downloadurl = self.baseurl+"Download?Id="+str(eachfile['Id']) self.browser.open(downloadurl) fileoutpath = os.path.join(self.startingpath, 'files', eachfile['ContactId'], eachfile['FileName']) if not os.path.exists(os.path.dirname(fileoutpath)): os.makedirs(fileoutpath) fout = open(fileoutpath, 'wb') fout.write(self.browser.response.content) fout.close() def play(self): print "she's all yours captain!" def downloadAReport(self, reportname): self.browser.open(self.baseurl + "Reports/exportResults.jsp?reportClass=" + reportname) reportForm = [eachform for eachform in self.browser.get_forms() if eachform.action == 'qbExport.jsp'] if len(reportForm) > 0: self.browser.submit_form(reportForm[0], submit=reportForm[0].submit_fields['process']) with open(reportname+".csv", 'wb') as outfile: outfile.write(self.browser.response.content) else: print "no " + reportname def downloadAllReports(self): for reportname in [ "AffiliateActivitySummary", "AffiliateLedger", "AffiliateRedirectActivity", "AffiliateReferral", "AffPayout", "AllOrders", "AllSales", "AllSalesItemized", "ARAgingReport", "CampaigneeBasic", "CampaigneeByDay", "CampaignProductConversion", "ClickThroughPercentage", "ClickThroughPercentageByEmail", "ContactDistributed", "CProgramRevenueSummary", "CreditCard", "CreditsIssued", "CustomerLifetimeValue", "DailyPayments", "DailyReceivables", "DailySalesTotals", "DashboardCampaign", "DashboardEmail", "DashboardLeads", "DashboardOrders", "DashboardUsers", "DigitalProductKey", "EmailBatchSearch", "EmailBroadcastConversionReport", "EmailConversion", "EmailSentSearch", "FailedCharge", "FaxBatchSearch", "FollowUpSequenceConversionReport", "FunnelFlowRecipient", "FunnelFlowRecipientWaiting", "FunnelGoalAchieved", "FunnelQueuedFlowItem", "FunnelUniqueContacts", "GroupAdds", "HabeasDetail", "InvoiceNetIncome", "LeadSourceConversion", "LeadSourceIncome", "LeadSourceROI", "LeadSourceROIByCategory", "MonthlyPayments", "MonthlyReceivables", "MonthlySalesTotals", "MonthlySalesTotalsByProduct", "OptOutSearch", "PaymentsReport", "PieceResponse", "ProductNetIncome", "Receivables", "RevenueForecastReport", "TaskSearch", "VoiceBatchSearch", "VoiceOptOutSearch", "WebformActivitySummary", "WebFormTracking" ]: self.downloadAReport(reportname) def getFilePath(self): return tkFileDialog.askopenfilename() def getFolderPath(self): return tkFileDialog.askdirectory() def getappname(self): return raw_input("Please enter appname:").strip('\n \t') def getapikey(self): global pw username = pw['username'] password = pw['password'] #Basically: # #Add username and password to your global variables. self.browser = RoboBrowser(history=True) self.browser.open(self.baseurl) logform = self.browser.get_form() logform.fields['username'].value = username logform.fields['password'].value = password self.browser.submit_form(logform) self.browser.follow_link(self.browser.get_links()[1]) self.browser.open(self.baseurl + 'app/miscSetting/itemWrapper?systemId=nav.admin&settingModuleName=Application&settingTabName=Application') pageSoup = BeautifulSoup(self.browser.response.content, 'html.parser') return pageSoup.findAll(id='Application_Encrypted_Key:_data')[0].text def handleAPItables(self): apidata={} self.customfields=self.svr.getAllRecords('DataFormField') for eachtable in ISServer.tables.keys(): print "starting " + eachtable if eachtable not in self.mapping.keys(): self.mapping[eachtable]=99 fields = ISServer.tables[eachtable] + ['_'+fld['Name'] for fld in self.customfields if fld['FormId'] is self.mapping[eachtable]] apidata[eachtable] = self.svr.getAllRecords(eachtable, interestingData=fields) with open(eachtable+".csv", 'wb') as outfile: writer=csv.DictWriter(outfile, fields) writer.writeheader() writer.writerows(apidata[eachtable]) print "done writing " + eachtable self.apidata = apidata def handlewebforms(self): # for eachid # webformsubmissionpath="https://" + self.appname + ".infusionsoft.com/app/webformSubmission/contactTabDetails?customFormWebResultId=" + str(x) pass
def scrap_process(self, storage): # You can iterate over ids, or get list of objects # from any API, or iterate throught pages of any site # Do not forget to skip already gathered data # Here is an example for you # 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36', # 'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', # 'Accept-Language' : 'ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4', # 'Accept-Encoding' : 'gzip, deflate, sdch'} def auth(): LoginForm = q.get_form(id="aspnetForm") LoginForm[ 'ctl00$ctl00$ModalLogin1$LoginTextBox'].value = 'avtostrada' LoginForm[ 'ctl00$ctl00$ModalLogin1$PasswordTextBox'].value = '5ed8f5' LoginForm.add_field( robobrowser.forms.fields.Input( '\<input name="__EVENTARGUMENT" value="" \/\>')) LoginForm.add_field( robobrowser.forms.fields.Input( '\<input name="__EVENTTARGET" value="ctl00$ctl00$ModalLogin1$ButtonFilter" \/\>' )) q.submit_form( LoginForm, submit=LoginForm['ctl00$ctl00$ModalLogin1$ButtonFilter']) def get_download_url(link): d = link.get('href') return 'http://brokenstone.ru/' + d #content=q.session.get(durl) def save_download(durl): file1 = q.session.get(durl) #if file1.headers['Content-Type'] == 'application/octet-stream': nameFile = (file1.headers['Content-Disposition']).split('=') xls_file = './{0}'.format(nameFile[1]) logger.info(xls_file) with open(xls_file, "wb") as output: output.write(file1.content) return nameFile[1] #else: # logger.error(u'Какая то хрень') url = storage logger.info("Начинаем загрузку") q = RoboBrowser() q.open(url) response = q.state if not response.response.ok: logger.error(response.response.text) # then continue process, or retry, or fix your code else: auth() a = q.get_links('Поставки по жд за') durls = list(map(get_download_url, a)) logger.info(durls) #d=a[1].get('href') #durl='http://brokenstone.ru/'+ d #content=map(q.session.get, durls) downloadFales = list(map(save_download, durls)) logger.info(downloadFales) dataframe = pd.concat(map(pd.read_excel, downloadFales)) dataframe.to_pickle('data.pkl') logger.info(u'Данные сохранены') # Note: here json can be used as response.json #data = q.response # save scrapped objects here # you can save url to identify already scrapped objects #storage.write_data([url + '\t' + data.replace('\n', '')]) #%% #Scrapper.scrap_process(Scrapper,'http://brokenstone.ru/supplyfileexport.aspx')
#procompiledRegex = re.compile(regex, re.MULTILINE | re.DOTALL) # log in to FIT browser.open('https://cas.fit.vutbr.cz') form = browser.get_form(action='/cosign.cgi') form['login'].value = config["login"] form['password'].value = config["password"] browser.submit_form(form) # open video server browser.open('https://video1.fit.vutbr.cz/av/records-categ.php?id=1') # get links to semesters links = browser.get_links(class_='link') del links[:1] # regex - get link names # ^<a class="link" .+id=\d{1,4}">(.+)<\/a> # but use of BeautifullSoup is advised instead # BeatifulSoup(html_doc) # titleName = soup.title.name print("\nSemesters") i = 0 for link in links: print("{} - {}".format(i, link.text)) i += 1 # select semester