def get_linkedin_viewer_count(username=None, password=None): from twill import get_browser from twill.commands import add_extra_header, go, fv, submit, reset_browser reset_browser() add_extra_header( 'User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.132 Safari/537.36' ) go("https://www.linkedin.com/nhome/") #fv("login", 'session_password', 'LetsTryPrime') #fv("login", 'session_key', '*****@*****.**') fv("login", 'session_key', username) fv("login", 'session_password', password) submit() go('http://www.linkedin.com/wvmx/profile?trk=nav_responsive_sub_nav_wvmp') try: for i in get_browser().result.lxml\ .get_element_by_id('viewers_list-content')\ .iterchildren(): user_listing = simplejson.loads(i.text.replace('\\u002d', '-')) except Exception as e: log.err('Failed to extract user_listing from page: {error}'.format( error=e)) raise LinkedInFailure() try: current_count = user_listing['content']['wvmx_profile_viewers'][ 'viewersCount'] return current_count except KeyError: log.err('Profile view struct in unknown format: {user_listing}'.format( user_listing=user_listing)) raise LinkedInFailure()
def main(): login, password = get_credentials() # log-in to Django site if login and password: tw.go(LOGIN_URL) tw.formvalue('1', 'username', login) tw.formvalue('1', 'password', password) tw.submit() if isinstance(DATA_URL, basestring): urls = [DATA_URL] else: urls = list(DATA_URL) # retrieve URIs for url in urls: try: tw.go(url) tw.code('200') tw.show() except TwillAssertionError: code = get_browser().get_code() print(u"Unable to access %(url)s. " u"Received HTTP #%(code)s." % { 'url': url, 'code': code }) tw.reset_browser()
def tearDown(self): # If you get an error on one of these lines, # maybe you didn't run base.TwillTests.setUp? settings.DEBUG_PROPAGATE_EXCEPTIONS = self.old_dbe twill.remove_wsgi_intercept('127.0.0.1', 8080) tc.reset_browser() django.core.cache.cache.get = self.real_get
def main(): login, password = get_credentials() # log-in to Django site if login and password: tw.go(LOGIN_URL) tw.formvalue('1', 'username', login) tw.formvalue('1', 'password', password) tw.submit() if isinstance(DATA_URL, basestring): urls = [DATA_URL] else: urls = list(DATA_URL) # retrieve URIs for url in urls: try: tw.go(url) tw.code('200') tw.show() except TwillAssertionError: code = get_browser().get_code() print (u"Unable to access %(url)s. " u"Received HTTP #%(code)s." % {'url': url, 'code': code}) tw.reset_browser()
def get_cert(cert): try: if not completed_certs.has_key(cert): reset_browser() go('http://www2.fdic.gov/idasp/ExternalConfirmation.asp?inCert1=%s' % (cert)) log.write(cert) html = get_browser().get_html() bhc_links = bhc_cert_href.findall(html) if len(bhc_links) > 0: get_bhc(cert) log.write(' holding\n') else: get_bank(cert) log.write(' bank\n') log.flush() except Exception, e: print e
def authAndRedirect(username, password): tw.reset_browser() tw.go(SYS_REDIRECT_URL) tw.fv('1', "username", username) tw.fv('1', "password", password) tw.formaction('1', AUTH_URL) tw.submit() return tw.get_browser().get_html()
def download_with_login(url, login_url, login=None, password=None, ext='', username_field='username', password_field='password', form_id=1): ''' Download a URI from a website using Django by loging-in first 1. Logs in using supplied login & password (if provided) 2. Create a temp file on disk using extension if provided 3. Write content of URI into file ''' # log-in to Django site if login and password: tw.go(login_url) tw.formvalue('%s' % form_id, username_field, login) tw.formvalue('%s' % form_id, password_field, password) tw.submit() # retrieve URI try: tw.go(url) tw.code('200') except TwillAssertionError: code = get_browser().get_code() # ensure we don't keep credentials tw.reset_browser() raise DownloadFailed(u"Unable to download %(url)s. " u"Received HTTP #%(code)s." % { 'url': url, 'code': code }) buff = StringIO.StringIO() twill.set_output(buff) try: tw.show() finally: twill.set_output(None) tw.reset_browser() # write file on disk suffix = '.%s' % ext if ext else '' fileh, filename = tempfile.mkstemp(suffix=suffix) os.write(fileh, buff.getvalue()) os.close(fileh) buff.close() return filename
def download_with_login(url, login_url, login=None, password=None, ext='', username_field='username', password_field='password', form_id=1): ''' Download a URI from a website using Django by loging-in first 1. Logs in using supplied login & password (if provided) 2. Create a temp file on disk using extension if provided 3. Write content of URI into file ''' # log-in to Django site if login and password: tw.go(login_url) tw.formvalue('%s' % form_id, username_field, login) tw.formvalue('%s' % form_id, password_field, password) tw.submit() # retrieve URI try: tw.go(url) tw.code('200') except TwillAssertionError: code = get_browser().get_code() # ensure we don't keep credentials tw.reset_browser() raise DownloadFailed(u"Unable to download %(url)s. " u"Received HTTP #%(code)s." % {'url': url, 'code': code}) buff = StringIO.StringIO() twill.set_output(buff) try: tw.show() finally: twill.set_output(None) tw.reset_browser() # write file on disk suffix = '.%s' % ext if ext else '' fileh, filename = tempfile.mkstemp(suffix=suffix) os.write(fileh, buff.getvalue()) os.close(fileh) buff.close() return filename
def get_linkedin_viewer_count(username=None, password=None): from twill import get_browser from twill.commands import add_extra_header, go, fv, submit, reset_browser reset_browser() from twill.errors import TwillException add_extra_header('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.132 Safari/537.36') go("https://www.linkedin.com/nhome/") # Added because LinkedIn login page no longer names the login form. b = get_browser() form_num = '' for n, f in enumerate(b.get_all_forms()): try: b.get_form_field(f, "session_key") b.get_form_field(f, "session_password") form_num = str(n+1) except TwillException: pass if form_num == '': log.err('Failed to parse LinkedIn login page - page format may have changed.') raise LinkedInFailure() #fv("login", 'session_password', 'LetsTryPrime') #fv("login", 'session_key', '*****@*****.**') fv(form_num, 'session_key', username) fv(form_num, 'session_password', password) submit() go('http://www.linkedin.com/wvmx/profile?trk=nav_responsive_sub_nav_wvmp') try: for i in get_browser().result.lxml\ .get_element_by_id('viewers_list-content')\ .iterchildren(): user_listing = simplejson.loads(i.text.replace('\\u002d','-')) except Exception as e: log.err('Failed to extract user_listing from page: {error}'.format(error=e)) raise LinkedInFailure() try: current_count = user_listing['content']['wvmx_profile_viewers']['viewersCount'] return current_count except KeyError: log.err('Profile view struct in unknown format: {user_listing}'.format(user_listing=user_listing)) raise LinkedInFailure()
def _execute_script(inp, **kw): """ Execute lines taken from a file-like iterator. """ # initialize new local dictionary & get global + current local namespaces.new_local_dict() globals_dict, locals_dict = namespaces.get_twill_glocals() locals_dict['__url__'] = commands.browser.get_url() # reset browser if not kw.get('no_reset'): commands.reset_browser() # go to a specific URL? init_url = kw.get('initial_url') if init_url: commands.go(init_url) locals_dict['__url__'] = commands.browser.get_url() # should we catch exceptions on failure? catch_errors = False if kw.get('never_fail'): catch_errors = True # sourceinfo stuff sourceinfo = kw.get('source', "<input>") try: for n, line in enumerate(inp): if not line.strip(): # skip empty lines continue cmdinfo = "%s:%d" % (sourceinfo, n,) print 'AT LINE:', cmdinfo cmd, args = parse_command(line, globals_dict, locals_dict) if cmd is None: continue try: execute_command(cmd, args, globals_dict, locals_dict, cmdinfo) except SystemExit: # abort script execution, if a SystemExit is raised. return except TwillAssertionError, e: print>>commands.ERR, '''\ Oops! Twill assertion error on line %d of '%s' while executing >> %s %s ''' % (n, sourceinfo, line.strip(), e) if not catch_errors: raise except Exception, e: print>>commands.ERR, '''\ EXCEPTION raised at line %d of '%s' %s Error message: '%s' ''' % (n, sourceinfo, line.strip(),str(e).strip(),) if not catch_errors: raise
def stopTest(self, test): test_instance = get_test_case_instance(test) if getattr(test_instance, "_twill", None): from twill.commands import reset_browser reset_browser() test_instance._twill = None
def pobierzPlan(user, password): tablicaHTMLow = [] commands.add_extra_header( 'User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.6') commands.clear_cookies() # Czyszczenie ciastek commands.reset_browser() #restart przegladarki commands.reset_output() #commands.config('use_BeautifulSoup', '0') commands.go("https://edukacja.pwr.wroc.pl/EdukacjaWeb/studia.do" ) # Przechodzimy do edukacji commands.showlinks() # DO USUNIECIA! Pokazuje linki commands.formclear('1') # Czysci formularz logowania commands.formvalue('1', 'login', user) # Podaje login commands.formvalue('1', 'password', password) # Podaje hasło commands.submit('0') # Klika zaloguj print("Linki po submit") # DO USUNIECIA! Pokazuje informacje commands.showlinks() # DO USUNIECIA! Pokazuje linki czyBledneLogowanie = sprawdzCzyBledneLogowanie( commands.show()) # Sprawdza czy na stronie wystapil blad czyLogowanieWInnejSesji = sprawdzCzyLogowanieWInnejSesji(commands.show()) if (czyBledneLogowanie == True): return "Bledne dane logowania" if (czyLogowanieWInnejSesji == True): return "zalogowany w innej sesji" commands.follow("zapisy.do") # Przechodzi linkiem na stronę zapisów # DO USUNIECIA! Pokazuje stronę po kliknięciu zapisy! print("Co po kliknieciu Zapisy") #commands.show() commands.showlinks() print "Forms:" formsy = commands.showforms() links = commands.showlinks() # Pobiera linki z danej strony control = None values = None select_options = None try: control = commands.browser.get_form("1").find_control('ineSluId', type="select") except: print("Nie ma selecta.") #control = commands.browser.get_form("1").find_control('ineSluId', type="select") if (control != None): # Jesli na stronie jest select values = pobierzElementySelect( commands.show()) # Pobieram parserem wartosci selecta select_options = utworzTabeleSelect( values) # Tworze nowy select podmieniajac stary select_attrs = {'id': 'some_id'} # Tworze atrybuty nowego selecta for v in values: # Lece petla po wartosciach selecta form = commands.get_browser().get_form("1") # Pobieram formularz ct = commands.browser.get_form("1").find_control( 'ineSluId', type="select") # Pobieram kontrolke z selectem add_select_to_form(form, 'ineSluId', select_attrs, select_options) # Tworze nowego selecta form.fixup( ) # Sprawdzam czy cos nowego nie zostało dodanego do form commands.showforms() commands.formvalue("1", ct.name, v.strip()) # Podaje wartosc dla selecta commands.submit('0') # Klikam submit html = pobierzZajecia(commands) # Pobieram zajecia commands.follow("zapisy.do") # Wracam do strony zapisów #commands.sleep(6) if ( html != "skreslony z kierunku" ): # Jesli funkcja zwrocila ze jest ktos skreslony to nie dodaje htmlu tablicaHTMLow.append( html ) # Jeli nie zwrocila takiego komunikatu to dodajemy ten html do tablicy else: html = pobierzZajecia( commands ) # Jesli nie ma selecta to pobieramy zajeci z tego kierunku i juz :D if (html != "skreslony z kierunku"): tablicaHTMLow.append(html) #print control.name, control.value, control.type #item = control.get("172748") #print item.name, item.selected, item.id, item.attrs #new_options #commands.formclear('1') # # #form = commands.get_browser().get_form("1") # #print('TO JEST TEN FORM:') #print(form) #print(len(form.controls)) # #notIsSelect = True #for ct in form.controls: # #print(ct) # if ct.name == 'ineSluId': # notIsSelect = False # print('JESTEM') # commands.sleep(3) # # select_attrs = {'id': 'some_id'} # values = pobierzElementySelect(commands.show()) # select_options = utworzTabeleSelect(values) # print(values) # print(select_options) # # # for v in values: # #form.fixup() # add_select_to_form(form, 'ineSluId', select_attrs, select_options) # form.fixup() # #ct.get(v).selected = True # print(ct) # print(form) # print(v) # commands.showforms() # # commands.formvalue("1", ct.name, v.strip()) # Podaje login # print("JEEDEFE") # commands.submit('0') # html = pobierzZajecia(commands) # commands.follow("zapisy.do") # print("JEEEEEEEEESSSSSTTTTTEEEEEMMM") # # commands.sleep(6) # if (html != "skreslony z kierunku"): # tablicaHTMLow.append(html) #ct.get(v).selected = False #for ct2 in form.controls: # if ct2.type == 'submit': # szukam wsrod niej tej co ma typ submit # commands.get_browser().clicked(form, ct2.attrs['name']) # klikam na ten przycisk # commands.get_browser().submit() #links = commands.showlinks() #commands.back() #commands.showforms() #return "no" #if (notIsSelect == True): # html = pobierzZajecia(commands) # if (html != "skreslony z kierunku"): # tablicaHTMLow.append(html) wyloguj(commands) commands.browser.clear_cookies() #usuwanie ciasteczek commands.reset_browser() #restart przegladarki commands.reset_output() return tablicaHTMLow
def twill_teardown(): twill.remove_wsgi_intercept('127.0.0.1', 8080) tc.reset_browser()
def pobierzPlan(user, password): tablicaHTMLow = [] commands.add_extra_header('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.0.6') commands.clear_cookies() # Czyszczenie ciastek commands.reset_browser() #restart przegladarki commands.reset_output() #commands.config('use_BeautifulSoup', '0') commands.go("https://edukacja.pwr.wroc.pl/EdukacjaWeb/studia.do") # Przechodzimy do edukacji commands.showlinks() # DO USUNIECIA! Pokazuje linki commands.formclear('1') # Czysci formularz logowania commands.formvalue('1', 'login', user) # Podaje login commands.formvalue('1', 'password', password) # Podaje hasło commands.submit('0') # Klika zaloguj print("Linki po submit") # DO USUNIECIA! Pokazuje informacje commands.showlinks() # DO USUNIECIA! Pokazuje linki czyBledneLogowanie = sprawdzCzyBledneLogowanie(commands.show()) # Sprawdza czy na stronie wystapil blad czyLogowanieWInnejSesji = sprawdzCzyLogowanieWInnejSesji(commands.show()) if (czyBledneLogowanie == True): return "Bledne dane logowania" if (czyLogowanieWInnejSesji == True): return "zalogowany w innej sesji" commands.follow("zapisy.do") # Przechodzi linkiem na stronę zapisów # DO USUNIECIA! Pokazuje stronę po kliknięciu zapisy! print("Co po kliknieciu Zapisy") #commands.show() commands.showlinks() print "Forms:" formsy = commands.showforms() links = commands.showlinks() # Pobiera linki z danej strony control = None values = None select_options = None try: control = commands.browser.get_form("1").find_control('ineSluId', type="select") except: print("Nie ma selecta.") #control = commands.browser.get_form("1").find_control('ineSluId', type="select") if(control != None): # Jesli na stronie jest select values = pobierzElementySelect(commands.show()) # Pobieram parserem wartosci selecta select_options = utworzTabeleSelect(values) # Tworze nowy select podmieniajac stary select_attrs = {'id': 'some_id'} # Tworze atrybuty nowego selecta for v in values: # Lece petla po wartosciach selecta form = commands.get_browser().get_form("1") # Pobieram formularz ct = commands.browser.get_form("1").find_control('ineSluId', type="select") # Pobieram kontrolke z selectem add_select_to_form(form, 'ineSluId', select_attrs, select_options) # Tworze nowego selecta form.fixup() # Sprawdzam czy cos nowego nie zostało dodanego do form commands.showforms() commands.formvalue("1", ct.name, v.strip()) # Podaje wartosc dla selecta commands.submit('0') # Klikam submit html = pobierzZajecia(commands) # Pobieram zajecia commands.follow("zapisy.do") # Wracam do strony zapisów #commands.sleep(6) if (html != "skreslony z kierunku"): # Jesli funkcja zwrocila ze jest ktos skreslony to nie dodaje htmlu tablicaHTMLow.append(html) # Jeli nie zwrocila takiego komunikatu to dodajemy ten html do tablicy else: html = pobierzZajecia(commands) # Jesli nie ma selecta to pobieramy zajeci z tego kierunku i juz :D if (html != "skreslony z kierunku"): tablicaHTMLow.append(html) #print control.name, control.value, control.type #item = control.get("172748") #print item.name, item.selected, item.id, item.attrs #new_options #commands.formclear('1') # # #form = commands.get_browser().get_form("1") # #print('TO JEST TEN FORM:') #print(form) #print(len(form.controls)) # #notIsSelect = True #for ct in form.controls: # #print(ct) # if ct.name == 'ineSluId': # notIsSelect = False # print('JESTEM') # commands.sleep(3) # # select_attrs = {'id': 'some_id'} # values = pobierzElementySelect(commands.show()) # select_options = utworzTabeleSelect(values) # print(values) # print(select_options) # # # for v in values: # #form.fixup() # add_select_to_form(form, 'ineSluId', select_attrs, select_options) # form.fixup() # #ct.get(v).selected = True # print(ct) # print(form) # print(v) # commands.showforms() # # commands.formvalue("1", ct.name, v.strip()) # Podaje login # print("JEEDEFE") # commands.submit('0') # html = pobierzZajecia(commands) # commands.follow("zapisy.do") # print("JEEEEEEEEESSSSSTTTTTEEEEEMMM") # # commands.sleep(6) # if (html != "skreslony z kierunku"): # tablicaHTMLow.append(html) #ct.get(v).selected = False #for ct2 in form.controls: # if ct2.type == 'submit': # szukam wsrod niej tej co ma typ submit # commands.get_browser().clicked(form, ct2.attrs['name']) # klikam na ten przycisk # commands.get_browser().submit() #links = commands.showlinks() #commands.back() #commands.showforms() #return "no" #if (notIsSelect == True): # html = pobierzZajecia(commands) # if (html != "skreslony z kierunku"): # tablicaHTMLow.append(html) wyloguj(commands) commands.browser.clear_cookies() #usuwanie ciasteczek commands.reset_browser() #restart przegladarki commands.reset_output() return tablicaHTMLow
def _execute_script(inp, **kw): """ Execute lines taken from a file-like iterator. """ # initialize new local dictionary & get global + current local namespaces.new_local_dict() globals_dict, locals_dict = namespaces.get_twill_glocals() locals_dict['__url__'] = commands.browser.get_url() # reset browser if not kw.get('no_reset'): commands.reset_browser() # go to a specific URL? init_url = kw.get('initial_url') if init_url: commands.go(init_url) locals_dict['__url__'] = commands.browser.get_url() # should we catch exceptions on failure? catch_errors = False if kw.get('never_fail'): catch_errors = True # sourceinfo stuff sourceinfo = kw.get('source', "<input>") try: for n, line in enumerate(inp): if not line.strip(): # skip empty lines continue cmdinfo = "%s:%d" % ( sourceinfo, n, ) print 'AT LINE:', cmdinfo cmd, args = parse_command(line, globals_dict, locals_dict) if cmd is None: continue try: execute_command(cmd, args, globals_dict, locals_dict, cmdinfo) except SystemExit: # abort script execution, if a SystemExit is raised. return except TwillAssertionError, e: print >> commands.ERR, '''\ Oops! Twill assertion error on line %d of '%s' while executing >> %s %s ''' % (n, sourceinfo, line.strip(), e) if not catch_errors: raise except Exception, e: print >> commands.ERR, '''\ EXCEPTION raised at line %d of '%s' %s Error message: '%s' ''' % ( n, sourceinfo, line.strip(), str(e).strip(), ) if not catch_errors: raise
def vote_story(hn_id): """vote for an story""" vote = request.POST.get('vote', '').strip() # we need a voting direction "up" or "down" if vote == '' or (vote != 'up' and vote != 'down'): return abort(code=400, text="Invalid voting direction, " + "needs to be 'up' or 'down'?") # check for the http auth if not request.auth: return abort(code=401, text="We need your username and " + "password for this operation.") # i can haz your hn password username, password = request.auth # we start web scraping go(HN_URL) # lets find the login url login_url = '' for link in showlinks(): if link.text == "login": login_url = link.url go(login_url) # we login #1 is the form (login, #2 is register) formvalue('1', 'u', username) formvalue('1', 'p', password) # 4 is the position of the submit button submit('4') # now we go to the story go('/item?id=%s' % hn_id) # and vote for it # find the link voting_url = '' for link in showlinks(): if link.url.startswith('vote?for=%s&dir=%s&by=%s' % (hn_id, vote, username)): voting_url = '/' + link.url if voting_url == '': return abort(code=400, text="Something's wrong at voting, " + "Could not find the voting url. " + "Most likely you already voted or " + "the username or password are wrong.") go(voting_url) # lets find the login url for link in showlinks(): if link.text == "logout": logout_url = link.url go(HN_URL) go(logout_url) # and we're done! reset_browser() # success! response is always in JSON return {'status': "ok", 'message': "voted successfully for %s" % hn_id}