def identify(codarea, numerolocal): browser = get_browser() form = browser.get_forms(MAINURL)[0] form["txt_indicativo"] = codarea form["txt_num_loc"] = numerolocal form.submit() for match in re.finditer(REGEX, browser.get_html()): print("%s: %s" % (match.group(1), match.group(2)))
def fetch_html(for_task, queries): """ Mail caller for html fetching. :param for_task: process for jobs/resumes :param queries: all the queries for which html have to be extracted :return: Null """ if for_task == settings.TASKS[0]: columns = settings.JOBS_COLUMNS else: columns = settings.RESUMES_COLUMNS filtered_queries = list(map(lambda query: { 'url': query[columns['link']], 'key': query[settings.UPDATE_KEY] }, queries)) if settings.MULTIPROCESS_REQUIRED: partition = int(len(filtered_queries) / settings.NO_OF_PROCESSES) if partition == 0: partition += 1 queries_chunks = [filtered_queries[i:i + partition] for i in xrange(0, len(filtered_queries), partition)] if settings.WEBDRIVER_REQUIRED: driver = browser.get_browser() processes = [Process(target=fetch_pool_results, args=(for_task, each_chunk, driver,)) for each_chunk in queries_chunks] else: processes = [Process(target=fetch_pool_results, args=(for_task, each_chunk,)) for each_chunk in queries_chunks] for process in processes: process.start() for process in processes: process.join() if settings.WEBDRIVER_REQUIRED: browser.close_browser(driver) else: if settings.WEBDRIVER_REQUIRED: driver = browser.get_browser() fetch_pool_results(for_task, filtered_queries, driver) browser.close_browser(driver) else: fetch_pool_results(for_task, filtered_queries)
def tencent_get_url(self, url): br = get_browser() br.open("http://weibo.yunyun.com") response = br.open(url) result = response.read().decode('utf-8') key = hashlib.md5(url.encode('utf-8')).hexdigest() self._cache.put(cache=CACHE_NAME, key=key, value=result) print >> sys.stdout, key
def update_passwords(self): changed_entries = [] for rule, login in self.logins: new_password = rule.generate_password() self.log_file.write( "Updating password for %s on %s to %s ... " % (login['username'], login['domain'], new_password)) self.log_file.flush() driver = get_browser() # set up screenshot thread stop_screenshots = threading.Event() screenshot_thread = threading.Thread( target=self.update_screenshot, args=[driver, stop_screenshots]) screenshot_thread.start() try: for i, step in enumerate(rule.steps): print "Running", step while len(step) < 3: step += [None] step_type, opts = step[0], step[1:] # handle templating if step_type == 'type' and opts[1]: for from_str, to_str in (('username', login['username']), ('old_password', login['password']), ('new_password', new_password)): opts[1] = opts[1].replace("{{ %s }}" % from_str, to_str) # run step run_step(driver, step_type, opts) except (UnexpectedElementError, NoSuchElementException, InvalidElementStateException, AssertionError) as e: #import ipdb; ipdb.set_trace() if type(e) == NoSuchElementException: message = json.loads(e.msg)['errorMessage'] else: message = str(e) self.log_file.write("Probably failed:\n %s" % message) self.log_file.flush() show_error("Update process failed for %s on step %s: %s" % (login['domain'], i + 1, message)) continue finally: stop_screenshots.set() # success self.log_file.write("Success.\n") self.log_file.flush() login['new_password'] = new_password changed_entries.append(login) pub.sendMessage('update_complete', changed_entries=changed_entries)
def aggregate(event, context): today = datetime.datetime.today() endpoint = 'https://www-shibuya.jp/schedule/#wwwxwww/' driver = browser.get_browser() driver.get(endpoint) time.sleep(3) ls_events = [] for i in range(3): # 3ヶ月分 current_date = today + relativedelta(month=i) year = current_date.year month = current_date.month html = HTML(html=driver.page_source, url='') articles = html.find('#eventList > div > article') for article in articles: day = int(article.find('.date .day', first=True).text) datetime_ = datetime.date(year, month, day) url = article.find('.pageLink', first=True).attrs.get('href') place = article.find('.inner .placeLabel span', first=True).text artist = article.find('.inner .info .title', first=True).text title = article.find('.inner .info .exp span', first=True).text openstart = article.find('.inner .info .openstart', first=True).text oss = openstart.replace('OPEN / START\u3000', '').split('/') if len(oss) == 2: open_, start = oss dic_event = { 'datetime': datetime_, 'url': url, 'place': place, 'artist': artist, 'title': title, 'open': open_, 'start': start, 'open_start': oss } ls_events.append(dic_event) next_button = driver.find_element_by_css_selector( '#schedule > * > ul > li.next > a') next_button.click() driver.close() driver.quit() items = process_to_items(ls_events) dynamo.put_items(items) response = { "statusCode": 200, "body": f"{len(ls_events)} items added or updated" } return response
def _get_cookies(): global COOKIES if COOKIES != None: return COOKIES br = get_browser() cj = mechanize.CookieJar() br.set_cookiejar(cj) br.open('http://www.opentable.com/new-york-city-restaurants') COOKIES = cj return COOKIES
async def async_process_new_appeal(self, appeal: dict) -> bool: logger.info(f'Новое обращение: {pformat(appeal)}') self.current_appeal = appeal with brwsr.get_browser() as browser: if not await self.solve_captcha(browser): return False proceed, url = await self.get_appeal_url() if not proceed: return False return await self.send_appeal(url, browser)
def update_passwords(self): changed_entries = [] for rule, login in self.logins: new_password = rule.generate_password() self.log_file.write("Updating password for %s on %s to %s ... " % (login['username'], login['domain'], new_password)) self.log_file.flush() driver = get_browser() # set up screenshot thread stop_screenshots = threading.Event() screenshot_thread = threading.Thread(target=self.update_screenshot, args=[driver, stop_screenshots]) screenshot_thread.start() try: for i, step in enumerate(rule.steps): print "Running", step while len(step)<3: step += [None] step_type, opts = step[0], step[1:] # handle templating if step_type == 'type' and opts[1]: for from_str, to_str in (('username', login['username']), ('old_password', login['password']), ('new_password', new_password)): opts[1] = opts[1].replace("{{ %s }}" % from_str, to_str) # run step run_step(driver, step_type, opts) except (UnexpectedElementError, NoSuchElementException, InvalidElementStateException, AssertionError) as e: #import ipdb; ipdb.set_trace() if type(e) == NoSuchElementException: message = json.loads(e.msg)['errorMessage'] else: message = str(e) self.log_file.write("Probably failed:\n %s" % message) self.log_file.flush() show_error("Update process failed for %s on step %s: %s" % ( login['domain'], i+1, message)) continue finally: stop_screenshots.set() # success self.log_file.write("Success.\n") self.log_file.flush() login['new_password'] = new_password changed_entries.append(login) pub.sendMessage('update_complete', changed_entries=changed_entries)
def convert(originalurl, log=True): browser = get_browser() form = browser.get_forms("http://tinyogg.com")[0] form["url"] = originalurl form.submit() if RESULT in browser.get_url(): return browser.get_url() match = re.search(RESULTREGEX, browser.get_html()) if match: return RESULT + match.group(1) for match in re.finditer(URLREGEX, browser.get_html()): if RESULT in match.group(): return match.group()
def execute(self, panel, login): driver = get_browser(self.javascript_enabled) # set up screenshot thread stop_screenshots = threading.Event() screenshot_thread = threading.Thread(target=panel.update_screenshot, args=[driver, stop_screenshots]) screenshot_thread.start() # set up replacement dictionary replacements = { 'username': login['username'], 'old_password': login['password'], 'new_password': login['new_password'] } # for regex-based matches, we include the match groups in the replacements dict as 'url_group_N', counting from 1 if type(login['match_result']) == tuple: for i, match_group in enumerate(login['match_result']): replacements['url_group_%s' % (i + 1)] = match_group # make sure a step is marked as the one that actually updates the password for step in self.steps: if type(step) == list and type( step[-1]) == dict and step[-1].get('updates_password'): break else: # by default, we assume it's the second to last step update_step = self.steps[-2] if type(update_step[-1]) != dict: update_step.append({}) update_step[-1]['updates_password'] = True try: self.run_steps(login, driver, self.steps, replacements) except BrowserException as e: login['update_error'] = e.message return False # failure finally: stop_screenshots.set() screenshot_thread.join() login['update_success'] = True return True # success
def execute(self, panel, login): driver = get_browser(self.javascript_enabled) # set up screenshot thread stop_screenshots = threading.Event() screenshot_thread = threading.Thread(target=panel.update_screenshot, args=[driver, stop_screenshots]) screenshot_thread.start() # set up replacement dictionary replacements = {'username': login['username'], 'old_password': login['password'], 'new_password': login['new_password']} # for regex-based matches, we include the match groups in the replacements dict as 'url_group_N', counting from 1 if type(login['match_result']) == tuple: for i, match_group in enumerate(login['match_result']): replacements['url_group_%s' % (i + 1)] = match_group # make sure a step is marked as the one that actually updates the password for step in self.steps: if type(step)==list and type(step[-1]) == dict and step[-1].get('updates_password'): break else: # by default, we assume it's the second to last step update_step = self.steps[-2] if type(update_step[-1]) != dict: update_step.append({}) update_step[-1]['updates_password'] = True try: self.run_steps(login, driver, self.steps, replacements) except BrowserException as e: login['update_error'] = e.message return False # failure finally: stop_screenshots.set() screenshot_thread.join() login['update_success'] = True return True # success
def get_elevations(coordinates): r""" >>> get_elevations([(' 1° 9\'53.06"S ', ' 32°46\'14.33"E ')]) [1134] >>> get_elevations([(' 27°26\'27.24"S ', ' 58°59\'44.88"O ')]) [54] """ b = browser.get_browser() b._twillbrowser._browser.addheaders.append(("referer", REFERER)) if type(coordinates) is not str: coordinates = encode_coordinates(coordinates) url = URLALTITUDE % urllib.urlencode({ "l" : coordinates, "key" : KEY, "id" : ID }) output = b.get_html(url, cache=60*60*24*30) regex = r"""[,\[](\d+)""" altitudes = [int(alt) for alt in re.findall(regex, output) if alt] return altitudes
def get_elevations(coordinates): r""" >>> get_elevations([(' 1° 9\'53.06"S ', ' 32°46\'14.33"E ')]) [1134] >>> get_elevations([(' 27°26\'27.24"S ', ' 58°59\'44.88"O ')]) [54] """ b = browser.get_browser() b._twillbrowser._browser.addheaders.append(("referer", REFERER)) if type(coordinates) is not str: coordinates = encode_coordinates(coordinates) url = URLALTITUDE % urllib.urlencode({ "l": coordinates, "key": KEY, "id": ID }) output = b.get_html(url, cache=60 * 60 * 24 * 30) regex = r"""[,\[](\d+)""" altitudes = [int(alt) for alt in re.findall(regex, output) if alt] return altitudes
def __init__(self, site, user, password): self.browser = get_browser() self.site = site self.user = user self.password = password
def __init__(self): self._br = get_browser()
def fetch_links(for_task, queries): """ Main caller for the links extraction process :param for_task: process for jobs/resumes :param queries: list of all the queries :return: Null """ filtered_queries = list( map( lambda query: { 'queries': query[settings.QUERIES_COLUMNS['queries']], 'key': query[settings.UPDATE_KEY] }, queries)) for each_query in filtered_queries: try: print print "# P: [Query] {query}".format(query=each_query['queries']) if for_task == settings.TASKS[0]: url = query_builder.fetch_job_link_url(each_query['queries'], 0) else: url = query_builder.fetch_resume_link_url( each_query['queries'], 0) print "# P: [Url] {url}".format(url=url) total_results = 0 result_present = False if settings.WEBDRIVER_REQUIRED: driver = browser.get_browser() browser.open_link(driver, url) soup = get_soup(url, driver=driver, for_task=for_task) browser.close_browser(driver) else: soup = get_soup(url) if for_task == settings.TASKS[0]: result_divs = soup.find_all(name="div", attrs={"id": "searchCount"}) else: result_divs = soup.find_all(name="div", attrs={"id": "result_count"}) for res_div in result_divs: result_present = True if for_task == settings.TASKS[0]: total_results = int( res_div.text.strip().split('of')[1].strip().replace( ',', '')) else: total_results = int( res_div.text.strip().split(' ')[0].strip().replace( ',', '')) if result_present: print "# O: [Result Present] <True>" print "# I: [Total Results] <{total}>".format( total=total_results) starts = pagination.make_start_list(for_task, total_results) if settings.MULTIPROCESS_REQUIRED: partition = int(len(starts) / settings.NO_OF_PROCESSES) if partition == 0: partition += 1 start_chunks = [ starts[i:i + partition] for i in xrange(0, len(starts), partition) ] if settings.WEBDRIVER_REQUIRED: driver = browser.get_browser() processes = [ Process(target=fetch_pool_results, args=( for_task, each_query['queries'], each_chunk, driver, )) for each_chunk in start_chunks ] else: processes = [ Process(target=fetch_pool_results, args=( for_task, each_query['queries'], each_chunk, )) for each_chunk in start_chunks ] for process in processes: process.start() for process in processes: process.join() if settings.WEBDRIVER_REQUIRED: browser.close_browser(driver) else: if settings.WEBDRIVER_REQUIRED: driver = browser.get_browser() fetch_pool_results(for_task, each_query['queries'], starts, driver) browser.close_browser(driver) else: fetch_pool_results(for_task, each_query['queries'], starts) else: print "# O: [Result Present] <False>" db.update_queries(for_task, each_query['key'], {'status': settings.LINK_EXTRACTION_DONE}) except Exception as e: db.update_queries(for_task, each_query['key'], {'status': settings.LINK_EXTRACTION_ERROR}) print "# E: [Link Extraction] <{error}>".format(error=str(e))
def __init__(self, dts, restaurant_ids, num_people): self.dts = dts self.restaurant_ids = restaurant_ids self.num_people = num_people self.br = get_browser()
def get_browser(*args,**kwargs): browser=b.get_browser(*args,**kwargs) browser.get(BTSurl) return browser
def __init__(self, master): self.main_frame = tk.Frame(master, bg='#c8c8c8') self.main_frame.grid(ipadx=2, ipady=2, padx=2, pady=2) self.remitente = tk.StringVar() self.number = tk.StringVar() self.captcha = tk.StringVar() self.browser = get_browser() self.personal = Personal() self.show_captcha() '''Etiqueta del número''' self.cod_label = tk.Label(self.main_frame, text="Número de destino", bg='#c8c8c8') self.cod_label.grid(row=1, column=1, sticky=tk.W) '''Caja de entrada del número''' self.ent_number = tk.Entry(self.main_frame, width=10, textvariable=self.number, bd=2, relief=tk.GROOVE) self.ent_number.grid(row=1, column=2, sticky=tk.W + tk.E) self.ent_number.focus_set() '''Etiquetas de ejemplo de número''' self.ejemplo_label = tk.Label(self.main_frame, text="código de area sin el 0\n y el número sin el 15", bg='#c8c8c8') self.ejemplo_label.grid(row=2, column=1, sticky=tk.N) self.ejemplo_label = tk.Label(self.main_frame, text="por ejemplo\n 3874567890", bg='#c8c8c8') self.ejemplo_label.grid(row=2, column=2, sticky=tk.N) '''Etiqueta del remitente''' self.remitente_label = tk.Label(self.main_frame, text="Tu Nombre", bg='#c8c8c8') self.remitente_label.grid(row=1, column=3, sticky=tk.E) '''Caja de entrada para el remitente''' self.ent_remitente = tk.Entry(self.main_frame, width=10, textvariable=self.remitente, bd=2, relief=tk.GROOVE) self.ent_remitente.grid(row=1, column=4, sticky=tk.W + tk.E) '''Etiqueta del mensaje''' self.msje_label = tk.Label(self.main_frame, text="Mensaje", bg='#c8c8c8') self.msje_label.grid(row=2, column=3, sticky=tk.W) '''Entrada de texto para el mensaje''' self.ent_msje = tk.Text(self.main_frame, width=25, height=4, wrap="word", bd=2, relief=tk.GROOVE) self.ent_msje.grid(row=2, column=4) self.ent_msje.bind("<KP_Enter>", self.keypress_return) '''Etiqueta del captcha''' self.cap_label = tk.Label(self.main_frame, text="Palabra de verificación", bg='#c8c8c8') self.cap_label.grid(row=3, column=1, sticky=tk.E) '''Caja de entrada para el captcha''' self.ent_captcha = tk.Entry(self.main_frame, width=4, textvariable=self.captcha, bd=2, relief=tk.GROOVE) self.ent_captcha.grid(row=3, column=3, sticky=tk.W) self.ent_captcha.bind("<Return>", self.keypress_return) self.ent_captcha.bind("<KP_Enter>", self.keypress_return) '''Boton para enviar''' self.hi_there = tk.Button(self.main_frame, text="Enviar", command=self.send, relief=tk.FLAT, bg='#c8c8c8', bd=0) self.hi_there.grid(row=3, column=4)
def extract_naver_map(): TITLE = [] ADDRESS = [] PHONE = [] URL = [] query = loc.get()+" "+keyword.get() browser = open_browser(query) wait = WebDriverWait(browser, 30) by_xpath = By.XPATH, "//object[@id='searchIframe']" wait.until(EC.presence_of_element_located(by_xpath)) time.sleep(3) search_frame = browser.find_element_by_xpath("//object[@id='searchIframe']") browser.switch_to.frame(search_frame) last_page = int(get_pages(browser)) get_browser(browser, query) wait.until(EC.presence_of_element_located(by_xpath)) search_frame = browser.find_element_by_xpath("//object[@id='searchIframe']") browser.switch_to.frame(search_frame) for p in range(last_page): print(f"----------------------------------------------------\n\nextracting page{p+1}/{last_page}\n\n----------------------------------------------------\n\n") time.sleep(1) while True: atags_1 = browser.find_elements_by_class_name('_2aE-_') if len(atags_1) == 0: atags_1 = browser.find_elements_by_class_name('Tx7az') browser.execute_script("document.querySelector('._1Az1K').scrollTo(document.querySelector('._1Az1K').scrollTop, document.querySelector('._1Az1K').scrollHeight);") atags = browser.find_elements_by_class_name('_2aE-_') if len(atags) == 0: atags = browser.find_elements_by_class_name('Tx7az') if len(atags_1) == len(atags): break print(f"현 페이지 총 아이템 수: {len(atags)}\n\n") #extract by_xpath = By.XPATH, '//object[@id="entryIframe"]' for a in atags: a.click() time.sleep(1) browser.switch_to_default_content() wait.until(EC.presence_of_element_located(by_xpath)) url = browser.find_elements_by_tag_name('object')[1].get_attribute('data') browser.execute_script("window.open('');") browser.switch_to_window(browser.window_handles[-1]) browser.get(url) try: html = browser.execute_script('return document.body.outerHTML') soup = BeautifulSoup(html,'html.parser') except WebDriverException: browser.close() browser.switch_to_window(browser.window_handles[0]) entry_frame = browser.find_element_by_xpath('//object[@id="entryIframe"]') browser.switch_to_frame(entry_frame) html = browser.execute_script('return document.body.outerHTML') soup = BeautifulSoup(html,'html.parser') browser.switch_to_default_content() title = soup.find('span', {'class': '_3XamX'}).text address = soup.find('span',{'class': '_2yqUQ'}).text phone = soup.find('li', {'class': '_3xPmJ'}) if phone: phone = phone.text.split('안내')[0] else: phone = None if len(browser.window_handles) >1 : browser.close() browser.switch_to_window(browser.window_handles[0]) else: pass browser.switch_to.frame(search_frame) TITLE.append(title) ADDRESS.append(address) PHONE.append(phone) URL.append(url) df = pd.DataFrame({'상호명': TITLE, '주소': ADDRESS, '전화번호': PHONE, '링크': URL}) df.to_csv(f'{query}.csv', encoding='utf-8') #click next page next_btn = browser.find_elements_by_class_name('_3pA6R')[1] next_btn.click() print('finished!') clean_data(query) messagebox.showinfo('info', '완료')
def __init__(self): self.browser = browser.get_browser() self._login_form = None
def get_browser(*args, **kwargs): browser = b.get_browser(*args, **kwargs) browser.get(BTSurl) return browser
def __init__(self): self.browser = get_browser() self.path = "http://sms2.personal.com.ar/Mensajes/sms.php"
def __init__(self): self._url = self._get_url() self._br = get_browser()
def get(): if len(pool) > 0: return pool.pop() else: return browser.get_browser()
def __init__(self): self._params = self._form_params() self._browser = get_browser()
import datetime import urllib from browser import get_browser, open_page FORMAT = '%m/%d/%Y %I:%M:%S %p' TODAY_URL = 'http://www.opentable.com/opentables.aspx?m=8&p=%d&d=%s&rid=%d&t=single&scpref=0' URL = 'http://www.opentable.com/nextavailabletable.aspx?hpu=1040606153&shpu=1&cop=1&m=8&p=%d&d=%s&rid=%d&mode=singlerest' br = get_browser() def get_reservations_for_list(dt, rids, people, want_today=False): results = {} for rid in rids: results[rid] = get_reservations(dt, rid, people) if want_today: results[rid] += get_today_reservations(dt, rid, people) if len(results) % 5 == 0: print 'processed %d / %d' % (len(results), len(rids)) return results def get_today_reservations(dt, rid, people=2): time = urllib.quote_plus(dt.strftime(FORMAT)) url = TODAY_URL % (people, time, rid) s = open_page(br, url) return _get_today_results(s) def get_reservations(dt, restaurant_id, people=2):
def __init__(self): self._br = get_browser() self._db = DBStore() self._init_yelp_keys_stuff()