def scrap(url, fail_time=0): timeout = config.TIMEOUT print u'正在请求', url, u', 请稍后...' try: driver = config.DRIVER driver.get(url) WebDriverWait(driver, timeout).until( EC.presence_of_element_located((By.ID, "J_TabRecommends")) ) result = get_recommends(driver, config.MAX_TRY) if result: print u'查找成功' html = driver.page_source parse_content(html) else: print u'请求超时, 获取失败, 此页面不存在相应内容' except TimeoutException: if fail_time >=2 : print u'请求超时, 正在切换代理, 继续重试' update_proxy_pool() new_proxy_driver() else: print u'请求超时,正在切换会话重试' new_driver() fail_time = fail_time + 1 if config.CONSOLE_OUTPUT: print u'当前页面请求失败数', fail_time if fail_time == config.MAX_FAIL: update_proxy_pool() if config.CONSOLE_OUTPUT: print u'失败次数过多, 跳过此请求' return False scrap(url, fail_time) except (socket.error, urllib2.URLError): print u'请求页面过于频繁, 请求被中断, 正在切换会话重试' new_driver() fail_time = fail_time + 1 if config.CONSOLE_OUTPUT: print u'当前页面请求失败数', fail_time if fail_time == config.MAX_FAIL: if config.CONSOLE_OUTPUT: print u'失败次数过多, 跳过此请求' return False scrap(url, fail_time) except (WindowsError, OSError, Exception): print u'未知错误, 跳过继续运行'
def scrap(url, fail_time=0): timeout = config.TIMEOUT print u'正在请求', url, u', 请稍后...' try: driver = config.DRIVER driver.get(url) WebDriverWait(driver, timeout).until( EC.presence_of_element_located((By.ID, "J_TabRecommends"))) result = get_recommends(driver, config.MAX_TRY) if result: print u'查找成功' html = driver.page_source parse_content(html) else: print u'请求超时, 获取失败, 此页面不存在相应内容' except TimeoutException: if fail_time >= 2: print u'请求超时, 正在切换代理, 继续重试' update_proxy_pool() new_proxy_driver() else: print u'请求超时,正在切换会话重试' new_driver() fail_time = fail_time + 1 if config.CONSOLE_OUTPUT: print u'当前页面请求失败数', fail_time if fail_time == config.MAX_FAIL: update_proxy_pool() if config.CONSOLE_OUTPUT: print u'失败次数过多, 跳过此请求' return False scrap(url, fail_time) except (socket.error, urllib2.URLError): print u'请求页面过于频繁, 请求被中断, 正在切换会话重试' new_driver() fail_time = fail_time + 1 if config.CONSOLE_OUTPUT: print u'当前页面请求失败数', fail_time if fail_time == config.MAX_FAIL: if config.CONSOLE_OUTPUT: print u'失败次数过多, 跳过此请求' return False scrap(url, fail_time) except (WindowsError, OSError, Exception): print u'未知错误, 跳过继续运行'
def index(): if request.method == 'POST': # check if the post request has the file part if 'header_file' not in request.files: flash('No file') return redirect(request.url) header_file = request.files['header_file'] # if user does not select file, browser also # submit a empty part without filename if header_file.filename == '': flash('No selected file') return redirect(request.url) else: content = header_file.read() data = parse_content(content) session['data'] = data return redirect( url_for('index')) # Follow POST/Redirect/Get Pattern return render_template('index.html', data=session.get('data'))
def get_entries(service, calendar_id, time_min): """Returns a dictionary of entries indexed by their corresponding LMD calendar and event ID, as a 2-tuple of integers.""" events = service.events() page_token = "" remote_entries = dict() while True: kwargs = {"calendarId": calendar_id, "timeMin": time_min.strftime(TIME_FORMAT)} if page_token: kwargs["pageToken"] = page_token event_list = events.list(**kwargs).execute() if "items" in event_list: for i, ei in enumerate(event_list["items"]): event = events.get(calendarId=calendar_id, eventId=ei["id"]).execute() key = parse_content(event["description"]) remote_entries[key] = event page_token = event_list.get("nextPageToken") if not page_token: break return remote_entries
def test(self): questions = parse_content(CONTENT) self.assertEqual(len(questions), 2)
import parse as parser import disk trellis_url = 'https://rippleneuro.com/support/software-downloads-updates/' if __name__ == '__main__': current_version = disk.read_version() # Current version of trellis try: # Get web html html = network.get_content(trellis_url) # Parse web html to get new version new_version = parser.parse_content(html) if current_version != new_version: # New version available disk.write_version(new_version) ctypes.windll.user32.MessageBoxW( 0, f'Trellis Version {new_version} is available', 'New Version', 1) else: # No new version ctypes.windll.user32.MessageBoxW( 0, f'Trellis Version {current_version} is up to date', 'No New Version', 1) except (network.NetworkException, parser.ParseException) as e: print(e) ctypes.windll.user32.MessageBoxW(
def save(url): html = html_download(url, 'utf-8') title, contents = parse_content(html) save_to_txt(title, contents)