def requestUrl(url, restart=0): proxy={'http': 'http://39.134.93.13:80'} proxy_support = urllib2.ProxyHandler(proxy) opener = urllib2.build_opener(proxy_support) urllib2.install_opener(opener) filename = url[FILE_OFFSET:] try: if restart: if DB.get(filename): return DB.get(filename) else: print url time.sleep(TIME_SLEEP) req = urllib2.Request(url, headers=M_Headers) data = urllib2.urlopen(req).read() DB.set(filename, data) return data else: print url time.sleep(TIME_SLEEP) req = urllib2.Request(url, headers=M_Headers) data = urllib2.urlopen(req).read() # DB.set(filename, data) return data except Exception, e: print e
def requestUrl(url, restart=0): filename = url[FILE_OFFSET:] try: if not restart: if DB.get(filename): return DB.get(filename) else: print url time.sleep(TIME_SLEEP) req = urllib2.Request(url, headers=M_Headers) data = urllib2.urlopen(req).read() DB.set(filename, data) return data else: print url time.sleep(TIME_SLEEP) req = urllib2.Request(url, headers=M_Headers) data = urllib2.urlopen(req).read() # DB.set(filename, data) return data except Exception, e: print e
def postUrl(url, keywords, restart=0): filename = url[FILE_OFFSET:] if not restart: return DB.get(filename) else: data = { 's': keywords, 'offset': 0, 'limit': 30, 'type': 1000, } data = requests.post(url, data).text DB.set(filename, data) return data
def requestByFixfox(url, restart=0): filename = url[FILE_OFFSET:] if not restart: return DB.get(filename) else: # time.sleep(TIME_SLEEP) options = Options() # options.add_argument('-headless') # driver = Firefox(executable_path='C:\Python27\geckodriver.exe', firefox_options=options) driver = Chrome(executable_path='chromedriver.exe') driver.get(url) wait = WebDriverWait(driver, timeout=5) if (driver.execute_script("return document.readyState")) == "complete": # wait.until(EC.visibility_of_element_located((By.CLASS_NAME, 'iptarea' ))) data = driver.page_source driver.quit() return data