Esempio n. 1
0
def requestUrl(url, restart=0):
    proxy={'http': 'http://39.134.93.13:80'}
    proxy_support = urllib2.ProxyHandler(proxy)
    opener = urllib2.build_opener(proxy_support)
    urllib2.install_opener(opener)
    filename = url[FILE_OFFSET:]
    try:
        if  restart:
            if DB.get(filename):
                return DB.get(filename)
            else:
                print url
                time.sleep(TIME_SLEEP)
                req = urllib2.Request(url, headers=M_Headers)
                data = urllib2.urlopen(req).read()
                DB.set(filename, data)
                return data
        else:
            print url
            time.sleep(TIME_SLEEP)
            req = urllib2.Request(url, headers=M_Headers)
            data = urllib2.urlopen(req).read()
            # DB.set(filename, data)
            return data
    except Exception, e:
        print e
Esempio n. 2
0
def requestUrl(url, restart=0):
    filename = url[FILE_OFFSET:]
    try:
        if not restart:
            if DB.get(filename):
                return DB.get(filename)
            else:
                print url
                time.sleep(TIME_SLEEP)
                req = urllib2.Request(url, headers=M_Headers)
                data = urllib2.urlopen(req).read()
                DB.set(filename, data)
                return data
        else:
            print url
            time.sleep(TIME_SLEEP)
            req = urllib2.Request(url, headers=M_Headers)
            data = urllib2.urlopen(req).read()
            # DB.set(filename, data)
            return data
    except Exception, e:
        print e
Esempio n. 3
0
def postUrl(url, keywords, restart=0):
    filename = url[FILE_OFFSET:]
    if not restart:
        return DB.get(filename)
    else:
        data = {
            's': keywords,
            'offset': 0,
            'limit': 30,
            'type': 1000,
        }
        data = requests.post(url, data).text
        DB.set(filename, data)
    return data
Esempio n. 4
0
def requestByFixfox(url, restart=0):
    filename = url[FILE_OFFSET:]
    if not restart:
        return DB.get(filename)
    else:

        # time.sleep(TIME_SLEEP)
        options = Options()
        # options.add_argument('-headless')
        # driver = Firefox(executable_path='C:\Python27\geckodriver.exe', firefox_options=options)
        driver = Chrome(executable_path='chromedriver.exe')

        driver.get(url)

        wait = WebDriverWait(driver, timeout=5)
        if (driver.execute_script("return document.readyState")) == "complete":
            # wait.until(EC.visibility_of_element_located((By.CLASS_NAME, 'iptarea' )))

            data = driver.page_source
            driver.quit()
    return data