def urllib3_test():
    from urllib3 import PoolManager
    import time, random

    url = ['http://sist.swjtu.edu.cn:8080/MicroElec/feng/index.asp',
           'http://sist.swjtu.edu.cn:8080/MicroElec/feng/contact/contact.asp,'
           'http://sist.swjtu.edu.cn:8080/MicroElec/feng/Templates/%E5%A4%8D%E4%BB%B6%20CSS/',
           'http://sist.swjtu.edu.cn:8080/MicroElec/index.asp',
           'http://sist.swjtu.edu.cn:8080/My%20Documents/Documents%20and%20Settingsjiangtigang%E6%A1%8C%E9%9D%A2ImcGroupHomepageNewImcWebYuan%20Ding"%20l']

    errors = 0
    oks = 0
    p = PoolManager(20)
    for a in url:
        try:
            p.request(method="GET", url=a)
            s = random.randint(0,10)
            time.sleep(s)
            print s
            oks += 1
        except:
            errors += 1
            continue

        print ".",
    print
    print "Errors: %s | Oks: %s." % (str(errors), str(oks))
Example #2
0
class APISent(object):
    """Sentiment features using API tools.

    Interacts with web and therefore needs urllib3. Might be _very_ slow,
    use with caution and prefrably store features.

    Parameters
    ----------
    mode : string, optional, default 'deep'
        Can be either 'deep' for Twitter-based neural sentiment (py2, boots
        local server instance), or 'nltk' for the text-processing.com API.

    Examples
    --------
    >>> sent = APISent()
    >>> sent.transform("you're gonna have a bad time")
    ... 0.030120761495050809
    >>> sent = APISent(mode='nltk')
    >>> sent.transform("you're gonna have a bad time")
    ...

    Notes
    -----
    Implemented by: Chris Emmery
    Deep sentiment: https://github.com/xiaohan2012/twitter-sent-dnn
    NLTK API: http://text-processing.com
    """

    def __init__(self, mode='deep'):
        """Load poolmanager and set API location."""
        from urllib3 import PoolManager
        self.name = 'apisent'
        self.mode = mode
        self.pool = PoolManager()

    def __str__(self):
        """String representation for APISent."""
        return '''
        feature:    {0}
        mode:       {1}
        '''.format(self.name, self.mode)

    def transform(self, raw, _):
        """Return a dictionary of feature values."""
        if self.mode == 'deep':
            jsf = json.dumps({'text': raw})
            header = {'content-type': 'application/json'}
            request = "http://localhost:6667/api"
            r = self.pool.request('POST', request, headers=header, body=jsf)
            out = {'deepsent': float(r.data.decode('utf-8'))}
        elif self.mode == 'nltk':
            qf = urlencode({'text': raw})
            request = "http://text-processing.com/api/sentiment/"
            r = self.pool.request('POST', request, body=qf)
            try:
                out = json.loads(r.data.decode('utf-8'))["probability"]
            except ValueError:
                exit("SentAPI threw unexpected response, " +
                     "probably reached rate limit.")
        return out
Example #3
0
class gamefetcher:
    def __init__(self):
        from urllib3 import PoolManager
        self.pool = PoolManager()

    def getfreegames(self):
        from json import loads
        response = self.pool.request('GET',
                                     config.fetch_games_url,
                                     preload_content=False)
        response.release_conn()
        return response.data

    def filtergames(self, text, games_owned):
        from bs4 import BeautifulSoup
        from re import compile

        apps = []

        soup = BeautifulSoup(text, 'html.parser')
        links = soup.find_all("a", href=True)
        regexraw = r"^https:\/\/store.steampowered.com\/app\/[0-9]{1,}"
        regex = compile(regexraw)
        for link in links:
            result = regex.match(link["href"])
            if (result != None):
                app_id = result.group(0).replace(
                    "https://store.steampowered.com/app/", "")
                if app_id not in games_owned:
                    apps.append(app_id)

        return apps
Example #4
0
def download_sig(opts, sig, version=None):
    """Download signature from hostname"""
    code = None
    downloaded = False
    useagent = 'ClamAV/0.101.1 (OS: linux-gnu, ARCH: x86_64, CPU: x86_64)'
    manager = PoolManager(headers=make_headers(user_agent=useagent),
                          cert_reqs='CERT_REQUIRED',
                          ca_certs=certifi.where(),
                          timeout=Timeout(connect=10.0, read=60.0))
    if version:
        path = '/%s.cvd' % sig
        filename = os.path.join(opts.workdir, '%s.cvd' % sig)
    else:
        path = '/%s.cdiff' % sig
        filename = os.path.join(opts.workdir, '%s.cdiff' % sig)
    try:
        req = manager.request('GET', 'http://%s%s' % (opts.hostname, path))
    except BaseException as msg:
        error("Request error: %s" % msg)
    data = req.data
    code = req.status
    if req.status == 200:
        with open(filename, 'wb') as handle:
            handle.write(data)
        downloaded = os.path.exists(filename)
    return downloaded, code
Example #5
0
    def __downloadPackage(self, url: str, filew: str,
                          http: urllib3.PoolManager) -> None:
        """
            Download package installation file

            Parameters
            ----------
                url : str
                    Url of installation file

                filew : str
                    Name of installation file

                http : urllib3.PoolManager
                    Urllib PoolManager web connection

            >>> __downloadPackage("https://domain.com/package.whl", "package.whl", urllib3.PoolManager())
            None
        """
        if os.path.exists(filew):
            return

        files = http.request("GET", url)

        with open(filew, "wb") as f:
            f.write(files.data)
Example #6
0
def get_resultados(name='', price_max=''):
    http = PoolManager()
    url_api = 'http://es.wallapop.com/rest/items?minPrice=&maxPrice={price_max}&dist=&order=creationDate-des&lat=41.398077&lng=2.170432&kws={kws}'.format(
        kws=urllib.parse.quote(name, safe=''), price_max=price_max)
    results = http.request('GET', url_api)
    results = json.loads(results.data.decode('utf-8'))
    return results['items']
Example #7
0
def filerecv(servloca, attrdata):
    """
    Receives file from the provided Veritas server
    :param servloca:
    :param attrdata:
    :return:
    """
    try:
        httpobjc = PoolManager()
        rgetfild = {"tokniden": attrdata}
        rqstobjc = httpobjc.request("GET",
                                    servloca + "filerecv",
                                    fields=rgetfild)
        respdata = json.loads(rqstobjc.data.decode())
        if respdata["retnmesg"] == "FAIL":
            click.echo(click.style("Transfer failed!", fg="red"))
        elif respdata["retnmesg"] == "DONE":
            filename = respdata["filename"]
            b64etext = respdata["contents"]
            contents = b64decode(b64etext.encode()).decode()
            with open(filename, "w") as fileobjc:
                fileobjc.write(contents)
            click.echo(click.style("Transfer successful!", fg="green"))
    except Exception as expt:
        click.echo(" * " +
                   click.style("Error occurred    : " + str(expt), fg="red"))
Example #8
0
    def get_html(self, url):
        """
        Функция принимает url страницы и собирает его html код, возвращая в get_all_links

        :param url:
        :return: html-код
        """
        http = PoolManager(1)
        # берётся один случайный прокси
        proxy = self.get_proxy()

        # Если страницу с таким url уже парсили, то не повторяем
        if url in self.parsed_pages:
            return ''

        try:
            # запрашиваем страницу, получаем объект HTTPResponse
            resp = http.request('GET', url, proxy[0], proxy[1])
        # обработка ошибки при слишком большом количестве перенаправлений
        except urllib3.exceptions.MaxRetryError:
            print(f'Достигнут лимит перенаправлений со страницы: {url}')
            return MAX_RETRY

        page = resp.data.decode('utf-8', 'ignore')
        # приводим response в читаемый вид
        soup = BeautifulSoup(page, "html.parser")
        # Регистрируем, что эту страницу обошли
        self.parsed_pages.add(url)
        # Возвращаем html код страницы
        return soup
Example #9
0
    def _load_internet(self):
        """
        Description
        -----------
            boot sequence 3.
            check internet connection

        Notes
        -----
            use urllib3 to set connection timeout manually and reduce check time

        Return
        ------
        result    0 success
                 -1 fail

        """

        self.net_connected = False
        try:
            from urllib3 import PoolManager, Timeout , Retry
            http = PoolManager(
                timeout=Timeout(connect=1.0, read=2.0), retries=Retry(0, redirect=0)
            )
            response = http.request("HEAD", "https://status.cloud.google.com/")
            if response.status == 200:  # if internet ok.
                self.log.info("pino_boot_loader.py: internet not connected!")
                self.net_connected = True
        except Exception as E:
            self.log.error("pino_boot_loader.py: _load_internet(), " + repr(E))
            return -1
        else:
            return 0
Example #10
0
def show_incomes():
    """
    docstring here
    """
    access_token = session.get('access_token')
    if access_token is None:
        return redirect(url_for('login'))
    access_token = access_token[0]
    _http = PoolManager(cert_reqs='CERT_REQUIRED', ca_certs=certifi.where())
    headers = {'Authorization': 'OAuth '+access_token}
    _request = _http.request(
            'GET',
            'https://www.googleapis.com/oauth2/v1/userinfo', None, headers)
    if _request.status == 401:
        session.pop('access_token', None)
        return redirect(url_for('login'))
    userinfo = json.loads(_request.data.decode('utf-8', 'strict'))
    if not userinfo['email'] in ['*****@*****.**', '*****@*****.**']:
        session.pop('access_token', None)
        return redirect(url_for('login'))
    else:
        session['email'] = userinfo['email']
        session['logged_in'] = True

    return render_template(
            'show_entries.html',
            entries=list(
                map(lambda x: x.to_dict(), incomes_get(id=0))),
            currencies=currency_get(id=0),
            periods=intervals_get(id=0),
            transactions=list(
                map(lambda x: x.to_dict(), transactions_get(id=0))))
Example #11
0
def export_template(t_id, output='string', file_path=None):
    """
    Exports a template as a string of xml
    :param t_id: ID of the template to export
    :param output: string of return type of template, 'str' or 'file'
    :param file_path: if file output type, the path and filename to write to
    :return: basestring of the xml template or file path written
    """
    # TemplatesAPI.export template is broken in swagger definition of NiFi1.2
    # return TemplateDTO is replaced by return string in a later version
    valid_output_types = ['file', 'string']
    if output not in valid_output_types:
        raise ValueError("Output type {0} not valid for ({1})".format(
            output, valid_output_types))
    con = PoolManager()
    url = nifi_config.host + '/templates/' + t_id + '/download'
    response = con.request('GET', url, preload_content=False)
    template_xml = etree.fromstring(response.data)
    if output == 'string':
        return etree.tostring(template_xml, encoding='utf8', method='xml')
    if output == 'file':
        assert access(dirname(file_path), W_OK), \
            "File_path {0} is inaccessible or not writable".format(file_path)
        xml_tree = etree.ElementTree(template_xml)
        xml_tree.write(file_path)
        return file_path
Example #12
0
class Do:
    def __init__(self, url, **kwargs):
        self.url = url
        self.http = PoolManager()
        self.connect_timeout = kwargs['connect_timeout'] if kwargs['connect_timeout'] else 2.0
        self.read_timeout = kwargs['read_timeout'] if kwargs['read_timeout'] else 2.0
        self.available_status = kwargs['available_status'] if kwargs['available_status'] else '200,'
        self.alert_mail = kwargs['alert_mail']

        self._get()

    def _get(self):
        r = None
        try:
            start = time.perf_counter()
            r = self.http.request('GET', self.url, timeout=Timeout(connect=self.connect_timeout, read=self.read_timeout))
            end = time.perf_counter()
        except Exception as ex:
            send_mail(self.alert_mail, 'Request {} failed'.format(self.url), str(ex))

        available_status = self.available_status.split(',')

        if not r:
            return None
        if not hasattr(r, 'status'):
            return None

        if str(r.status) in available_status:
            logger.info(self.url, end-start)
        else:
            send_mail(self.alert_mail, '{} :{}'.format(self.url, r.status), '{}\r\n{}\r\n{}'.format(r.status, r.data, end-start))
Example #13
0
def download_url(urlpath, output_dir=".", binary=False):

    http = PoolManager()
    req = http.request("GET", urlpath)
    if req.status != 200:
        raise Exception("Could not get file from " + urlpath)

    parsed = urlparse(urlpath)
    filename = os.path.basename(parsed.path)
    writemod = "wb" if binary else "w"

    contents = req.data
    if output_dir != ".":
        if not os.path.exists(output_dir):
            log.error("{0} does not exist".format(output_dir))
            log.error("Writing file to {0}".format(os.getcwd()))
        else:
            filename = "/".join([output_dir, filename])
    with open(filename, writemod) as downloaded:
        try:
            downloaded.write(contents)
        except TypeError:
            with open(filename, "wb") as downloaded:
                downloaded.write(contents)
    if not os.path.exists(filename):
        raise Exception("Could not write to {}".format(filename))
    return filename
Example #14
0
async def parseqr(qr_e):
    """ .decode komutu cevap verilen fotoğraftan QR kodu / Barkod içeriğini alır """
    downloaded_file_name = await qr_e.client.download_media(
        await qr_e.get_reply_message())

    # QR kodunu çözmek için resmi ZXing web sayfasını ayrıştır
    files = {'f': open(downloaded_file_name, 'rb').read()}
    t_response = None

    try:
        http = PoolManager()
        t_response = http.request('POST',
                                  "https://zxing.org/w/decode",
                                  fields=files)
        t_response = t_response.data
        http.clear()
    except:
        pass

    os.remove(downloaded_file_name)
    if not t_response:
        await qr_e.edit("decode başarısız oldu.")
        return
    soup = BeautifulSoup(t_response, "html.parser")
    qr_contents = soup.find_all("pre")[0].text
    await qr_e.edit(qr_contents)
Example #15
0
 def pool_request(self, pool: urllib3.PoolManager, data: dict = None):
     """
     池化http请求方式发起远程调用
     :param pool: urllib3连接池实例
     :param data:请求参数
     :return:
     """
     try:
         # 轮询方式访问实例
         instance = self.__instances[self.__instance_index]
         # 检查服务注册信息(防止调用自身)
         if self.__interface_app.service_name() == self.__service_name:
             print('警告::(有风险)使用远程调用单元调用自身服务(', self.__service_name, ',', self.__interface, ')')
             try:
                 if instance['host'] is socket.gethostbyname(socket.getfqdn(socket.gethostname())) \
                         and \
                         instance['port'] is self.__interface_app.port():
                     # 检查请求路径
                     self.__instances.pop()
                 else:
                     self.__instance_index = self.__instance_index + 1
                 instance = self.__instances[self.__instance_index]
             except Exception as e:
                 print(e)
         url = str('http://' + instance['host'] + ':' + instance['port'] + self.__interface)
         res = pool.request(method=self.__req_obj.get_method(), url=url, fields=data)
         self.__id = instance['id']
         self.__instance_index = (self.__instance_index + 1) if self.__instance_index < len(self.__instances) - 2 else 0
         url = str('http://' + instance['host'] + ':' + instance['port'] + self.__interface)
         setattr(self.__req_obj, 'full_url', url)
         return eval(res.data.decode('utf8')) if res.status == 200 else {'event': res.status, 'msg': '服务调用异常'}
     except:
         return {'event': 500, 'msg': '服务调用异常'}
Example #16
0
 class __ProviderManager(QtCore.QObject):
     
     sendError = QtCore.pyqtSignal(str)
     
     def __init__(self):
         QtCore.QObject.__init__(self)
         self.providers = {}
         self.poolManager = PoolManager(timeout=Timeout(10),
                                        headers={'Accept-Encoding': 'gzip,deflate'})
         self.engine = Engine()
     
     def loadProviderFromFile(self,path):
         try:
             providerFile = open(path,mode="r")
             provider = json.loads(providerFile.read())
             providerFile.close()
             self.providers[provider["name"]] = provider
         except Exception as e:
             self.sendError.emit("cannot load provider at '"+path+"' <br/><b>Reason:</b> "+str(e))
             
     def loadProviderFromUrl(self,url):
         try:
             req = self.poolManager.request("GET", url)
             provider = json.loads(req.data.decode('utf-8'))
             self.providers[provider["name"]] = provider
             del req
         except Exception as e:
             self.sendError.emit("cannot load provider at '"+url+"' <br/><b>Reason:</b> "+str(e))
             
     def queryProvider(self,text,category,pages,providerName,perPageCallback=None,whenDoneCallback=None):#Exceptions here should be managed by the caller
         return self.engine.makeQuery(self.providers[providerName], text, category, pages,perPageCallback,whenDoneCallback)
     
     def reset(self):
         if  self.providers:
             self.providers.clear() 
    def __request_api(self, options={}) -> dict:
        """Sent request to diferent data point

        Args:
            options (dict, optional): options to sent req. Defaults to {}.

        Returns:
            dict: response of api
        """
        url = options.get('url')
        headers = options.get('headers')
        method = options.get('method')

        # Set default timeout to 5 second and retries to 3 using urllib3 PoolManager
        retries = Retry(connect=3, read=3, redirect=3)
        timeout = Timeout(connect=5.0, read=5.0)
        http = PoolManager(retries=retries, timeout=timeout)

        # Sent req to api especific
        try:
            response = http.request(url=url, headers=headers, method=method)
        except Exception as err:
            return {'Error': err}

        return json.loads(response.data.decode('utf-8'))
Example #18
0
def getfreegames(s):
    url = s
    from urllib3 import PoolManager, exceptions
    from bs4 import BeautifulSoup
    import certifi
    try:
        https = PoolManager(ca_certs=certifi.where())
        response = https.request('GET', url,
                                 headers=config.headers).data.decode('utf-8')
        logwrite('Got url: {}'.format(url))
    except exceptions.ConnectionError:
        pp.pprint('Cant connect to {}'.format(url))
        exit()

    soup = BeautifulSoup(response, 'html.parser')
    filterapps = soup.findAll('td')
    text = '{}'.format(filterapps)
    soup = BeautifulSoup(text, 'html.parser')
    from re import compile
    appidfinder = compile('^[0-9]{2,}$')
    link = compile('^/')
    for _ in soup.findAll('a', attrs={'href': link}):
        appid = returnappid(_.get('href'))
        appid = appidfinder.match(appid)
        if appid:
            appids.append(appid.string)
        else:
            break
Example #19
0
async def covid(event):
    try:
        url = 'https://quiec.tech/corona.php'
        http = PoolManager()
        request = http.request('GET', url)
        result = jsloads(request.data.decode('utf-8'))
        http.clear()
    except:
        await event.edit("`Bir hata oluÅŸtu.`")
        return

    sonuclar = ("** Koronavirüs Verileri **\n" +
                "\n**Dünya geneli**\n" +
                f"**🌎 Vaka:** `{result['tum']}`\n" +
                f"**🌎 Ölüm:** `{result['tumolum']}`\n" +
                f"**🌎 İyileşen:** `{result['tumk']}`\n" +
                "\n**Türkiye**\n" +
                f"**🇹🇷 Vaka (toplam):** `{result['trtum']}`\n" +
                f"**🇹🇷 Vaka (bugün):** `{result['trbtum']}`\n" +
                f"**🇹🇷 Vaka (aktif):** `{result['tra']}`\n" +
                f"**🇹🇷 Ölüm (toplam):** `{result['trolum']}`\n" +
                f"**🇹🇷 Ölüm (bugün):** `{result['trbolum']}`\n" +
                f"**🇹🇷 İyileşen:** `{result['trk']}`")

    await event.edit(sonuclar)
Example #20
0
def filesend(servloca, attrdata):
    """
    Sends file to the provided Veritas server
    :param servloca:
    :param attrdata:
    :return:
    """
    try:
        with open(attrdata, "r") as fileobjc:
            contents = fileobjc.read()
        b64etext = b64encode(contents.encode()).decode()
        rgetfild = {"filename": attrdata, "contents": b64etext}
        httpobjc = PoolManager()
        rqstobjc = httpobjc.request("GET",
                                    servloca + "filesend",
                                    fields=rgetfild)
        respdata = json.loads(rqstobjc.data.decode())
        if respdata["retnmesg"] == "FAIL":
            click.echo(click.style("Transfer failed!", fg="red"))
        elif respdata["retnmesg"] == "DONE":
            click.echo(
                click.style("Store this token safely -> " +
                            respdata["tokniden"],
                            fg="green"))
    except Exception as expt:
        click.echo(" * " +
                   click.style("Error occurred    : " + str(expt), fg="red"))
def getPharmaciesData():
    pharmaciesUrl = r'https://raw.githubusercontent.com/kiang/pharmacies/master/json/points.json'
    http = PoolManager()

    r = http.request('GET', pharmaciesUrl)
    pharmaciesData = json.loads(r.data)['features']

    allData = []
    for phd in pharmaciesData:
        data = {}
        geo = phd['geometry']['coordinates']
        name = phd['properties']['name']
        address = phd['properties']['address']
        mask_adult = phd['properties']['mask_adult']
        mask_child = phd['properties']['mask_child']
        available = phd['properties']['available']
        note = phd['properties']['note']
        data = {
            'name': name,
            'address': address,
            'mask_adult': mask_adult,
            'mask_child': mask_child,
            'available': available,
            'note': note,
            'geometry': geo
        }
        allData.append(data)
    return allData
Example #22
0
def get_entries_for_menu(menu):
    url = menu.url
    parent_element = menu.menu_element
    meal_element = menu.meal_element
    price_element = menu.price_element

    meal_selector = f"{parent_element} > * {meal_element}"
    price_selector = f"{parent_element} > * {price_element}"

    conn_pool = PoolManager()
    downloaded_page = conn_pool.request('GET', menu.url)
    print(downloaded_page._body)

    file_html = downloaded_page._body
    """
    file_html = f.readlines()
    file_html = "".join(file_html)
    """

    soup = BeautifulSoup(file_html, 'html.parser')

    "div#day-selection-tab-1 > * div.single-food > strong"

    meals = soup.select(meal_selector)
    prices = soup.select(price_selector)

    menu_entries = []
    for meal, price in zip(meals, prices):
        menu_entries.append((meal, price))

    return menu_entries
Example #23
0
async def parseqr(qr_e):
    """ .decode """
    downloaded_file_name = await qr_e.client.download_media(
        await qr_e.get_reply_message())

    # QR
    files = {'f': open(downloaded_file_name, 'rb').read()}
    t_response = None

    try:
        http = PoolManager()
        t_response = http.request(
            'POST', "https://zxing.org/w/decode", fields=files)
        t_response = t_response.data
        http.clear()
    except:
        pass

    os.remove(downloaded_file_name)
    if not t_response:
        await qr_e.edit(LANG['ERROR'])
        return
    soup = BeautifulSoup(t_response, "html.parser")
    qr_contents = soup.find_all("pre")[0].text
    await qr_e.edit(qr_contents)
Example #24
0
def getAmqpStats(helper):
    from urllib3 import PoolManager, util
    try: 
        username = "******"
        password = "******"

        headers = util.make_headers(basic_auth = username + ":" + password)

        http = PoolManager()
        r = http.request("GET", "upsilon:15672/api/channels", headers = headers)

        channels = json.loads(r.data)


        tbl = PrettyTable(["Connection", "username", "Unacked", "Publish"])

        if helper.args.debug:
            print json.dumps(channels, indent = 4);

        for conn in channels:
            tbl.add_row([conn['name'], conn['user'], conn['messages_unacknowledged'], 0])

        print tbl

    except Exception as e:
        print str(e)
Example #25
0
def check_stock_proxy_manager(url, proxy=None, count=0):
    if proxy is None:
        manager = PoolManager(timeout=5,
                              cert_reqs='CERT_REQUIRED',
                              ca_certs=certifi.where())
    else:
        proxy_url = "%s://%s:%s" % (proxy[0], proxy[1], proxy[2])
        manager = ProxyManager(proxy_url,
                               timeout=5,
                               cert_reqs='CERT_REQUIRED',
                               ca_certs=certifi.where())
    headers = util.make_headers(accept_encoding='gzip, deflate',
                                keep_alive=True,
                                user_agent="Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:47.0) Gecko/20100101 Firefox/47.0")
    headers['Accept-Language'] = "en-US,en;q=0.5"
    headers['Connection'] = 'keep-alive'
    headers['Accept'] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
    try:
        response = manager.request('GET',
                                   url,
                                   preload_content=False,
                                   headers=headers)
        content = json.loads(response.data)
        print("%s - Connect Success!" % count)
        return content['hasStock']
    except Exception as ex:
        print("%s - Connect Error!" % count)
        return False
Example #26
0
    def downloadPDF(self, filename='tdnet.zip', limit=3):
        try:
            df = self.df.head(limit)
        except:
            df = self.df
        df = df[df['datetime'] > self.date_min]

        with ZipFile(filename, 'w', compression=ZIP_DEFLATED) as new_zip:
            for pdf, code, date, title in zip(df['pdf'], df['code'],
                                              df['date'], df['title']):
                url = pdf
                filename = str(code) + '_' + str(date)[:10].replace(
                    '-', '') + '_' + title + '.pdf'

                #download
                request_methods = PoolManager()
                response = request_methods.request('GET', url)
                f = open(filename, 'wb')
                f.write(response.data)
                f.close()
                new_zip.write(filename)
                os.remove(filename)

                # 時間調整
                sleep(3)
def thingspeakgetfloat(chan, field, trace):
    """Retrieve a floating point value from a ThingSpeak field"""
    try:
        if chan == TS_BASEMENT_CHAN:
            key = TS_BASEMENT_API_READKEY
            nresults = "2"
            chanstr = "basement"
        else:
            wg_error_print("thingspeakgetfloat",
                           "Invalid channel arg: " + str(chan))
            return THINGSPEAK_FLOAT_ERROR
        pman = PoolManager()
        # Get the requested field from the specified ThingSpeak channel
        retstruct = pman.request(
            'GET', 'http://api.thingspeak.com/channels/' + chan + '/fields/' +
            field + '.json?api_key=' + key + '&results=' + nresults)
        decodestruct = json.loads(retstruct.data.decode('utf-8'))
        retval = decodestruct['feeds'][0]['field' + field]
        wg_trace_print(
            "field " + field + " of " + chanstr + " channel = " + retval,
            trace)
    except:
        wg_error_print(
            "thingspeakgetfloat", "Exception getting field " + field +
            " from " + chanstr + " channel")
        return THINGSPEAK_FLOAT_ERROR
    return float(retval)
Example #28
0
class Downloader:
    def __init__(self):
        self.pool = PoolManager()
        self.htot = html2text.HTML2Text()
        self.htot.body_width = 0
        self.htot.ignore_links = True

    def _get_page(self, url):
        return self.pool.request('GET', url).data.decode()

    def _tag_visible(self, elem):
        if elem.parent.name in [
                'style', 'script', 'head', 'title', 'meta', '[document]',
                'article', 'img', 'aside', 'iframe', 'footer'
        ]:
            return 0
        if isinstance(elem, element.Comment):
            return 0
        if elem == "\n" or elem.strip() == "":
            return 0
        return 1

    def _apply_formatting(self, elem):
        # formatting
        elem = re.sub(" +", " ", elem)
        return elem

    def _tag_to_text(self, tag):
        return self.htot.handle(str(tag))

    def _text_to_html(self, body):
        soup = BeautifulSoup(body, 'html.parser')
        texts = soup.findAll(text=True)
        visible_texts = list(filter(self._tag_visible, texts))
        return visible_texts
Example #29
0
    def _url2dictlist(self, mined_url: str) -> List[Dict]:
        """
        Generates a json file with all the metadata.

        Parameters
        ----------
        mined_url: str
            URL of the GH Archive json.gz file.

        Returns
        -------
        List(Dict):
            Returns a list of the JSON dictionaries corresponding to the mined data.
        """
        https = PoolManager(cert_reqs='CERT_REQUIRED',
                            ca_certs=certifi.where())
        dict_list = []
        try:
            response = https.request('GET', mined_url)
            compressed_json = BytesIO(response.data)
            with gzip.GzipFile(fileobj=compressed_json) as json_bytes:
                json_str = json_bytes.read().decode('utf-8')
                for json_value in json_str.split('\n'):
                    if self._is_valid_json(json_value):
                        data = json.loads(json_value)
                        if self.filter_by_event(data, self.event_set):
                            dict_list.append(data)
        except OSError:
            pass

        return dict_list
Example #30
0
def get_url(url, encoding=''):
    if encoding == '':
        encoding = GBK
    pool = PoolManager()
    res = pool.request('get', url)
    res_dict = res.data.decode(encoding)
    return res_dict
Example #31
0
def check_vul(url):
    """
    Test if a GET to a URL is successful
    :param url: The URL to test
    :return: A dict with the exploit type as the keys, and the HTTP status code as the value
    """
    if gl_args.mode == 'auto-scan' or gl_args.mode == 'file-scan':
        timeout = Timeout(connect=1.0, read=3.0)
        pool = PoolManager(timeout=timeout, retries=1, cert_reqs='CERT_NONE')
    else:
        timeout = Timeout(connect=3.0, read=6.0)
        pool = PoolManager(timeout=timeout, cert_reqs='CERT_NONE')

    url_check = parse_url(url)
    if '443' in str(url_check.port) and url_check.scheme != 'https':
        url = "https://"+str(url_check.host)+":"+str(url_check.port)

    print(GREEN + "\n ** Checking Host: %s **\n" % url)

    headers = {"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
               "Connection": "keep-alive",
               "User-Agent": user_agents[randint(0, len(user_agents) - 1)]}

    paths = {"jmx-console": "/jmx-console/HtmlAdaptor?action=inspectMBean&name=jboss.system:type=ServerInfo",
             "web-console" 	: "/web-console/ServerInfo.jsp",
             "JMXInvokerServlet": "/invoker/JMXInvokerServlet",
             "admin-console" : "/admin-console/"}

    for i in paths.keys():
        if gl_interrupted: break
        try:
            print(GREEN + " * Checking %s: \t" % i + ENDC),
            r = pool.request('HEAD', url +str(paths[i]), redirect=False, headers=headers)
            paths[i] = r.status

            # check if it's false positive
            if len(r.getheaders()) == 0:
                print(RED + "[ ERROR ]\n * The server %s is not an HTTP server.\n" % url + ENDC)
                paths = {"jmx-console": 505,
                         "web-console": 505,
                         "JMXInvokerServlet": 505,
                         "admin-console": 505}
                break


            if paths[i] in (301, 302, 303, 307, 308):
                url_redirect = r.get_redirect_location()
                print(GREEN + "[ REDIRECT ]\n * The server sent a redirect to: %s\n" % url_redirect)
            elif paths[i] == 200 or paths[i] == 500:
                if i == "admin-console":
                    print(RED + "[ EXPOSED ]" + ENDC)
                else:
                    print(RED + "[ VULNERABLE ]" + ENDC)
            else:
                print(GREEN + "[ OK ]")
        except:
            print(RED + "\n * An error occurred while connecting to the host %s\n" % url + ENDC)
            paths[i] = 505

    return paths
Example #32
0
 def getwylie(tibtxt):
     http = PoolManager()
     url = WYCONVURL + urlparse.quote(tibtxt)
     req = http.request('GET', url)
     if req.status == 200:
         return req.data.decode('utf-8')
     return False
Example #33
0
 def gettib(wytxt):
     http = PoolManager()
     url = TIBCONVURL + wytxt.replace(' ', '%20')
     req = http.request('GET', url)
     if req.status == 200:
         return req.data.decode('utf-8')
     return False
def fetch_images_with_urls(word_net_id_list):
    '''
    Download the images with url and store them on the local folder.

    :param word_net_id_list: list of wordnet ids
    :return: none
    '''
    image_db_dir = os.path.dirname(os.path.realpath(__file__))
    db_conn = sqlite3.connect(os.path.join(str(image_db_dir), "ImageNet.db"))
    c = db_conn.cursor()
    http_conn = PoolManager()
    for word_net_id in word_net_id_list:

        # Create a list of image ids that will be fetched
        image_id_list = []
        image_file_dir = os.path.dirname(os.path.realpath(__file__))
        image_id_file_path = os.path.join(str(image_file_dir), 'ImageID.txt')
        with open(image_id_file_path, 'r', encoding='utf8') as out_file:
            for line in out_file:
                if line.startswith(word_net_id):
                    image_id_list = line.split("\t")[1].strip().split(',')
                    break

        # Check how many images are downloaded for the target noun object
        file_path = os.path.dirname(os.path.realpath(__file__))
        image_folder_path = os.path.join(file_path, 'Images', word_net_id)
        if not os.path.exists(image_folder_path):
            os.makedirs(image_folder_path)
        url_count = len(os.listdir(image_folder_path))

        # Randomly download the images for the target noun object
        random.Random(time.time()).shuffle(image_id_list)
        start_time = time.time()
        for image_id in image_id_list:
            if url_count < 10 and image_id.startswith(word_net_id):

                # First, find the url of the image from the database
                url = c.execute(
                    'SELECT URL from ImageNetURLs WHERE ImageID=(?) ORDER BY RANDOM() LIMIT 10',
                    (image_id, )).fetchone()
                if url is not None:
                    if check_url(url[0]) is not False:

                        # Second, fetch the image
                        timeout = Timeout(connect=5.0, read=10.0)
                        response = http_conn.request('GET',
                                                     url[0],
                                                     preload_content=False,
                                                     timeout=timeout)
                        file_name = os.path.normpath(image_folder_path + "/" +
                                                     image_id + ".png")

                        # Last, save the image on the local directory
                        out_file = open(file_name, 'wb')
                        shutil.copyfileobj(response, out_file)
                        url_count = url_count + 1

        # print("Image directory: " + image_folder_path)
        # print("--- %s seconds (Get Images) ---" % (time.time() - start_time))
    c.close()
def check_url(url):
    '''
    Verify the url before fetching the image.

    :param url: url string
    :return: TRUE or FALSE
    '''
    try:
        timeout = Timeout(connect=1.0, read=1.0)
        conn = PoolManager()
        response = conn.request('HEAD', url, timeout=timeout)
        status = int(response.status)
        is_url_valid = False

        # Check the HTTP response status code and whether the url redirects to another page
        if status in range(200,
                           209) and response.get_redirect_location() is False:

            # Check the content length of the response(if present) to verify whether the url
            # contains an image which can be used in scene creation
            content_length = int(response.headers.get('Content-Length', 0))
            if content_length != 0 and content_length > 2100:
                is_url_valid = True

        conn.clear()
        return is_url_valid

    except Exception:
        return False
def radtherm_get_todays_highest_setting(trace):
    """Figure out the highest temp setting in today's program."""
    days = ["mon", "tue", "wed", "thu", "fri", "sat", "sun"]
    num_tries = 1
    retval = {}
    prog = RADTHERM_FLOAT_ERROR
    try:
        pman = PoolManager()
        wkdy = datetime.datetime.today().weekday()
        url = 'http://' + TSTAT_IP +'/tstat/program/heat/' + days[wkdy]
        while num_tries < 6 and retval.get(str(wkdy), 'error') == 'error':
            ret = pman.request('GET', url)
            retval = json.loads(ret.data.decode('utf-8'))
            if trace:
                pprt = pprint.PrettyPrinter(indent=4)
                pprt.pprint(retval)
            if retval.get(str(wkdy), 'error') != 'error':
                prog = max((retval[str(wkdy)])[1::2]) # 1,3,5, etc. elements are the temps
            num_tries += 1
        return prog
    except Exception as err: #pylint: disable=W0703
        wg_error_print("radtherm_get_todays_highest_setting",
                       str(err))
        if hasattr('err', 'response'):
            wg_error_print(err.response.text)
        return prog
Example #37
0
def urllib3_test():
    from urllib3 import PoolManager

    errors = 0
    oks = 0
    p = PoolManager(20)
    for a in url:
        try:
            p.request(method="GET", url=a)
            oks += 1
        except:
            errors += 1
            continue

        print ".",
    print
    print "Errors: %s | Oks: %s." % (str(errors), str(oks))
Example #38
0
 def run(self):
     try:
         response = self.pool.request("GET", self.url, headers=self.headers)
     except HostChangedError, e:
         # Redirect, give up on managing resources ourselves, just get the
         # file
         managed_pool = PoolManager(1)
         response = managed_pool.request("GET", e.url, headers=self.headers)
Example #39
0
def getUrl(url):
    """Возвращает содержимое страницы по переданном url"""
    try:
        http = PoolManager()
        r = http.request('GET', url,
                         timeout=Timeout(connect=2.0, read=5.0),
                         retries=Retry(5, redirect=False)
                         )
        return html.fromstring(r.data)
    except urllib3.exceptions.MaxRetryError:
        print('Превышено максимальное число попыток (5):', url)
        return None
Example #40
0
    def getSequential(self, urls_headers):
        conn_pool = connection_from_url(urls_headers[0][0], maxsize=CONNECTIONS_PER_HOST)
        responses = []

        for url, headers in urls_headers:
            try:
                response = conn_pool.request("GET", url, headers=headers)
            except HostChangedError, e:
                # Redirect, give up on managing resources ourselves, just get the
                # file
                managed_pool = PoolManager(1)
                response = managed_pool.request("GET", e.url, headers=headers)
            responses.append((url, response))
Example #41
0
class PlainUtility():
	def __init__(self):
		user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.7) Gecko/2009021910 Firefox/3.0.7'
		self.headers={'User-Agent':user_agent}
		self.ip_url = 'http://icanhazip.com/'
		self.logger = logging.getLogger('gkp')
		retries = Retry(connect=5, read=5, redirect=5)
		self.agent = PoolManager(10,retries=retries, timeout=Timeout(total=30.0))

	def current_ip(self):
		return self.request(self.ip_url)

	def request(self,url):
		r = self.agent.request('GET',url)
		if r.status == 200: return r.data
		else: self.logger.error('status %s'%r.status)
Example #42
0
    def download(self):
        if self.url is None or self.url == '':
            return DOWNLOAD_RESULT['FAIL']

        ##################### Start Download Web Page #####################
        printState(hint="Connecting", msg=self.url)
        parse_url = parseURL(self.url)
        scheme = parse_url.scheme
        (filename, filetype) = getFileInURL(parse_url.path)

        timeout = Timeout(connect=2., read=7.)
        if scheme.lower() is 'https':
            http = PoolManager(
                cert_reqs='CERT_REQUIRED', 
                ca_certs=certifi.where(),
                timeout=timeout
            )
        else:
            http = PoolManager(timeout=timeout)

        try:
            r = http.request('GET', self.url)
            printState(hint='Establish', msg=self.url)

        except SSLError as e:
            printFail(hint="SSL Error", msg=self.url)
            return DOWNLOAD_RESULT['FAIL']
        except MaxRetryError as e:
            printFail(hint="Resolve Error", msg=self.url)
            return DOWNLOAD_RESULT['FAIL']
        ##################### End #####################

        ##################### Start Save Web Page #####################
        if isNormalConn(r.status):
            try:
                file_name = save(data=r.data,filename=filename, dir=DOWNLOAD_DIR)
            except AttributeError as e:
                printFail(hint="Save file fail in", msg=self.url)
                return DOWNLOAD_RESULT['FAIL']
            URL_VISITED_FILE_LIST.put(file_name)

        URL_VISITED_LIST.append(self.url)
        printSuccess(hint="Finish", msg=self.url)
        self.url = None
        self.fail_time = 0
        return DOWNLOAD_RESULT['SUCCESS']
Example #43
0
    def _get_report_file(self, url):
        # Ensure file url matches the hostname in settings,
        # workaround for Canvas bug help.instructure.com/tickets/362386
        url = re.sub(r'^https://[^/]+', settings.RESTCLIENTS_CANVAS_HOST, url)

        timeout = getattr(settings, "RESTCLIENTS_TIMEOUT", 15.0)
        cafile = getattr(settings, "RESTCLIENTS_CA_BUNDLE",
                         "/etc/ssl/certs/ca-bundle.crt")
        pool_manager = PoolManager(cert_reqs="CERT_REQUIRED",
                                   ca_certs=cafile,
                                   socket_timeout=timeout,
                                   retries=5)

        response = pool_manager.request("GET", url)

        if response.status != 200:
            raise DataFailureException(url, response.status, response.data)

        return response.data
Example #44
0
 def test_urllib3_basic(self):
     ''' Basic ''' # {{{
     http = PoolManager()
     for n in self.numbers:
         r = http.request('GET', self.prefix_url+n)
         l = len(r.data)
using = 0
notusing = 0
broke = 0
http = urllib3.PoolManager(
      cert_reqs='CERT_REQUIRED', # Force certificate check.
      ca_certs=certifi.where(),  # Path to the Certifi bundle.
 )


while i<len(domains):
 domain = domains[i].rstrip('\n')
 url=urlstart + domains[i].rstrip('\r\n')
 http = PoolManager(timeout=Timeout(read=2.0))
 try:
  check = http.request('GET', url, headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1'},timeout=2)
  response=check.headers
  if 'strict-transport-security' in response:
    print "[+] " +site + ': is using HSTS!!!'
    if 'preload' in str(response):
        print "  [+] Preload enabled"
    else:
    	print "  [Warning!] Preload is not configured"    
    if 'includeSubDomains' in str(response):
       print "  [+] includeSubdomains is present"
    else:
       print "  [Warning!] includeSubDomains is not configured"
    if 'max-age=31536000' in str(response):
	print "  [+] max-age is set to two years - well done"
    else:
	print "  [Warning!] max-age should really be set to two years (31536000)"
Example #46
0
class Client(object):
    """Interface to KISSmetrics tracking service"""

    def __init__(self, key, trk_host=KISSmetrics.TRACKING_HOSTNAME,
                 trk_scheme=KISSmetrics.TRACKING_SCHEME):
        """Initialize client for use with KISSmetrics API key.

        :param key: API key for product, found on the
                    "KISSmetrics Settings".
        :type key: str
        :param trk_host: tracking host for requests; defaults
                         production tracking service.
        :param trk_proto: the protocol for requests; either be `'http'`
                          or `'https'`.

        """
        self.key = key
        if trk_scheme not in ('http', 'https'):
            raise ValueError('trk_scheme must be one of (http, https)')
        self.http = PoolManager()
        self.trk_host = trk_host
        self.trk_scheme = trk_scheme

    def record(self, person, event, properties=None, timestamp=None,
               path=KISSmetrics.RECORD_PATH):
        """Record `event` for `person` with any `properties`.

        :param person: the individual performing the `event`
        :param event: the `event` name that was performed
        :param properties: any additional data to include
        :type properties: dict
        :param timestamp: when the `event` was performed; optional for
                          back-dating
        :param path: HTTP endpoint to use; defaults to
                    ``KISSmetrics.RECORD_PATH``

        :returns: an HTTP response for the request
        :rtype: `urllib3.response.HTTPResponse`

        """
        this_request = request.record(self.key, person, event,
                                      timestamp=timestamp,
                                      properties=properties,
                                      scheme=self.trk_scheme,
                                      host=self.trk_host, path=path)
        return self._request(this_request)

    def set(self, person, properties=None, timestamp=None,
            path=KISSmetrics.SET_PATH):
        """Set a property (or properties) for a `person`.

        :param person: individual to associate properties with
        :param properties: key-value pairs to associate with `person`
        :type properties: dict
        :param timestamp: when the `event` was performed; optional for
                          back-dating
        :param path: HTTP endpoint to use; defaults to
                    ``KISSmetrics.SET_PATH``

        :returns: an HTTP response for the request
        :rtype: `urllib3.response.HTTPResponse`

        """
        this_request = request.set(self.key, person, timestamp=timestamp,
                                   properties=properties,
                                   scheme=self.trk_scheme, host=self.trk_host,
                                   path=path)
        return self._request(this_request)

    def alias(self, person, identity, path=KISSmetrics.ALIAS_PATH):
        """Map `person` to `identity`; actions done by one resolve to other.

        :param person: consider as same individual ``identity``; the
                       source of the alias operation
        :type person: str or unicode
        :param identity: consider as an alias of ``person``; the target
                         of the alias operation
        :type identity: str or unicode
        :param path: HTTP endpoint to use; defaults to
                    ``KISSmetrics.ALIAS_PATH``

        :returns: an HTTP response for the request
        :rtype: `urllib3.response.HTTPResponse`

        Note the direction of the mapping is ``person`` to ``identity``
        (so "``person`` is also known as ``identity``" or "``person`` =>
        ``identity``" when looking at it as "``<source>`` => ``<target>``")

        When consulting the Aliasing documentation, `person` corresponds
        to ``query_string.PERSON_PARAM`` and `identity` corresponds to
        ``query_string.ALIAS_PARAM``.

        Aliasing is not a reversible operation.  When aliasing to an
        identity, take care not to use a session identifier or any other
        value that is not relatively stable (a value that will not
        change per request or per session).

        For more information see the API Specifications on `Aliasing
        <http://support.kissmetrics.com/apis/specifications.html#aliasing-users>`_.

        """
        this_request = request.alias(self.key, person, identity,
                                     scheme=self.trk_scheme,
                                     host=self.trk_host, path=path)
        return self._request(this_request)

    def _request(self, uri, method='GET'):
        return self.http.request(method, uri)
class ProxyService(object):
    def __init__(self, error_dict):
        self.redis_service = RedisService()
        self.error_proxy_dict = error_dict
        self.connection_pool = PoolManager()

    def get_proxy(self, protocol):
        proxy = self.redis_service.read_set(PROXY_URL_KEY.format(protocol=protocol))
        if not proxy:
            return None
        return {protocol: proxy}

    def get_valid_size(self, protocol):
        return self.redis_service.get_set_size(PROXY_URL_KEY.format(protocol=protocol))

    def process(self):
        logger.info('Start load proxy.')
        #content = self._scrape_http_proxy()
        #parser_proxy_url_set = self._parser_http_proxy(content)
        #self._save('http', self._check('http', parser_proxy_url_set))

        content = self._scrape_https_proxy()
        parser_proxy_url_set = self._parser_https_proxy(content)
        self._save('https', self._check('https', parser_proxy_url_set))

    def manage(self, proxy, error):
        if not proxy:
            return
        protocol, proxy_url = proxy.items()[0]
        if error:
            if proxy_url in self.error_proxy_dict:
                self.error_proxy_dict[proxy_url] += 1
                if self.error_proxy_dict[proxy_url] > DEFAULT_ERROR_TIMES:
                    self.redis_service.remove_set(PROXY_URL_KEY.format(protocol=protocol), proxy_url)
                    self.error_proxy_dict.pop(proxy_url)
                    logger.info('Invalid proxy: {}'.format(proxy_url))
                    print 'Invalid proxy'
            else:
                self.error_proxy_dict[proxy_url] = 1
        else:
            if proxy_url in self.error_proxy_dict:
                self.error_proxy_dict[proxy_url] -= 1
                if self.error_proxy_dict[proxy_url] < 1:
                    self.error_proxy_dict.pop(proxy_url)
        logger.info(self.error_proxy_dict)

    @retry(2)
    def _scrape_http_proxy(self):
        scrape_url = 'http://www.xicidaili.com/nn'
        header = {'content-type': 'text/html',
                  'User-Agent': user_agents[random.randint(0, len(user_agents)-1)]}
        try:
            response = self.request.get(scrape_url, headers=header, proxies=None)
            return response.content
        except:
            raise Exception('Failed scrape proxies.')

    @retry(2)
    def _scrape_https_proxy(self):
        #scrape_url = 'http://www.nianshao.me/?stype=2'
        scrape_url = 'http://proxy.moo.jp/zh/?c=&pt=&pr=HTTPS&a%5B%5D=0&a%5B%5D=1&a%5B%5D=2&u=60'
        header = {'content-type': 'text/html',
                  'Accept-Language': 'zh-CN,zh;q=0.8',
                  'User-Agent': user_agents[random.randint(0, len(user_agents)-1)]}
        response = self.connection_pool.request('GET', scrape_url, timeout=60, headers=header)
        return response.data

    def _parser_http_proxy(self, content):
        soup = BeautifulSoup(content, 'html.parser')
        proxy_tag = soup.find(id='ip_list').select('tr')
        parser_proxy_url_set = set()
        for i in range(1, 21):
            proxy_url = PROXY_URL.format(protocol='http',
                                         ip=proxy_tag[i].find_all('td')[1].string,
                                         port=proxy_tag[i].find_all('td')[2].string)
            parser_proxy_url_set.add(proxy_url)
        return parser_proxy_url_set

    def _parser_https_proxy(self, content):
        soup = BeautifulSoup(content, 'html.parser')
        proxy_tag = soup.find_all('tr', {'class': 'Odd'})
        res = re.compile('%(%|\w)+')
        parser_proxy_url_set = set()
        for i in range(0, 25):
            tds = proxy_tag[i].find_all('td')
            if not tds[0].string:
                continue
            ip_res = res.search(tds[0].string)
            if ip_res:
                ip = urllib.unquote(ip_res.group(0))
                port = tds[1].string
                proxy_url = PROXY_URL.format(protocol='https', ip=ip, port=port)
                parser_proxy_url_set.add(proxy_url)
        return parser_proxy_url_set

    def _check(self, protocol, proxy_url_set):
        valid_proxy_url_set = set()
        for url in proxy_url_set:
            header = {'content-type': 'text/html',
                      'User-Agent': user_agents[random.randint(0, len(user_agents)-1)]}
            proxy = {protocol: url}
            conection_pool = ProxyManager(url)
            try:
                response = conection_pool.request('GET', CHECK_URL[protocol], timeout=60, headers=header)
                if response.status == 200:
                    valid_proxy_url_set.add(url)
                    print 'Valid proxy url', url
                else:
                    print 'Invalid ', url
            except Exception as ex:
                print ex
                print 'Invalid ', url

        return valid_proxy_url_set

    def _save(self, protocol, parser_proxy_url_set):
        for url in parser_proxy_url_set:
            self.redis_service.add_set(PROXY_URL_KEY.format(protocol=protocol), url)