コード例 #1
0
async def api_v1_stats(request: Request):
    median_query: ProxyIP = ProxyIP.raw("""SELECT latency
                                FROM proxy_ips
                                WHERE is_valid = 1
                                ORDER BY latency
                                LIMIT 1
                                OFFSET (
                                  SELECT COUNT(*) FROM proxy_ips WHERE is_valid = 1
                                ) / 2""").get()
    median = median_query.latency

    mean_query: ProxyIP = ProxyIP.raw("""SELECT AVG(latency) as latency
                                    FROM proxy_ips
                                    WHERE is_valid = 1 AND latency < 9999""").get()
    mean = mean_query.latency

    valid_count = _get_valid_proxies_query().count()

    total_count = ProxyIP.select().count()

    return json({
        'median': median,
        'valid_count': valid_count,
        'total_count': total_count,
        'mean': mean,
    })
コード例 #2
0
def create_test_ip() -> str:
    ip_str = _gen_random_ip()
    ip = ProxyIP(ip=ip_str,
                 port=3306,
                 latency=200.00,
                 stability=100.0,
                 is_valid=True)
    ip.save()
    return ip_str
コード例 #3
0
def validate_proxy_ip(p: ProxyIP):
    # logger.debug('Validating ip: {}'.format(p.ip))
    policy = ValidationPolicy(proxy_ip=p)

    if not policy.should_validate():
        return

    v = Validator(host=p.ip,
                  port=int(p.port),
                  using_https=policy.should_try_https())

    try:
        v.validate()
    except (KeyboardInterrupt, SystemExit):
        logger.info('KeyboardInterrupt terminates validate_proxy_ip: ' + p.ip)

    meta = v.meta if v.meta else {}
    validated_ip = ProxyIP(ip=p.ip, port=p.port, **meta)
    # save valid ip into database
    validated_ip.latency = v.latency
    validated_ip.stability = v.success_rate
    validated_ip.is_valid = v.valid
    validated_ip.is_anonymous = v.anonymous

    # Increase attempts and https_attempts
    validated_ip.attempts = validated_ip.attempts + 1
    if v.using_https:
        validated_ip.https_attempts = validated_ip.https_attempts + 1

    if v.valid:
        validated_ip.is_https = v.using_https

    # logger.debug('Save valid ip into database: \n' + validated_ip.__str__())

    save_ip(validated_ip)
コード例 #4
0
def test_save_ip():
    p1 = ProxyIP(ip='192.168.0.1', port=443, latency=200, stability=0.5)
    save_ip(p1)
    # basically the same ip
    p2 = ProxyIP(ip='192.168.0.1', port=443, latency=200, stability=0.5)
    save_ip(p2)
    count = ProxyIP.select().where(ProxyIP.ip == '192.168.0.1').count()

    assert count == 1

    ProxyIP.delete().execute()
コード例 #5
0
def test_create_ip_floor_latency():
    ip_str = gen_random_ip()
    ip = ProxyIP(ip=ip_str,
                 port=3306,
                 latency=100.66,
                 stability=100.0,
                 is_valid=True)
    ip.save()

    assert ip.latency == 100.0

    delete_test_ip(ip_str)
コード例 #6
0
ファイル: jobs.py プロジェクト: mmg1/scylla-1
def save_ip(p: ProxyIP):
    basic_query = ProxyIP.select().where(ProxyIP.ip == p.ip)
    count = basic_query.count()
    if count == 0:
        # logger.debug('Creating new ip record: ' + p.__str__())
        p.save()
    else:
        # logger.debug('Update an existing ip record: ' + p.__str__())

        existing_proxy: ProxyIP = ProxyIP.get(ProxyIP.ip == p.ip)

        existing_proxy.assign_from(p)

        existing_proxy.save()
コード例 #7
0
def test_create_ip():
    ip_str = create_test_ip()

    count = ProxyIP.select().count()
    assert count > 0

    delete_test_ip(ip_str)
コード例 #8
0
def test_validate_proxy_ip(mocker):
    method = mocker.patch('scylla.validator.Validator.validate')
    method2 = mocker.patch('scylla.jobs.save_ip')
    p = ProxyIP(ip='127.0.0.1', port=80)
    validate_proxy_ip(p)
    method.assert_called_once()
    method2.assert_called_once()
コード例 #9
0
    def feed_from_db():

        # TODO: better query (order by attempts)
        proxies = ProxyIP.select().where(ProxyIP.updated_at > datetime.now() - timedelta(days=14))
        for p in proxies:
            scheduler.validator_queue.put(p)

        logger.debug('Feed {} proxies from the database for a second time validation'.format(len(proxies)))
コード例 #10
0
ファイル: server.py プロジェクト: zvrr/scylla
def get_proxy(https=False) -> ProxyIP:
    proxies: [ProxyIP] = ProxyIP.select().where(ProxyIP.is_valid == True).where(ProxyIP.stability >= 0.9)

    if https:
        proxies = proxies.where(ProxyIP.is_https == True)

    proxies = proxies.order_by(ProxyIP.updated_at.desc()).limit(63)
    proxy: ProxyIP = random.choice(proxies)

    return proxy
コード例 #11
0
    def parse(self, html: HTML) -> [ProxyIP]:
        ip_list: [ProxyIP] = []

        for ip_row in html.find('.proxylist tbody tr'):
            ip_port = ip_row.find('td:nth-child(1)', first=True).text
            ip_address, port = ip_port.split(":")

            p = ProxyIP(ip=ip_address, port=port)

            ip_list.append(p)

        return ip_list
コード例 #12
0
    def parse(self, html: HTML) -> [ProxyIP]:
        ip_list: [ProxyIP] = []

        for tr in html.find('table table tr'):
            ip_element = tr.find('td:nth-of-type(1)', first=True)
            port_element = tr.find('td:nth-of-type(2)', first=True)
            if ip_element and port_element:
                ip = re.search(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', ip_element.text).group(0)
                port = re.search(r'\d{2,5}', port_element.text).group(0)
                ip_list.append(ProxyIP(ip=ip, port=port))

        return ip_list
コード例 #13
0
    def parse(self, html: HTML) -> [ProxyIP]:
        ip_list: [ProxyIP] = []
        text = html.raw_html.decode('utf-8')
        obj = json.loads(text)

        for ip_row in obj:

            p = ProxyIP(ip=ip_row['ip'],
                        port=ip_row['port'],
                        is_anonymous=ip_row['anonymous'])
            ip_list.append(p)
        return ip_list
コード例 #14
0
    def parse(self, document: PyQuery) -> [ProxyIP]:
        ip_list: [ProxyIP] = []

        for ip_row in document.find('#list table tr'):
            ip_row: PyQuery = ip_row
            ip_element = ip_row.find('td[data-title="IP"]')
            port_element = ip_row.find('td[data-title="PORT"]')

            if ip_element and port_element:
                p = ProxyIP(ip=ip_element.text(), port=port_element.text())
                ip_list.append(p)

        return ip_list
コード例 #15
0
ファイル: xici_provider.py プロジェクト: zmqAlbert/scylla
    def parse(self, document: PyQuery) -> [ProxyIP]:
        ip_list: [ProxyIP] = []

        for ip_row in document.find('#ip_list tr'):
            ip_row: PyQuery = ip_row
            ip_element = ip_row.find('td:nth-child(2)')
            port_element = ip_row.find('td:nth-child(3)')

            if ip_element and port_element:
                p = ProxyIP(ip=ip_element.text(), port=port_element.text())
                ip_list.append(p)

        return ip_list
コード例 #16
0
ファイル: data5u_provider.py プロジェクト: zvrr/scylla
    def parse(self, html: HTML) -> [ProxyIP]:
        ip_list: [ProxyIP] = []

        for ip_row in html.find('.wlist > ul > li:nth-child(2) .l2'):

            ip_element = ip_row.find('span:nth-child(1)', first=True)
            port_element = ip_row.find('span:nth-child(2)', first=True)

            if ip_element and port_element:
                p = ProxyIP(ip=ip_element.text, port=port_element.text)
                ip_list.append(p)

        return ip_list
コード例 #17
0
    def parse(self, document: PyQuery) -> [ProxyIP]:
        ip_list: [ProxyIP] = []

        for ip_row in document.find('.proxylist tbody tr'):
            ip_row: PyQuery = PyQuery(ip_row)
            ip_port: str = ip_row.find('td:nth-child(1)').text()
            ip_address, port = ip_port.split(":")

            p = ProxyIP(ip=ip_address, port=port)

            ip_list.append(p)

        return ip_list
コード例 #18
0
    def parse(self, document: PyQuery) -> [ProxyIP]:
        ip_list: [ProxyIP] = []

        text = document.html()
        json_object = json.load(text)
        if not json_object or type(json_object['usproxy']) != list:
            return ip_list

        for ip_port in json_object['usproxy']:
            p = ProxyIP(ip=ip_port['ip'], port=ip_port['port'])
            ip_list.append(p)

        return ip_list
コード例 #19
0
    def parse(self, document: PyQuery) -> [ProxyIP]:
        ip_list: [ProxyIP] = []

        for ip_row in document.find('#proxylisttable tbody tr'):
            ip_row: PyQuery = ip_row
            ip_address: str = ip_row.find('td:nth-child(1)').text()
            port: str = ip_row.find('td:nth-child(2)').text()

            p = ProxyIP(ip=ip_address, port=port)

            ip_list.append(p)

        return ip_list
コード例 #20
0
ファイル: proxy_scraper_provider.py プロジェクト: zvrr/scylla
    def parse(self, html: HTML) -> [ProxyIP]:
        ip_list: [ProxyIP] = []

        text = html.raw_html.decode('utf-8')
        obj = json.loads(text)
        if not obj or type(obj['usproxy']) != list:
            return ip_list

        for ip_port in obj['usproxy']:
            p = ProxyIP(ip=ip_port['ip'], port=ip_port['port'])
            ip_list.append(p)

        return ip_list
コード例 #21
0
    def parse(self, document: PyQuery) -> [ProxyIP]:
        ip_list: [ProxyIP] = []

        for ip_row in document.find('.wlist > ul > li:nth-child(2) .l2'):
            ip_row: PyQuery = ip_row
            ip_element = ip_row.find('span:nth-child(1)')
            port_element = ip_row.find('span:nth-child(2)')

            if ip_element and port_element:
                p = ProxyIP(ip=ip_element.text(), port=port_element.text())
                ip_list.append(p)

        return ip_list
コード例 #22
0
ファイル: kuaidaili_provider.py プロジェクト: mmg1/scylla-1
    def parse(self, html: HTML) -> [ProxyIP]:
        ip_list: [ProxyIP] = []

        for ip_row in html.find('#list table tr'):

            ip_element = ip_row.find('td[data-title="IP"]', first=True)
            port_element = ip_row.find('td[data-title="PORT"]', first=True)

            if ip_element and port_element:
                p = ProxyIP(ip=ip_element.text, port=port_element.text)
                ip_list.append(p)

        return ip_list
コード例 #23
0
    def parse(self, document: PyQuery) -> [ProxyIP]:
        ip_list: [ProxyIP] = []

        for tr in document.find('table table tr'):
            tr: PyQuery = tr
            ip_element = tr.find('td:nth-of-type(1)')
            port_element = tr.find('td:nth-of-type(2)')
            if ip_element and port_element:
                ip = re.search(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}',
                               ip_element.text).group(0)
                port = re.search(r'\d{2,5}', port_element.text).group(0)
                ip_list.append(ProxyIP(ip=ip, port=port))

        return ip_list
コード例 #24
0
    def parse(self, html: HTML) -> [ProxyIP]:
        ip_list: [ProxyIP] = []

        for ip_row in html.find('#proxylisttable tbody tr'):
            ip_address = ip_row.find('td:nth-child(1)', first=True).text
            port = ip_row.find('td:nth-child(2)', first=True).text

            p = ProxyIP(ip=ip_address,
                        port=port,
                        provider=self.__class__.__name__)

            ip_list.append(p)

        return ip_list
コード例 #25
0
    def parse(self, document: PyQuery) -> [ProxyIP]:
        ip_list: [ProxyIP] = []

        for ip_row in document.find('table tr'):
            ip_row: PyQuery = ip_row
            ip_element: PyQuery = ip_row.find('td:nth-child(1)')
            port_element: PyQuery = ip_row.find('td:nth-child(2)')

            if ip_element and port_element:
                p = ProxyIP(ip=re.sub(r'document\.write\(.+\)', '', ip_element.text()), port=port_element.text())

                ip_list.append(p)

        return ip_list
コード例 #26
0
    def parse(self, html: HTML) -> [ProxyIP]:
        ip_list: [ProxyIP] = []

        for ul in html.find('#proxy-table > div.table-wrap ul'):
            js_code = ul.find('li.proxy script', first=True).text
            matched = re.findall(r"Proxy\('(.+)'\)", js_code)
            if matched and len(matched) > 0:
                encoded = matched[0]
                ip_port = base64.b64decode(encoded).decode("utf-8")
                ip = re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}',
                                ip_port)[0]
                port = re.findall(r':(\d{2,5})', ip_port)[0]
                ip_list.append(ProxyIP(ip=ip, port=port))

        return ip_list
コード例 #27
0
def save_ip(p: ProxyIP):
    basic_query = ProxyIP.select().where(ProxyIP.ip == p.ip)
    count = basic_query.count()
    if count == 0:
        logger.debug('Creating new ip record: ' + p.__str__())
        p.save()
    else:
        logger.debug('Update an existing ip record: ' + p.__str__())

        ProxyIP.update(latency=p.latency,
                       stability=p.stability,
                       is_valid=p.is_valid,
                       is_anonymous=p.is_anonymous,
                       updated_at=datetime.datetime.now()).where(
                           ProxyIP.ip == p.ip).execute()

        logger.debug('Saved: ' + p.__str__())
コード例 #28
0
ファイル: cool_proxy_provider.py プロジェクト: mmg1/scylla-1
    def parse(self, html: HTML) -> [ProxyIP]:
        ip_list: [ProxyIP] = []

        for ip_row in html.find('table tr'):

            ip_element = ip_row.find('td:nth-child(1)', first=True)
            port_element = ip_row.find('td:nth-child(2)', first=True)

            if ip_element and port_element:
                p = ProxyIP(ip=re.sub(r'document\.write\(.+\)', '',
                                      ip_element.text),
                            port=port_element.text)

                ip_list.append(p)

        return ip_list
コード例 #29
0
ファイル: spys_me_provider.py プロジェクト: wvengen/scylla
    def parse(self, html: HTML) -> [ProxyIP]:
        ip_list: [ProxyIP] = []

        text = html.raw_html

        ip_port_str_list = re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{2,5}', text.decode('utf-8'))

        for ip_port in ip_port_str_list:

            ip = re.search(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', ip_port).group(0)
            port = re.search(r':(\d{2,5})', ip_port).group(1)

            if ip and port:
                p = ProxyIP(ip=ip, port=port, provider=self.__class__.__name__)
                ip_list.append(p)

        return ip_list
コード例 #30
0
ファイル: spys_me_provider.py プロジェクト: zmqAlbert/scylla
    def parse(self, document: PyQuery) -> [ProxyIP]:
        ip_list: [ProxyIP] = []

        text = document.html()

        ip_port_str_list = re.findall(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d{2,5}', text.decode('utf-8'))

        for ip_port in ip_port_str_list:

            ip = re.search(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', ip_port).group(0)
            port = re.search(r':(\d{2,5})', ip_port).group(1)

            if ip and port:
                p = ProxyIP(ip=ip, port=port)
                ip_list.append(p)

        return ip_list