Esempio n. 1
0
    def save(self):
        ## create diff
        Proxy.objects.filter(delegating=self.user).delete()
        newproxy = Proxy(delegating=self.user, isdefault=True)
        newproxy.save()
        if 'main_proxy' in self.data.keys():
            for user in self.data.getlist('main_proxy'):
                user_object = CustomUser.objects.get(pk=user)
                newproxy.delegates.add(user_object)
        newproxy.save()

        for count in xrange(int(self.data["tagfieldcount"])):
            if "side_proxy%d" % count in self.data.keys(
            ) and "side_proxy_tags%d" % count in self.data.keys():
                newproxy = Proxy(delegating=self.user)
                newproxy.save()
                for user in self.data.getlist("side_proxy%d" % count):
                    user_object = CustomUser.objects.get(pk=user)
                    newproxy.delegates.add(user_object)

                for tag in self.data.getlist("side_proxy_tags%d" % count):
                    tag_object = Tag.objects.get(pk=tag)
                    newproxy.tags.add(tag_object)

                newproxy.save()
        return
Esempio n. 2
0
    def save(self):
        ### first disable all existing proxies (suboptimal, but easiest) ###
        for proxy in Proxy.objects.filter(delegating=self.user):
            proxy.disable()

        ### add new proxy for each form element ###
        newproxy = Proxy(delegating=self.user, isdefault=True)
        newproxy.save()
        if 'main_proxy' in self.data.keys():
            for user in self.data.getlist('main_proxy'):
                user_object = CustomUser.objects.get(pk=user)
                newproxy.delegates.add(user_object)
        newproxy.save()

        for count in xrange(int(self.data["tagfieldcount"])):
            if "side_proxy%d" % count in self.data.keys(
            ) and "side_proxy_tags%d" % count in self.data.keys():
                newproxy = Proxy(delegating=self.user)
                newproxy.save()
                for user in self.data.getlist("side_proxy%d" % count):
                    user_object = CustomUser.objects.get(pk=user)
                    newproxy.delegates.add(user_object)

                for tag in self.data.getlist("side_proxy_tags%d" % count):
                    tag_object = Tag.objects.get(pk=tag)
                    newproxy.tags.add(tag_object)

                newproxy.save()
        return
Esempio n. 3
0
def filte(content):
    soup = BeautifulSoup(content)
    proxy_list_info = soup.findAll('tr')
    proxy_list = []
    for proxy in proxy_list_info:
        td_index = 0
        proxy_tds = proxy.findAll('td')
        has_get = False
        proxy = Proxy()
        for proxy_td in proxy_tds:
            td_index += 1
            if td_index == 2:
                has_get = True
                proxy.ip = proxy_td.text
            elif td_index == 3:
                proxy.port = proxy_td.text
            elif td_index == 4:
                if not proxy_td.a == None:
                    proxy.location = proxy_td.a.text
            elif td_index == 5:
                proxy.anonymous_type = proxy_td.text
            elif td_index == 6:
                proxy.proxy_type = proxy_td.text.lower()

        if has_get:
            proxy_list.append(proxy)
    return proxy_list
Esempio n. 4
0
    def load(self) -> list:
        ls = []
        if self._num is None:
            return ls

        if self._context and self._context.logger:
            self._context.logger.info('SixSixIPProxySpider: loading proxy list.')

        url = SixSixIPProxySpider._POOL_URL.format(self._num)
        reg = re.compile(r'(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d+)(?=<br />)')
        try:
            res = requests.get(url, proxies=self._sys_proxy, timeout=self._timeout)
            for match in reg.finditer(res.text):
                try:
                    for protocol in ('http', 'https'):
                        proxy = Proxy()
                        proxy.ip = match.group(1)
                        proxy.port = match.group(2)
                        proxy.protocol = protocol
                        proxy.proxy_url = self.proxy_url(proxy.ip, proxy.port, proxy.protocol)
                        proxy.collect_time = Datetime.now()
                        proxy.local = Config.local
                        ls.append(proxy)
                except:
                    pass
            return ls
        except:
            if self._context and self._context.logger:
                self._context.logger.exception('SixSixIPProxySpider: Failed be load proxy list.')
            raise
Esempio n. 5
0
def addProxies(s):
    s = s.splitlines()
    for i in s:
        try:
            Proxy(ip_and_port=i, proxy_type='http').save()
        except:
            pass
Esempio n. 6
0
    def load(self) -> list:
        ls = []

        if self._context and self._context.logger:
            self._context.logger.info('FatezeroProxySpider: loading proxy list.')
        try:
            res = requests.get(FatezeroProxySpider._POOL_URL, proxies=self._sys_proxy, timeout=self._timeout)
            for text in res.text.split('\n'):
                try:
                    p = json.loads(text, encoding='utf-8')
                    proxy = Proxy()
                    proxy.ip = p['host']
                    proxy.port = p['port']
                    proxy.protocol = p['type']
                    proxy.proxy_url = self.proxy_url(proxy.ip, proxy.port, proxy.protocol)
                    proxy.collect_time = Datetime.now()
                    proxy.local = Config.local
                    ls.append(proxy)
                except:
                    pass
            if self._num is None:
                return ls
            else:
                return ls[:self._num]
        except:
            if self._context and self._context.logger:
                self._context.logger.exception('FatezeroProxySpider: Failed be load proxy list.')
            raise
Esempio n. 7
0
def save_proxies(url):
    try:
        r = fetch(url)
    except requests.exceptions.RequestException:
        return False
    addresses = re.findall(PROXY_REGEX, r.text)
    for address in addresses:
        proxy = Proxy(address=address)
        try:
            proxy.save()
        except NotUniqueError:
            pass
Esempio n. 8
0
    def create_or_update_proxy(raw_protocol: Proxy.PROTOCOLS, auth_data,
                               domain, port, start_checking_time,
                               end_checking_time):
        if raw_protocol is None or domain is None or port is None or auth_data is None or start_checking_time is None\
                or end_checking_time is None:
            raise Exception("Bad arguments")

        if raw_protocol < 0 or raw_protocol >= len(Proxy.PROTOCOLS):
            raise Exception("Bad protocol")

        response_time = int(
            round((end_checking_time - start_checking_time) * 1000000))

        proxy = session.query(Proxy).filter(
            sqlalchemy.and_(Proxy.raw_protocol == raw_protocol,
                            Proxy.auth_data == auth_data,
                            Proxy.domain == domain,
                            Proxy.port == port)).first()

        if proxy:
            # exists, so update
            pass
        else:
            # doesn't exist, so create
            proxy = Proxy(number_of_bad_checks=0,
                          raw_protocol=raw_protocol,
                          auth_data=auth_data,
                          domain=domain,
                          port=port)
            session.add(proxy)

        if proxy.bad_proxy or proxy.uptime is None or proxy.uptime == 0:
            proxy.uptime = int(time.time())

        if proxy.bad_uptime is None or proxy.bad_uptime == 0 or \
                proxy.number_of_bad_checks > settings.DEAD_PROXY_THRESHOLD:
            proxy.bad_uptime = int(time.time())

        proxy.response_time = response_time
        proxy.number_of_bad_checks = 0
        proxy.last_check_time = int(time.time())

        checking_time = int(end_checking_time - start_checking_time)
        if checking_time > settings.PROXY_CHECKING_TIMEOUT:
            checking_time = settings.PROXY_CHECKING_TIMEOUT

        proxy.checking_period = \
            settings.MIN_PROXY_CHECKING_PERIOD \
            + (checking_time / settings.PROXY_CHECKING_TIMEOUT) \
            * (settings.MAX_PROXY_CHECKING_PERIOD - settings.MIN_PROXY_CHECKING_PERIOD)

        session.commit()
Esempio n. 9
0
def add_proxy(request):
    urls = request.body
    print urls
    urlList = urls.split(',')
    rst = []
    for url in urlList:
        try:
            p = Proxy(url=url, rate=0)
            p.save()
        except Exception as e:
            print rst.append(e.message)
            continue
    return HttpResponse(reduce(lambda x, y: x + ',' + y, rst))
Esempio n. 10
0
    def __execute_one_spider_task(self, spider):
        """
        一次执行具体爬虫的任务
        :param spider:
        :return:
        """
        try:
            for proxy in spider.get_proxies():
                proxy = check_proxy(proxy)
                if proxy.speed != -1:
                    session = Session()
                    exist = session.query(Proxy) \
                        .filter(Proxy.ip == str(proxy.ip), Proxy.port == str(proxy.port)) \
                        .first()

                    if not exist:
                        obj = Proxy(
                            ip=str(proxy.ip),
                            port=str(proxy.port),
                            protocol=proxy.protocol,
                            nick_type=proxy.nick_type,
                            speed=proxy.speed,
                            area=str(proxy.area),
                            score=proxy.score,
                            disable_domain=proxy.disable_domain,
                            origin=str(proxy.origin),
                            create_time=datetime.now()
                        )
                        session.add(obj)
                        session.commit()
                        session.close()
                        logger.info(f'insert: {proxy.ip}:{proxy.port} from {proxy.origin}!')
                    else:
                        exist.score['score'] = settings.MAX_SCORE
                        exist.score['power'] = 0
                        exist.port = proxy.port
                        exist.protocol = proxy.protocol
                        exist.nick_type = proxy.nick_type
                        exist.speed = proxy.speed
                        exist.area = proxy.area
                        exist.disable_domain = proxy.disable_domain
                        exist.origin = proxy.origin
                        session.commit()
                        session.close()
                        logger.info(f'update: {proxy.ip}:{proxy.port}, to max score successfully!')
                else:
                    logger.info(f'invalid: {proxy.ip}:{proxy.port} from {proxy.origin}!')

        except Exception as e:
            logger.error(f'spider error: {e}')
Esempio n. 11
0
    async def process_raw_proxy(self, proxy, collector_id):
        self.logger.debug("processing raw proxy \"{}\"".format(proxy))

        try:
            _, auth_data, domain, port = proxy_validator.retrieve(proxy)
        except proxy_validator.ValidationError as ex:
            self.collectors_logger.error(
                "Collector with id \"{}\" returned bad raw proxy \"{}\". "
                "Message: {}".format(collector_id, proxy, ex)
            )
            return


        # don't care about protocol
        try:
            proxy = await db.get(
                Proxy.select().where(
                    Proxy.auth_data == auth_data,
                    Proxy.domain == domain,
                    Proxy.port == port,
                )
            )

            if proxy.last_check_time + settings.PROXY_NOT_CHECKING_PERIOD >= time.time():
                proxy_short_address = ""
                if auth_data:
                    proxy_short_address += auth_data + "@"

                proxy_short_address += "{}:{}".format(domain, port)

                self.logger.debug(
                    "skipping proxy \"{}\" from collector \"{}\"".format(
                        proxy_short_address, collector_id)
                )
                return
        except Proxy.DoesNotExist:
            pass

        for raw_protocol in range(len(Proxy.PROTOCOLS)):
            while not self.good_proxies_are_processed:
                # TODO: find a better way
                await asyncio.sleep(0.1)

            new_proxy = Proxy()
            new_proxy.raw_protocol = raw_protocol
            new_proxy.auth_data = auth_data
            new_proxy.domain = domain
            new_proxy.port = port

            await self.add_proxy_to_queue(new_proxy, collector_id)
Esempio n. 12
0
def add_proxy(request):
    if request.method == 'POST':
        port = request.POST.get('port', '0')
        destination = request.POST.get('destination', 'https://empty')
        delay = request.POST.get('delay', '0')
        p = Proxy(delay=delay,
                  port=port,
                  destination=destination,
                  started=False,
                  pid=0)
        p.save()
        new_id = p.id
        proxy = Proxy.objects.filter(id=new_id).values()
        response = [{"message": "proxy was started", "proxy": list(proxy)[0]}]
    return JsonResponse(response, safe=False)
Esempio n. 13
0
def use_thread_with_queue2():
    cleanup()
    # in_queue = Queue.Queue()
    # out_queue = Queue.Queue()
    #
    # for i in range(5):
    #     t = threading.Thread(target=save_proxies_with_queue2,
    #                          args=(in_queue, out_queue))
    #     t.setDaemon(True)
    #     t.start()
    #
    # for url in PROXY_SITES:
    #     in_queue.put(url)
    #
    # result = []
    #
    # for i in range(5):
    #     t = threading.Thread(target=append_result,
    #                          args=(out_queue, result))
    #     t.setDaemon(True)
    #     t.start()
    #
    # in_queue.join()
    # out_queue.join()

    addresses = []

    # mogu_key = ""
    # res = requests.get(mogu_key)
    # addresses = res.json()['msg']

    for address in addresses:
        proxy = Proxy(address=address['ip'] + ':' + address['port'])
        try:
            proxy.save()
        except NotUniqueError:
            pass

    pool = Pool(10)
    pool.map(check_proxy, Proxy.objects.all())
    print(len(addresses))
    print(Proxy.objects.count())
Esempio n. 14
0
def save_proxies(url):
    proxies = []
    try:
        # if url == 'http://www.kuaidaili.com/free':
        #     import pdb;
        #     pdb.set_trace()
        res = requests.get(url)

    except requests.exceptions.RequestException:
        return False
    addresses = re.findall(PROXY_REGEX, res.text)
    for address in addresses:
        proxy = Proxy(address=address)
        try:
            proxy.save()
        except NotUniqueError:
            pass
        else:
            proxies.append(address)
    return proxies
Esempio n. 15
0
 def get_ips(self):
     for pat in pats:
         objs = []
         headers = {
             'content-type': 'application/json',
             'User-Agent': random.choice(agents)
         }
         for i in range(100):
             ip_obj = self.session.query(Proxy).order_by(
                 func.random()).first()
             proxies = {
                 '{type}'.format(type=ip_obj.type):
                 '{type}://{ip}:{port}'.format(type=ip_obj.type,
                                               ip=ip_obj.ip,
                                               port=ip_obj.port)
             }
             url = '{base_url}{pat}{page}'.format(base_url=base_url,
                                                  pat=pat,
                                                  page=i)
             logger.info('Scrapy {url}'.format(url=url))
             try:
                 response = requests.get(url,
                                         headers=headers,
                                         proxies=proxies)
                 if response.status_code == 200:
                     selector = etree.HTML(response.text)
                     for line in selector.xpath(
                             '//table[@id="ip_list"]//tr[@class="odd"]'):
                         proxy_obj = Proxy()
                         proxy_obj.id = str(uuid.uuid1())
                         proxy_obj.ip = line.xpath('td')[1].xpath(
                             'text()')[0]
                         proxy_obj.port = line.xpath('td')[2].xpath(
                             'text()')[0]
                         proxy_obj.type = str(
                             line.xpath('td')[5].xpath('text()')
                             [0]).lower().replace('https', 'http')
                         objs.append(proxy_obj)
             except:
                 pass
         self._threads_check(objs)
Esempio n. 16
0
def filte(content):
    soup = BeautifulSoup(content)
    proxy_list_tables = soup.findAll('table')
    table_index = 0
    pattern = re.compile(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}')
    proxy_list = []
    for table in proxy_list_tables:
        table_index += 1
        if table_index == 3:
            proxy_list_info = table.findAll('tr')
            for proxy in proxy_list_info:
                td_index = 0
                proxy_tds = proxy.findAll('td')
                proxy = Proxy()
                is_proxy = False
                for proxy_td in proxy_tds:
                    td_index += 1
                    if td_index == 2:
                        rel_ip_info = re.search(pattern, proxy_td.text)
                        if rel_ip_info:
                            proxy.ip = rel_ip_info.group(0)
                            is_proxy = True
                    elif td_index == 3:
                        if is_proxy:
                            proxy.port = int(proxy_td.text)
                    elif td_index == 4:
                        if is_proxy:
                            if '匿名代理' == proxy_td.text or '高度匿名' == proxy_td.text:
                                proxy.anonymous_type = '高匿'
                            else:
                                proxy.anonymous_type = '透明'
                    elif td_index == 5:
                        if is_proxy:
                            proxy.location = proxy_td.text
                            proxy.proxy_type = 'http'
                if is_proxy:
                    proxy_list.append(proxy)
    return proxy_list
Esempio n. 17
0
def proxy(operation=None, proxy_id=None):
    if operation and proxy_id and operation == 'delete':
        try:
            rm_proxy = Proxy.query.get(proxy_id)
            db.session.delete(rm_proxy)
            db.session.commit()

            flash(u'Proxy removed', 'success')
        except Exception:
            flash(u'Proxy does not exist', 'error')

    if request.method == 'POST':
        proxy = Proxy(ptype=request.values.get('type', None),
                      ip=request.values.get('ip', None),
                      port=request.values.get('port', None))
        db.session.add(proxy)
        db.session.commit()

        flash(u'Proxy added', 'success')

    proxy_list = []
    results = Proxy.query.all()
    if results:
        for result in results:
            proxy_list.append({
                'id': result.id,
                'ptype': result.ptype,
                'ip': result.ip,
                'port': result.port,
                'status': result.status,
            })

    return render_template('proxy.html',
                           data=proxy_list,
                           base_url=BASE_SITE_URL,
                           scrapyd_url=scrapyd_url)
Esempio n. 18
0
def set_proxy(bot, update, args):
    if len(args) == 1:
        proxy_data = args[0].split(':')
        if len(proxy_data) != 4:
            update.message.reply_text(
                "Please, include the proxy data to the "
                "command, like in the example:\n"
                "<code>/set_proxy mydomain.com:8080:usErnAme:s3cret</code>",
                parse_mode=ParseMode.HTML)
            return
        proxy_ip, proxy_port, proxy_username, proxy_password = proxy_data
        current_proxy = session.query(Proxy).first()
        if current_proxy:
            session.delete(current_proxy)
        new_proxy = Proxy(proxy_ip, proxy_port, proxy_username, proxy_password)
        session.add(new_proxy)
        session.commit()
        update.message.reply_text("Proxy settings updated.")
    else:
        update.message.reply_text(
            "Please, include the proxy data to the "
            "command, like in the example:\n"
            "<code>/set_proxy mydomain.com:8080:usErnAme:s3cret</code>",
            parse_mode=ParseMode.HTML)
Esempio n. 19
0
 def __init__(self, proxy_test_filter, context=None):
     super().__init__(context=context)
     self._proxy_test_filter = proxy_test_filter
     self._entity = Proxy()
Esempio n. 20
0
				return (True, (time.time() - time1) * 1000)
		return (False, 0)
	except:
		return (False, 0)

def check_google(proxy_info):
	proxy_content = proxy_info.ip + ':' + str(proxy_info.port)
	proxy = urllib2.ProxyHandler({proxy_info.proxy_type : proxy_content})
	opener = urllib2.build_opener(proxy)
	urllib2.install_opener(opener)
	try:
		time1 = time.time()
		response = urllib2.urlopen(GOOGLE_CHECK_URL, timeout=3)
		title = BeautifulSoup(response.read()).title.text
		if 'Google' == str(title):
			proxy_info.check_time = str(datetime.now()).split('.')[0]
			return (True, (time.time() - time1) * 1000)
		else:
			return (False, 0)
	except:
		return (False, 0)

if __name__ == '__main__':
	proxy = Proxy()
	proxy.ip = '222.74.6.48'
	proxy.port = '8000'
	proxy.proxy_type = 'http'
	default_ip = get_default_ip()
	print check_anonymous(proxy, default_ip)
	
	
Esempio n. 21
0
from models import Proxy
from main_proxy import text_proxy_generator

for proxy in text_proxy_generator():
    proxy = Proxy(host_port=proxy)
    proxy.save()