Ejemplo n.º 1
0
    def _check(self, proxy):
        """Do a proxy check"""

        check_key = "proxy.%s.check" % proxy.pk

        try:
            res = ProxyCheckResult()
            res.proxy = proxy
            res.mirror = self
            res.check_start = now()
            response = self._make_request(proxy)
            raw_data = response.body
            try:
                elapsed_time = '%1.2f' % self._get_elapsed_time(proxy)
            except:
                elapsed_time = response.time
            res.check_end = now()
            res.raw_response = raw_data

            if self.output_type == 'plm_v1':
                self._parse_plm_v1(res, raw_data)
            else:
                raise Exception('Output type not found!')

            proxy.update_from_check(res, elapsed_time)

            res.save()

            return res
        except:
            proxy.update_from_error()
            raise
        finally:
            # Task unlock
            cache.delete(check_key)
Ejemplo n.º 2
0
    def _check(self, proxy):
        """Do a proxy check"""

        check_key = "proxy.%s.check" % proxy.pk

        try:
            res = ProxyCheckResult()
            res.proxy = proxy
            res.mirror = self
            res.check_start = now()
            response = self._make_request(proxy)
            raw_data = response.body
            try:
                elapsed_time = '%1.2f' % self._get_elapsed_time(proxy)
            except:
                elapsed_time = response.total_time
            res.check_end = now()
            res.raw_response = raw_data

            if self.output_type == 'plm_v1':
                self._parse_plm_v1(res, raw_data)
            else:
                raise Exception('Output type not found!')

            proxy.update_from_check(res, elapsed_time)

            res.save()

            return res
        except:
            proxy.update_from_error()
            raise
        finally:
            # Task unlock
            cache.delete(check_key)
Ejemplo n.º 3
0
def mirror(request):
    """
    from dateutil.parser import parse
    from dateutil.tz import tzoffset
    from datetime import datetime

    parse("2012-12-28 22:22:12.868342+04:00")
    datetime(2012, 12, 28, 22, 22, 12, 868342,
             tzinfo=tzoffset(None, 14400))
    """

    start = now()

    SERIALIZABLE = (str, unicode, bool, int, float)

    output = dict()

    output['REMOTE_ADDR'] = request.META.get('REMOTE_ADDR', '')
    output['REMOTE_HOST'] = request.META.get('REMOTE_HOST', '')

    # HTTP Headers
    output['http_headers'] = dict()
    for k, v in request.META.items():
        if k.startswith('HTTP_') and type(v) in SERIALIZABLE:
            output['http_headers'][k[5:]] = v

    # Timing
    output['response_start'] = str(start)
    output['response_end'] = str(now())

    return HttpResponse(json.dumps(output))
Ejemplo n.º 4
0
 def handle(self, *args, **options):
     for proxy in ProxyList.objects.filter(next_check__lte=now()):
         try:
             self.parse_proxies(proxy)
         except Exception, msg:
             print '>>', msg.__str__()
         proxy.next_check = (now() - timedelta(seconds=proxy.update_period))
         proxy.save()
Ejemplo n.º 5
0
    def update_from_error(self):
        """ Last check was an error """

        self.last_check = now()
        self.errors += 1
        self._update_next_check()
        self.save()
Ejemplo n.º 6
0
    def update_from_error(self):
        """ Last check was an error """

        self.last_check = now()
        self.errors += 1
        self._update_next_check()
        self.save()
Ejemplo n.º 7
0
    def parse_proxies(proxy):
        grab = grabber.Grab(use_db_proxy=False)
        grab.go(proxy.url)
        content = grab.response.body

        for proxy in content.split('\n'):
            proxy = proxy.strip()

            if proxy and ':' in proxy:
                proxy_part = proxy.split('@')
                base_part = proxy_part[0].split(':')
                if len(base_part) != 2:
                    continue

                proxy, port = base_part
                if proxy and port:
                    obj, created = Proxy.objects.get_or_create(
                        hostname=proxy, port=port)

                    if len(proxy_part) == 2:
                        auth_part = proxy_part[1].split(':')
                        if len(auth_part) == 2:
                            user, password = auth_part
                            if user and password:
                                obj.user = user
                                obj.password = password
                                obj.save()

                    if created is True:
                        obj.next_check = (
                            now() - timedelta(seconds=60))
                        obj.save()
Ejemplo n.º 8
0
 def _add_proxies(self, port=None, count=0):
     for data in self.proxies:
         if port and data['port'] != port:
             continue
         data['last_check'] = now()
         self.proxy.create(**data)
         count += 1
     self.assertEqual(self.proxy.all().count(), count)
Ejemplo n.º 9
0
 def _add_proxies(self, port=None, count=0):
     for data in self.proxies:
         if port and data['port'] != port:
             continue
         data['last_check'] = now()
         self.proxy.create(**data)
         count += 1
     self.assertEqual(self.proxy.all().count(), count)
Ejemplo n.º 10
0
 def task_find_proxy(self, grab, task):
     for proxy in re.findall(self.proxy_re, grab.response.body):
         if ':' in proxy:
             proxy, port = proxy.strip().split(':')
             try:
                 obj = Proxy.objects.create(hostname=proxy, port=port)
                 obj.port = port
                 obj.next_check = (now() - timedelta(seconds=max_check))
                 obj.save()
             except:
                 pass
Ejemplo n.º 11
0
 def task_find_proxy(self, grab, task):
     for proxy in re.findall(self.proxy_re, grab.response.body):
         if ':' in proxy:
             proxy, port = proxy.strip().split(':')
             try:
                 obj = Proxy.objects.create(hostname=proxy, port=port)
                 obj.port = port
                 obj.next_check = (now() - timedelta(seconds=max_check))
                 obj.save()
             except:
                 pass
Ejemplo n.º 12
0
    def _update_next_check(self):
        """ Calculate and set next check time """

        delay = randint(defaults.PROXY_LIST_MIN_CHECK_INTERVAL,
                        defaults.PROXY_LIST_MAX_CHECK_INTERVAL)

        delay += defaults.PROXY_LIST_ERROR_DELAY * self.errors

        if self.last_check:
            self.next_check = self.last_check + timedelta(seconds=delay)
        else:
            self.next_check = now() + timedelta(seconds=delay)
Ejemplo n.º 13
0
def check_proxies():
    mirrors = Mirror.objects.all()
    proxies = Proxy.objects.filter(next_check__lte=now()).order_by("errors")

    for p in proxies:
        m = choice(mirrors)
        if not m.is_checking(p):
            try:
                m.check_proxy(p)
            except Exception, msg:
                if settings.DEBUG:
                    print("%s - %s" % (str(p), msg))
Ejemplo n.º 14
0
def check_proxies():
    mirrors = Mirror.objects.all()
    proxies = Proxy.objects.filter(next_check__lte=now()).order_by('errors')

    for p in proxies:
        m = choice(mirrors)
        if not m.is_checking(p):
            try:
                m.check_proxy(p)
            except Exception, msg:
                if settings.DEBUG:
                    print('%s - %s' % (str(p), msg))
Ejemplo n.º 15
0
def mirror(request):
    start = now()

    serializable = (str, unicode, bool, int, float)

    output = dict()

    output['REMOTE_ADDR'] = request.META.get('REMOTE_ADDR', '')
    output['REMOTE_HOST'] = request.META.get('REMOTE_HOST', '')

    # HTTP Headers
    output['http_headers'] = dict()
    for k, v in request.META.items():
        if k.startswith('HTTP_') and type(v) in serializable:
            output['http_headers'][k[5:]] = v

    # Timing
    output['response_start'] = str(start)
    output['response_end'] = str(now())

    return HttpResponse(json.dumps(output))
Ejemplo n.º 16
0
    def update_from_check(self, check, elapsed_time):
        """ Update data from a ProxyCheckResult """

        if check.check_start:
            self.last_check = check.check_start
        else:
            self.last_check = now()
        self.errors = 0
        self.anonymity_level = check.anonymity()
        self._update_next_check()
        self.elapsed_time = elapsed_time
        self.save()
Ejemplo n.º 17
0
    def update_from_check(self, check, elapsed_time):
        """ Update data from a ProxyCheckResult """

        if check.check_start:
            self.last_check = check.check_start
        else:
            self.last_check = now()
        self.errors = 0
        self.anonymity_level = check.anonymity()
        self._update_next_check()
        self.elapsed_time = elapsed_time
        self.save()
Ejemplo n.º 18
0
    def _update_next_check(self):
        """ Calculate and set next check time """

        delay = randint(defaults.PROXY_LIST_MIN_CHECK_INTERVAL,
                        defaults.PROXY_LIST_MAX_CHECK_INTERVAL)

        delay += defaults.PROXY_LIST_ERROR_DELAY * self.errors

        if self.last_check:
            self.next_check = self.last_check + timedelta(seconds=delay)
        else:
            self.next_check = now() + timedelta(seconds=delay)
Ejemplo n.º 19
0
    def save(self, *args, **kwargs):
        if not self.country:
            if self.hostname.count('.') == 3:
                self.country = self._geoip.country_code_by_addr(
                    str(self.hostname))
            else:
                self.country = self._geoip.country_code_by_name(
                    str(self.hostname))

        if not self.next_check:
            self.next_check = (now() - timedelta(seconds=max_check))

        super(Proxy, self).save(*args, **kwargs)
Ejemplo n.º 20
0
    def save(self, *args, **kwargs):
        if not self.country:
            if self.hostname.count('.') == 3:
                self.country = self._geoip.country_code_by_addr(
                    str(self.hostname))
            else:
                self.country = self._geoip.country_code_by_name(
                    str(self.hostname))

        if not self.next_check:
            self.next_check = (
                now() -
                timedelta(seconds=defaults.PROXY_LIST_MAX_CHECK_INTERVAL))

        super(Proxy, self).save(*args, **kwargs)
Ejemplo n.º 21
0
    def save(self, *args, **kwargs):
        if not self.country:
            if self.hostname.count('.') == 3:
                self.country = self._geoip.country_code_by_addr(str(
                    self.hostname
                ))
            else:
                self.country = self._geoip.country_code_by_name(str(
                    self.hostname
                ))

        if not self.next_check:
            self.next_check = (now() - timedelta(seconds=max_check))

        super(Proxy, self).save(*args, **kwargs)
Ejemplo n.º 22
0
    def save(self, *args, **kwargs):
        if not self.country:
            if self.hostname.count('.') == 3:
                self.country = self._geoip.country_code_by_addr(str(
                    self.hostname
                ))
            else:
                self.country = self._geoip.country_code_by_name(str(
                    self.hostname
                ))

        if not self.next_check:
            self.next_check = (now() - timedelta(
                seconds=defaults.PROXY_LIST_MAX_CHECK_INTERVAL))

        super(Proxy, self).save(*args, **kwargs)
Ejemplo n.º 23
0
 def handle(self, *args, **options):
     next_check = now() - timedelta(days=30)
     for p in Proxy.objects.all():
         p.next_check = next_check
         p.save()