def _check(self, proxy): """Do a proxy check""" check_key = "proxy.%s.check" % proxy.pk try: res = ProxyCheckResult() res.proxy = proxy res.mirror = self res.check_start = now() response = self._make_request(proxy) raw_data = response.body try: elapsed_time = '%1.2f' % self._get_elapsed_time(proxy) except: elapsed_time = response.time res.check_end = now() res.raw_response = raw_data if self.output_type == 'plm_v1': self._parse_plm_v1(res, raw_data) else: raise Exception('Output type not found!') proxy.update_from_check(res, elapsed_time) res.save() return res except: proxy.update_from_error() raise finally: # Task unlock cache.delete(check_key)
def _check(self, proxy): """Do a proxy check""" check_key = "proxy.%s.check" % proxy.pk try: res = ProxyCheckResult() res.proxy = proxy res.mirror = self res.check_start = now() response = self._make_request(proxy) raw_data = response.body try: elapsed_time = '%1.2f' % self._get_elapsed_time(proxy) except: elapsed_time = response.total_time res.check_end = now() res.raw_response = raw_data if self.output_type == 'plm_v1': self._parse_plm_v1(res, raw_data) else: raise Exception('Output type not found!') proxy.update_from_check(res, elapsed_time) res.save() return res except: proxy.update_from_error() raise finally: # Task unlock cache.delete(check_key)
def mirror(request): """ from dateutil.parser import parse from dateutil.tz import tzoffset from datetime import datetime parse("2012-12-28 22:22:12.868342+04:00") datetime(2012, 12, 28, 22, 22, 12, 868342, tzinfo=tzoffset(None, 14400)) """ start = now() SERIALIZABLE = (str, unicode, bool, int, float) output = dict() output['REMOTE_ADDR'] = request.META.get('REMOTE_ADDR', '') output['REMOTE_HOST'] = request.META.get('REMOTE_HOST', '') # HTTP Headers output['http_headers'] = dict() for k, v in request.META.items(): if k.startswith('HTTP_') and type(v) in SERIALIZABLE: output['http_headers'][k[5:]] = v # Timing output['response_start'] = str(start) output['response_end'] = str(now()) return HttpResponse(json.dumps(output))
def handle(self, *args, **options): for proxy in ProxyList.objects.filter(next_check__lte=now()): try: self.parse_proxies(proxy) except Exception, msg: print '>>', msg.__str__() proxy.next_check = (now() - timedelta(seconds=proxy.update_period)) proxy.save()
def update_from_error(self): """ Last check was an error """ self.last_check = now() self.errors += 1 self._update_next_check() self.save()
def parse_proxies(proxy): grab = grabber.Grab(use_db_proxy=False) grab.go(proxy.url) content = grab.response.body for proxy in content.split('\n'): proxy = proxy.strip() if proxy and ':' in proxy: proxy_part = proxy.split('@') base_part = proxy_part[0].split(':') if len(base_part) != 2: continue proxy, port = base_part if proxy and port: obj, created = Proxy.objects.get_or_create( hostname=proxy, port=port) if len(proxy_part) == 2: auth_part = proxy_part[1].split(':') if len(auth_part) == 2: user, password = auth_part if user and password: obj.user = user obj.password = password obj.save() if created is True: obj.next_check = ( now() - timedelta(seconds=60)) obj.save()
def _add_proxies(self, port=None, count=0): for data in self.proxies: if port and data['port'] != port: continue data['last_check'] = now() self.proxy.create(**data) count += 1 self.assertEqual(self.proxy.all().count(), count)
def task_find_proxy(self, grab, task): for proxy in re.findall(self.proxy_re, grab.response.body): if ':' in proxy: proxy, port = proxy.strip().split(':') try: obj = Proxy.objects.create(hostname=proxy, port=port) obj.port = port obj.next_check = (now() - timedelta(seconds=max_check)) obj.save() except: pass
def _update_next_check(self): """ Calculate and set next check time """ delay = randint(defaults.PROXY_LIST_MIN_CHECK_INTERVAL, defaults.PROXY_LIST_MAX_CHECK_INTERVAL) delay += defaults.PROXY_LIST_ERROR_DELAY * self.errors if self.last_check: self.next_check = self.last_check + timedelta(seconds=delay) else: self.next_check = now() + timedelta(seconds=delay)
def check_proxies(): mirrors = Mirror.objects.all() proxies = Proxy.objects.filter(next_check__lte=now()).order_by("errors") for p in proxies: m = choice(mirrors) if not m.is_checking(p): try: m.check_proxy(p) except Exception, msg: if settings.DEBUG: print("%s - %s" % (str(p), msg))
def check_proxies(): mirrors = Mirror.objects.all() proxies = Proxy.objects.filter(next_check__lte=now()).order_by('errors') for p in proxies: m = choice(mirrors) if not m.is_checking(p): try: m.check_proxy(p) except Exception, msg: if settings.DEBUG: print('%s - %s' % (str(p), msg))
def mirror(request): start = now() serializable = (str, unicode, bool, int, float) output = dict() output['REMOTE_ADDR'] = request.META.get('REMOTE_ADDR', '') output['REMOTE_HOST'] = request.META.get('REMOTE_HOST', '') # HTTP Headers output['http_headers'] = dict() for k, v in request.META.items(): if k.startswith('HTTP_') and type(v) in serializable: output['http_headers'][k[5:]] = v # Timing output['response_start'] = str(start) output['response_end'] = str(now()) return HttpResponse(json.dumps(output))
def update_from_check(self, check, elapsed_time): """ Update data from a ProxyCheckResult """ if check.check_start: self.last_check = check.check_start else: self.last_check = now() self.errors = 0 self.anonymity_level = check.anonymity() self._update_next_check() self.elapsed_time = elapsed_time self.save()
def save(self, *args, **kwargs): if not self.country: if self.hostname.count('.') == 3: self.country = self._geoip.country_code_by_addr( str(self.hostname)) else: self.country = self._geoip.country_code_by_name( str(self.hostname)) if not self.next_check: self.next_check = (now() - timedelta(seconds=max_check)) super(Proxy, self).save(*args, **kwargs)
def save(self, *args, **kwargs): if not self.country: if self.hostname.count('.') == 3: self.country = self._geoip.country_code_by_addr( str(self.hostname)) else: self.country = self._geoip.country_code_by_name( str(self.hostname)) if not self.next_check: self.next_check = ( now() - timedelta(seconds=defaults.PROXY_LIST_MAX_CHECK_INTERVAL)) super(Proxy, self).save(*args, **kwargs)
def save(self, *args, **kwargs): if not self.country: if self.hostname.count('.') == 3: self.country = self._geoip.country_code_by_addr(str( self.hostname )) else: self.country = self._geoip.country_code_by_name(str( self.hostname )) if not self.next_check: self.next_check = (now() - timedelta(seconds=max_check)) super(Proxy, self).save(*args, **kwargs)
def save(self, *args, **kwargs): if not self.country: if self.hostname.count('.') == 3: self.country = self._geoip.country_code_by_addr(str( self.hostname )) else: self.country = self._geoip.country_code_by_name(str( self.hostname )) if not self.next_check: self.next_check = (now() - timedelta( seconds=defaults.PROXY_LIST_MAX_CHECK_INTERVAL)) super(Proxy, self).save(*args, **kwargs)
def handle(self, *args, **options): next_check = now() - timedelta(days=30) for p in Proxy.objects.all(): p.next_check = next_check p.save()