def wieistmeineip(self):
     result = {}
     # Save original socket
     originalSocket = socket.socket
     # Set TOR Socks proxy
     commonutils.setTorProxy()
     
     try: 
         # Load 
         soup = self.parse("http://www.wieistmeineip.de")
         location = soup.findAll("div", { "class" : "location" })[0]
         location = bs(location.text, convertEntities=bs.HTML_ENTITIES)
         
         ip = soup.findAll('div', id='ipv4')[0]
         raw_ip = bs(ip.text, convertEntities=bs.HTML_ENTITIES)
         pattern = re.compile('[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}')
         ip = re.search(pattern, raw_ip.text)    
         
         result["ipaddress"] = ip.group(0)
         result["country"] = str(location)
     finally:
         # Removing SOCKS Tor Proxy 
         socket.socket = originalSocket 
         
     return result
Example #2
0
    def wieistmeineip(self):
        result = {}
        # Save original socket
        originalSocket = socket.socket
        # Set TOR Socks proxy
        commonutils.setTorProxy()

        try:
            # Load
            soup = self.parse("http://www.wieistmeineip.de")
            location = soup.findAll("div", {"class": "location"})[0]
            location = bs(location.text, convertEntities=bs.HTML_ENTITIES)

            ip = soup.findAll('div', id='ipv4')[0]
            raw_ip = bs(ip.text, convertEntities=bs.HTML_ENTITIES)
            pattern = re.compile(
                '[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}')
            ip = re.search(pattern, raw_ip.text)

            result["ipaddress"] = ip.group(0)
            result["country"] = str(location)
        finally:
            # Removing SOCKS Tor Proxy
            socket.socket = originalSocket

        return result
Example #3
0
    def __process_url(self, url):
        # Crawler config load
        cfgCrawler = Config(
            os.path.join(RAGPICKER_ROOT, 'config',
                         'crawler.conf')).get("clientConfig")

        data = None
        headers = {
            'User-Agent':
            cfgCrawler.get("browser_user_agent",
                           "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"),
            'Accept-Language':
            cfgCrawler.get("browser_accept_language", "en-US"),
        }

        # Save original socket
        originalSocket = socket.socket
        # Set TOR Socks proxy
        commonutils.setTorProxy()

        request = urllib2.Request(url, data, headers)

        try:
            url_dl = urllib2.urlopen(request, timeout=30).read()
        except urllib2.HTTPError as e:
            raise e
        except urllib2.URLError as e:
            raise e
        except Exception, e:
            raise IOError("Thread(" + self.processName +
                          ") - %s - Error parsing %s" % (e, url))
Example #4
0
 def __process_url(self, url):
     # Crawler config load
     cfgCrawler = Config(os.path.join(RAGPICKER_ROOT, 'config', 'crawler.conf')).get("clientConfig")
     
     data = None
     headers = {   
         'User-Agent': cfgCrawler.get("browser_user_agent", "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"),
         'Accept-Language': cfgCrawler.get("browser_accept_language", "en-US"),
     }
     
     # Save original socket
     originalSocket = socket.socket
     # Set TOR Socks proxy
     commonutils.setTorProxy() 
            
     request = urllib2.Request(url, data, headers)
 
     try:
         url_dl = urllib2.urlopen(request, timeout=30).read()
     except urllib2.HTTPError as e:
         raise e
     except urllib2.URLError as e:    
         raise e
     except Exception, e:
         raise IOError("Thread(" + self.processName + ") - %s - Error parsing %s" % (e, url))