Ejemplo n.º 1
0
 def __init__(self, url, proxy=None, trace=False):
     if not url.endswith('/'):
         url = url + '/'
     self.url = url
     if 'APPDATA' in os.environ:
         homepath = os.environ["APPDATA"]
     elif 'USERPROFILE' in os.environ:
         homepath = os.path.join(os.environ["USERPROFILE"],
                                 "Local Settings", "Application Data")
     elif 'HOME' in os.environ:
         homepath = os.environ["HOME"]
     else:
         homepath = ''
     self.cookie_file = os.path.join(homepath, ".post-review-cookies.txt")
     self._cj = cookielib.MozillaCookieJar(self.cookie_file)
     self._password_mgr = ReviewBoardHTTPPasswordMgr(self.url)
     self._opener = opener = urllib2.build_opener(
         urllib2.ProxyHandler(proxy), urllib2.UnknownHandler(),
         urllib2.HTTPHandler(), HttpErrorHandler(),
         urllib2.HTTPErrorProcessor(),
         urllib2.HTTPCookieProcessor(self._cj),
         urllib2.HTTPBasicAuthHandler(self._password_mgr),
         urllib2.HTTPDigestAuthHandler(self._password_mgr))
     urllib2.install_opener(self._opener)
     self._trace = trace
Ejemplo n.º 2
0
def main(output_dir):
  # Create opener.
  opener = urllib2.OpenerDirector()
  opener.add_handler(urllib2.ProxyHandler())
  opener.add_handler(urllib2.UnknownHandler())
  opener.add_handler(urllib2.HTTPHandler())
  opener.add_handler(urllib2.HTTPDefaultErrorHandler())
  opener.add_handler(urllib2.HTTPSHandler())
  opener.add_handler(urllib2.HTTPErrorProcessor())

  # Iterate over the files in the docs directory and copy them, as appropriate.
  for root, dirs, files in os.walk('.'):
    for file_name in files:
      if file_name.endswith('.soy') and not file_name.startswith('__'):
        # Strip the './' prefix, if appropriate.
        if root.startswith('./'):
          root = root[2:]

        # Construct the URL where the .soy file is being served.
        soy_file = file_name
        html_file = root + '/' + soy_file[:-len('.soy')] + '.html'
        url = 'http://localhost:9811/' + html_file

        # Fetch url and copy its contents to output_dir.
        req = urllib2.Request(url)
        res = opener.open(req)
        html = res.read()
        copy_to_output_dir(html_file, output_dir, html)
      elif file_name.endswith('.css') or file_name.endswith('.js'):
        #  Copy the static resource to output_dir.
        relative_path = os.path.join(root, file_name)
        with open(relative_path) as resource_file:
          resource = resource_file.read()
          copy_to_output_dir(relative_path, output_dir, resource)
Ejemplo n.º 3
0
    def request_url(self, url, retry_times=20):
        pageContent = None
        msg = ''

        while not pageContent and retry_times > 0:
            retry_times -= 1
            header = random.choice(self.headers)
            proxy = {}
            if len(self.proxies) > 1: proxy = random.choice(self.proxies)
            proxyAdded = urllib2.ProxyHandler(proxy)
            cj = cookielib.CookieJar()
            ck_handler = urllib2.HTTPCookieProcessor(cj)
            if not self.enable_cookie:
                ck_handler = urllib2.UnknownHandler()

            proxyOpener = urllib2.build_opener(proxyAdded, ck_handler)

            proxyOpener.addheaders = header
            try:
                pageContent = self.decode_page(
                    proxyOpener.open(url, timeout=20.0))
                #print pageContent
            except Exception, e:
                #print "error in request url :", trace_info(),proxy,url
                msg = "request url error: " + trace_info()
                pageContent = None
Ejemplo n.º 4
0
def get_opener(cookiejar=None):
    opener = urllib2.OpenerDirector()
    opener.add_handler(urllib2.ProxyHandler())
    opener.add_handler(urllib2.UnknownHandler())
    opener.add_handler(urllib2.HTTPHandler())
    opener.add_handler(urllib2.HTTPDefaultErrorHandler())
    opener.add_handler(urllib2.HTTPErrorProcessor())
    opener.add_handler(urllib2.HTTPSHandler())
    if cookiejar:
        opener.add_handler(urllib2.HTTPCookieProcessor(cookiejar))
    return opener
Ejemplo n.º 5
0
 def _GetOpener(self):
     # Authentication code needs to know about 302 response.
     # So make OpenerDirector without HTTPRedirectHandler.
     opener = urllib2.OpenerDirector()
     opener.add_handler(urllib2.ProxyHandler())
     opener.add_handler(urllib2.UnknownHandler())
     opener.add_handler(urllib2.HTTPHandler())
     opener.add_handler(urllib2.HTTPDefaultErrorHandler())
     opener.add_handler(urllib2.HTTPSHandler())
     opener.add_handler(urllib2.HTTPErrorProcessor())
     opener.add_handler(urllib2.HTTPCookieProcessor(cookie_jar))
     return opener
Ejemplo n.º 6
0
    def __init__(self, user=None, password=None):
        super(CrawlSuite, self).__init__()

        self.opener = urllib2.OpenerDirector()
        self.opener.add_handler(urllib2.UnknownHandler())
        self.opener.add_handler(urllib2.HTTPHandler())
        self.opener.add_handler(urllib2.HTTPSHandler())
        self.opener.add_handler(urllib2.HTTPCookieProcessor())
        self.opener.add_handler(RedirectHandler())

        self._authenticate(user, password)
        self.user = user
Ejemplo n.º 7
0
 def __init__(self, proxy, verbose=0):
     self.proxy = proxy
     self.verbose = verbose
     self.opener = opener = urllib2.OpenerDirector()
     if proxy:
         opener.add_handler(urllib2.ProxyHandler({'http':self.proxy}))
     else:
         opener.add_handler(urllib2.ProxyHandler())
     opener.add_handler(urllib2.UnknownHandler())
     opener.add_handler(urllib2.HTTPHandler())
     opener.add_handler(urllib2.HTTPDefaultErrorHandler())
     opener.add_handler(urllib2.HTTPSHandler())
     opener.add_handler(urllib2.HTTPErrorProcessor())
Ejemplo n.º 8
0
    def _GetOpener(self):
        """Returns an OpenerDirector that supports cookies and ignores redirects.

    Returns:
      A urllib2.OpenerDirector object.
    """
        opener = urllib2.OpenerDirector()
        opener.add_handler(fancy_urllib.FancyProxyHandler())
        opener.add_handler(urllib2.UnknownHandler())
        opener.add_handler(urllib2.HTTPHandler())
        opener.add_handler(urllib2.HTTPDefaultErrorHandler())
        opener.add_handler(urllib2.HTTPSHandler())
        opener.add_handler(urllib2.HTTPErrorProcessor())
        opener.add_handler(ContentEncodingHandler())

        auth_domain = ''
        if 'AUTH_DOMAIN' in os.environ:
            auth_domain = os.environ['AUTH_DOMAIN'].lower()

        if self.save_cookies:
            if auth_domain == 'appscale':
                cookies_dir = os.path.expanduser(
                    HttpRpcServer.APPSCALE_COOKIE_DIR)
                if not os.path.exists(cookies_dir):
                    os.mkdir(cookies_dir)
            else:
                self.cookie_jar.filename = os.path.expanduser(
                    HttpRpcServer.DEFAULT_COOKIE_FILE_PATH)

                if os.path.exists(self.cookie_jar.filename):
                    try:
                        self.cookie_jar.load()
                        self.authenticated = True
                        logger.debug("Loaded authentication cookies from %s",
                                     self.cookie_jar.filename)
                    except (OSError, IOError, cookielib.LoadError), e:
                        logger.debug(
                            "Could not load authentication cookies; %s: %s",
                            e.__class__.__name__, e)
                        self.cookie_jar.filename = None
                    else:
                        try:
                            fd = os.open(self.cookie_jar.filename, os.O_CREAT,
                                         0600)
                            os.close(fd)
                        except (OSError, IOError), e:
                            logger.debug("Could not create authentication cookies file " + \
                                         "; %s: %s" % (e.__class__.__name__, e))
                            self.cookie_jar.filename = None
Ejemplo n.º 9
0
def _GetHTTPOpener():
    """Create an http opener used to interact with Google's ClientLogin.

  Returns:
    An http opener capable of handling anything needed to interact with
    Google's ClientLogin.
  """
    # Create an http opener capable of handling proxies, http and https.
    opener = urllib2.OpenerDirector()
    opener.add_handler(urllib2.ProxyHandler())
    opener.add_handler(urllib2.UnknownHandler())
    opener.add_handler(urllib2.HTTPHandler())
    opener.add_handler(urllib2.HTTPDefaultErrorHandler())
    opener.add_handler(urllib2.HTTPErrorProcessor())
    opener.add_handler(urllib2.HTTPSHandler())
    return opener
Ejemplo n.º 10
0
 def setUp(self):
     super(HttpCase, self).setUp()
     self.registry.enter_test_mode()
     # setup a magic session_id that will be rollbacked
     self.session = openerp.http.root.session_store.new()
     self.session_id = self.session.sid
     self.session.db = get_db_name()
     openerp.http.root.session_store.save(self.session)
     # setup an url opener helper
     self.opener = urllib2.OpenerDirector()
     self.opener.add_handler(urllib2.UnknownHandler())
     self.opener.add_handler(urllib2.HTTPHandler())
     self.opener.add_handler(urllib2.HTTPSHandler())
     self.opener.add_handler(urllib2.HTTPCookieProcessor())
     self.opener.add_handler(RedirectHandler())
     self.opener.addheaders.append(('Cookie', 'session_id=%s' % self.session_id))
Ejemplo n.º 11
0
    def __init__(self, *args, **kargs):
        urllib2.OpenerDirector.__init__(self, *args, **kargs)
        #agregando soporte basico
        self.add_handler(urllib2.ProxyHandler())
        self.add_handler(urllib2.UnknownHandler())
        self.add_handler(urllib2.HTTPHandler())
        self.add_handler(urllib2.HTTPDefaultErrorHandler())
        self.add_handler(urllib2.HTTPRedirectHandler())
        self.add_handler(urllib2.FTPHandler())
        self.add_handler(urllib2.FileHandler())
        self.add_handler(urllib2.HTTPErrorProcessor())

        #Agregar soporte para cookies. (en este momento no es necesario,
        #pero uno nunca sabe si se puede llegar a nececitar)
        self.cj = cookielib.CookieJar()
        self.add_handler(urllib2.HTTPCookieProcessor(self.cj))
Ejemplo n.º 12
0
    def _GetOpener(self):
        """Returns an OpenerDirector that supports cookies and ignores redirects.

    Returns:
      A urllib2.OpenerDirector object.
    """
        opener = urllib2.OpenerDirector()
        opener.add_handler(urllib2.ProxyHandler())
        opener.add_handler(urllib2.UnknownHandler())
        opener.add_handler(urllib2.HTTPHandler())
        opener.add_handler(urllib2.HTTPDefaultErrorHandler())
        opener.add_handler(urllib2.HTTPSHandler())
        opener.add_handler(urllib2.HTTPErrorProcessor())

        opener.add_handler(urllib2.HTTPCookieProcessor(self.cookie_jar))
        return opener
Ejemplo n.º 13
0
Archivo: http.py Proyecto: sguzwf/lumia
 def _build_opener(self):
     # 创建一个opener用于配置请求
     opener = urllib2.OpenerDirector()
     # disable 本地代理
     opener.add_handler(urllib2.ProxyHandler())
     # 对URL不符合要求抛出URLError
     opener.add_handler(urllib2.UnknownHandler())
     # 发送http请求
     opener.add_handler(urllib2.HTTPHandler())
     # 建错误reponse抛出为HttpEerror
     opener.add_handler(urllib2.HTTPDefaultErrorHandler())
     # 发送https请求
     opener.add_handler(urllib2.HTTPSHandler())
     # 对于 http response status code 不属于[200,300)
     # 转换为错误response
     opener.add_handler(urllib2.HTTPErrorProcessor())
     self.opener = opener
Ejemplo n.º 14
0
 def resolve(self,item,captcha_cb=None,select_cb=None):
     item = item.copy()        
     util.init_urllib()
     url = self._url(item['url'])
     page = ''
     try:
         opener = urllib2.OpenerDirector()
         opener.add_handler(urllib2.HTTPHandler())
         opener.add_handler(urllib2.UnknownHandler())
         urllib2.install_opener(opener)
         request = urllib2.Request(url)
         request.add_header('User-Agent',util.UA)
         response= urllib2.urlopen(request)
         page = response.read()
         response.close()
     except urllib2.HTTPError, e:
         traceback.print_exc()
         return
Ejemplo n.º 15
0
    def _GetOpener(self):
        """Returns an OpenerDirector that supports cookies and ignores redirects.

    Returns:
      A urllib2.OpenerDirector object.
    """
        opener = urllib2.OpenerDirector()
        opener.add_handler(fancy_urllib.FancyProxyHandler())
        opener.add_handler(urllib2.UnknownHandler())
        opener.add_handler(urllib2.HTTPHandler())
        opener.add_handler(urllib2.HTTPDefaultErrorHandler())
        opener.add_handler(fancy_urllib.FancyHTTPSHandler())
        opener.add_handler(urllib2.HTTPErrorProcessor())
        opener.add_handler(ContentEncodingHandler())

        if self.save_cookies:
            self.cookie_jar.filename = os.path.expanduser(
                HttpRpcServer.DEFAULT_COOKIE_FILE_PATH)

            if os.path.exists(self.cookie_jar.filename):
                try:
                    self.cookie_jar.load()
                    self.authenticated = True
                    logger.debug("Loaded authentication cookies from %s",
                                 self.cookie_jar.filename)
                except (OSError, IOError, cookielib.LoadError), e:
                    # Failed to load cookies. The target file path is bad.
                    logger.debug(
                        "Could not load authentication cookies; %s: %s",
                        e.__class__.__name__, e)
                    self.cookie_jar.filename = None
            else:
                # Create an empty cookie file. This must be created with the file
                # permissions set upfront in order to be secure.
                try:
                    fd = os.open(self.cookie_jar.filename, os.O_CREAT, 0600)
                    os.close(fd)
                except (OSError, IOError), e:
                    # Failed to create cookie file. Don't try to save cookies.
                    logger.debug(
                        "Could not create authentication cookies file; %s: %s",
                        e.__class__.__name__, e)
                    self.cookie_jar.filename = None
Ejemplo n.º 16
0
    def test_badly_named_methods(self):
        # test work-around for three methods that accidentally follow the
        # naming conventions for handler methods
        # (*_open() / *_request() / *_response())

        # These used to call the accidentally-named methods, causing a
        # TypeError in real code; here, returning self from these mock
        # methods would either cause no exception, or AttributeError.

        from urllib2 import URLError

        o = OpenerDirector()
        meth_spec = [
            [("do_open", "return self"), ("proxy_open", "return self")],
            [("redirect_request", "return self")],
        ]
        handlers = add_ordered_mock_handlers(o, meth_spec)
        o.add_handler(urllib2.UnknownHandler())
        for scheme in "do", "proxy", "redirect":
            self.assertRaises(URLError, o.open, scheme + "://example.com/")
Ejemplo n.º 17
0
    def __init__(self, user=None, password=None):
        super(CrawlSuite, self).__init__()

        registry = openerp.registry(tools.config['db_name'])
        try:
            # switch registry to test mode, so that requests can be made
            registry.enter_test_mode()

            self.opener = urllib2.OpenerDirector()
            self.opener.add_handler(urllib2.UnknownHandler())
            self.opener.add_handler(urllib2.HTTPHandler())
            self.opener.add_handler(urllib2.HTTPSHandler())
            self.opener.add_handler(urllib2.HTTPCookieProcessor())
            self.opener.add_handler(RedirectHandler())

            self._authenticate(user, password)
            self.user = user

        finally:
            registry.leave_test_mode()
Ejemplo n.º 18
0
    def _GetOpener(self):
        """Returns an OpenerDirector that supports cookies and ignores redirects.

    Returns:
      A urllib2.OpenerDirector object.
    """
        opener = urllib2.OpenerDirector()
        opener.add_handler(fancy_urllib.FancyProxyHandler())
        opener.add_handler(urllib2.UnknownHandler())
        opener.add_handler(urllib2.HTTPHandler())
        opener.add_handler(urllib2.HTTPDefaultErrorHandler())
        opener.add_handler(fancy_urllib.FancyHTTPSHandler())
        opener.add_handler(urllib2.HTTPErrorProcessor())

        if self.save_cookies:
            self.cookie_jar.filename = os.path.expanduser(
                HttpRpcServer.DEFAULT_COOKIE_FILE_PATH)

            if os.path.exists(self.cookie_jar.filename):
                try:
                    self.cookie_jar.load()
                    self.authenticated = True
                    logger.info("Loaded authentication cookies from %s",
                                self.cookie_jar.filename)
                except (OSError, IOError, cookielib.LoadError), e:

                    logger.debug(
                        "Could not load authentication cookies; %s: %s",
                        e.__class__.__name__, e)
                    self.cookie_jar.filename = None
            else:

                try:
                    fd = os.open(self.cookie_jar.filename, os.O_CREAT, 0600)
                    os.close(fd)
                except (OSError, IOError), e:

                    logger.debug(
                        "Could not create authentication cookies file; %s: %s",
                        e.__class__.__name__, e)
                    self.cookie_jar.filename = None
Ejemplo n.º 19
0
  def _GetOpener(self):
    """Returns an OpenerDirector that supports cookies and ignores redirects.

    Returns:
      A urllib2.OpenerDirector object.
    """
    opener = urllib2.OpenerDirector()
    opener.add_handler(urllib2.ProxyHandler())
    opener.add_handler(urllib2.UnknownHandler())
    opener.add_handler(urllib2.HTTPHandler())
    opener.add_handler(urllib2.HTTPDefaultErrorHandler())
    opener.add_handler(urllib2.HTTPSHandler())
    opener.add_handler(urllib2.HTTPErrorProcessor())
    if self.save_cookies:
      self.cookie_file = os.path.expanduser("~/.codereview_upload_cookies")
      self.cookie_jar = cookielib.MozillaCookieJar(self.cookie_file)
      if os.path.exists(self.cookie_file):
        try:
          self.cookie_jar.load()
          self.authenticated = True
          StatusUpdate("Loaded authentication cookies from %s" %
                       self.cookie_file)
        except (cookielib.LoadError, IOError):
          # Failed to load cookies - just ignore them.
          pass
      else:
        # Create an empty cookie file with mode 600
        fd = os.open(self.cookie_file, os.O_CREAT, 0600)
        os.close(fd)
      # Always chmod the cookie file
      os.chmod(self.cookie_file, 0600)
    else:
      # Don't save cookies across runs of update.py.
      self.cookie_jar = cookielib.CookieJar()
    opener.add_handler(urllib2.HTTPCookieProcessor(self.cookie_jar))
    return opener
Ejemplo n.º 20
0
    def url_inventory(self, request):
        '''
        creates a csv file compiling the http status of artbase artworks and their urls and locations
        
        '''
        context_instance = RequestContext(request)
        opts = self.model._meta
        admin_site = self.admin_site
        completed_message = None

        writer = csv.writer(open(os.path.join(settings.MEDIA_ROOT, "artbase/data/artbase_broken_urls_inventory_%s.csv" \
            % datetime.date.today(), "wb")))

        writer.writerow([
            'ARTWORK ID', 'ARTWORK TITLE', 'ARTWORK URL', 'URL STATUS',
            'LOCATION', 'LOCATION STATUS', 'UPDATE NOTICE'
        ])
        all_works = ArtworkStub.objects.filter(status="approved")

        handler = urllib2.UnknownHandler()
        opener = urllib2.build_opener(handler)
        urllib2.install_opener(opener)

        # timeout in seconds
        timeout = 15
        socket.setdefaulttimeout(timeout)

        if request.method == "POST":
            if request.POST.get("run_inventory"):
                for work in all_works:
                    url_response = None
                    url_error_msg = None
                    url_status = None
                    location_response = None
                    location_status = None
                    location_error_msg = None
                    updating_notice = None

                    #a few hacks to make sure the url is formatted correctly
                    if "/artbase/" in work.url:
                        if "http://archive.rhizome.org" not in work.url:
                            if "http://" not in work.url:
                                work.url = "http://archive.rhizome.org%s" % work.url
                                updating_notice = "Make sure has full rhizome archives url (http://archive.rhizome.org/....)"

                    if "/artbase/" in work.location:
                        if "http://archive.rhizome.org" not in work.location:
                            if "http://" not in work.location:
                                work.location = "http://archive.rhizome.org%s" % work.location
                                updating_notice = "Make sure has full rhizome archives url (http://archive.rhizome.org/....)"
                    if "http://" not in work.url:
                        work.url = "http://%s" % work.url
                        updating_notice = "Make sure has url including 'http://'"

                    if "http://" not in work.location:
                        work.location = "http://%s" % work.location
                        updating_notice = "Make sure has url including 'http://'"

                    try:
                        url_response = urllib2.urlopen(work.url, timeout=15)
                    except (urllib2.URLError, httplib.BadStatusLine,
                            httplib.InvalidURL, httplib.HTTPException,
                            httplib.UnknownProtocol), e:
                        if hasattr(e, 'reason'):
                            if isinstance(e.reason, socket.timeout):
                                url_error_msg = 'Failed to reach server. TIMED OUT '
                            else:
                                url_error_msg = 'Failed to reach server. Reason: %s ' % e.reason
                        elif hasattr(e, 'code'):
                            url_error_msg = "The server couldn't fulfill the request. Error code: %s" % e.code
                        else:
                            url_error_msg = "Failed to reach server!"
                    except:
                        url_error_msg = "Failed to reach server!"
Ejemplo n.º 21
0
	"""
    def find_user_password(self, realm, authuri):
        creds = codetricks.stealVar("_temp_credentials")
        if creds is not None:
            return creds


_restrictedURLOpener = urllib2.OpenerDirector()
_restrictedURLOpener.add_handler(urllib2.HTTPRedirectHandler())
_restrictedURLOpener.add_handler(urllib2.HTTPHandler())
_restrictedURLOpener.add_handler(urllib2.HTTPSHandler())
_restrictedURLOpener.add_handler(urllib2.HTTPErrorProcessor())
_restrictedURLOpener.add_handler(
    urllib2.HTTPBasicAuthHandler(_UrlopenRemotePasswordMgr()))
_restrictedURLOpener.add_handler(urllib2.FTPHandler())
_restrictedURLOpener.add_handler(urllib2.UnknownHandler())
_restrictedURLOpener.addheaders = [("user-agent", "GAVO DaCHS HTTP client")]


def urlopenRemote(url, data=None, creds=(None, None)):
    """works like urllib2.urlopen, except only http, https, and ftp URLs
	are handled.

	The function also massages the error messages of urllib2 a bit.  urllib2
	errors always become IOErrors (which is more convenient within the DC).

	creds may be a pair of username and password.  Those credentials
	will be presented in http basic authentication to any server
	that cares to ask.  For both reasons, don't use any valuable credentials
	here.
	"""
Ejemplo n.º 22
0
    def __call__(self, value):
        try:
            super(RelativeURLValidator, self).__call__(value)
        except ValidationError as e:
            # Trivial case failed. Try for possible IDN domain
            if value:
                value = smart_text(value)
                scheme, netloc, path, query, fragment = urlparse.urlsplit(
                    value)
                try:
                    netloc = netloc.encode('idna')  # IDN -> ACE
                except UnicodeError:  # invalid domain part
                    raise e
                url = urlparse.urlunsplit(
                    (scheme, netloc, path, query, fragment))
                super(RelativeURLValidator, self).__call__(url)
            else:
                raise
        else:
            url = value

        if self.verify_exists:
            broken_error = ValidationError(
                _(u'This URL appears to be a broken link.'),
                code='invalid_link')

            if url.startswith('http://') or url.startswith('ftp://'):
                headers = {
                    "Accept":
                    "text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5",
                    "Accept-Language": "en-us,en;q=0.5",
                    "Accept-Charset": "ISO-8859-1,utf-8;q=0.7,*;q=0.7",
                    "Connection": "close",
                    "User-Agent": self.user_agent,
                }
                url = url.encode('utf-8')
                try:
                    req = urllib2.Request(url, None, headers)
                    req.get_method = lambda: 'HEAD'
                    #Create an opener that does not support local file access
                    opener = urllib2.OpenerDirector()

                    #Don't follow redirects, but don't treat them as errors either
                    error_nop = lambda *args, **kwargs: True
                    http_error_processor = urllib2.HTTPErrorProcessor()
                    http_error_processor.http_error_301 = error_nop
                    http_error_processor.http_error_302 = error_nop
                    http_error_processor.http_error_307 = error_nop

                    handlers = [
                        urllib2.UnknownHandler(),
                        urllib2.HTTPHandler(),
                        urllib2.HTTPDefaultErrorHandler(),
                        urllib2.FTPHandler(), http_error_processor
                    ]
                    try:
                        import ssl
                        handlers.append(urllib2.HTTPSHandler())
                    except:
                        #Python isn't compiled with SSL support
                        pass
                    map(opener.add_handler, handlers)
                    if platform.python_version_tuple() >= (2, 6):
                        opener.open(req, timeout=10)
                    else:
                        opener.open(req)
                except ValueError:
                    raise ValidationError(_(u'Enter a valid URL.'),
                                          code='invalid')
                except:  # urllib2.URLError, httplib.InvalidURL, etc.
                    raise broken_error

            else:
                # Resolve the relative URL
                try:
                    resolve(url)
                except Http404:
                    raise broken_error
Ejemplo n.º 23
0
                code='invalid_link')
            try:
                req = urllib2.Request(url, None, headers)
                req.get_method = lambda: 'HEAD'
                #Create an opener that does not support local file access
                opener = urllib2.OpenerDirector()

                #Don't follow redirects, but don't treat them as errors either
                error_nop = lambda *args, **kwargs: True
                http_error_processor = urllib2.HTTPErrorProcessor()
                http_error_processor.http_error_301 = error_nop
                http_error_processor.http_error_302 = error_nop
                http_error_processor.http_error_307 = error_nop

                handlers = [
                    urllib2.UnknownHandler(),
                    urllib2.HTTPHandler(),
                    urllib2.HTTPDefaultErrorHandler(),
                    urllib2.FTPHandler(), http_error_processor
                ]
                try:
                    import ssl
                    handlers.append(urllib2.HTTPSHandler())
                except:
                    #Python isn't compiled with SSL support
                    pass
                map(opener.add_handler, handlers)
                if platform.python_version_tuple() >= (2, 6):
                    opener.open(req, timeout=10)
                else:
                    opener.open(req)
            broken_error = ValidationError(
                _(u'This URL appears to be a broken link.'), code='invalid_link')
            try:
                req = urllib2.Request(url, None, headers)
                req.get_method = lambda: 'HEAD'
                #Create an opener that does not support local file access
                opener = urllib2.OpenerDirector()

                #Don't follow redirects, but don't treat them as errors either
                error_nop = lambda *args, **kwargs: True
                http_error_processor = urllib2.HTTPErrorProcessor()
                http_error_processor.http_error_301 = error_nop
                http_error_processor.http_error_302 = error_nop
                http_error_processor.http_error_307 = error_nop

                handlers = [urllib2.UnknownHandler(),
                            urllib2.HTTPHandler(),
                            urllib2.HTTPDefaultErrorHandler(),
                            urllib2.FTPHandler(),
                            http_error_processor]
                try:
                    import ssl
                except ImportError:
                    # Python isn't compiled with SSL support
                    pass
                else:
                    handlers.append(urllib2.HTTPSHandler())
                map(opener.add_handler, handlers)
                if platform.python_version_tuple() >= (2, 6):
                    opener.open(req, timeout=10)
                else:
Ejemplo n.º 25
0
    def run_report_and_create_csv(self, archived):
        '''
        creates work and report    
        '''
        if archived:
            writer = csv.writer(
                open(
                    os.path.join(
                        settings.MEDIA_ROOT,
                        "artbase/data/archived_broken_urls_inventory.csv"),
                    "wb"))
        else:
            writer = csv.writer(
                open(
                    os.path.join(
                        settings.MEDIA_ROOT,
                        "artbase/data/artbase_broken_urls_inventory.csv"),
                    "wb"))
        writer.writerow([
            'ARTWORK ID', 'ARTWORK TITLE', 'ARTWORK URL', 'URL STATUS',
            'LOCATION', 'LOCATION STATUS', 'UPDATE NOTICE'
        ])
        if archived:
            all_works = ArtworkStub.objects.filter(status="approved").filter(
                location_type="cloned")
        else:
            all_works = ArtworkStub.objects.filter(status="approved")

        handler = urllib2.UnknownHandler()
        opener = urllib2.build_opener(handler)
        urllib2.install_opener(opener)

        # timeout in seconds
        timeout = 15
        socket.setdefaulttimeout(timeout)

        for work in all_works:
            url_response = None
            url_error_msg = None
            url_status = None
            location_response = None
            location_status = None
            location_error_msg = None
            updating_notice = None

            #a few hacks to make sure the url is formatted correctly
            if "/artbase/" in work.url:
                if "http://archive.rhizome.org" not in work.url:
                    if "http://" not in work.url:
                        work.url = "http://archive.rhizome.org%s" % work.url
                        updating_notice = "Make sure has full rhizome archives url (http://archive.rhizome.org/....)"

            if "/artbase/" in work.location:
                if "http://archive.rhizome.org" not in work.location:
                    if "http://" not in work.location:
                        work.location = "http://archive.rhizome.org%s" % work.location
                        updating_notice = "Make sure has full rhizome archives url (http://archive.rhizome.org/....)"
            if "http://" not in work.url:
                work.url = "http://%s" % work.url
                updating_notice = "Make sure has url including 'http://'"

            if "http://" not in work.location:
                work.location = "http://%s" % work.location
                updating_notice = "Make sure has url including 'http://'"

            try:
                url_response = urllib2.urlopen(work.url, timeout=15)
            except (urllib2.URLError, httplib.BadStatusLine,
                    httplib.InvalidURL, httplib.HTTPException,
                    httplib.UnknownProtocol), e:
                if hasattr(e, 'reason'):
                    if isinstance(e.reason, socket.timeout):
                        url_error_msg = 'Failed to reach server. TIMED OUT '
                    else:
                        url_error_msg = 'Failed to reach server. Reason: %s ' % e.reason
                elif hasattr(e, 'code'):
                    url_error_msg = "The server couldn't fulfill the request. Error code: %s" % e.code
                else:
                    url_error_msg = "Failed to reach server!"
            except: