def _get_qvalue(accept_types, content_type_string): content_type = httpheader.content_type(content_type_string) class_qvalue = None universal_wildcard_qvalue = None ret = 0 for accept_content_type, qvalue, _ in accept_types: if accept_content_type == content_type: ret = qvalue break elif accept_content_type.is_wildcard() and accept_content_type.major == content_type.major: class_qvalue = qvalue elif accept_content_type.is_universal_wildcard(): universal_wildcard_qvalue = qvalue else: if class_qvalue is not None: ret = class_qvalue if universal_wildcard_qvalue is not None: ret = universal_wildcard_qvalue return ret
def __init__(self, name, additional_headers={}): """ @param name: URL to be opened @keyword additional_headers: additional HTTP request headers to be added to the call """ try: req = urllib2.Request(url=name) for key in additional_headers: req.add_header(key, additional_headers[key]) if 'Accept' not in additional_headers: req.add_header('Accept', 'text/html, application/xhtml+xml') self.data = urllib2.urlopen(req) self.headers = self.data.info() if URIOpener.CONTENT_TYPE in self.headers: # The call below will remove the possible media type parameters, like charset settings ct = httpheader.content_type( self.headers[URIOpener.CONTENT_TYPE]) self.content_type = ct.media_type if 'charset' in ct.parmdict: self.charset = ct.parmdict['charset'] else: self.charset = None # print else: # check if the suffix can be used for the content type; this may be important # for file:// type URI or if the server is not properly set up to return the right # mime type self.charset = None self.content_type = "" for suffix in preferred_suffixes.keys(): if name.endswith(suffix): self.content_type = preferred_suffixes[suffix] break if URIOpener.CONTENT_LOCATION in self.headers: self.location = urlparse.urljoin( self.data.geturl(), self.headers[URIOpener.CONTENT_LOCATION]) else: self.location = name self.expiration_date = datetime.datetime.utcnow( ) + datetime.timedelta(days=1) if URIOpener.EXPIRES in self.headers: try: # Thanks to Deron Meranda for the HTTP date conversion method... self.expiration_date = httpheader.parse_http_datetime( self.headers[URIOpener.EXPIRES]) except: # The Expires date format was wrong, sorry, forget it... pass self.last_modified_date = None if URIOpener.LAST_MODIFIED in self.headers: try: # Thanks to Deron Meranda for the HTTP date conversion method... self.last_modified_date = httpheader.parse_http_datetime( self.headers[URIOpener.LAST_MODIFIED]) except: # The last modified date format was wrong, sorry, forget it... pass except urllib2.HTTPError, e: from pyRdfa import HTTPError raise HTTPError('%s' % e, e.code)
def __init__(self, name, additional_headers = {}) : """ @param name: URL to be opened @keyword additional_headers: additional HTTP request headers to be added to the call """ try : req = urllib2.Request(url=name) for key in additional_headers : req.add_header(key, additional_headers[key]) if 'Accept' not in additional_headers : req.add_header('Accept', 'text/html, application/xhtml+xml') self.data = urllib2.urlopen(req) self.headers = self.data.info() if URIOpener.CONTENT_TYPE in self.headers : # The call below will remove the possible media type parameters, like charset settings ct = httpheader.content_type(self.headers[URIOpener.CONTENT_TYPE]) self.content_type = ct.media_type if 'charset' in ct.parmdict : self.charset = ct.parmdict['charset'] else : self.charset = None # print else : # check if the suffix can be used for the content type; this may be important # for file:// type URI or if the server is not properly set up to return the right # mime type self.charset = None self.content_type = "" for suffix in preferred_suffixes.keys() : if name.endswith(suffix) : self.content_type = preferred_suffixes[suffix] break if URIOpener.CONTENT_LOCATION in self.headers : self.location = urlparse.urljoin(self.data.geturl(),self.headers[URIOpener.CONTENT_LOCATION]) else : self.location = name self.expiration_date = datetime.datetime.utcnow() + datetime.timedelta(days=1) if URIOpener.EXPIRES in self.headers : try : # Thanks to Deron Meranda for the HTTP date conversion method... self.expiration_date = httpheader.parse_http_datetime(self.headers[URIOpener.EXPIRES]) except : # The Expires date format was wrong, sorry, forget it... pass self.last_modified_date = None if URIOpener.LAST_MODIFIED in self.headers : try : # Thanks to Deron Meranda for the HTTP date conversion method... self.last_modified_date = httpheader.parse_http_datetime(self.headers[URIOpener.LAST_MODIFIED]) except : # The last modified date format was wrong, sorry, forget it... pass except urllib2.HTTPError, e : from pyRdfa import HTTPError raise HTTPError('%s' % e, e.code)