def _find_headers(self): # Cached? cache_file = self._headers_file_name() try: headers = httplib.HTTPMessage(open(cache_file)) except: headers = None # Retrieve if not headers: url = self.url() host = str(urlparse.urlparse(url)[1]) def block(): try: connection = urllib2.urlopen(url) headers = connection.info() connection.close() except urllib2.HTTPError: return 'URL could not be fetched', None return None, headers headers = logger.with_action_log('Connecting to %s' % (host, ), block) # Save if headers: try: open(cache_file, 'w').write(str(headers)) except: pass else: headers = httplib.HTTPMessage(StringIO.StringIO(''), seekable=0) return headers
def parse_stream (stream, location=None, use_parser_cache=True, grammar=standard_grammar): try: #yappy.parser._DEBUG = 1 lexer = yappy.parser.Lexer (tokens) parser = yappy.parser.LRparser (grammar.grammar (), grammar.cache_file_name (), util.if_ (use_parser_cache, 1, 0), yappy.parser.LALRtable) text = str (util.read_stream (stream)) token_list = lexer.scan (text) first_error = True result = parser.parsing (token_list, context={'location': location}) except yappy.parser.LRParserError, e: result = str (e)
def __init__(self, parent, name, attrs, input_position): """Construct node named 'name' with 'parent' node. 'attrs' is a sequence of pairs (name, value,) representing node attributes. 'input_position' identifies the starting position of the node in the input stream, it is a pair (LINE, COLUMN) where LINE and COLUMN are integers, or None. """ self._parent = parent self._name = str(name) self._attrs = [( str(x[0]), x[1], ) for x in attrs] self._children = [] self._text = '' self._style = {} self._input_position = input_position
def add_text(self, text): """Append 'text' to the node. """ if self.name(): text_node = Node(self, '', (), self.input_position()) text_node.add_text(text) self.append_child(text_node) else: self._text = self._text + str(text)
def __init__(self, node, description, data=util.undefined_argument): """Make issue related to 'node' described by 'description'. 'node' must be a 'document.Node' instance. 'description' is a natural language string describing the problem. 'data' can be arbitrary data complementing the description. """ self._description = str(description) self._input_position = node and node.input_position() if data is util.undefined_argument: self._data = None else: self._data = util.str_(data)
def _parse_mime_type(self, mime_type): default_mime_type = ( None, None, ) s_mime_type = S.make(mime_type) if s_mime_type: mime_type = string.split(s_mime_type, '/') elif not util.is_sequence(mime_type): mime_type = default_mime_type if len(mime_type) != 2: mime_type = default_mime_type return tuple([str(s) for s in mime_type])
def _find_mime_type(self): # Cached? cache_file = self._mime_type_file_name() try: mime_type_string = str(open(cache_file).read()) except: mime_type_string = None # Guess if not mime_type_string: url = self.url() mime_type_string = str(mimetypes.guess_type(url)[0]) # Retrieve if not mime_type_string: mime_type_string = str(self._find_headers().gettype()) # Save if mime_type_string: mime_type = self._parse_mime_type(mime_type_string) try: open(cache_file, 'w').write('%s/%s' % mime_type) except: pass else: mime_type = '' # not None -- to avoid future repeated retrievals return mime_type
def str_(object): """Same as 'charseq.str' except it applies 'str_' to sequence elements too. """ if isinstance(object, list): result = '[' for x in object[:1]: result = result + str_(x) for x in object[1:]: result = result + ', ' + str_(x) result = result + ']' elif isinstance(object, tuple): result = '(' for x in object[:1]: result = result + str_(x) + ',' for x in object[1:]: result = result + ' ' + str_(x) + ',' result = result + ')' else: result = charseq.str(object) return result
def __init__(self, url, mime_type=None, refresh_cache=util.undefined_argument): """Create location identified by 'url' given as a string. If 'mime_type' is given, it explicitly specifies the MIME type of the location. It must be either of the form returned by the 'mime_type' method or a common MIME type string. If 'refresh_cache' is true, refresh page cache on the first page access. """ self._url = str(url) self._local_copy_name_ = None self._refresh_cache = refresh_cache if refresh_cache is util.undefined_argument: self._refresh_cache_needed = config.refresh_cache else: self._refresh_cache_needed = refresh_cache self._mime_type = mime_type and self._parse_mime_type(mime_type) self._headers = None
class Location(object): """Represents location identified by URL. """ def __init__(self, url, mime_type=None, refresh_cache=util.undefined_argument): """Create location identified by 'url' given as a string. If 'mime_type' is given, it explicitly specifies the MIME type of the location. It must be either of the form returned by the 'mime_type' method or a common MIME type string. If 'refresh_cache' is true, refresh page cache on the first page access. """ self._url = str(url) self._local_copy_name_ = None self._refresh_cache = refresh_cache if refresh_cache is util.undefined_argument: self._refresh_cache_needed = config.refresh_cache else: self._refresh_cache_needed = refresh_cache self._mime_type = mime_type and self._parse_mime_type(mime_type) self._headers = None def _parse_mime_type(self, mime_type): default_mime_type = ( None, None, ) s_mime_type = S.make(mime_type) if s_mime_type: mime_type = string.split(s_mime_type, '/') elif not util.is_sequence(mime_type): mime_type = default_mime_type if len(mime_type) != 2: mime_type = default_mime_type return tuple([str(s) for s in mime_type]) def _find_headers(self): # Cached? cache_file = self._headers_file_name() try: headers = httplib.HTTPMessage(open(cache_file)) except: headers = None # Retrieve if not headers: url = self.url() host = str(urlparse.urlparse(url)[1]) def block(): try: connection = urllib2.urlopen(url) headers = connection.info() connection.close() except urllib2.HTTPError: return 'URL could not be fetched', None return None, headers headers = logger.with_action_log('Connecting to %s' % (host, ), block) # Save if headers: try: open(cache_file, 'w').write(str(headers)) except: pass else: headers = httplib.HTTPMessage(StringIO.StringIO(''), seekable=0) return headers def _find_mime_type(self): # Cached? cache_file = self._mime_type_file_name() try: mime_type_string = str(open(cache_file).read()) except: mime_type_string = None # Guess if not mime_type_string: url = self.url() mime_type_string = str(mimetypes.guess_type(url)[0]) # Retrieve if not mime_type_string: mime_type_string = str(self._find_headers().gettype()) # Save if mime_type_string: mime_type = self._parse_mime_type(mime_type_string) try: open(cache_file, 'w').write('%s/%s' % mime_type) except: pass else: mime_type = '' # not None -- to avoid future repeated retrievals return mime_type def _local_copy_name(self): if not self._local_copy_name_: hash_ = md5.new(self.url()).hexdigest() self._local_copy_name_ = os.path.join(config.cache_directory, hash_) return self._local_copy_name_ def _charset_file_name(self): return self._local_copy_name() + '.charset' def _mime_type_file_name(self): return self._local_copy_name() + '.mimetype' def _headers_file_name(self): return self._local_copy_name() + '.headers' def _local_copy_charset(self): self._ensure_local_copy() f = open(self._charset_file_name()) return str(f.read()) def _fetch(self): if not os.path.exists(config.cache_directory): try: os.mkdir(config.cache_directory) except OSError, e: raise exception.System_Error("Write to local disk failed", e) copy_name = self._local_copy_name() def block(): try: _file_name, headers = urllib.urlretrieve(self.url(), copy_name) except IOError, e: raise exception.System_Error("URL could not be retrieved", e) charset = str(headers.getparam('charset') or '') try: f = open(self._charset_file_name(), 'w') f.write(charset) f.close() f = open(self._headers_file_name(), 'w') f.write(str(headers)) f.close() except Exception, e: raise exception.System_Error("Write to local disk failed", e)
def header(self, header): """Return the value of 'header', as a string. """ if self._headers is None: self._headers = self._find_headers() return str(self._headers.getparam(header))
def string_token_func (x): return (Tokens.STRING, str (x[1:-1]),)
def url(self): """Return the url of the location, as a string. """ return str(self._url)
def _local_copy_charset(self): self._ensure_local_copy() f = open(self._charset_file_name()) return str(f.read())
def url_token_func (x): url = x[x.find('"')+1:x.rfind('"')] return (Tokens.URI, str (url),)
def protocol(self): """Return protocol name of the location, as a string. """ url = self.url() return str(urlparse.urlparse(url)[0])
def __str__ (self): result = 'Stylesheet:\n' for mr in self._media_rules: result = result + str (mr) + '\n' return result