Esempio n. 1
0
    def __init__(self, mem):

        self._memory = mem
        self._stringfactory = ZStringFactory(self._memory)
        self._zsciitranslator = ZsciiTranslator(self._memory)

        # Load and parse game's 'standard' dictionary from static memory.
        dict_addr = self._memory.read_word(0x08)
        self._num_entries, self._entry_length, self._separators, entries_addr = \
                           self._parse_dict_header(dict_addr)
        self._dict = self.get_dictionary(dict_addr)
Esempio n. 2
0
  def __init__(self, mem):

    self._memory = mem
    self._stringfactory = ZStringFactory(self._memory)
    self._zsciitranslator = ZsciiTranslator(self._memory)

    # Load and parse game's 'standard' dictionary from static memory.
    dict_addr = self._memory.read_word(0x08)
    self._num_entries, self._entry_length, self._separators, entries_addr = \
                       self._parse_dict_header(dict_addr)
    self._dict = self.get_dictionary(dict_addr)
Esempio n. 3
0
class ZLexer(object):
    def __init__(self, mem):

        self._memory = mem
        self._stringfactory = ZStringFactory(self._memory)
        self._zsciitranslator = ZsciiTranslator(self._memory)

        # Load and parse game's 'standard' dictionary from static memory.
        dict_addr = self._memory.read_word(0x08)
        self._num_entries, self._entry_length, self._separators, entries_addr = \
                           self._parse_dict_header(dict_addr)
        self._dict = self.get_dictionary(dict_addr)

    def _parse_dict_header(self, address):
        """Parse the header of the dictionary at ADDRESS.  Return the
    number of entries, the length of each entry, a list of zscii
    word separators, and an address of the beginning the entries."""

        addr = address
        num_separators = self._memory[addr]
        separators = self._memory[(addr + 1):(addr + num_separators)]
        addr += (1 + num_separators)
        entry_length = self._memory[addr]
        addr += 1
        num_entries = self._memory.read_word(addr)
        addr += 2

        return num_entries, entry_length, separators, addr

    def _tokenise_string(self, string, separators):
        """Split unicode STRING into a list of words, and return the list.
    Whitespace always counts as a word separator, but so do any
    unicode characters provided in the list of SEPARATORS.  Note,
    however, that instances of these separators caunt as words
    themselves."""

        # re.findall(r'[,.;]|\w+', 'abc, def')
        sep_string = ""
        for sep in separators:
            sep_string += sep
        if sep_string == "":
            regex = r"\w+"
        else:
            regex = r"[%s]|\w+" % sep_string

        return re.findall(regex, string)

    #--------- Public APIs -----------

    def get_dictionary(self, address):
        """Load a z-machine-format dictionary at ADDRESS -- which maps
    zstrings to bytestrings -- into a python dictionary which maps
    unicode strings to the address of the word in the original
    dictionary.  Return the new dictionary."""

        dict = {}

        num_entries, entry_length, separators, addr = \
                     self._parse_dict_header(address)

        for i in range(0, num_entries):
            text_key = self._stringfactory.get(addr)
            dict[text_key] = addr
            addr += entry_length

        return dict

    def parse_input(self, string, dict_addr=None):
        """Given a unicode string, parse it into words based on a dictionary.

    if DICT_ADDR is provided, use the custom dictionary at that
    address to do the analysis, otherwise default to using the game's
    'standard' dictionary.

    The dictionary plays two roles: first, it specifies separator
    characters beyond the usual space character.  Second, we need to
    look up each word in the dictionary and return the address.

    Return a list of lists, each list being of the form

       [word, byte_address_of_word_in_dictionary (or 0 if not in dictionary)]
    """

        if dict_addr is None:
            zseparators = self._separators
            dict = self._dict
        else:
            num_entries, entry_length, zseparators, addr = \
                         self._parse_dict_header(dict_addr)
            dict = self.get_dictionary(dict_addr)

        # Our list of word separators are actually zscii codes that must
        # be converted to unicode before we can use them.
        separators = []
        for code in zseparators:
            separators.append(self._zsciitranslator.ztou(code))

        token_list = self._tokenise_string(string, separators)

        final_list = []
        for word in token_list:
            if dict.has_key(word):
                byte_addr = dict[word]
            else:
                byte_addr = 0
            final_list.append([word, byte_addr])

        return final_list
Esempio n. 4
0
class ZLexer(object):

  def __init__(self, mem):

    self._memory = mem
    self._stringfactory = ZStringFactory(self._memory)
    self._zsciitranslator = ZsciiTranslator(self._memory)

    # Load and parse game's 'standard' dictionary from static memory.
    dict_addr = self._memory.read_word(0x08)
    self._num_entries, self._entry_length, self._separators, entries_addr = \
                       self._parse_dict_header(dict_addr)
    self._dict = self.get_dictionary(dict_addr)


  def _parse_dict_header(self, address):
    """Parse the header of the dictionary at ADDRESS.  Return the
    number of entries, the length of each entry, a list of zscii
    word separators, and an address of the beginning the entries."""

    addr = address
    num_separators = self._memory[addr]
    separators = self._memory[(addr + 1):(addr + num_separators)]
    addr += (1 + num_separators)
    entry_length = self._memory[addr]
    addr += 1
    num_entries = self._memory.read_word(addr)
    addr += 2

    return num_entries, entry_length, separators, addr


  def _tokenise_string(self, string, separators):
     """Split unicode STRING into a list of words, and return the list.
    Whitespace always counts as a word separator, but so do any
    unicode characters provided in the list of SEPARATORS.  Note,
    however, that instances of these separators caunt as words
    themselves."""

     # re.findall(r'[,.;]|\w+', 'abc, def')
     sep_string = ""
     for sep in separators:
       sep_string += sep
     if sep_string == "":
       regex = r"\w+"
     else:
       regex = r"[%s]|\w+" % sep_string

     return re.findall(regex, string)


  #--------- Public APIs -----------


  def get_dictionary(self, address):
    """Load a z-machine-format dictionary at ADDRESS -- which maps
    zstrings to bytestrings -- into a python dictionary which maps
    unicode strings to the address of the word in the original
    dictionary.  Return the new dictionary."""

    dict = {}

    num_entries, entry_length, separators, addr = \
                 self._parse_dict_header(address)

    for i in range(0, num_entries):
      text_key = self._stringfactory.get(addr)
      dict[text_key] = addr
      addr += entry_length

    return dict


  def parse_input(self, string, dict_addr=None):
    """Given a unicode string, parse it into words based on a dictionary.

    if DICT_ADDR is provided, use the custom dictionary at that
    address to do the analysis, otherwise default to using the game's
    'standard' dictionary.

    The dictionary plays two roles: first, it specifies separator
    characters beyond the usual space character.  Second, we need to
    look up each word in the dictionary and return the address.

    Return a list of lists, each list being of the form

       [word, byte_address_of_word_in_dictionary (or 0 if not in dictionary)]
    """

    if dict_addr is None:
      zseparators = self._separators
      dict = self._dict
    else:
      num_entries, entry_length, zseparators, addr = \
                   self._parse_dict_header(dict_addr)
      dict = self.get_dictionary(dict_addr)

    # Our list of word separators are actually zscii codes that must
    # be converted to unicode before we can use them.
    separators = []
    for code in zseparators:
      separators.append(self._zsciitranslator.ztou(code))

    token_list = self._tokenise_string(string, separators)

    final_list = []
    for word in token_list:
      if dict.has_key(word):
        byte_addr = dict[word]
      else:
        byte_addr = 0
      final_list.append([word, byte_addr])

    return final_list