コード例 #1
0
    def _initialize(self):
        # If we're already initialized, then do nothing.
        if self._basedir == get_basedir(): return

        # Make sure the corpus is installed.
        basedir = get_basedir()
        if not os.path.isdir(os.path.join(basedir, self._original_rootdir)):
            raise IOError('%s is not installed' % self._name)
        self._basedir = basedir
        self._rootdir = os.path.join(basedir, self._original_rootdir)

        # Read in the data file.
        datapath = os.path.join(self._rootdir, self._data_file)
        data = open(datapath).read()

        # Extract the license
        self._license = self.LICENSE_RE.search(data).group(1)

        # Extract the description
        self._description = self.DESCRIPTION_RE.search(data).group(1)

        # Remove line number markings and other comments
        data = re.sub(r'<--\s+.*?-->', '', data)
        #p\.\s+\d+\s+-->', '', data)
        #data = re.sub(r'<--\s+p\.\s+\d+\s+-->', '', data)

        # Divide the thesaurus into items.
        items = re.split('\n     #', data)

        self._itemlist = []
        self._items = {}
        for item in items[1:]:
            (key, contents) = item.split('--', 1)
            key = ' '.join(key.split()) # Normalize the key.
            self._itemlist.append(key)
            self._items[key] = contents.strip()
        self._itemlist = tuple(self._itemlist)
コード例 #2
0
    def _initialize(self):
        "Make sure that we're initialized."
        # If we're already initialized, then do nothing.
        if self._initialized: return

        # Make sure the corpus is installed.
        basedir = get_basedir()
        if not os.path.isabs(self._original_rootdir):
            if not os.path.isdir(os.path.join(basedir, self._original_rootdir)):
                raise IOError('%s is not installed' % self._name)
            self._basedir = basedir
            self._rootdir = os.path.join(basedir, self._original_rootdir)
        else:
            if not os.path.isdir(self._original_rootdir):
                raise IOError('%s is not installed' % self._name)
            self._basedir = '' # empty
            self._rootdir = self._original_rootdir

#        # Check the directory for 'merged', and change it to
#        # 'combined' if appropriate.
#        if 'merged' in self._groups:
#            if os.path.isdir(os.path.join(self._rootdir, 'combined')):
#                self._group_directory['merged'] = 'combined'

        # Get the list of items in each group.
        self._group_items = {}
        for group in self._groups:
            self._find_items(group)
        if not self._group_items.has_key('combined'):
            self._virtual_merged = 1
            self._find_virtual_merged_items()

        # Get the overall list of items
        self._items = []
        for items in self._group_items.values():
            self._items += items

        # Read metadata from files
        if self._description is None and self._description_file is not None:
            path = os.path.join(self._rootdir, self._description_file)
            self._description = open(path).read()
        if self._license is None and self._license_file is not None:
            path = os.path.join(self._rootdir, self._license_file)
            self._license = open(path).read()
        if self._copyright is None and self._copyright_file is not None:
            path = os.path.join(self._rootdir, self._copyright_file)
            self._copyright = open(path).read()

        self._initialized = True