Ejemplo n.º 1
0
    def rebuild_index(self):
        """Creates the index from scratch."""

        schema = whoosh.fields.Schema(
            name=whoosh.fields.ID(sortable=True, stored=True, spelling=True),
            table=whoosh.fields.ID(sortable=True, stored=True),
            row_id=whoosh.fields.ID(sortable=True, stored=True),
            language=whoosh.fields.STORED,
            iso639=whoosh.fields.ID(sortable=True, stored=True),
            iso3166=whoosh.fields.ID(sortable=True, stored=True),
            display_name=whoosh.fields.STORED,  # non-lowercased name
        )

        if os.path.exists(self.directory):
            # create_in() isn't totally reliable, so just nuke whatever's there
            # manually.  Try to be careful about this...
            for f in os.listdir(self.directory):
                if re.match('^_?(MAIN|SPELL)_', f):
                    os.remove(os.path.join(self.directory, f))
        else:
            os.mkdir(self.directory)

        self.index = whoosh.index.create_in(self.directory,
                                            schema=schema,
                                            indexname='MAIN')
        writer = self.index.writer()

        # Index every name in all our tables of interest
        for cls in self.indexed_tables.values():
            q = self.session.query(cls).order_by(cls.id)

            for row in q:
                row_key = dict(table=unicode(cls.__tablename__),
                               row_id=unicode(row.id))

                def add(name, language, iso639, iso3166):
                    normalized_name = self.normalize_name(name)

                    writer.add_document(name=normalized_name,
                                        display_name=name,
                                        language=language,
                                        iso639=iso639,
                                        iso3166=iso3166,
                                        **row_key)

                if cls == tables.PokemonForm:
                    name_map = 'pokemon_name_map'
                else:
                    name_map = 'name_map'

                for language, name in getattr(row, name_map, {}).items():
                    if not name:
                        continue

                    add(name, language.identifier, language.iso639,
                        language.iso3166)

                    # Add generated Roomaji too
                    # XXX this should be a first-class concept, not
                    # piggybacking on Japanese
                    if language.identifier == 'ja':
                        add(romanize(name), language.identifier,
                            language.iso639, language.iso3166)

        writer.commit()
Ejemplo n.º 2
0
    def rebuild_index(self):
        """Creates the index from scratch."""

        schema = whoosh.fields.Schema(
            name=whoosh.fields.ID(sortable=True, stored=True, spelling=True),
            table=whoosh.fields.ID(sortable=True, stored=True),
            row_id=whoosh.fields.ID(sortable=True, stored=True),
            language=whoosh.fields.STORED,
            iso639=whoosh.fields.ID(sortable=True, stored=True),
            iso3166=whoosh.fields.ID(sortable=True, stored=True),
            display_name=whoosh.fields.STORED,  # non-lowercased name
        )

        if os.path.exists(self.directory):
            # create_in() isn't totally reliable, so just nuke whatever's there
            # manually.  Try to be careful about this...
            for f in os.listdir(self.directory):
                if re.match('^_?(MAIN|SPELL)_', f):
                    os.remove(os.path.join(self.directory, f))
        else:
            os.mkdir(self.directory)

        self.index = whoosh.index.create_in(self.directory, schema=schema,
                                                            indexname='MAIN')
        writer = self.index.writer()

        # Index every name in all our tables of interest
        for cls in self.indexed_tables.values():
            q = self.session.query(cls).order_by(cls.id)

            for row in q:
                row_key = dict(table=text_type(cls.__tablename__),
                               row_id=text_type(row.id))

                def add(name, language, iso639, iso3166):
                    normalized_name = self.normalize_name(name)

                    writer.add_document(
                        name=normalized_name, display_name=name,
                        language=language, iso639=iso639, iso3166=iso3166,
                        **row_key
                    )

                if cls == tables.PokemonForm:
                    name_map = 'pokemon_name_map'
                else:
                    name_map = 'name_map'

                for language, name in getattr(row, name_map, {}).items():
                    if not name:
                        continue

                    add(name, language.identifier,
                              language.iso639,
                              language.iso3166)

                    # Add generated Roomaji too
                    # XXX this should be a first-class concept, not
                    # piggybacking on Japanese
                    if language.identifier == 'ja-Hrkt':
                        add(romanize(name), language.identifier, language.iso639, language.iso3166)

        writer.commit()