def rebuild_index(self): """Creates the index from scratch.""" schema = whoosh.fields.Schema( name=whoosh.fields.ID(sortable=True, stored=True, spelling=True), table=whoosh.fields.ID(sortable=True, stored=True), row_id=whoosh.fields.ID(sortable=True, stored=True), language=whoosh.fields.STORED, iso639=whoosh.fields.ID(sortable=True, stored=True), iso3166=whoosh.fields.ID(sortable=True, stored=True), display_name=whoosh.fields.STORED, # non-lowercased name ) if os.path.exists(self.directory): # create_in() isn't totally reliable, so just nuke whatever's there # manually. Try to be careful about this... for f in os.listdir(self.directory): if re.match('^_?(MAIN|SPELL)_', f): os.remove(os.path.join(self.directory, f)) else: os.mkdir(self.directory) self.index = whoosh.index.create_in(self.directory, schema=schema, indexname='MAIN') writer = self.index.writer() # Index every name in all our tables of interest for cls in self.indexed_tables.values(): q = self.session.query(cls).order_by(cls.id) for row in q: row_key = dict(table=unicode(cls.__tablename__), row_id=unicode(row.id)) def add(name, language, iso639, iso3166): normalized_name = self.normalize_name(name) writer.add_document(name=normalized_name, display_name=name, language=language, iso639=iso639, iso3166=iso3166, **row_key) if cls == tables.PokemonForm: name_map = 'pokemon_name_map' else: name_map = 'name_map' for language, name in getattr(row, name_map, {}).items(): if not name: continue add(name, language.identifier, language.iso639, language.iso3166) # Add generated Roomaji too # XXX this should be a first-class concept, not # piggybacking on Japanese if language.identifier == 'ja': add(romanize(name), language.identifier, language.iso639, language.iso3166) writer.commit()
def rebuild_index(self): """Creates the index from scratch.""" schema = whoosh.fields.Schema( name=whoosh.fields.ID(sortable=True, stored=True, spelling=True), table=whoosh.fields.ID(sortable=True, stored=True), row_id=whoosh.fields.ID(sortable=True, stored=True), language=whoosh.fields.STORED, iso639=whoosh.fields.ID(sortable=True, stored=True), iso3166=whoosh.fields.ID(sortable=True, stored=True), display_name=whoosh.fields.STORED, # non-lowercased name ) if os.path.exists(self.directory): # create_in() isn't totally reliable, so just nuke whatever's there # manually. Try to be careful about this... for f in os.listdir(self.directory): if re.match('^_?(MAIN|SPELL)_', f): os.remove(os.path.join(self.directory, f)) else: os.mkdir(self.directory) self.index = whoosh.index.create_in(self.directory, schema=schema, indexname='MAIN') writer = self.index.writer() # Index every name in all our tables of interest for cls in self.indexed_tables.values(): q = self.session.query(cls).order_by(cls.id) for row in q: row_key = dict(table=text_type(cls.__tablename__), row_id=text_type(row.id)) def add(name, language, iso639, iso3166): normalized_name = self.normalize_name(name) writer.add_document( name=normalized_name, display_name=name, language=language, iso639=iso639, iso3166=iso3166, **row_key ) if cls == tables.PokemonForm: name_map = 'pokemon_name_map' else: name_map = 'name_map' for language, name in getattr(row, name_map, {}).items(): if not name: continue add(name, language.identifier, language.iso639, language.iso3166) # Add generated Roomaji too # XXX this should be a first-class concept, not # piggybacking on Japanese if language.identifier == 'ja-Hrkt': add(romanize(name), language.identifier, language.iso639, language.iso3166) writer.commit()