def class_prepared_callback(self, sender, **kwargs): schema_dict = {} for field_name in self.fields: field = self.model._meta.get_field_by_name(field_name)[0] schema_dict[field.name] = field_mapping[field.__class__] self.schema = Schema(**schema_dict) if self.index is None: self.index = Index(self.storage, schema=self.schema, create=True) self.searcher = self.index.searcher() if self.real_time: post_save.connect(self.post_save_callback, sender=self.model) post_delete.connect(self.post_delete_callback, sender=self.model)
class WhooshManager(models.Manager): def __init__(self, *args, **kwargs): self.default = kwargs.pop("default",None) self.parser = None self.fields = kwargs.pop('fields', []) + ['id'] self.real_time = kwargs.pop('real_time', True) if not os.path.lexists(STORAGE_DIR): os.makedirs(STORAGE_DIR) self.storage = store.FileStorage(STORAGE_DIR) try: self.index = Index(self.storage) except (IndexError, EmptyIndexError): self.index = None super(WhooshManager, self).__init__(*args, **kwargs) def contribute_to_class(self, model, name): super(WhooshManager, self).contribute_to_class(model, name) class_prepared.connect(self.class_prepared_callback, sender=self.model) def class_prepared_callback(self, sender, **kwargs): schema_dict = {} for field_name in self.fields: field = self.model._meta.get_field_by_name(field_name)[0] schema_dict[field.name] = field_mapping[field.__class__] self.schema = Schema(**schema_dict) if self.index is None: self.index = Index(self.storage, schema=self.schema, create=True) self.searcher = self.index.searcher() if self.real_time: post_save.connect(self.post_save_callback, sender=self.model) post_delete.connect(self.post_delete_callback, sender=self.model) def post_save_callback(self, sender, instance, created, **kwargs): dct = dict([(f, unicode(getattr(instance, f))) for f in self.fields]) self.index = self.index.refresh() writer = self.index.writer() if created: writer.add_document(**dct) else: writer.update_document(**dct) writer.commit() self.index = self.index.refresh() self.searcher = self.index.searcher() def post_delete_callback(self, sender, instance, **kwargs): pass def query(self, q): if self.parser is None: self.parser = QueryParser(self.default, schema=self.schema) results = self.searcher.search(self.parser.parse(q+"*")) return self.filter(id__in=[r['id'] for r in results])
def __init__(self, *args, **kwargs): self.default = kwargs.pop("default", None) self.parser = None self.fields = kwargs.pop('fields', []) + ['id'] self.real_time = kwargs.pop('real_time', True) if not os.path.lexists(STORAGE_DIR): os.makedirs(STORAGE_DIR) self.storage = store.FileStorage(STORAGE_DIR) try: self.index = Index(self.storage) except (IndexError, EmptyIndexError): self.index = None super(WhooshManager, self).__init__(*args, **kwargs)
class WhooshManager(models.Manager): def __init__(self, *args, **kwargs): self.default = kwargs.pop("default", None) self.parser = None self.fields = kwargs.pop('fields', []) + ['id'] self.real_time = kwargs.pop('real_time', True) if not os.path.lexists(STORAGE_DIR): os.makedirs(STORAGE_DIR) self.storage = store.FileStorage(STORAGE_DIR) try: self.index = Index(self.storage) except (IndexError, EmptyIndexError): self.index = None super(WhooshManager, self).__init__(*args, **kwargs) def contribute_to_class(self, model, name): super(WhooshManager, self).contribute_to_class(model, name) class_prepared.connect(self.class_prepared_callback, sender=self.model) def class_prepared_callback(self, sender, **kwargs): schema_dict = {} for field_name in self.fields: field = self.model._meta.get_field_by_name(field_name)[0] schema_dict[field.name] = field_mapping[field.__class__] self.schema = Schema(**schema_dict) if self.index is None: self.index = Index(self.storage, schema=self.schema, create=True) self.searcher = self.index.searcher() if self.real_time: post_save.connect(self.post_save_callback, sender=self.model) post_delete.connect(self.post_delete_callback, sender=self.model) def post_save_callback(self, sender, instance, created, **kwargs): dct = dict([(f, unicode(getattr(instance, f))) for f in self.fields]) self.index = self.index.refresh() writer = self.index.writer() if created: writer.add_document(**dct) else: writer.update_document(**dct) writer.commit() def post_delete_callback(self, sender, instance, **kwargs): pass def query(self, q): if self.parser is None: self.parser = QueryParser(self.default, schema=self.schema) results = self.searcher.search(self.parser.parse(q)) return self.filter(id__in=[r['id'] for r in results])
def __query(self, index: Index, text: str, domains: Set[str]) -> pd.DataFrame: q = self.name_parser.parse(text) with index.searcher() as s: results = [] for hit in s.search(q, limit=6): ds = set((hit.get('domains') or '').split(',')) results.append({ 'raw_score': hit.score, 'id': hit['id'], 'name': hit['name'], 'domains_boost': self.matching_domains_boost if len(ds & domains) > 0 else 1 }) if len(results) == 0: return pd.DataFrame() df = pd.DataFrame.from_records(results, index='id') # Compute accurate score based on string similarity (lowercased) df['score'] = df['name'].apply( # "Sharpen" the similarity to make it more intuitive lambda name: jellyfish.jaro_winkler_similarity( name.lower(), text.lower())**1.5) df['score'] = df['score'] * df[ 'domains_boost'] / self.matching_domains_boost df = df.sort_values(by='score', ascending=False) return df.reset_index(drop=True)
def update_index(index: Index, cards: List[Card]) -> None: writer = index.writer() cards = [c for c in cards if c.layout != 'token' and c.type != 'Vanguard'] for card in cards: document = {} document['id'] = card['id'] document['name'] = card['name'] document['name_tokenized'] = card['name'] document['name_stemmed'] = card['name'] document['name_normalized'] = card['name'] writer.update_document(**document) writer.commit()
def __add_documents(index: Index, docs: Iterable[dict]): writer = index.writer() written = 0 try: for doc in docs: writer.add_document(**doc) written += 1 writer.commit() except BaseException: writer.cancel() print('Failed to index documents') raise
def __init__(self, *args, **kwargs): self.default = kwargs.pop("default",None) self.parser = None self.fields = kwargs.pop('fields', []) + ['id'] self.real_time = kwargs.pop('real_time', True) if not os.path.lexists(STORAGE_DIR): os.makedirs(STORAGE_DIR) self.storage = store.FileStorage(STORAGE_DIR) try: self.index = Index(self.storage) except (IndexError, EmptyIndexError): self.index = None super(WhooshManager, self).__init__(*args, **kwargs)
def update_index(index: Index, cards: List[Card]) -> None: writer = index.writer() cards = [c for c in cards if c.layout != 'token' and c.type_line != 'Vanguard'] for card in cards: names = card.names if not card.layout == 'meld' or card.position == 1 else [card.name] if card.name not in names: names.append(card.name) # Split and aftermath cards for name in names: document = {} document['id'] = card.id document['name'] = name document['canonical_name'] = card.name document['name_tokenized'] = name document['name_stemmed'] = name document['name_normalized'] = name writer.update_document(**document) writer.commit()
def update_index(index: Index, cards: List[Card]) -> None: writer = index.writer() # We exclude tokens here because they can have the exact same name as cards. # We exclude emblems here to stop them showing up as cards = [c for c in cards if c.layout in multiverse.playable_layouts()] for card in cards: names = card.names if card.name not in names: names.append(card.name) # Split and aftermath cards for name in names: document = {} document['id'] = card.id document['name'] = name document['canonical_name'] = card.name document['name_tokenized'] = name document['name_stemmed'] = name document['name_normalized'] = name writer.update_document(**document) writer.commit()