Example #1
0
 def class_prepared_callback(self, sender, **kwargs):
     schema_dict = {}
     for field_name in self.fields:
         field = self.model._meta.get_field_by_name(field_name)[0]
         schema_dict[field.name] = field_mapping[field.__class__]
     self.schema = Schema(**schema_dict)
     if self.index is None:
         self.index = Index(self.storage, schema=self.schema, create=True)
     self.searcher = self.index.searcher()
     if self.real_time:
         post_save.connect(self.post_save_callback, sender=self.model)
         post_delete.connect(self.post_delete_callback, sender=self.model)
Example #2
0
class WhooshManager(models.Manager):
    def __init__(self, *args, **kwargs):
        self.default = kwargs.pop("default",None)
        self.parser = None
        self.fields = kwargs.pop('fields', []) + ['id']
        self.real_time = kwargs.pop('real_time', True)
        if not os.path.lexists(STORAGE_DIR):
            os.makedirs(STORAGE_DIR)
        self.storage = store.FileStorage(STORAGE_DIR)
        try:
            self.index = Index(self.storage)
        except (IndexError, EmptyIndexError):
            self.index = None
        super(WhooshManager, self).__init__(*args, **kwargs)
    
    def contribute_to_class(self, model, name):
        super(WhooshManager, self).contribute_to_class(model, name)
        class_prepared.connect(self.class_prepared_callback, sender=self.model)
    
    def class_prepared_callback(self, sender, **kwargs):
        schema_dict = {}
        for field_name in self.fields:
            field = self.model._meta.get_field_by_name(field_name)[0]
            schema_dict[field.name] = field_mapping[field.__class__]
        self.schema = Schema(**schema_dict)
        if self.index is None:
            self.index = Index(self.storage, schema=self.schema, create=True)
        self.searcher = self.index.searcher()
        if self.real_time:
            post_save.connect(self.post_save_callback, sender=self.model)
            post_delete.connect(self.post_delete_callback, sender=self.model)
    
    def post_save_callback(self, sender, instance, created, **kwargs):
        dct = dict([(f, unicode(getattr(instance, f))) for f in self.fields])
        self.index = self.index.refresh()
        writer = self.index.writer()
        if created:
            writer.add_document(**dct)
        else:
            writer.update_document(**dct)
        writer.commit()
        self.index = self.index.refresh()
        self.searcher = self.index.searcher()
        
    
    def post_delete_callback(self, sender, instance, **kwargs):
        pass
    
    def query(self, q):
        if self.parser is None:
            self.parser = QueryParser(self.default, schema=self.schema)
        results = self.searcher.search(self.parser.parse(q+"*"))
        return self.filter(id__in=[r['id'] for r in results])
Example #3
0
 def __init__(self, *args, **kwargs):
     self.default = kwargs.pop("default", None)
     self.parser = None
     self.fields = kwargs.pop('fields', []) + ['id']
     self.real_time = kwargs.pop('real_time', True)
     if not os.path.lexists(STORAGE_DIR):
         os.makedirs(STORAGE_DIR)
     self.storage = store.FileStorage(STORAGE_DIR)
     try:
         self.index = Index(self.storage)
     except (IndexError, EmptyIndexError):
         self.index = None
     super(WhooshManager, self).__init__(*args, **kwargs)
Example #4
0
class WhooshManager(models.Manager):
    def __init__(self, *args, **kwargs):
        self.default = kwargs.pop("default", None)
        self.parser = None
        self.fields = kwargs.pop('fields', []) + ['id']
        self.real_time = kwargs.pop('real_time', True)
        if not os.path.lexists(STORAGE_DIR):
            os.makedirs(STORAGE_DIR)
        self.storage = store.FileStorage(STORAGE_DIR)
        try:
            self.index = Index(self.storage)
        except (IndexError, EmptyIndexError):
            self.index = None
        super(WhooshManager, self).__init__(*args, **kwargs)

    def contribute_to_class(self, model, name):
        super(WhooshManager, self).contribute_to_class(model, name)
        class_prepared.connect(self.class_prepared_callback, sender=self.model)

    def class_prepared_callback(self, sender, **kwargs):
        schema_dict = {}
        for field_name in self.fields:
            field = self.model._meta.get_field_by_name(field_name)[0]
            schema_dict[field.name] = field_mapping[field.__class__]
        self.schema = Schema(**schema_dict)
        if self.index is None:
            self.index = Index(self.storage, schema=self.schema, create=True)
        self.searcher = self.index.searcher()
        if self.real_time:
            post_save.connect(self.post_save_callback, sender=self.model)
            post_delete.connect(self.post_delete_callback, sender=self.model)

    def post_save_callback(self, sender, instance, created, **kwargs):
        dct = dict([(f, unicode(getattr(instance, f))) for f in self.fields])
        self.index = self.index.refresh()
        writer = self.index.writer()
        if created:
            writer.add_document(**dct)
        else:
            writer.update_document(**dct)
        writer.commit()

    def post_delete_callback(self, sender, instance, **kwargs):
        pass

    def query(self, q):
        if self.parser is None:
            self.parser = QueryParser(self.default, schema=self.schema)
        results = self.searcher.search(self.parser.parse(q))
        return self.filter(id__in=[r['id'] for r in results])
Example #5
0
    def __query(self, index: Index, text: str,
                domains: Set[str]) -> pd.DataFrame:
        q = self.name_parser.parse(text)

        with index.searcher() as s:
            results = []
            for hit in s.search(q, limit=6):
                ds = set((hit.get('domains') or '').split(','))
                results.append({
                    'raw_score':
                    hit.score,
                    'id':
                    hit['id'],
                    'name':
                    hit['name'],
                    'domains_boost':
                    self.matching_domains_boost if len(ds & domains) > 0 else 1
                })

        if len(results) == 0:
            return pd.DataFrame()

        df = pd.DataFrame.from_records(results, index='id')

        # Compute accurate score based on string similarity (lowercased)
        df['score'] = df['name'].apply(
            # "Sharpen" the similarity to make it more intuitive
            lambda name: jellyfish.jaro_winkler_similarity(
                name.lower(), text.lower())**1.5)

        df['score'] = df['score'] * df[
            'domains_boost'] / self.matching_domains_boost
        df = df.sort_values(by='score', ascending=False)
        return df.reset_index(drop=True)
def update_index(index: Index, cards: List[Card]) -> None:
    writer = index.writer()
    cards = [c for c in cards if c.layout != 'token' and c.type != 'Vanguard']
    for card in cards:
        document = {}
        document['id'] = card['id']
        document['name'] = card['name']
        document['name_tokenized'] = card['name']
        document['name_stemmed'] = card['name']
        document['name_normalized'] = card['name']
        writer.update_document(**document)
    writer.commit()
Example #7
0
 def class_prepared_callback(self, sender, **kwargs):
     schema_dict = {}
     for field_name in self.fields:
         field = self.model._meta.get_field_by_name(field_name)[0]
         schema_dict[field.name] = field_mapping[field.__class__]
     self.schema = Schema(**schema_dict)
     if self.index is None:
         self.index = Index(self.storage, schema=self.schema, create=True)
     self.searcher = self.index.searcher()
     if self.real_time:
         post_save.connect(self.post_save_callback, sender=self.model)
         post_delete.connect(self.post_delete_callback, sender=self.model)
Example #8
0
    def __add_documents(index: Index, docs: Iterable[dict]):
        writer = index.writer()
        written = 0

        try:
            for doc in docs:
                writer.add_document(**doc)
                written += 1
            writer.commit()
        except BaseException:
            writer.cancel()
            print('Failed to index documents')
            raise
Example #9
0
 def __init__(self, *args, **kwargs):
     self.default = kwargs.pop("default",None)
     self.parser = None
     self.fields = kwargs.pop('fields', []) + ['id']
     self.real_time = kwargs.pop('real_time', True)
     if not os.path.lexists(STORAGE_DIR):
         os.makedirs(STORAGE_DIR)
     self.storage = store.FileStorage(STORAGE_DIR)
     try:
         self.index = Index(self.storage)
     except (IndexError, EmptyIndexError):
         self.index = None
     super(WhooshManager, self).__init__(*args, **kwargs)
def update_index(index: Index, cards: List[Card]) -> None:
    writer = index.writer()
    cards = [c for c in cards if c.layout != 'token' and c.type_line != 'Vanguard']
    for card in cards:
        names = card.names if not card.layout == 'meld' or card.position == 1 else [card.name]
        if card.name not in names:
            names.append(card.name) # Split and aftermath cards
        for name in names:
            document = {}
            document['id'] = card.id
            document['name'] = name
            document['canonical_name'] = card.name
            document['name_tokenized'] = name
            document['name_stemmed'] = name
            document['name_normalized'] = name
            writer.update_document(**document)
    writer.commit()
def update_index(index: Index, cards: List[Card]) -> None:
    writer = index.writer()
    # We exclude tokens here because they can have the exact same name as cards.
    # We exclude emblems here to stop them showing up as
    cards = [c for c in cards if c.layout in multiverse.playable_layouts()]
    for card in cards:
        names = card.names
        if card.name not in names:
            names.append(card.name)  # Split and aftermath cards
        for name in names:
            document = {}
            document['id'] = card.id
            document['name'] = name
            document['canonical_name'] = card.name
            document['name_tokenized'] = name
            document['name_stemmed'] = name
            document['name_normalized'] = name
            writer.update_document(**document)
    writer.commit()