コード例 #1
0
ファイル: __init__.py プロジェクト: johnfelipe/server
    def get_search_phrases(self, indexing_func=None):
        """Returns search phrases from properties in a given Model instance.

        Args (optional):
            only_index: List of strings.  Restricts indexing to these property names.
            indexing_func: A function that returns a set of keywords or phrases.

        Note that the indexing_func can be passed in to allow more customized
        search phrase generation.

        Two model variables influence the output of this method:
            INDEX_ONLY: If None, all indexable properties are indexed.
                If a list of property names, only those properties are indexed.
            INDEX_MULTI_WORD: Class variable that allows multi-word search
                phrases like "statue of liberty."
            INDEX_STEMMING: Returns stemmed phrases.
        """
        if not indexing_func:
            klass = self.__class__
            if klass.INDEX_MULTI_WORD:
                indexing_func = klass.get_search_phraseset
            else:
                indexing_func = klass.get_simple_search_phraseset
        if self.INDEX_STEMMING:
            stemmer = Stemmer.Stemmer('english')
        phrases = set()
        for prop_name, prop_value in self.properties().iteritems():
            if (not self.INDEX_ONLY) or (prop_name in self.INDEX_ONLY):
                values = prop_value.get_value_for_datastore(self)
                if not isinstance(values, list):
                    values = [values]
                if (isinstance(values[0], basestring)
                        and not isinstance(values[0], datastore_types.Blob)):
                    for value in values:
                        words = indexing_func(value)
                        if self.INDEX_STEMMING:
                            stemmed_words = set(stemmer.stemWords(words))
                            phrases.update(stemmed_words)
                        else:
                            phrases.update(words)
        return list(phrases)
コード例 #2
0
ファイル: __init__.py プロジェクト: johnfelipe/server
    def full_text_search(phrase,
                         limit=10,
                         kind=None,
                         stemming=INDEX_STEMMING,
                         multi_word_literal=INDEX_MULTI_WORD,
                         searched_phrases_out=None):
        """Queries search indices for phrases using a merge-join.
        
        Args:
            phrase: String.  Search phrase.
            kind: String.  Returned keys/entities are restricted to this kind.

        Returns:
            A list of (key, title) tuples corresponding to the indexed entities.  
            Multi-word literal matches are returned first.

        TODO -- Should provide feedback if input search phrase has stop words, etc.
        """
        if searched_phrases_out is None:
            searched_phrases_out = []
        index_keys = []
        keywords = PUNCTUATION_REGEX.sub(' ', phrase).lower().split()
        if stemming:
            stemmer = Stemmer.Stemmer('english')
            klass = StemmedIndex
        else:
            klass = LiteralIndex

        if len(keywords) > 1 and multi_word_literal:
            # Try to match literal multi-word phrases first
            if len(keywords) == 2:
                search_phrases = [' '.join(keywords)]
            else:
                search_phrases = []
                sub_strings = len(keywords) - 2
                keyword_not_stop_word = map(lambda x: x not in STOP_WORDS,
                                            keywords)
                for pos in xrange(0, sub_strings):
                    if keyword_not_stop_word[pos] and keyword_not_stop_word[
                            pos + 2]:
                        search_phrases.append(' '.join(keywords[pos:pos + 3]))
            query = klass.all(keys_only=True)
            for phrase in search_phrases:
                if stemming:
                    phrase = stemmer.stemWord(phrase)
                searched_phrases_out.append(phrase)
                query = query.filter('phrases =', phrase)
            if kind:
                query = query.filter('parent_kind =', kind)
            index_keys = query.fetch(limit=limit)

        if len(index_keys) < limit:
            new_limit = limit - len(index_keys)
            keywords = filter(lambda x: len(x) >= SEARCH_PHRASE_MIN_LENGTH,
                              keywords)
            if stemming:
                keywords = stemmer.stemWords(keywords)
            query = klass.all(keys_only=True)
            for keyword in keywords:
                searched_phrases_out.append(phrase)
                query = query.filter('phrases =', keyword)
            if kind:
                query = query.filter('parent_kind =', kind)
            single_word_matches = [key for key in query.fetch(limit=new_limit) \
                                   if key not in index_keys]
            index_keys.extend(single_word_matches)

        return [(key.parent(), SearchIndex.get_title(key.name()))
                for key in index_keys]