Python Builder.field Examples, lunr.builder.Builder.field Python Examples

Example #1

0

Show file

File: test_builder.py Project: yeraydiazdiaz/lunr.py

class TestBuilderBuild:
    def setup_method(self, method):
        self.builder = Builder()
        doc = {"id": "id", "title": "test", "body": "missing"}

        self.builder.ref("id")
        self.builder.field("title")
        self.builder.add(doc)
        self.index = self.builder.build()

    def test_adds_tokens_to_inverted_index(self):
        _assert_deep_keys(self.builder.inverted_index, "test.title.id")

    def test_builds_vector_space_of_the_document_fields(self):
        assert "title/id" in self.builder.field_vectors
        assert isinstance(self.builder.field_vectors["title/id"], Vector)

    def test_skips_fields_not_defined_for_indexing(self):
        assert "missing" not in self.builder.inverted_index

    def test_builds_a_token_set_for_the_corpus(self):
        needle = TokenSet.from_string("test")
        assert "test" in self.builder.token_set.intersect(needle).to_list()

    def test_calculates_document_count(self):
        assert self.builder.average_field_length["title"] == 1

    def test_index_is_returned(self):
        assert isinstance(self.index, Index)

Example #2

0

Show file

File: test_builder.py Project: yeraydiazdiaz/lunr.py

 def test_define_fields_to_index(self):
     builder = Builder()
     builder.field("foo")
     assert len(builder._fields) == 1
     assert builder._fields["foo"].name == "foo"
     assert builder._fields["foo"].boost == 1
     assert builder._fields["foo"].extractor is None
     assert repr(builder._fields["foo"]) == '<Field "foo" boost="1">'
     assert hash(builder._fields["foo"]) == hash("foo")

Example #3

0

Show file

File: test_builder.py Project: yeraydiazdiaz/lunr.py

class TestBuilderAdd:
    def test_builder_casts_docrefs_to_strings(self):
        self.builder = Builder()
        self.builder.ref("id")
        self.builder.field("title")

        self.builder.add(dict(id=123, title="test", body="missing"))

        _assert_deep_keys(self.builder.inverted_index, "test.title.123")

    def test_builder_metadata_whitelist_includes_metadata_in_index(self):
        self.builder = Builder()
        self.builder.ref("id")
        self.builder.field("title")
        self.builder.metadata_whitelist = ["position"]

        self.builder.add(dict(id="a", title="test", body="missing"))
        self.builder.add(dict(id="b", title="another test", body="missing"))

        assert self.builder.inverted_index["test"]["title"]["a"] == {
            "position": [[0, 4]]
        }
        assert self.builder.inverted_index["test"]["title"]["b"] == {
            "position": [[8, 4]]
        }

    def test_builder_field_raises_if_contains_slash(self):
        self.builder = Builder()

        with pytest.raises(ValueError):
            self.builder.field("foo/bar")

    def test_builder_extracts_nested_properties_from_document(self):
        self.builder = Builder()
        self.builder.field("name", extractor=lambda d: d["person"]["name"])

        self.builder.add({"id": "id", "person": {"name": "bob"}})

        assert self.builder.inverted_index["bob"]["name"]["id"] == {}

    def test_builder_field_term_frequency_and_length(self):
        self.builder = Builder()
        self.builder.ref("id")
        self.builder.field("title")

        self.builder.add(
            dict(id="a", title="test a testing test", body="missing"))

        assert self.builder.field_term_frequencies == {
            "title/a": {
                "test": 2,
                "a": 1,
                "testing": 1
            }
        }
        assert self.builder.field_lengths == {"title/a": 4}

Example #4

0

Show file

File: utils.py Project: mmbck/robotkernel

def lunr_builder(ref, fields):
    """A convenience function to configure and construct a lunr.Builder.

    Returns:
        Index: The populated Index ready to search against.
    """
    builder = Builder()
    builder.pipeline.add(trimmer, stop_word_filter, stemmer)
    builder.search_pipeline.add(stemmer)
    builder.ref(ref)
    for field in fields:
        builder.field(field)
    return builder

Example #5

0

Show file

File: __main__.py Project: wilhelmer/lunr.py

def lunr(ref, fields, documents, languages=None):
    """A convenience function to configure and construct a lunr.Index.

    Args:
        ref (str): The key in the documents to be used a the reference.
        fields (list): A list of strings defining fields in the documents to
            index. Optionally a list of dictionaries with three keys:
            `field_name` defining the document's field, `boost` an integer
            defining a boost to be applied to the field, and `extractor`
            a callable taking the document as a single argument and returning
            a string located in the document in a particular way.
        documents (list): The list of dictonaries representing the documents
            to index. Optionally a 2-tuple of dicts, the first one being
            the document and the second the associated attributes to it.
        languages (str or list, optional): The languages to use if using
            NLTK language support, ignored if NLTK is not available.

    Returns:
        Index: The populated Index ready to search against.
    """
    if languages is not None and lang.LANGUAGE_SUPPORT:
        if isinstance(languages, basestring):
            languages = [languages]

        unsupported_languages = set(languages) - set(lang.SUPPORTED_LANGUAGES)
        if unsupported_languages:
            raise RuntimeError("The specified languages {} are not supported, "
                               "please choose one of {}".format(
                                   ", ".join(unsupported_languages),
                                   ", ".join(lang.SUPPORTED_LANGUAGES.keys()),
                               ))
        builder = lang.get_nltk_builder(languages)
    else:
        builder = Builder()
        builder.pipeline.add(trimmer, stop_word_filter)  #, stemmer)
        #builder.search_pipeline.add(stemmer)

    builder.ref(ref)
    for field in fields:
        if isinstance(field, dict):
            builder.field(**field)
        else:
            builder.field(field)

    for document in documents:
        if isinstance(document, (tuple, list)):
            builder.add(document[0], attributes=document[1])
        else:
            builder.add(document)

    return builder.build()

Example #6

0

Show file

File: search.py Project: adamcupial/wdl

    def generate_output(self, writer):
        pages = [self.create_node(x) for x in self.context['articles']]
        path = os.path.join(self.output_path, 'search_index.json')

        pages_to_index = [{
            'id': x['id'],
            'title': x['title'],
            'text': x['text']
        } for x in pages]

        additional_data = {
            x['id']: {
                'url': x['url'],
                'title': x['title'],
                'summary': x['summary'],
            }
            for x in pages
        }

        Pipeline.register_function(special_chars_remover,
                                   'specialCharsRemover')

        bldr = Builder()
        bldr.pipeline.add(trimmer, stop_word_filter, stemmer,
                          special_chars_remover)
        bldr.search_pipeline.add(stemmer)
        bldr.ref('id')
        bldr.field('title', 10)
        bldr.field('text')

        for page in pages_to_index:
            bldr.add(page)
        idx = bldr.build().serialize()

        with open(path, 'w') as idxfile:
            json.dump({
                'index': idx,
                'data': additional_data,
            }, idxfile)