Exemple #1
0
def test_highlight_ngrams():
    schema = fields.Schema(text=fields.NGRAMWORDS(stored=True))
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        w.add_document(text=u("Multiplication and subtraction are good"))

    with ix.searcher() as s:
        qp = qparser.QueryParser("text", ix.schema)
        q = qp.parse(u("multiplication"))
        r = s.search(q)
        assert r.scored_length() == 1

        r.fragmenter = highlight.SentenceFragmenter()
        r.formatter = highlight.UppercaseFormatter()
        snippet = r[0].highlights("text")
        assert snippet == "MULTIPLICATIon and subtracTION are good"
Exemple #2
0
def test_workflow_manual():
    schema = fields.Schema(id=fields.ID(stored=True),
                           title=fields.TEXT(stored=True))
    ix = RamStorage().create_index(schema)

    w = ix.writer()
    w.add_document(id=u("1"), title=u("The man who wasn't there"))
    w.add_document(id=u("2"), title=u("The dog who barked at midnight"))
    w.add_document(id=u("3"), title=u("The invisible man"))
    w.add_document(id=u("4"), title=u("The girl with the dragon tattoo"))
    w.add_document(id=u("5"), title=u("The woman who disappeared"))
    w.commit()

    with ix.searcher() as s:
        # Parse the user query
        parser = qparser.QueryParser("title", schema=ix.schema)
        q = parser.parse(u("man"))

        # Extract the terms the user used in the field we're interested in
        terms = [
            text for fieldname, text in q.all_terms() if fieldname == "title"
        ]

        # Perform the search
        r = s.search(q)
        assert_equal(len(r), 2)

        # Use the same analyzer as the field uses. To be sure, you can
        # do schema[fieldname].analyzer. Be careful not to do this
        # on non-text field types such as DATETIME.
        analyzer = schema["title"].analyzer

        # Since we want to highlight the full title, not extract fragments,
        # we'll use WholeFragmenter.
        nf = highlight.WholeFragmenter()

        # In this example we'll simply uppercase the matched terms
        fmt = highlight.UppercaseFormatter()

        outputs = []
        for d in r:
            text = d["title"]
            outputs.append(highlight.highlight(text, terms, analyzer, nf, fmt))

        assert_equal(outputs,
                     ["The invisible MAN", "The MAN who wasn't there"])
def test_paged_highlights():
    schema = fields.Schema(text=fields.TEXT(stored=True))
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        w.add_document(text=u("alfa bravo charlie delta echo foxtrot"))
        w.add_document(text=u("bravo charlie delta echo foxtrot golf"))
        w.add_document(text=u("charlie delta echo foxtrot golf hotel"))
        w.add_document(text=u("delta echo foxtrot golf hotel india"))
        w.add_document(text=u("echo foxtrot golf hotel india juliet"))
        w.add_document(text=u("foxtrot golf hotel india juliet kilo"))

    with ix.searcher() as s:
        q = query.Term("text", u("alfa"))
        page = s.search_page(q, 1, pagelen=3)

        page.results.fragmenter = highlight.WholeFragmenter()
        page.results.formatter = highlight.UppercaseFormatter()
        hi = page[0].highlights("text")
        assert hi == u("ALFA bravo charlie delta echo foxtrot")
Exemple #4
0
def test_correct_correct():
    from whoosh import qparser

    schema = fields.Schema(a=fields.TEXT())
    with TempIndex(schema) as ix:
        with ix.writer() as w:
            w.add_document(a=u'dworska')
            w.add_document(a=u'swojska')

        with ix.searcher() as s:
            s = ix.searcher()
            qtext = u'dworska'

            qp = qparser.QueryParser('a', ix.schema)
            q = qp.parse(qtext, ix.schema)
            c = s.correct_query(q, qtext)

            assert c.string == "dworska"
            string = c.format_string(highlight.UppercaseFormatter())
            assert string == "dworska"
Exemple #5
0
def test_correct_correct():
    from whoosh import qparser

    schema = fields.Schema(a=fields.TEXT(spelling=True))
    ix = RamStorage().create_index(schema)
    ix_writer = ix.writer()

    ix_writer.add_document(a=u('dworska'))
    ix_writer.add_document(a=u('swojska'))

    ix_writer.commit()

    s = ix.searcher()
    qtext = u('dworska')

    qp = qparser.QueryParser('a', ix.schema)
    q = qp.parse(qtext, ix.schema)
    c = s.correct_query(q, qtext)

    assert c.string == "dworska"
    assert c.format_string(highlight.UppercaseFormatter()) == "dworska"
def test_snippets():
    ana = analysis.StemmingAnalyzer()
    schema = fields.Schema(text=fields.TEXT(stored=True, analyzer=ana))
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(text=u(
        "Lay out the rough animation by creating the important poses where they occur on the timeline."
    ))
    w.add_document(text=u(
        "Set key frames on everything that's key-able. This is for control and predictability: you don't want to accidentally leave something un-keyed. This is also much faster than selecting the parameters to key."
    ))
    w.add_document(text=u(
        "Use constant (straight) or sometimes linear transitions between keyframes in the channel editor. This makes the character jump between poses."
    ))
    w.add_document(text=u(
        "Keying everything gives quick, immediate results. But it can become difficult to tweak the animation later, especially for complex characters."
    ))
    w.add_document(text=u(
        "Copy the current pose to create the next one: pose the character, key everything, then copy the keyframe in the playbar to another frame, and key everything at that frame."
    ))
    w.commit()

    target = [
        "Set KEY frames on everything that's KEY-able",
        "Copy the current pose to create the next one: pose the character, KEY everything, then copy the keyframe in the playbar to another frame, and KEY everything at that frame",
        "KEYING everything gives quick, immediate results"
    ]

    with ix.searcher() as s:
        qp = qparser.QueryParser("text", ix.schema)
        q = qp.parse(u("key"))
        r = s.search(q, terms=True)
        r.fragmenter = highlight.SentenceFragmenter()
        r.formatter = highlight.UppercaseFormatter()

        assert sorted([hit.highlights("text", top=1)
                       for hit in r]) == sorted(target)
Exemple #7
0
def test_workflow_easy():
    schema = fields.Schema(id=fields.ID(stored=True),
                           title=fields.TEXT(stored=True))
    ix = RamStorage().create_index(schema)

    w = ix.writer()
    w.add_document(id=u("1"), title=u("The man who wasn't there"))
    w.add_document(id=u("2"), title=u("The dog who barked at midnight"))
    w.add_document(id=u("3"), title=u("The invisible man"))
    w.add_document(id=u("4"), title=u("The girl with the dragon tattoo"))
    w.add_document(id=u("5"), title=u("The woman who disappeared"))
    w.commit()

    with ix.searcher() as s:
        # Parse the user query
        parser = qparser.QueryParser("title", schema=ix.schema)
        q = parser.parse(u("man"))
        r = s.search(q, terms=True)
        assert len(r) == 2

        r.fragmenter = highlight.WholeFragmenter()
        r.formatter = highlight.UppercaseFormatter()
        outputs = [hit.highlights("title") for hit in r]
        assert outputs == ["The invisible MAN", "The MAN who wasn't there"]
Exemple #8
0
            myquery = parser.parse( query.search_terms  )
            results = []
            reponse = []
            rr={}
            with self.queryIndex.searcher() as searcher:
                results = searcher.search( myquery )
                
                
                for result in results:
		  temp = result['content']
		  temp = temp.split("\t")
		  sugg= temp[0]
		  print result
		  rr['content']= sugg
                results.fragmenter = highlight.ContextFragmenter(surround=40)
                results.formatter = highlight.UppercaseFormatter()
                
                response = parse_whoosh_trec('WhooshQueryEngine', query.search_terms, results)
            return response

        # -----  The Following are Whoosh errors -----

        # There's a problem with the Whoosh query created from the users query
        except QueryError, e:
          raise SearchEngineError("Whoosh Query Suggest Engine", e, errorType="Whoosh", query=query)

        # Our Whoosh Index is empty
        except EmptyIndexError, e: 		
          raise SearchEngineError("Whoosh Query Suggest Engine", e, errorType="Whoosh")

        # Our Whoosh Index does not match our version of Whoosh