Ejemplo n.º 1
0
def test_within_delete():
    st = gwrite(enlist("abc def ghi"))
    gr = greader(st)
    assert set(fst.within(gr, "df")) == set(["def"])

    st = gwrite(enlist("0"))
    gr = greader(st)
    assert list(fst.within(gr, "01")) == ["0"]
Ejemplo n.º 2
0
def test_within_delete():
    st = gwrite(enlist("abc def ghi"))
    gr = greader(st)
    assert set(fst.within(gr, "df")) == set(["def"])

    st = gwrite(enlist("0"))
    gr = greader(st)
    assert list(fst.within(gr, "01")) == ["0"]
Ejemplo n.º 3
0
def test_within_replace():
    st = gwrite(enlist("abc def ghi"))
    gr = greader(st)
    assert set(fst.within(gr, "dez")) == set(["def"])

    st = gwrite(enlist("00 01 10 11"))
    gr = greader(st)
    s = set(fst.within(gr, "00"))
    assert s == set(["00", "10", "01"])
Ejemplo n.º 4
0
def test_within_replace():
    st = gwrite(enlist("abc def ghi"))
    gr = greader(st)
    assert set(fst.within(gr, "dez")) == set(["def"])

    st = gwrite(enlist("00 01 10 11"))
    gr = greader(st)
    s = set(fst.within(gr, "00"))
    assert s == set(["00", "10", "01"])
Ejemplo n.º 5
0
 def _suggestions(self, text, maxdist, prefix):
     if self.dict.has_key(text):
         yield (len(text) * 10 + self.dict.get(text, 0) * 5, text)
     for sug in fst.within(self.graph, text, k=maxdist, prefix=prefix):
         # Higher scores are better, so negate the edit distance
         yield ((0 - maxdist) * 100 + len(sug) * 10 + self.dict.get(sug, 0),
                sug)
Ejemplo n.º 6
0
def test_within():
    with TempStorage() as st:
        gwrite(enlist("0 00 000 001 01 010 011 1 10 100 101 11 110 111"), st)
        gr = greader(st)
        s = set(fst.within(gr, "01", k=1))
        gr.close()
    assert s == set(["0", "00", "01", "011", "010", "001", "10", "101", "1", "11"])
Ejemplo n.º 7
0
 def terms_within(self, fieldname, text, maxdist, prefix=0):
     if not self.has_word_graph(fieldname):
         # This reader doesn't have a graph stored, use the slow method
         return IndexReader.terms_within(self, fieldname, text, maxdist,
                                         prefix=prefix)
     gr = self._get_graph()
     return fst.within(gr, text, k=maxdist, prefix=prefix,
                       address=self._graph.root(fieldname))
Ejemplo n.º 8
0
def test_within():
    with TempStorage() as st:
        gwrite(enlist("0 00 000 001 01 010 011 1 10 100 101 11 110 111"), st)
        gr = greader(st)
        s = set(fst.within(gr, "01", k=1))
        gr.close()
    assert s == set(
        ["0", "00", "01", "011", "010", "001", "10", "101", "1", "11"])
Ejemplo n.º 9
0
 def terms_within(self, fieldname, text, maxdist, prefix=0):
     if not self.has_word_graph(fieldname):
         # This reader doesn't have a graph stored, use the slow method
         return IndexReader.terms_within(self,
                                         fieldname,
                                         text,
                                         maxdist,
                                         prefix=prefix)
     gr = self._get_graph()
     return fst.within(gr,
                       text,
                       k=maxdist,
                       prefix=prefix,
                       address=self._graph.root(fieldname))
Ejemplo n.º 10
0
def test_within_unicode():
    domain = [u("\u280b\u2817\u2801\u281d\u2809\u2811"),
              u("\u65e5\u672c"),
              u("\uc774\uc124\ud76c"),
              ]

    st = RamStorage()
    gw = fst.GraphWriter(st.create_file("test"))
    gw.start_field("test")
    for key in domain:
        gw.insert(key)
    gw.close()

    gr = fst.GraphReader(st.open_file("test"))
    s = list(fst.within(gr, u("\uc774.\ud76c")))
    assert s == [u("\uc774\uc124\ud76c")]
Ejemplo n.º 11
0
def test_within_unicode():
    domain = [
        u("\u280b\u2817\u2801\u281d\u2809\u2811"),
        u("\u65e5\u672c"),
        u("\uc774\uc124\ud76c"),
    ]

    st = RamStorage()
    gw = fst.GraphWriter(st.create_file("test"))
    gw.start_field("test")
    for key in domain:
        gw.insert(key)
    gw.close()

    gr = fst.GraphReader(st.open_file("test"))
    s = list(fst.within(gr, u("\uc774.\ud76c")))
    assert s == [u("\uc774\uc124\ud76c")]
Ejemplo n.º 12
0
def test_within_prefix():
    st = gwrite(enlist("aabc aadc babc badc"))
    gr = greader(st)
    s = set(fst.within(gr, "aaxc", prefix=2))
    assert s == set(["aabc", "aadc"])
Ejemplo n.º 13
0
def test_within_k2():
    st = gwrite(enlist("abc bac cba"))
    gr = greader(st)
    s = set(fst.within(gr, "cb", k=2))
    assert s == set(["abc", "cba"])
Ejemplo n.º 14
0
def test_within_transpose():
    st = gwrite(enlist("abc def ghi"))
    gr = greader(st)
    s = set(fst.within(gr, "dfe"))
    assert s == set(["def"])
Ejemplo n.º 15
0
def test_within_k2():
    st = gwrite(enlist("abc bac cba"))
    gr = greader(st)
    s = set(fst.within(gr, "cb", k=2))
    assert s == set(["abc", "cba"])
Ejemplo n.º 16
0
def test_within_transpose():
    st = gwrite(enlist("abc def ghi"))
    gr = greader(st)
    s = set(fst.within(gr, "dfe"))
    assert s == set(["def"])
Ejemplo n.º 17
0
def test_within_insert():
    st = gwrite(enlist("00 01 10 11"))
    gr = greader(st)
    s = set(fst.within(gr, "0"))
    assert s == set(["00", "01", "10"])
Ejemplo n.º 18
0
def test_within_match():
    st = gwrite(enlist("abc def ghi"))
    gr = greader(st)
    assert set(fst.within(gr, "def")) == set(["def"])
Ejemplo n.º 19
0
def test_within_insert():
    st = gwrite(enlist("00 01 10 11"))
    gr = greader(st)
    s = set(fst.within(gr, "0"))
    assert s == set(["00", "01", "10"])
Ejemplo n.º 20
0
def test_within_prefix():
    st = gwrite(enlist("aabc aadc babc badc"))
    gr = greader(st)
    s = set(fst.within(gr, "aaxc", prefix=2))
    assert s == set(["aabc", "aadc"])
Ejemplo n.º 21
0
 def _suggestions(self, text, maxdist, prefix):
     if self.dict.has_key(text):
         yield (len(text)*10 + self.dict.get(text,0)*5, text)
     for sug in fst.within(self.graph, text, k=maxdist, prefix=prefix):
         # Higher scores are better, so negate the edit distance
         yield ((0-maxdist)*100 + len(sug)*10 + self.dict.get(sug,0), sug)
Ejemplo n.º 22
0
 def _suggestions(self, text, maxdist, prefix):
     for sug in fst.within(self.graph, text, k=maxdist, prefix=prefix):
         # Higher scores are better, so negate the edit distance
         yield (0 - maxdist, sug)
Ejemplo n.º 23
0
def test_within_match():
    st = gwrite(enlist("abc def ghi"))
    gr = greader(st)
    assert set(fst.within(gr, "def")) == set(["def"])
 def _suggestions(self, text, maxdist, prefix):
     for sug in fst.within(self.graph, text, k=maxdist, prefix=prefix):
         # Higher scores are better, so negate the edit distance
         yield (0 - maxdist, sug)