Ejemplo n.º 1
0
def testTagGC(env):
    if env.isCluster():
        raise unittest.SkipTest()
    NumberOfDocs = 101
    env.expect('ft.config', 'set', 'FORK_GC_CLEAN_THRESHOLD', 0).equal('OK')
    env.assertOk(
        env.cmd('ft.create', 'idx', 'ON', 'HASH', 'schema', 't', 'tag'))
    waitForIndex(env, 'idx')

    for i in range(NumberOfDocs):
        env.assertOk(
            env.cmd('ft.add', 'idx', 'doc%d' % i, 1.0, 'fields', 't', '1'))

    for i in range(0, NumberOfDocs, 2):
        env.assertEqual(env.cmd('ft.del', 'idx', 'doc%d' % i), 1)

    for i in range(100):
        # gc is random so we need to do it long enough times for it to work
        env.cmd('ft.debug', 'GC_FORCEINVOKE', 'idx')

    res = env.cmd('ft.debug', 'DUMP_TAGIDX', 'idx', 't')
    for r1 in res:
        for r2 in r1[1]:
            # if r2 is greater then 100 its on the last block and fork GC does not clean the last block
            env.assertTrue(r2 % 2 == 0 or r2 > 100)
Ejemplo n.º 2
0
def testTradSimp(env):
    # Ensure that traditional chinese characters get converted to their simplified variants
    env.cmd('ft.create', 'idx', 'ON', 'HASH', 'LANGUAGE_FIELD', '__language',
            'schema', 'txt', 'text')
    waitForIndex(env, 'idx')
    env.cmd('ft.add', 'idx', 'genS', 1.0, 'language', 'chinese', 'fields',
            'txt', GEN_CN_S)
    env.cmd('ft.add', 'idx', 'genT', 1.0, 'language', 'chinese', 'fields',
            'txt', GEN_CN_T)

    res = env.cmd('ft.search', 'idx', '那时', 'language', 'chinese', 'highlight',
                  'summarize')
    env.assertContains('<b>\xe9\x82\xa3\xe6\x99\x82</b>\xef... ', res[2])
    env.assertContains('<b>\xe9\x82\xa3\xe6\x97\xb6</b>\xef... ', res[4])

    # The variants should still show up as different, so as to not modify
    res1 = {res[2][i]: res[2][i + 1] for i in range(0, len(res[2]), 2)}
    res2 = {res[4][i]: res[4][i + 1] for i in range(0, len(res[4]), 2)}
    env.assertTrue('那時' in res1['txt'])
    env.assertTrue('那时' in res2['txt'])

    # Ensure that searching in traditional still gives us the proper results:
    res = env.cmd('ft.search', 'idx', '那時', 'language', 'chinese', 'highlight')
    res1 = {res[2][i]: res[2][i + 1] for i in range(0, len(res[2]), 2)}
    res2 = {res[4][i]: res[4][i + 1] for i in range(0, len(res[4]), 2)}
    env.assertTrue('那時' in res1['txt'])
    env.assertTrue('那时' in res2['txt'])
Ejemplo n.º 3
0
def testIssue364(env):
    # FT.CREATE testset "SCHEMA" "permit_timestamp" "NUMERIC" "SORTABLE" "job_category" "TEXT" "NOSTEM" "address" "TEXT" "NOSTEM"  "neighbourhood" "TAG" "SORTABLE" "description" "TEXT"  "building_type" "TEXT" "WEIGHT" "20" "NOSTEM" "SORTABLE"     "work_type" "TEXT" "NOSTEM" "SORTABLE"     "floor_area" "NUMERIC" "SORTABLE"     "construction_value" "NUMERIC" "SORTABLE"     "zoning" "TAG"     "units_added" "NUMERIC" "SORTABLE"     "location" "GEO"
    # ft.add testset 109056573-002 1 fields building_type "Retail and Shops" description "To change the use from a Restaurant to a Personal Service Shop (Great Clips)"
    # FT.SEARCH testset retail RETURN 1 description SUMMARIZE LIMIT 0 1
    env.cmd('ft.create', 'idx', 'ON', 'HASH', 'SCHEMA', 'building_type',
            'TEXT', 'description', 'TEXT')
    waitForIndex(env, 'idx')
    env.cmd(
        'ft.add', 'idx', 'doc1', '1.0', 'FIELDS', 'building_type',
        'Retail and Shops', 'description',
        'To change the use from a Restaurant to a Personal Service Shop (Great Clips)'
    )

    env.cmd(
        'ft.add', 'idx', 'doc2', '1.0', 'FIELDS', 'building_type',
        'Retail and Shops', 'description',
        'To change the use from a Restaurant to a Personal Service Shop (Great Clips) at the end'
    )

    ret = env.cmd('FT.SEARCH', 'idx', 'retail', 'RETURN', 1, 'description',
                  'SUMMARIZE')
    expected = [
        2L, 'doc2',
        [
            'description',
            'To change the use from a Restaurant to a Personal Service Shop (Great Clips) at the'
        ], 'doc1',
        [
            'description',
            'To change the use from a Restaurant to a Personal Service Shop (Great Clips)'
        ]
    ]
    env.assertEqual(toSortedFlatList(expected), toSortedFlatList(ret))
Ejemplo n.º 4
0
def testBasicGC(env):
    if env.isCluster():
        raise unittest.SkipTest()
    env.expect('ft.config', 'set', 'FORK_GC_CLEAN_THRESHOLD', 0).equal('OK')
    env.assertOk(
        env.cmd('ft.create', 'idx', 'ON', 'HASH', 'schema', 'title', 'text',
                'id', 'numeric', 't', 'tag'))
    waitForIndex(env, 'idx')
    for i in range(101):
        env.assertOk(
            env.cmd('ft.add', 'idx', 'doc%d' % i, 1.0, 'fields', 'title',
                    'hello world', 'id', '5', 't', 'tag1'))

    env.assertEqual(env.cmd('ft.debug', 'DUMP_INVIDX', 'idx', 'world'),
                    [long(i) for i in range(1, 102)])
    env.assertEqual(env.cmd('ft.debug', 'DUMP_NUMIDX', 'idx', 'id'),
                    [[long(i) for i in range(1, 102)]])
    env.assertEqual(env.cmd('ft.debug', 'DUMP_TAGIDX', 'idx', 't'),
                    [['tag1', [long(i) for i in range(1, 102)]]])

    env.assertEqual(env.cmd('ft.del', 'idx', 'doc0'), 1)

    for i in range(100):
        # gc is random so we need to do it long enough times for it to work
        env.cmd('ft.debug', 'GC_FORCEINVOKE', 'idx')

    # check that the gc collected the deleted docs
    env.assertEqual(env.cmd('ft.debug', 'DUMP_INVIDX', 'idx', 'world'),
                    [long(i) for i in range(2, 102)])
    env.assertEqual(env.cmd('ft.debug', 'DUMP_NUMIDX', 'idx', 'id'),
                    [[long(i) for i in range(2, 102)]])
    env.assertEqual(env.cmd('ft.debug', 'DUMP_TAGIDX', 'idx', 't'),
                    [['tag1', [long(i) for i in range(2, 102)]]])
Ejemplo n.º 5
0
def testGeoGCIntensive(env):
    if env.isCluster():
        raise unittest.SkipTest()
    NumberOfDocs = 1000
    env.expect('ft.config', 'set', 'FORK_GC_CLEAN_THRESHOLD', 0).equal('OK')
    env.assertOk(
        env.cmd('ft.create', 'idx', 'ON', 'HASH', 'schema', 'g', 'geo'))
    waitForIndex(env, 'idx')

    for i in range(NumberOfDocs):
        env.assertOk(
            env.cmd('ft.add', 'idx', 'doc%d' % i, 1.0, 'fields', 'g',
                    '12.34,56.78'))

    for i in range(0, NumberOfDocs, 2):
        env.assertEqual(env.cmd('ft.del', 'idx', 'doc%d' % i), 1)

    for i in range(100):
        env.cmd('ft.debug', 'GC_FORCEINVOKE', 'idx')

    res = env.cmd('ft.debug', 'DUMP_NUMIDX', 'idx', 'g')
    for r1 in res:
        for r2 in r1:
            # if r2 is greater then 900 its on the last block and fork GC does not clean the last block
            env.assertTrue(r2 % 2 == 0 or r2 > 900)
Ejemplo n.º 6
0
def testTagVals(env):
    r = env
    r.execute_command('ft.create', 'idx', 'ON', 'HASH', 'schema', 'title',
                      'text', 'tags', 'tag', 'othertags', 'tag')

    N = 100
    alltags = set()
    for n in range(N):
        tags = ('foo %d' % n, 'bar %d' % n, 'x')
        alltags.add(tags[0])
        alltags.add(tags[1])
        alltags.add(tags[2])

        env.assertOk(
            r.execute_command('ft.add', 'idx', 'doc%d' % n, 1.0, 'fields',
                              'tags', ','.join(tags), 'othertags',
                              'baz %d' % int(n // 2)))
    for _ in r.retry_with_rdb_reload():
        waitForIndex(r, 'idx')
        res = r.execute_command('ft.tagvals', 'idx', 'tags')
        env.assertEqual(N * 2 + 1, len(res))

        env.assertEqual(alltags, set(res))

        res = r.execute_command('ft.tagvals', 'idx', 'othertags')
        env.assertEqual(N / 2, len(res))

        env.expect('ft.tagvals', 'idx').raiseError()
        env.expect('ft.tagvals', 'idx', 'idx', 'idx').raiseError()
        env.expect('ft.tagvals', 'fake_idx', 'tags').raiseError()
        env.expect('ft.tagvals', 'idx', 'fake_tags').raiseError()
        env.expect('ft.tagvals', 'idx', 'title').raiseError()
Ejemplo n.º 7
0
def testBenchmarkNumeric(env):
  random.seed()
  num_docs = 1000000
  copies = 10
  num_queries = 1
  pipe_batch = 1000

  pl = env.getConnection().pipeline()
  for i in range(num_docs):
    pl.execute_command('HSET','doc%d' % i, 'n', (i % copies) * 0.99)
    if i % pipe_batch == 0:
      pl.execute()
  pl.execute()
  print 'create index'
  env.expect('FT.CREATE idx SCHEMA n NUMERIC').ok()
  waitForIndex(env, 'idx')

  for i in range(num_queries):
    pl.execute_command('FT.SEARCH','idx', '@n:[0 %d]' % num_docs, 'LIMIT', 0, 0)
    
  start_time = time()
  pl.execute()
  print time() - start_time

  print env.cmd('ft.info idx')
Ejemplo n.º 8
0
def aofTestCommon(env, reloadfn):
    # TODO: Change this attribute in rmtest
    conn = getConnectionByEnv(env)
    env.cmd('ft.create', 'idx', 'ON', 'HASH', 'schema', 'field1', 'text',
            'field2', 'numeric')
    for x in range(1, 10):
        conn.execute_command('hset', 'doc{}'.format(x), 'field1',
                             'myText{}'.format(x), 'field2', 20 * x)

    reloadfn()
    waitForIndex(env, 'idx')
    exp = [
        9L, 'doc1', ['field1', 'myText1', 'field2', '20'], 'doc2',
        ['field1', 'myText2', 'field2', '40'], 'doc3',
        ['field1', 'myText3', 'field2', '60'], 'doc4',
        ['field1', 'myText4', 'field2', '80'], 'doc5',
        ['field1', 'myText5', 'field2', '100'], 'doc6',
        ['field1', 'myText6', 'field2', '120'], 'doc7',
        ['field1', 'myText7', 'field2', '140'], 'doc8',
        ['field1', 'myText8', 'field2', '160'], 'doc9',
        ['field1', 'myText9', 'field2', '180']
    ]

    reloadfn()
    waitForIndex(env, 'idx')
    ret = env.cmd('ft.search', 'idx', 'myt*')
    env.assertEqual(toSortedFlatList(ret), toSortedFlatList(exp))
Ejemplo n.º 9
0
def testBM25ScorerExplanation(env):
    env.expect('ft.create', 'idx', 'ON', 'HASH', 'SCORE_FIELD', '__score',
               'schema', 'title', 'text', 'weight', 10, 'body', 'text').ok()
    waitForIndex(env, 'idx')
    env.expect('ft.add', 'idx', 'doc1', 0.5, 'fields', 'title', 'hello world',' body', 'lorem ist ipsum').ok()
    env.expect('ft.add', 'idx', 'doc2', 1, 'fields', 'title', 'hello another world',' body', 'lorem ist ipsum lorem lorem').ok()
    env.expect('ft.add', 'idx', 'doc3', 0.1, 'fields', 'title', 'hello yet another world',' body', 'lorem ist ipsum lorem lorem').ok()
    res = env.cmd('ft.search', 'idx', 'hello world', 'withscores', 'EXPLAINSCORE', 'scorer', 'BM25')
    env.assertEqual(res[0], 3L)
    if env.isCluster():
        env.assertContains('Final BM25', res[2][1][0])
        env.assertContains('Final BM25', res[5][1][0])
        env.assertContains('Final BM25', res[8][1][0])
    else:
        env.assertEqual(res[2][1], ['Final BM25 : words BM25 1.56 * document score 0.50 / slop 1',
                            [['(Weight 1.00 * children BM25 1.56)',
                            ['(0.78 = IDF 1.00 * F 10 / (F 10 + k1 1.2 * (1 - b 0.5 + b 0.5 * Average Len 3.67)))',
                            '(0.78 = IDF 1.00 * F 10 / (F 10 + k1 1.2 * (1 - b 0.5 + b 0.5 * Average Len 3.67)))']]]])
        env.assertEqual(res[5][1], ['Final BM25 : words BM25 1.56 * document score 1.00 / slop 2',
                            [['(Weight 1.00 * children BM25 1.56)',
                            ['(0.78 = IDF 1.00 * F 10 / (F 10 + k1 1.2 * (1 - b 0.5 + b 0.5 * Average Len 3.67)))',
                            '(0.78 = IDF 1.00 * F 10 / (F 10 + k1 1.2 * (1 - b 0.5 + b 0.5 * Average Len 3.67)))']]]])
        env.assertEqual(res[8][1], ['Final BM25 : words BM25 1.56 * document score 0.10 / slop 3',
                            [['(Weight 1.00 * children BM25 1.56)',
                            ['(0.78 = IDF 1.00 * F 10 / (F 10 + k1 1.2 * (1 - b 0.5 + b 0.5 * Average Len 3.67)))',
                            '(0.78 = IDF 1.00 * F 10 / (F 10 + k1 1.2 * (1 - b 0.5 + b 0.5 * Average Len 3.67)))']]]])
Ejemplo n.º 10
0
def testSummarizationMultiField(env):
    p1 = "Redis is an open-source in-memory database project implementing a networked, in-memory key-value store with optional durability. Redis supports different kinds of abstract data structures, such as strings, lists, maps, sets, sorted sets, hyperloglogs, bitmaps and spatial indexes. The project is mainly developed by Salvatore Sanfilippo and is currently sponsored by Redis Labs.[4] Redis Labs creates and maintains the official Redis Enterprise Pack."
    p2 = "Redis typically holds the whole dataset in memory. Versions up to 2.4 could be configured to use what they refer to as virtual memory[19] in which some of the dataset is stored on disk, but this feature is deprecated. Persistence is now achieved in two different ways: one is called snapshotting, and is a semi-persistent durability mode where the dataset is asynchronously transferred from memory to disk from time to time, written in RDB dump format. Since version 1.1 the safer alternative is AOF, an append-only file (a journal) that is written as operations modifying the dataset in memory are processed. Redis is able to rewrite the append-only file in the background in order to avoid an indefinite growth of the journal."

    env.cmd('FT.CREATE', 'idx', 'ON', 'HASH', 'SCHEMA', 'txt1', 'TEXT', 'txt2',
            'TEXT')
    waitForIndex(env, 'idx')
    env.cmd('FT.ADD', 'idx', 'redis', 1.0, 'FIELDS', 'txt1', p1, 'txt2', p2)

    # Now perform the multi-field search
    env.cmd('FT.SEARCH', 'idx', 'memory persistence salvatore', 'HIGHLIGHT',
            'TAGS', '<b>', '</b>', 'SUMMARIZE', 'LEN', 5, 'RETURN', 2, 'txt1',
            'txt2')

    # Now perform the multi-field search
    res = env.cmd('FT.SEARCH', 'idx', 'memory persistence salvatore',
                  'SUMMARIZE', 'FIELDS', 2, 'txt1', 'txt2', 'LEN', 5)

    env.assertEqual(1L, res[0])
    env.assertEqual('redis', res[1])
    for term in [
            'txt1',
            'memory database project implementing a networked, in-memory ... by Salvatore Sanfilippo... ',
            'txt2',
            'dataset in memory. Versions... as virtual memory[19] in... persistent durability mode where the dataset is asynchronously transferred from memory... '
    ]:
        env.assertIn(term, res[2])
Ejemplo n.º 11
0
def testScoreError(env):
    env.skipOnCluster()
    env.expect('ft.create idx ON HASH schema title text').ok()
    waitForIndex(env, 'idx')
    env.expect('ft.add idx doc1 0.01 fields title hello').ok()
    env.expect('ft.search idx hello EXPLAINSCORE').error().contains(
        'EXPLAINSCORE must be accompanied with WITHSCORES')
Ejemplo n.º 12
0
def testSynonymsIntensiveLoad(env):
    iterations = 1000
    r = env
    env.assertOk(
        r.execute_command('ft.create', 'idx', 'ON', 'HASH', 'schema', 'title',
                          'text', 'body', 'text'))
    for i in range(iterations):
        env.assertEqual(
            r.execute_command('ft.synupdate', 'idx', 'id%d' % i, 'boy%d' % i,
                              'child%d' % i, 'offspring%d' % i), 'OK')
    for i in range(iterations):
        env.assertOk(
            r.execute_command('ft.add', 'idx', 'doc%d' % i, 1.0, 'fields',
                              'title', 'he is a boy%d' % i, 'body',
                              'this is a test'))
    for _ in env.reloading_iterator():
        waitForIndex(r, 'idx')
        for i in range(iterations):
            res = r.execute_command('ft.search', 'idx', 'child%d' % i,
                                    'EXPANDER', 'SYNONYM')
            env.assertEqual(res[0:2], [1L, 'doc%d' % i])
            env.assertEqual(
                set(res[2]),
                set(['title',
                     'he is a boy%d' % i, 'body', 'this is a test']))
Ejemplo n.º 13
0
def testCn(env):
    text = open(SRCTEXT).read()
    env.cmd('ft.create', 'idx', 'ON', 'HASH', 'LANGUAGE', 'CHINESE', 'schema',
            'txt', 'text')
    waitForIndex(env, 'idx')
    env.cmd('ft.add', 'idx', 'doc1', 1.0, 'FIELDS', 'txt', text)
    res = env.cmd('ft.search', 'idx', '之旅', 'SUMMARIZE', 'HIGHLIGHT',
                  'LANGUAGE', 'chinese')
    cn = '2009\xe5\xb9\xb4\xef\xbc\x98\xe6\x9c\x88\xef\xbc\x96\xe6\x97\xa5\xe5\xbc\x80\xe5\xa7\x8b\xe5\xa4\xa7\xe5\xad\xa6<b>\xe4\xb9\x8b\xe6\x97\x85</b>\xef\xbc\x8c\xe5\xb2\xb3\xe9\x98\xb3\xe4\xbb\x8a\xe5\xa4\xa9\xe7\x9a\x84\xe6\xb0\x94\xe6\xb8\xa9\xe4\xb8\xba38.6\xe2\x84\x83, \xe4\xb9\x9f\xe5\xb0\xb1\xe6\x98\xaf101.48\xe2\x84\x89... \xef\xbc\x8c \xe5\x8d\x95\xe4\xbd\x8d \xe5\x92\x8c \xe5\x85\xa8\xe8\xa7\x92 : 2009\xe5\xb9\xb4 8\xe6\x9c\x88 6\xe6\x97\xa5 \xe5\xbc\x80\xe5\xa7\x8b \xe5\xa4\xa7\xe5\xad\xa6 <b>\xe4\xb9\x8b\xe6\x97\x85</b> \xef\xbc\x8c \xe5\xb2\xb3\xe9\x98\xb3 \xe4\xbb\x8a\xe5\xa4\xa9 \xe7\x9a\x84 \xe6\xb0\x94\xe6\xb8\xa9 \xe4\xb8\xba 38.6\xe2\x84\x83 , \xe4\xb9\x9f\xe5\xb0\xb1\xe6\x98\xaf 101... '
    env.assertContains(cn, res[2])

    res = env.cmd('ft.search', 'idx', 'hacker', 'summarize', 'highlight')
    cn = ' visit http://code.google.com/p/jcseg, we all admire the <b>hacker</b> spirit!\xe7\x89\xb9\xe6\xae\x8a\xe6\x95\xb0\xe5\xad\x97: \xe2\x91\xa0 \xe2\x91\xa9 \xe2\x91\xbd \xe3\x88\xa9. ... p / jcseg , we all admire appreciate like love enjoy the <b>hacker</b> spirit mind ! \xe7\x89\xb9\xe6\xae\x8a \xe6\x95\xb0\xe5\xad\x97 : \xe2\x91\xa0 \xe2\x91\xa9 \xe2\x91\xbd \xe3\x88\xa9 . ~~~ ... '
    env.assertContains(cn, res[2])

    # Check that we can tokenize english with friso (sub-optimal, but don't want gibberish)
    gentxt = open(GENTXT).read()
    env.cmd('ft.add', 'idx', 'doc2', 1.0, 'FIELDS', 'txt', gentxt)
    res = env.cmd('ft.search', 'idx', 'abraham', 'summarize', 'highlight')
    cn = 'thy name any more be called Abram, but thy name shall be <b>Abraham</b>; for a father of many nations have I made thee. {17:6} And... and I will be their God. {17:9} And God said unto <b>Abraham</b>, Thou shalt keep my covenant therefore, thou, and thy seed... hath broken my covenant. {17:15} And God said unto <b>Abraham</b>, As for Sarai thy wife, thou shalt not call her name Sarai... '
    env.assertContains(cn, res[2])

    # Add an empty document. Hope we don't crash!
    env.cmd('ft.add', 'idx', 'doc3', 1.0, 'fields', 'txt1', '')

    # Check splitting. TODO - see how to actually test for matches
    env.cmd('ft.search', 'idx', 'redis客户端', 'language', 'chinese')
    env.cmd('ft.search', 'idx', '简介Redisson 是一个高级的分布式协调Redis客户端', 'language',
            'chinese')
Ejemplo n.º 14
0
def testReplace(env):
    conn = getConnectionByEnv(env)
    r = env

    r.expect('ft.create idx schema f text').ok()

    res = conn.execute_command('HSET', 'doc1', 'f', 'hello world')
    env.assertEqual(res, 1)
    res = conn.execute_command('HSET', 'doc2', 'f', 'hello world')
    env.assertEqual(res, 1)
    res = r.execute_command('ft.search', 'idx', 'hello world')
    r.assertEqual(2, res[0])

    # now replace doc1 with a different content
    res = conn.execute_command('HSET', 'doc1', 'f', 'goodbye universe')
    env.assertEqual(res, 0)

    for _ in r.retry_with_rdb_reload():
        waitForIndex(env, 'idx')
        # make sure the query for hello world does not return the replaced document
        r.expect('ft.search', 'idx', 'hello world',
                 'nocontent').equal([1, 'doc2'])

        # search for the doc's new content
        r.expect('ft.search', 'idx', 'goodbye universe',
                 'nocontent').equal([1, 'doc1'])
Ejemplo n.º 15
0
def testSpellCheckResultsOrder():
    env = Env()
    env.cmd('ft.dictadd', 'dict', 'name')
    env.cmd('ft.create', 'idx', 'ON', 'HASH', 'SCHEMA', 'name', 'TEXT', 'body', 'TEXT')
    waitForIndex(env, 'idx')
    env.cmd('ft.add', 'idx', 'doc1', 1.0, 'FIELDS', 'name', 'Elior', 'body', 'body1')
    env.cmd('ft.add', 'idx', 'doc2', 1.0, 'FIELDS', 'name', 'Hila', 'body', 'body2')
    env.expect('ft.spellcheck', 'idx', 'Elioh Hilh').equal([['TERM', 'elioh', [['0.5', 'elior']]], ['TERM', 'hilh', [['0.5', 'hila']]]])
Ejemplo n.º 16
0
def testSpellCheckOnExistingTerm():
    env = Env()
    env.cmd('ft.create', 'idx', 'ON', 'HASH', 'SCHEMA', 'name', 'TEXT', 'body', 'TEXT')
    waitForIndex(env, 'idx')
    env.cmd('ft.add', 'idx', 'doc1', 1.0, 'FIELDS', 'name', 'name', 'body', 'body1')
    env.cmd('ft.add', 'idx', 'doc2', 1.0, 'FIELDS', 'name', 'name2', 'body', 'body2')
    env.cmd('ft.add', 'idx', 'doc3', 1.0, 'FIELDS', 'name', 'name2', 'body', 'name2')
    env.expect('ft.spellcheck', 'idx', 'name').equal([])
Ejemplo n.º 17
0
def testBasicSpellCheckWithNoResult():
    env = Env()
    env.cmd('ft.create', 'idx', 'ON', 'HASH', 'SCHEMA', 'name', 'TEXT', 'body', 'TEXT')
    waitForIndex(env, 'idx')
    env.cmd('ft.add', 'idx', 'doc1', 1.0, 'FIELDS', 'name', 'name1', 'body', 'body1')
    env.cmd('ft.add', 'idx', 'doc2', 1.0, 'FIELDS', 'name', 'name2', 'body', 'body2')
    env.cmd('ft.add', 'idx', 'doc3', 1.0, 'FIELDS', 'name', 'name2', 'body', 'name2')
    env.expect('ft.spellcheck', 'idx', 'somenotexiststext').equal([['TERM', 'somenotexiststext', []]])
Ejemplo n.º 18
0
def createIndex(env, r):
    r.expect('ft.create', 'idx', 'ON', 'HASH', 'schema', 'txt', 'text').ok()
    waitForIndex(r, 'idx')

    for i in xrange(1000):
        did, tokens = generate_random_doc(env)

        r.execute_command('ft.add', 'idx', did, 1.0, 'fields', 'txt',
                          ' '.join(tokens))
Ejemplo n.º 19
0
def testMixedHighlight(env):
    txt = r"""
Redis支持主从同步。数据可以从主服务器向任意数量的从服务器上同步,从服务器可以是关联其他从服务器的主服务器。这使得Redis可执行单层树复制。从盘可以有意无意的对数据进行写操作。由于完全实现了发布/订阅机制,使得从数据库在任何地方同步树时,可订阅一个频道并接收主服务器完整的消息发布记录。同步对读取操作的可扩展性和数据冗余很有帮助。[8]
"""
    env.cmd('ft.create', 'idx', 'ON', 'HASH', 'LANGUAGE_FIELD', 'chinese', 'schema', 'txt', 'text')
    waitForIndex(env, 'idx')
    env.cmd('ft.add', 'idx', 'doc1', 1.0, 'language', 'chinese', 'fields', 'txt', txt)
    # Should not crash!
    env.cmd('ft.search', 'idx', 'redis', 'highlight')
Ejemplo n.º 20
0
def testSpellCheckWrongArity():
    env = Env()
    env.cmd('ft.dictadd', 'dict', 'name')
    env.cmd('ft.create', 'idx', 'ON', 'HASH', 'SCHEMA', 'name', 'TEXT', 'body', 'TEXT')
    waitForIndex(env, 'idx')
    env.cmd('ft.add', 'idx', 'doc1', 1.0, 'FIELDS', 'name', 'name1', 'body', 'body1')
    env.cmd('ft.add', 'idx', 'doc2', 1.0, 'FIELDS', 'name', 'name2', 'body', 'body2')
    env.cmd('ft.add', 'idx', 'doc3', 1.0, 'FIELDS', 'name', 'name2', 'body', 'name2')
    env.expect('ft.spellcheck', 'idx').raiseError()
    env.expect('ft.spellcheck', 'idx').raiseError()
Ejemplo n.º 21
0
 def __init__(self):
     self.env = Env(testName="testing debug commands")
     self.env.skipOnCluster()
     self.env.expect('FT.CREATE', 'idx', 'ON', 'HASH', 'SCHEMA', 'name',
                     'TEXT', 'SORTABLE', 'age', 'NUMERIC', 'SORTABLE', 't',
                     'TAG', 'SORTABLE').ok()
     waitForIndex(self.env, 'idx')
     self.env.expect('FT.ADD', 'idx', 'doc1', '1.0', 'FIELDS', 'name',
                     'meir', 'age', '29', 't', 'test').ok()
     self.env.cmd('SET', 'foo', 'bar')
Ejemplo n.º 22
0
def testSummarizationNoSave(env):
    env.skip()
    env.cmd('FT.CREATE', 'idx', 'ON', 'HASH', 'SCHEMA', 'body', 'TEXT')
    waitForIndex(env, 'idx')
    env.cmd('FT.ADD', 'idx', 'doc', 1.0, 'NOSAVE', 'fields', 'body',
            'hello world')
    res = env.cmd('FT.SEARCH', 'idx', 'hello', 'SUMMARIZE', 'RETURN', 1,
                  'body')
    # print res
    env.assertEqual([1L, 'doc', []], res)
Ejemplo n.º 23
0
def testSpellCheckExcludeDict():
    env = Env()
    env.cmd('ft.dictadd', 'dict', 'name')
    env.cmd('ft.create', 'idx', 'ON', 'HASH', 'SCHEMA', 'name', 'TEXT', 'body', 'TEXT')
    waitForIndex(env, 'idx')
    env.cmd('ft.add', 'idx', 'doc1', 1.0, 'FIELDS', 'name', 'name1', 'body', 'body1')
    env.cmd('ft.add', 'idx', 'doc2', 1.0, 'FIELDS', 'name', 'name2', 'body', 'body2')
    env.cmd('ft.add', 'idx', 'doc3', 1.0, 'FIELDS', 'name', 'name2', 'body', 'name2')
    env.expect('ft.spellcheck', 'idx', 'name', 'TERMS', 'EXCLUDE', 'dict').equal([])
    env.expect('ft.spellcheck', 'idx', 'name', 'TERMS', 'exclude', 'dict').equal([])
Ejemplo n.º 24
0
def testTFIDFScorerExplanation(env):
    conn = getConnectionByEnv(env)
    env.expect('ft.create', 'idx', 'ON', 'HASH', 'SCORE_FIELD', '__score',
               'schema', 'title', 'text', 'weight', 10, 'body', 'text').ok()
    waitForIndex(env, 'idx')

    env.execute_command('ft.add', 'idx', 'doc1', 0.5, 'fields', 'title', 'hello world',' body', 'lorem ist ipsum')
    env.execute_command('ft.add', 'idx', 'doc2', 1, 'fields', 'title', 'hello another world',' body', 'lorem ist ipsum lorem lorem')
    env.execute_command('ft.add', 'idx', 'doc3', 0.1, 'fields', 'title', 'hello yet another world',' body', 'lorem ist ipsum lorem lorem')

    res = env.cmd('ft.search', 'idx', 'hello world', 'withscores', 'EXPLAINSCORE')
    env.assertEqual(res[0], 3)
    env.assertEqual(res[2][1], ['Final TFIDF : words TFIDF 20.00 * document score 0.50 / norm 10 / slop 1',
                                [['(Weight 1.00 * total children TFIDF 20.00)',
                                ['(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)',
                                '(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)']]]])
    env.assertEqual(res[5][1],['Final TFIDF : words TFIDF 20.00 * document score 1.00 / norm 10 / slop 2',
                                [['(Weight 1.00 * total children TFIDF 20.00)',
                                ['(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)',
                                '(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)']]]])
    env.assertEqual(res[8][1], ['Final TFIDF : words TFIDF 20.00 * document score 0.10 / norm 10 / slop 3',
                                [['(Weight 1.00 * total children TFIDF 20.00)',
                                ['(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)',
                                '(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)']]]])

    # test depth limit

    res = env.cmd('ft.search', 'idx', 'hello(world(world))', 'withscores', 'EXPLAINSCORE', 'limit', 0, 1)
    env.assertEqual(res[2][1], ['Final TFIDF : words TFIDF 30.00 * document score 0.50 / norm 10 / slop 1',
                                [['(Weight 1.00 * total children TFIDF 30.00)',
                                    ['(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)',
                                        ['(Weight 1.00 * total children TFIDF 20.00)',
                                            ['(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)',
                                            '(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)']]]]]])

    res1 = ['Final TFIDF : words TFIDF 40.00 * document score 1.00 / norm 10 / slop 1',
                [['(Weight 1.00 * total children TFIDF 40.00)',
                    ['(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)',
                        ['(Weight 1.00 * total children TFIDF 30.00)',
                            ['(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)',
                                ['(Weight 1.00 * total children TFIDF 20.00)',
                                    ['(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)',
                                     '(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)']]]]]]]]
    res2 = ['Final TFIDF : words TFIDF 40.00 * document score 1.00 / norm 10 / slop 1',
                [['(Weight 1.00 * total children TFIDF 40.00)',
                    ['(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)',
                        ['(Weight 1.00 * total children TFIDF 30.00)',
                            ['(TFIDF 10.00 = Weight 1.00 * TF 10 * IDF 1.00)',
                             '(Weight 1.00 * total children TFIDF 20.00)']]]]]]


    actual_res = env.cmd('ft.search', 'idx', 'hello(world(world(hello)))', 'withscores', 'EXPLAINSCORE', 'limit', 0, 1)
    # on older versions we trim the reply to remain under the 7-layer limitation.
    res = res1 if server_version_at_least(env, "6.2.0") else res2
    env.assertEqual(actual_res[2][1], res)
Ejemplo n.º 25
0
def testBasicSpellCheck():
    env = Env()
    env.cmd('ft.create', 'idx', 'ON', 'HASH', 'SCHEMA', 'name', 'TEXT', 'body', 'TEXT')
    waitForIndex(env, 'idx')
    env.cmd('ft.add', 'idx', 'doc1', 1.0, 'FIELDS', 'name', 'name1', 'body', 'body1')
    env.cmd('ft.add', 'idx', 'doc2', 1.0, 'FIELDS', 'name', 'name2', 'body', 'body2')
    env.cmd('ft.add', 'idx', 'doc3', 1.0, 'FIELDS', 'name', 'name2', 'body', 'name2')
    env.expect('ft.spellcheck', 'idx', 'name').equal([['TERM', 'name',
                                                     [['0.66666666666666663', 'name2'], ['0.33333333333333331', 'name1']]]])
    if not env.isCluster():
        env.expect('ft.spellcheck', 'idx', '@body:name').equal([['TERM', 'name', [['0.66666666666666663', 'name2']]]])
Ejemplo n.º 26
0
def testSynonymsRdb(env):
    r = env
    env.assertOk(
        r.execute_command('ft.create', 'idx', 'ON', 'HASH', 'schema', 'title',
                          'text', 'body', 'text'))
    env.assertEqual(
        r.execute_command('ft.synadd', 'idx', 'boy', 'child', 'offspring'), 0)
    for _ in env.reloading_iterator():
        waitForIndex(env, 'idx')
        env.assertEqual(r.execute_command('ft.syndump', 'idx'),
                        ['offspring', [0L], 'child', [0L], 'boy', [0L]])
Ejemplo n.º 27
0
def loadDocs(env, count=100, idx='idx', text='hello world'):
    env.expect('FT.CREATE', idx, 'ON', 'HASH', 'prefix', 1, idx, 'SCHEMA', 'f1', 'TEXT').ok()
    waitForIndex(env, idx)
    for x in range(count):
        cmd = ['FT.ADD', idx, '{}_doc{}'.format(idx, x), 1.0, 'FIELDS', 'f1', text]
        env.cmd(*cmd)
    r1 = env.cmd('ft.search', idx, text)
    r2 = list(set(map(lambda x: x[1], filter(lambda x: isinstance(x, list), r1))))
    env.assertEqual([text], r2)
    r3 = env.cmd('ft.info', idx)
    env.assertEqual(count, int(r3[r3.index('num_docs') + 1]))
Ejemplo n.º 28
0
def testSpellCheckWithDuplications():
    env = Env()
    env.cmd('ft.dictadd', 'dict', 'name1', 'name4', 'name5')
    env.cmd('ft.create', 'idx', 'ON', 'HASH', 'SCHEMA', 'name', 'TEXT', 'body', 'TEXT')
    waitForIndex(env, 'idx')
    env.cmd('ft.add', 'idx', 'doc1', 1.0, 'FIELDS', 'name', 'name1', 'body', 'body1')
    env.cmd('ft.add', 'idx', 'doc2', 1.0, 'FIELDS', 'name', 'name2', 'body', 'body2')
    env.cmd('ft.add', 'idx', 'doc3', 1.0, 'FIELDS', 'name', 'name2', 'body', 'name2')
    env.expect('ft.spellcheck', 'idx', 'name', 'TERMS', 'INCLUDE', 'dict').equal([['TERM', 'name',
                                                                                   [['0.66666666666666663', 'name2'],
                                                                                    ['0.33333333333333331', 'name1'],
                                                                                    ['0', 'name4'], ['0', 'name5']]]])
Ejemplo n.º 29
0
def testDocscoreScorerExplanation(env):
    env.expect('ft.create', 'idx', 'ON', 'HASH', 'SCORE_FIELD', '__score',
               'schema', 'title', 'text', 'weight', 10, 'body', 'text').ok()
    waitForIndex(env, 'idx')
    env.expect('ft.add', 'idx', 'doc1', 0.5, 'fields', 'title', 'hello world',' body', 'lorem ist ipsum').ok()
    env.expect('ft.add', 'idx', 'doc2', 1, 'fields', 'title', 'hello another world',' body', 'lorem ist ipsum lorem lorem').ok()
    env.expect('ft.add', 'idx', 'doc3', 0.1, 'fields', 'title', 'hello yet another world',' body', 'lorem ist ipsum lorem lorem').ok()
    res = env.cmd('ft.search', 'idx', 'hello world', 'withscores', 'EXPLAINSCORE', 'scorer', 'DOCSCORE')
    env.assertEqual(res[0], 3L)
    env.assertEqual(res[2][1], "Document's score is 1.00")
    env.assertEqual(res[5][1], "Document's score is 0.50")
    env.assertEqual(res[8][1], "Document's score is 0.10")
Ejemplo n.º 30
0
def testScoreReplace(env):
    conn = getConnectionByEnv(env)
    env.expect('ft.create idx ON HASH schema f text').ok()
    waitForIndex(env, 'idx')
    conn.execute_command('HSET', 'doc1', 'f', 'redisearch')
    conn.execute_command('HSET', 'doc1', 'f', 'redisearch')
    env.expect('FT.SEARCH idx redisearch withscores nocontent').equal([1L, 'doc1', '1'])
    conn.execute_command('HSET', 'doc1', 'f', 'redisearch')
    env.expect('FT.SEARCH idx redisearch withscores nocontent').equal([1L, 'doc1', '0'])
    if not env.isCluster:
        env.expect('ft.config set FORK_GC_CLEAN_THRESHOLD 0').ok()
        env.expect('ft.debug GC_FORCEINVOKE idx').equal('DONE')
        env.expect('FT.SEARCH idx redisearch withscores nocontent').equal([1L, 'doc1', '1'])