Exemple #1
0
 def test_single_element(self):
     c = [('http://test.net', 'Simple text')]
     ii = II(c)
     ii.create_index()
     d = dd(list)
     d['simple'] = ['http://test.net']
     d['text'] = ['http://test.net']
     assert d == ii.index
Exemple #2
0
 def test_multiple_elements(self):
     c = [('One', 'one Two three'), ('two', 'three'),
          ('three', 'two Three')]
     d = dd(list)
     d['one'] = ['One']
     d['two'] = ['One', 'three']
     d['three'] = ['One', 'two', 'three']
     ii = II(c)
     ii.create_index()
     assert d == ii.index
Exemple #3
0
def main():
    # if uncompressed index not exists
    if not os.path.exists('./uncompressed_index'):
        db_service = DBService("msuspider.db")

        inverted_index = InvertedIndex(db_service.get_texts())
        inverted_index.create_index()

        with open('index.pickle', 'wb') as f:
            pickle.dump(inverted_index.index, f)

        id_service = IDService(inverted_index.index)
        id_service.create_postings_ids()
        id_service.create_word_ids()

        converter = IndexToByteConverter(inverted_index.index)
        converter.build()

    # if in-memory index doesnt exist
    if not os.path.exists('./index.pickle'):
        db_service = DBService("msuspider.db")
        inverted_index = InvertedIndex(db_service.get_texts())
        inverted_index.create_index()
        with open('index.pickle', 'wb') as f:
            pickle.dump(inverted_index.index, f)
        inv_index = inverted_index.index
    else:
        with open('index.pickle', 'rb') as f:
            inv_index = pickle.load(f)

    # if compressed index deosnt exist
    if not os.path.exists('./compressed_index'):
        index_compressor = IndexCompressor(inv_index)
        index_compressor.build_delta()

    converter = IndexToByteConverter()
    print(converter.get_word_postings('ректор')[1])
    print(inv_index['ректор'][1])