Example #1
0
    def test_Smoke(self):
        writer = sparkey.LogWriter(self.logfile)
        for i in range(0, 10):
            writer.put('key%d' % i, 'value%d' % i)
        writer.close()

        reader = sparkey.LogReader(self.logfile)
        for i, (key, value, type) in enumerate(reader):
            self.assertEqual('key%d' % i, key)
            self.assertEqual('value%d' % i, value)
            self.assertEqual(sparkey.IterType.PUT, type)

        self.assertEqual(9, i)
        reader.close()

        sparkey.writehash(self.hashfile, self.logfile)

        hashreader = sparkey.HashReader(self.hashfile, self.logfile)
        self.assertEqual(10, len(hashreader))
        for i in range(0, 10):
            self.assertTrue('key%d' % i in hashreader)

        self.assertFalse('key_miss' in hashreader)

        for i, (key, value) in enumerate(hashreader):
            self.assertEqual('key%d' % i, key)
            self.assertEqual('value%d' % i, value)
        self.assertEqual(9, i)

        self.assertEqual('value0', hashreader.get('key0'))
        self.assertEqual('value9', hashreader.get('key9'))
        self.assertEqual(None, hashreader.get('key10'))

        hashreader.close()
Example #2
0
 def _create(self, compression_type, num_entries):
     writer = sparkey.LogWriter(self.logfile,
                                compression_type=compression_type,
                                compression_block_size=1024)
     for i in range(0, num_entries):
         writer.put("key_" + str(i), "value_" + str(i))
     writer.close()
     sparkey.writehash(self.hashfile, self.logfile)
Example #3
0
    def close(self):
        self.dbs["doc"].close()
        sparkey.writehash(get_db_file(self.prefix, "doc", "hash"),
                          get_db_file(self.prefix, "doc", "log"))
        self.timer.status("Collection")

        for class_buf in self.classes:
            self.dbs["classes"].put(str(self.classes[class_buf]),
                                    str(class_buf))
        self.dbs["classes"].close()
        sparkey.writehash(get_db_file(self.prefix, "classes", "hash"),
                          get_db_file(self.prefix, "classes", "log"))
        self.timer.status("Wrote " + str(len(self.classes)) + " classes")

        topn = {}
        for field in self.values:
            self.dbs["stats"].put(str(field), json.dumps(self.values[field]))
        self.dbs["stats"].close()
        sparkey.writehash(get_db_file(self.prefix, "stats", "hash"),
                          get_db_file(self.prefix, "stats", "log"))
        took = self.timer.status("Values write")

        for field in self.docvals:
            if self.docvals[field].getvalue() is not None:
                self.dbs["docvals"].put(str(field),
                                        self.docvals[field].getvalue())
        self.dbs["docvals"].close()
        sparkey.writehash(get_db_file(self.prefix, "docvals", "hash"),
                          get_db_file(self.prefix, "docvals", "log"))
        self.timer.status("Docvals write")

        self.stats["total_fields"] = self.field_count
        self.stats["total_docs"] = self.id
        self.stats["total_classes"] = len(self.classes)
        self.stats["keywords"] = len(self.keywords)
        self.timer.stat_end("collect", self.id)
        self.log.debug("keywords: %d" % self.stats["keywords"])
        self.log.debug("docs: %d" % self.stats["total_docs"])

        self.timer.stat_start("idx_write")
        for k in self.keywords:
            self.dbs["idx"].put(str(k), marshal.dumps(self.keywords[k]))
        self.dbs["idx"].close()
        sparkey.writehash(get_db_file(self.prefix, "idx", "hash"),
                          get_db_file(self.prefix, "idx", "log"))
        took = self.timer.status("Wrote index")
        self.timer.stat_end("idx_write", self.stats["keywords"])

        self.timer.stat_end("index", self.id)
Example #4
0
 def close(self):
     self.dbs["doc"].close()
     sparkey.writehash(get_db_file(self.prefix, "doc", "hash"),
         get_db_file(self.prefix, "doc", "log"))
     self.timer.status("Collection")
     
     for class_buf in self.classes:
         self.dbs["classes"].put(str(self.classes[class_buf]), str(class_buf))
     self.dbs["classes"].close()
     sparkey.writehash(get_db_file(self.prefix, "classes", "hash"),
         get_db_file(self.prefix, "classes", "log"))
     self.timer.status("Wrote " + str(len(self.classes)) + " classes")
     
     topn = {}
     for field in self.values:
         self.dbs["stats"].put(str(field), json.dumps(self.values[field]))
     self.dbs["stats"].close()
     sparkey.writehash(get_db_file(self.prefix, "stats", "hash"),
         get_db_file(self.prefix, "stats", "log"))
     took = self.timer.status("Values write")
     
     for field in self.docvals:
         if self.docvals[field].getvalue() is not None:
             self.dbs["docvals"].put(str(field), self.docvals[field].getvalue())
     self.dbs["docvals"].close()
     sparkey.writehash(get_db_file(self.prefix, "docvals", "hash"),
         get_db_file(self.prefix, "docvals", "log"))
     self.timer.status("Docvals write")
     
     self.stats["total_fields"] = self.field_count
     self.stats["total_docs"] = self.id
     self.stats["total_classes"] = len(self.classes)
     self.stats["keywords"] = len(self.keywords)
     self.timer.stat_end("collect", self.id)
     self.log.debug("keywords: %d" % self.stats["keywords"])
     self.log.debug("docs: %d" % self.stats["total_docs"])
     
     self.timer.stat_start("idx_write")
     for k in self.keywords:
         self.dbs["idx"].put(str(k), marshal.dumps(self.keywords[k]))
     self.dbs["idx"].close()
     sparkey.writehash(get_db_file(self.prefix, "idx", "hash"),
         get_db_file(self.prefix, "idx", "log"))
     took = self.timer.status("Wrote index")
     self.timer.stat_end("idx_write", self.stats["keywords"])
     
     self.timer.stat_end("index", self.id)
Example #5
0
 def _create(self, compression_type, num_entries):
     writer = sparkey.LogWriter(self.logfile, compression_type=compression_type, compression_block_size=1024)
     for i in xrange(0, num_entries):
       writer.put("key_" + str(i), "value_" + str(i))
     writer.close()
     sparkey.writehash(self.hashfile, self.logfile)