def test_recluster(self): # force reindexing and clustering self.cfg.index_mode = "recluster" self.cfg.k_medoids = 2 pxi = PopconXapianIndex(self.cfg) self.assertEqual(pxi.source_dir,self.cfg.clusters_dir) self.assertEqual(pxi.get_doccount(),2)
def test_clustering(self): # force reindex with clustering self.cfg.index_mode = "cluster" pxi = PopconXapianIndex(self.cfg) self.assertEqual(pxi.source_dir,self.cfg.clusters_dir) all_submissions = [submissions for (root, dirs, submissions) in os.walk(pxi.source_dir)] self.assertEqual(pxi.get_doccount(), sum([len(submissions) for submissions in all_submissions]))
You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. """ import os import sys sys.path.insert(0,'../') import logging import datetime from config import Config from data import PopconXapianIndex if __name__ == '__main__': cfg = Config() begin_time = datetime.datetime.now() logging.info("Popcon indexing started at %s" % begin_time) # use config file or command line options popindex = PopconXapianIndex(cfg) end_time = datetime.datetime.now() logging.info("Popcon indexing completed at %s" % end_time) logging.info("Number of documents (submissions): %d" % popindex.get_doccount()) delta = end_time - begin_time logging.info("Time elapsed: %d seconds." % delta.seconds) if cfg.index_mode=="cluster" or cfg.index_mode=="recluster": logging.info("Medoids: %d\tDispersion:%f" % (cfg.k_medoids,popindex.cluster_dispersion))