Example #1
0
 def test_recluster(self):
     # force reindexing and clustering
     self.cfg.index_mode = "recluster"
     self.cfg.k_medoids = 2
     pxi = PopconXapianIndex(self.cfg)
     self.assertEqual(pxi.source_dir,self.cfg.clusters_dir)
     self.assertEqual(pxi.get_doccount(),2)
Example #2
0
 def test_submissions(self):
     pxi = PopconXapianIndex(self.cfg)
     submissions = pxi.get_submissions(pxi.source_dir)
     all_submissions = [submissions for (root, dirs, submissions) in
                        os.walk(pxi.source_dir)]
     self.assertEqual(len(submissions),
                      sum([len(submissions) for submissions in
                           all_submissions]))
Example #3
0
 def test_clustering(self):
     # force reindex with clustering
     self.cfg.index_mode = "cluster"
     pxi = PopconXapianIndex(self.cfg)
     self.assertEqual(pxi.source_dir,self.cfg.clusters_dir)
     all_submissions = [submissions for (root, dirs, submissions) in
                        os.walk(pxi.source_dir)]
     self.assertEqual(pxi.get_doccount(),
                      sum([len(submissions) for submissions in
                           all_submissions]))
Example #4
0
 def setUp(self):
     self.cfg = Config()
     self.cfg.popcon_index = "test_data/.sample_pxi"
     self.cfg.popcon_dir = "test_data/popcon_dir"
     self.cfg.clusters_dir = "test_data/clusters_dir"
     # build old index for all tests
     shutil.rmtree(self.cfg.popcon_index,1)
     self.assertFalse(os.path.exists(self.cfg.popcon_index))
     # local variable, index will be closed before test
     pxi = PopconXapianIndex(self.cfg)
     self.assertEqual(pxi.get_metadata("old"),"")
     pxi.set_metadata("old","true")
Example #5
0
 def iterate(self, params, rep, n):
     if params['name'] == "clustering":
         logging.info("Running iteration %d" % params['medoids'][n])
         self.cfg.k_medoids = params['medoids'][n]
         pxi = PopconXapianIndex(self.cfg)
         result = {'k_medoids': params['medoids'][n],
                   'dispersion': pxi.cluster_dispersion}
     else:
         result = {}
     return result
Example #6
0
 def test_reindex(self):
     # force reindex with no clustering
     self.cfg.index_mode = "reindex"
     pxi = PopconXapianIndex(self.cfg)
     self.assertEqual(pxi.get_metadata("old"),"")
Example #7
0
 def test_load(self):
     # load the previously built index
     pxi = PopconXapianIndex(self.cfg)
     self.assertEqual(pxi.get_metadata("old"),"true")
Example #8
0
    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""
import os
import sys
sys.path.insert(0,'../')
import logging
import datetime

from config import Config
from data import PopconXapianIndex

if __name__ == '__main__':
    cfg = Config()
    begin_time = datetime.datetime.now()
    logging.info("Popcon indexing started at %s" % begin_time)

    # use config file or command line options
    popindex = PopconXapianIndex(cfg)

    end_time = datetime.datetime.now()
    logging.info("Popcon indexing completed at %s" % end_time)
    logging.info("Number of documents (submissions): %d" %
                 popindex.get_doccount())

    delta = end_time - begin_time
    logging.info("Time elapsed: %d seconds." % delta.seconds)
    if cfg.index_mode=="cluster" or cfg.index_mode=="recluster":
        logging.info("Medoids: %d\tDispersion:%f" %
                     (cfg.k_medoids,popindex.cluster_dispersion))
Example #9
0
    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""
import sys
sys.path.insert(0, '../')
import logging
import datetime

from config import Config
from data import PopconXapianIndex
from error import Error

if __name__ == '__main__':
    try:
        cfg = Config()
        begin_time = datetime.datetime.now()
        logging.info("Popcon indexing started at %s" % begin_time)

        pxi = PopconXapianIndex(cfg)

        end_time = datetime.datetime.now()
        logging.info("Popcon indexing completed at %s" % end_time)
        delta = end_time - begin_time
        logging.info("Time elapsed: %d seconds." % delta.seconds)
        if cfg.index_mode == "cluster" or cfg.index_mode == "recluster":
            logging.info("Medoids: %d\tDispersion:%f" %
                         (cfg.k_medoids, pxi.cluster_dispersion))

    except Error:
        logging.critical("Aborting proccess. Use '--debug' for more details.")