Beispiel #1
0
 def test_recluster(self):
     # force reindexing and clustering
     self.cfg.index_mode = "recluster"
     self.cfg.k_medoids = 2
     pxi = PopconXapianIndex(self.cfg)
     self.assertEqual(pxi.source_dir,self.cfg.clusters_dir)
     self.assertEqual(pxi.get_doccount(),2)
Beispiel #2
0
 def test_clustering(self):
     # force reindex with clustering
     self.cfg.index_mode = "cluster"
     pxi = PopconXapianIndex(self.cfg)
     self.assertEqual(pxi.source_dir,self.cfg.clusters_dir)
     all_submissions = [submissions for (root, dirs, submissions) in
                        os.walk(pxi.source_dir)]
     self.assertEqual(pxi.get_doccount(),
                      sum([len(submissions) for submissions in
                           all_submissions]))
Beispiel #3
0
    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""
import os
import sys
sys.path.insert(0,'../')
import logging
import datetime

from config import Config
from data import PopconXapianIndex

if __name__ == '__main__':
    cfg = Config()
    begin_time = datetime.datetime.now()
    logging.info("Popcon indexing started at %s" % begin_time)

    # use config file or command line options
    popindex = PopconXapianIndex(cfg)

    end_time = datetime.datetime.now()
    logging.info("Popcon indexing completed at %s" % end_time)
    logging.info("Number of documents (submissions): %d" %
                 popindex.get_doccount())

    delta = end_time - begin_time
    logging.info("Time elapsed: %d seconds." % delta.seconds)
    if cfg.index_mode=="cluster" or cfg.index_mode=="recluster":
        logging.info("Medoids: %d\tDispersion:%f" %
                     (cfg.k_medoids,popindex.cluster_dispersion))