Ejemplo n.º 1
0
 def setupExperimentDir(self):
     """
         Ensures dir exists, etc.
     """
     self.setupExperimentDir()
     self.exp["exp_dir"]=os.path.normpath(os.path.join(cp.Corpus.paths.experiments,self.exp["name"])) + os.sep
     ensureDirExists(self.exp["exp_dir"])
Ejemplo n.º 2
0
    def initializeIndexer(self):
        """
            Initializes the Java VM, creates directories if needed
        """
        print "Initializing VM..."
        lucene.initVM(maxheap="768m")

        baseFullIndexDir=cp.Corpus.paths.fileLuceneIndex+os.sep
        ensureDirExists(baseFullIndexDir)
Ejemplo n.º 3
0
 def __init__(self, result_storer, cache_dir, res_ids=None, max_results=sys.maxint):
     """
         Creates cache directory if it doesn't exist
     """
     super(self.__class__, self).__init__(result_storer, res_ids=res_ids, max_results=max_results)
     self.cache_dir=cache_dir
     self.own_dir=os.path.join(cache_dir, self.result_storer.table_name)
     ensureDirExists(cache_dir)
     ensureDirExists(self.own_dir)
Ejemplo n.º 4
0
 def saveCachedJson(self, path, data):
     """
         Save anything as JSON
     """
     ensureDirExists(os.path.dirname(path))
     lines=json.dumps(bow,indent=3)
     try:
         f=codecs.open(path, "w","utf-8")
         f.write(lines)
         f.close()
     except:
         print("Error saving JSON", path, "Exception in saveCachedJson():",sys.exc_info()[:2])
Ejemplo n.º 5
0
    def loadListOrListAllFiles(self, inputdir, file_mask):
        """
            Either loads the existing file list or lists the contents of the
            input directory.
        """
        all_input_files_fn=os.path.join(cp.Corpus.paths.fileDB,"all_input_files.txt")
        ALL_INPUT_FILES=loadFileList(all_input_files_fn)
        if not ALL_INPUT_FILES:
            print("Listing all files...")
            ALL_INPUT_FILES=self.listAllFiles(inputdir,file_mask)
            ensureDirExists(cp.Corpus.paths.fileDB)
            saveFileList(ALL_INPUT_FILES,all_input_files_fn)

        return ALL_INPUT_FILES
Ejemplo n.º 6
0
    def createIndexWriter(self, actual_dir, max_field_length=20000000):
        """
            Returns an IndexWriter object created for the actual_dir specified
        """
        ensureDirExists(actual_dir)
        index = SimpleFSDirectory(File(actual_dir))
        analyzer = StandardAnalyzer(LuceneVersion.LUCENE_CURRENT)

        writerConfig=IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, analyzer)
        similarity=FieldAgnosticSimilarity()

        writerConfig.setSimilarity(similarity)
        writerConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE)

    ##    res= IndexWriter(index, analyzer, True, IndexWriter.MaxFieldLength(max_field_length))
        res= IndexWriter(index, writerConfig)
        res.deleteAll()
        return res
Ejemplo n.º 7
0
 def createDefaultDirs(self):
     """
         Creates all necessary dirs
     """
     for path in self.paths:
         ensureDirExists(self.paths[path])