Esempio n. 1
0
    def doInBackground(self):
        #Initialize progress property.
        progress = 0
        self.super__setProgress(progress)

        # "\n download tools list"
        progress = 2
        self.super__setProgress(progress)
        self.delete_file(self.tmpToolsListFile)

        if not self.download_file(self.app.toolsListUrl, self.tmpToolsListFile):
            # " I cannot download the tools list."
            progress = 3
            self.super__setProgress(progress)
            return

        toolsRefs = read_tools_list(self.tmpToolsListFile)

        #Download tools data as jar files
        progress = 5
        self.super__setProgress(progress)
        self.jarDir = File.separator.join([self.app.SCRIPTDIR, "tools", "jar"])
        if not File(self.jarDir).exists():
            File(self.jarDir).mkdir()
        else:
            #delete old files
            for jarFileName in File(self.jarDir).list():
                File(File.separator.join([self.jarDir, jarFileName])).delete()
        #download new files
        for toolRef in toolsRefs:
            jarFileName = "%s.jar" % toolRef
            jarUrl = "%s/%s" % (self.app.jarBaseUrl, jarFileName)
            jarFilePath = File.separator.join([self.jarDir, jarFileName])
            answer = self.download_file(jarUrl, jarFilePath)
            if not answer:
                # " I cannot download the tools file"
                progress = 6
                self.super__setProgress(progress)
                return

        #Extract tools data from jar files
        self.toolsDir = File.separator.join([self.app.SCRIPTDIR, "tools", "data"])
        progress = 7
        self.super__setProgress(progress)
        self.extract_tools_data_from_jar_files()

        #Remove temporary file
        self.delete_file(self.toolsListFile)
        Files.copy(Paths.get(self.tmpToolsListFile), Paths.get(self.toolsListFile))
        self.delete_file(self.tmpToolsListFile)

        progress = 8
        self.super__setProgress(progress)
Esempio n. 2
0
 def __init__(self, directory):
     self.directory = directory
     # create Directories for the search index and for the taxonomy index
     # in RAM or on Disc
     #indexDir = RAMDirectory()
     #taxoDir = RAMDirectory()
     self.indexDir = FSDirectory.open(Paths.get(os.path.join(self.directory,
                                                             INDEX_DIR)))
     self.taxoDir = FSDirectory.open(Paths.get(os.path.join(self.directory,
                                                            TAXONOMY_DIR)))
     # FacetConfig
     self.facets_config = FacetsConfig()
     self.facets_config.setHierarchical("Categories", True)
     self.facets_config.setMultiValued("Categories", True)
Esempio n. 3
0
def search_loop(index_dir, field="contents", explain=False):
    searcher = IndexSearcher(
        DirectoryReader.open(SimpleFSDirectory(Paths.get(index_dir))))
    analyzer = StandardAnalyzer()
    print("Hit enter with no input to quit.")
    while True:
        command = input("Query:")
        if command == '':
            return
        print("Searching for: %s" % command)
        query = QueryParser(field, analyzer).parse(command)
        scoreDocs = searcher.search(query, 50).scoreDocs
        print("%s total matching documents." % len(scoreDocs))

        for scoreDoc in scoreDocs:
            doc = searcher.doc(scoreDoc.doc)
            if field == 'web':
                print(
                    f'{doc.get("web")} | {doc.get("raw")} | {scoreDoc.score}')
            else:
                print('path:', doc.get("path"), 'name:', doc.get("name"))
            if explain:
                explanation = searcher.explain(query, scoreDoc.doc)
                print(explanation)
                print('------------')
Esempio n. 4
0
    def retrieve_sents(self):

        indexDir = self.indexDir
        query = self.query

        sent_ind_list = []
        # template = CustomTemplate(format)
        fsDir = SimpleFSDirectory(Paths.get(indexDir))
        # print indexDir
        searcher = IndexSearcher(DirectoryReader.open(fsDir))

        analyzer = StandardAnalyzer()
        parser = QueryParser("contents", analyzer)
        parser.setDefaultOperator(QueryParser.Operator.OR)
        query = parser.parse(query)
        # print query
        start = datetime.now()
        scoreDocs = searcher.search(query, 50).scoreDocs
        duration = datetime.now() - start
        # print query
        if self.stats:
            print("Found %d sentences (in %s) that matched query '%s':" % (len(scoreDocs), duration, query),
                  file=sys.stderr)

        for scoreDoc in scoreDocs:
            # print scoreDoc.doc
            # doc = searcher.doc(scoreDoc.doc)
            sent_ind_list.append(scoreDoc.doc)

        return sent_ind_list
Esempio n. 5
0
def l_searcher(query_string, directory, number_documents):
	lucene.initVM()

	# analyzer = StandardAnalyzer()
	reader = DirectoryReader.open(FSDirectory.open(Paths.get(directory)))
	searcher = IndexSearcher(reader)

	# Top 'n' documents as result
	topN = number_documents

	try:
		# query = QueryParser("question", analyzer).parse(query_string)
		query = FuzzyQuery(Term("question", query_string), 2)
		print("The query was: {}".format(query))

		hits = searcher.search(query, topN)

		print("The hits were: ")

		options = []
		options_answers = []

		# print(hits.totalHits)

		for hit in hits.scoreDocs:
			print(hit.doc)
			# print(hit.score, hit.doc, hit.toString())
			doc = searcher.doc(hit.doc)
			options_answers.append(doc.get("answer"))
			options.append(doc.get("question"))
			# print(doc.get("answer"))

		return options, options_answers
	except IndexError:
		return None
Esempio n. 6
0
    def __init__(self, root, storeDir, analyzer, type="html"):

        if not os.path.exists(storeDir):
            os.mkdir(storeDir)

        store = SimpleFSDirectory(Paths.get(storeDir))
        analyzer = LimitTokenCountAnalyzer(analyzer, 1048576)
        config = IndexWriterConfig(analyzer)
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
        writer = IndexWriter(store, config)
        self.load_stop_words([
            "CNstopwords.txt",
            "ENstopwords.txt",
        ])
        self.html2text = HTML2Text()
        self.html2text.ignore_links = True
        self.html2text.ignore_images = True
        type_to_index = {
            "html": self.index_html,
            "image": self.index_image,
        }
        type_to_index[type](root, writer)
        ticker = Ticker()
        print('commit index')
        threading.Thread(target=ticker.run).start()
        writer.commit()
        writer.close()
        ticker.tick = False
        print('done')
Esempio n. 7
0
 def buscar():
     global folder_path, folder_index
     logging.info("Ingresando en la peticion para busqueda")
     # print folder_path
     # print folder_index
     logging.info("palabra buscada: "+request.form['id_entrada'])
     palabra = str(request.form['id_entrada']).replace('"', "").replace(":","").replace(".","").replace(",","").replace(";","").replace("'","")
     logging.info("Obteniendo ambiente de lucene en busqueda")
     vm_env = lucene.getVMEnv()
     logging.info("Creando hilo en el ambiente en busqueda")
     vm_env.attachCurrentThread()
     #base_dir = os.path.dirname(os.path.abspath(folder_path))
     
     logging.info("Llamando a SimpleFSDirectory")
     directory = SimpleFSDirectory(Paths.get(INDEX_PATH))
     # directory = SimpleFSDirectory(Paths.get(os.path.join(base_dir, folder_index)))
     logging.info("Llamando a IndexSearcher")
     searcher = IndexSearcher(DirectoryReader.open(directory))
     logging.info("Llamando a StandardAnalyzer")
     analyzer = StandardAnalyzer()
     logging.info("Buscando palabra: "+palabra)
     SearchFiles().buscar(searcher, analyzer, palabra)
     listanombres=SearchFiles().getlistanombres()
     logging.info("Obteniendo la lista de nombres: "+str(listanombres))
     #print "Lista controller: ",listanombres
     # print "Entro"
     logging.info("Renderizando template de busqueda con resultado")
     return render_template('search.html', texto=palabra, nombres=listanombres, resultado=str("Se encontraron "+str(len(listanombres))+" documentos!."))
Esempio n. 8
0
def searchResults(command):
    STORE_DIR = "./index_2"
    directory = SimpleFSDirectory(Paths.get(STORE_DIR))
    searcher = IndexSearcher(DirectoryReader.open(directory))
    analyzer = SmartChineseAnalyzer()
    num, results = run(searcher, analyzer, command)
    del searcher
    return results
Esempio n. 9
0
 def __init__(self):
     indexdir = './IndexFiles.index'
     lucene.initVM(vmargs=['-Djava.awt.headless=true'])
     search_dir = SimpleFSDirectory(Paths.get(indexdir))
     self.searcher = IndexSearcher(DirectoryReader.open(search_dir))
     self.searcher.setSimilarity(BM25Similarity())
     self.analyzer = LimitTokenCountAnalyzer(StandardAnalyzer(), 1048576)
     self.lemmatizer = nltk.stem.WordNetLemmatizer()
Esempio n. 10
0
 def __init__(self, store_dir):
     self.store_dir = store_dir
     if not os.path.exists(store_dir):
         os.mkdir(store_dir, 0777)
     self.store = SimpleFSDirectory(Paths.get(store_dir))
     self.searcher = None
     self.analyzer = StandardAnalyzer()
     self.analyzer = LimitTokenCountAnalyzer(self.analyzer, 1048576)
Esempio n. 11
0
def createIndexWriter(indexDir):
    if not os.path.exists(indexDir):
        os.mkdir(indexDir)
    directory = FSDirectory.open(Paths.get(indexDir))
    config = IndexWriterConfig(WhitespaceAnalyzer())
    #config = config.setRAMBufferSizeMB(ramBufferSize)
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
    return IndexWriter(directory, config)
Esempio n. 12
0
 def __init__(self, index_dir):
     print("lucene:", lucene.VERSION)
     self.index_dir = index_dir
     store = SimpleFSDirectory(Paths.get(self.index_dir))
     analyzer = LimitTokenCountAnalyzer(StandardAnalyzer(), 1048576)
     config = IndexWriterConfig(analyzer)
     config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
     self.writer = IndexWriter(store, config)
Esempio n. 13
0
 def addLang(self, lang, dataset, analyzer, index_path=None):
     self.languages.append(lang)
     idxdir = self.get_index(lang, dataset, index_path)
     directory = SimpleFSDirectory(Paths.get(idxdir))
     self.searcher[lang] = IndexSearcher(DirectoryReader.open(directory))
     self.parser[lang] = QueryParser("context", analyzers[analyzer]())
     self.searcher[lang].setSimilarity(self.similarity)
     self.lang = lang
 def __init__(self):
     # self.analyzer = StandardAnalyzer()
     # self.analyzer = PersianAnalyzer(StopFilter.makeStopSet(sw))
     # self.analyzer = PersianAnalyzer()
     self.analyzer = StopAnalyzer(Paths.get(Config.stop_words_address))
     self.config = IndexWriterConfig(self.analyzer)
     self.index = RAMDirectory()
     self.w = IndexWriter(self.index, self.config)
Esempio n. 15
0
 def __init__(self, indexDir):
     self.directory = SimpleFSDirectory(Paths.get(indexDir))
     self.reader = DirectoryReader.open(self.directory)
     self.searcher = IndexSearcher(self.reader)
     self.nameQueryParser = QueryParser('name', StandardAnalyzer())
     self.nameQueryParser.setDefaultOperator(QueryParser.Operator.AND)
     self.idQueryParser = QueryParser('id', StandardAnalyzer())
     self.idQueryParser.setDefaultOperator(QueryParser.Operator.AND)
 def __init__(self, input_path): # Temporarily removing from arguments: ,input_file
     self.txt_files = glob.glob(input_path+"en*.txt")
     self.TITLE = "title"
     self.TEXT = "text"
     self.directory = input_path
     # Store the index on disk:
     self.in_directory = SimpleFSDirectory.open(Paths.get(os.path.join(self.directory, INDEX_DIR)))
     self.in_directory_English = SimpleFSDirectory.open(Paths.get(os.path.join(self.directory, INDEX_DIR_ENG)))
     self.in_directory_English_lemma = SimpleFSDirectory.open(Paths.get(os.path.join(self.directory, INDEX_DIR_EL)))
     self.queries = []
     self.sp = spacy.load("en_core_web_sm", disable=["ner", "parser"])
     self.sp_bodies_lemma = []
     self.sp_bodies_pos = []
     self.query_lemma = ""
     self.query_pos = ""
     self.prec_at_1 =0
     pass
Esempio n. 17
0
 def __init__(self, dir, data_file):
     self.dir = dir
     self.data_file = data_file
     index_dir = FSDirectory.open(Paths.get(self.dir))
     analyzer = StandardAnalyzer()
     writer_config = IndexWriterConfig(analyzer)
     writer_config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
     self.writer = IndexWriter(index_dir, writer_config)
Esempio n. 18
0
 def __init__(self, file_name):
     path = Paths.get(file_name)
     self.file_name = path.toAbsolutePath().toString()
     self.file = File(self.file_name)
     self.db_factory = DocumentBuilderFactory.newInstance()
     self.db_builder = self.db_factory.newDocumentBuilder()
     self.doc = self.db_builder.parse(self.file)
     self.doc.getDocumentElement().normalize()
Esempio n. 19
0
    def __init__(self, tfidf_path, strict=True):
        lucene.initVM()
        analyzer = StandardAnalyzer()
        reader = DirectoryReader.open(SimpleFSDirectory(Paths.get(tfidf_path)))
        self.searcher = IndexSearcher(reader)

        self.parser = QueryParser("text", analyzer)
        self.parser.setDefaultOperator(QueryParser.Operator.OR)
Esempio n. 20
0
 def create_index_dir(self):
     """
     Create the directory where index is stored
     :return: index directory
     """
     path = Paths.get('index')
     indexDir = FSDirectory.open(path)
     return indexDir
Esempio n. 21
0
    def __init__(self, baseDir, indexDirectory="IR.Index"):
        """
        :param baseDir: The directory where this querrier is run
        :param indexDirectory: Directory of indices, default value = 'IR.Index'
        """
        indexDir = FSDirectory.open(Paths.get(os.path.join(baseDir, indexDirectory)))

        self.reader = DirectoryReader.open(indexDir)
Esempio n. 22
0
def convert(input_svg_path, rotation_x, rotation_y, ns_registry_lock):
    assert isinstance(input_svg_path, (str, unicode))
    assert os.path.splitext(input_svg_path)[1] == ".svg"

    input_file_name = os.path.splitext(input_svg_path)[0]
    output_png_path = "{}_rotX_{}_rotY_{}.png".format(input_file_name, rotation_x, rotation_y)
    _log.info("      converting '%s' to Pocket Code compatible png '%s'", input_svg_path, output_png_path)


    output_svg_path = input_svg_path.replace(".svg", "_modified.svg")
    output_svg_URI = Paths.get(output_svg_path).toUri().toURL().toString()

    if os.path.exists(output_png_path):
        _log.error("      '%s' already exists", output_png_path)
        #assert False # "Still a Duplicate?"
        # remove temporary files
        if os.path.exists(output_svg_path):
            os.remove(output_svg_path)
        return output_png_path # avoid duplicate conversions!

    png_ostream = None
    error = None
    try:
        _parse_and_rewrite_svg_file(input_svg_path, output_svg_path, ns_registry_lock)
        command = "svg2png"
        out = subprocess.check_output([command, output_svg_path, "-o", output_png_path])
        _log.info("      converting '%s' to Pocket Code compatible png '%s'",
                  input_svg_path, output_png_path)
        assert os.path.exists(output_png_path)

        final_image = _translation(output_png_path, rotation_x, rotation_y)

        if final_image is None:
            raise RuntimeError("...")

        from javax.imageio import ImageIO
        from java.io import File
        ImageIO.write(final_image, "PNG", File(output_png_path))
        return output_png_path
    except BaseException as err:
        import traceback
        import sys
        exc_info = sys.exc_info()
        _log.error(err)
        _log.error(traceback.format_exc())
        _log.error(exc_info)
        error = common.ScratchtobatError("SVG to PNG conversion call failed for: %s" % input_svg_path)
    finally:
        # free resources
        if png_ostream != None:
            png_ostream.flush()
            png_ostream.close()
        # remove temporary files
        if os.path.exists(output_svg_path):
            os.remove(output_svg_path)

    if error != None:
        raise error
Esempio n. 23
0
def getLucene(path):
    directory = FSDirectory.open(Paths.get(path))
    analyzer = WhitespaceAnalyzer()
    config = IndexWriterConfig(analyzer)
    config.setIndexSort(Sort(SortField(NUMERIC_STAMP_FIELD, SortField.Type.LONG)))
    writer = IndexWriter(directory, config)
    reader = writer.getReader()
    searcher = IndexSearcher(reader)
    return writer, reader, searcher
Esempio n. 24
0
 def _getLucene(self, path):
     directory = FSDirectory.open(Paths.get(path))
     config = IndexWriterConfig(None)
     config.setRAMBufferSizeMB(256.0)  # faster
     config.setUseCompoundFile(False)  # faster, for Lucene 4.4 and later
     writer = IndexWriter(directory, config)
     reader = writer.getReader()
     searcher = IndexSearcher(reader)
     return writer, reader, searcher
 def __init__(self, file_name):
     lucene.initVM(vmargs=['-Djava.awt.headless=true'])
     base_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
     directory = SimpleFSDirectory(
         Paths.get(os.path.join(base_dir, file_name)))
     self.reader = DirectoryReader.open(directory)
     self.searcher = IndexSearcher(self.reader)
     self.analyzer = StandardAnalyzer()
     self.error_count = 0
def extract_file_from_jar(config_file):
    file_url = LoaderUtil.getResourceBySelfClassLoader(config_file)
    if file_url:
        tmp_file, tmp_abs_path = tempfile.mkstemp()
        tmp_file.close()
        Files.copy(file_url.openStream(), Paths.get(tmp_abs_path), StandardCopyOption.REPLACE_EXISTING)
        return tmp_abs_path
    else:
        return None
Esempio n. 27
0
 def __init__(self, store_dir):
     self.store_dir = store_dir
     if not os.path.exists(store_dir):
         os.mkdir(store_dir, 0777)
     self.store = SimpleFSDirectory(Paths.get(store_dir))
     self.analyzer = StandardAnalyzer()
     self.analyzer = LimitTokenCountAnalyzer(self.analyzer, 1048576)
     self.config = IndexWriterConfig(self.analyzer)
     self.writer = IndexWriter(self.store, self.config)
Esempio n. 28
0
 def _getLucene(self, path):
     directory = FSDirectory.open(Paths.get(path))
     config = IndexWriterConfig(None)
     config.setRAMBufferSizeMB(256.0) # faster
     config.setUseCompoundFile(False) # faster, for Lucene 4.4 and later
     writer = IndexWriter(directory, config)
     reader = writer.getReader()
     searcher = IndexSearcher(reader)
     return writer, reader, searcher
Esempio n. 29
0
 def __init__(self, index_path, update=False):
     dir = FSDirectory.open(Paths.get(index_path))
     analyzer = StandardAnalyzer()
     iwc = IndexWriterConfig(analyzer)
     if update:
         iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND)
     else:
         iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
     self.writer = IndexWriter(dir, iwc)
Esempio n. 30
0
def searchIndex:
    lucene.initVM(vmargs=['-Djava.awt.headless=true'])
    print('lucene', lucene.VERSION)
    base_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
    directory = SimpleFSDirectory(Paths.get(os.path.join(base_dir, INDEX_DIR)))
    searcher = IndexSearcher(DirectoryReader.open(directory))
    analyzer = StandardAnalyzer()
    run(searcher, analyzer)
    del searcher
Esempio n. 31
0
def get_ancient_content(id, dir):
    index_dir = SimpleFSDirectory(Paths.get(dir))
    searcher = IndexSearcher(DirectoryReader.open(index_dir))
    all_text = ''
    query = RegexpQuery(Term('id', id + '\.[0-9]+\.[0-9]+'))
    hits = searcher.search(query, 9999)
    for hit in hits.scoreDocs:
        doc = searcher.doc(hit.doc)
        text = doc.get('text')
    return doc
Esempio n. 32
0
def extract_file_from_jar(config_file):
    file_url = LoaderUtil.getResourceBySelfClassLoader(config_file)
    if file_url:
        tmp_file, tmp_abs_path = tempfile.mkstemp()
        tmp_file.close()
        Files.copy(file_url.openStream(), Paths.get(tmp_abs_path),
                   StandardCopyOption.REPLACE_EXISTING)
        return tmp_abs_path
    else:
        return None
Esempio n. 33
0
def getLucene(path):
    directory = FSDirectory.open(Paths.get(path))
    analyzer = WhitespaceAnalyzer()
    config = IndexWriterConfig(analyzer)
    config.setIndexSort(
        Sort(SortField(NUMERIC_STAMP_FIELD, SortField.Type.LONG)))
    writer = IndexWriter(directory, config)
    reader = writer.getReader()
    searcher = IndexSearcher(reader)
    return writer, reader, searcher
Esempio n. 34
0
    def __init__(self, lang):
        lucene.initVM()

        if lang == 'zh':
            indexDir = SimpleFSDirectory(Paths.get(str(config.IDX_COS_ZH)))
            analyzer = SmartChineseAnalyzer()
        elif lang == 'en':
            indexDir = SimpleFSDirectory(Paths.get(str(config.IDX_COS_EN)))
            analyzer = EnglishAnalyzer()
        else:
            raise ValueError(
                'lang should be "zh" or "en", {} is invalid!'.format(lang))

        self.reader = DirectoryReader.open(indexDir)
        self.searcher = IndexSearcher(self.reader)
        self.searcher.setSimilarity(mySimilarity())
        self.analyzer = analyzer
        logger.debug('search similarity func: {}'.format(
            self.searcher.getSimilarity()))
Esempio n. 35
0
    def get_searcher(self, original_dir):
        '''
        Generate an `IndexSearcher` according to the parameters.

        Input: `original_dir`: directory storing the original Lucene index
        Output: `IndexSearcher` with the correct parameters
        '''
        store = SimpleFSDirectory(Paths.get(original_dir))
        reader = DirectoryReader.open(store)
        return IndexSearcher(reader)
Esempio n. 36
0
    def __init__(self, index_path=None):
        self.question = None
        self.index_path = index_path or DEFAULTS['lucene_index']
        self.tokenizer = tokenizers.get_class('simple')()
        self.env = lucene.initVM(vmargs=['-Djava.awt.headless=true'])
        directory = SimpleFSDirectory(Paths.get(self.index_path))
        self.analyzer = StandardAnalyzer()
        # self.query_parser = MultiFieldQueryParser(["title", "text"], self.analyzer)

        self.searcher = IndexSearcher(DirectoryReader.open(directory))
Esempio n. 37
0
    def __init__(self):
        lucene.initVM()

        indexDir = SimpleFSDirectory(Paths.get(str(config.LUCENE_INDEXED)))
        analyzer = PorterStemmerAnalyzer()
        writerConfig = IndexWriterConfig(analyzer)
        writerConfig.setOpenMode(
            OpenMode.CREATE
        )  # Create a new index in the directory, removing any previously indexed documents
        self.index_writer = IndexWriter(indexDir, writerConfig)
Esempio n. 38
0
def load_index():
    global vm, searcher, reader
    vm = lucene.initVM()

    FIELD_CONTENTS = "text"
    DOC_NAME = "identifier"
    STORE_DIR = "./full_index1"
    store = SimpleFSDirectory(Paths.get(STORE_DIR))
    reader = DirectoryReader.open(store)
    searcher = IndexSearcher(reader)
Esempio n. 39
0
 def readFileToStructure(self, path, structure):
     header = PdbHeader()
     header.setTitle(Paths.get(path).getFileName().toString())
     structure.setPDBHeader(header)
     model = ReadFile.getModelFromFile(self, path)
     structure.setChains(Lists.newArrayList(model))
     info = PdbCryst()
     info.setSpaceGroup(SpaceGroup(0, 1, 1, "P 1", "P 1", BravaisL.CUBIC))
     info.setCrystalCell(ReadFile.getBox(self, info.getSpaceGroup().getBravLattice().getExampleUnitCell()))
     header.setCrystallographicInfo(info)
     return structure
Esempio n. 40
0
 def download_file(self, url, filePath):
     """Downloads a file form url and save it as filePath
     """
     try:
         print "\ndownloading"
         print url
         print filePath
         inputStream = URI.create(url).toURL().openStream()
         Files.copy(inputStream, Paths.get(filePath))
         return True
     except (UnknownHostException, SocketException), e:
         print e
         print "I cannot download:\n%s" % url
         return False
Esempio n. 41
0
 def __init__(self, dbName, dropDB=False):
     #self.initObject = lucene.initVM() #default 2048? #vmargs=['-Djava.awt.headless=true']
     """
     attachCurrentThread(name, asDaemon)
     Before a thread created in Python or elsewhere but not in the Java VM
     can be used with the Java VM, this method needs to be invoked.
     The two arguments it takes are optional and self-explanatory.
     """
     #self.initObject.attachCurrentThread('LuceneDB', True)
     luceneVM.attachCurrentThread('LuceneDB')
     self.analyzer = StandardAnalyzer() #split on whitespace, no trunkation or stemming
     self.indexDir = None
     self.searcher = None
     (user,db) = dbName.split('_', 1)
     directory = "./files/"+user+'/'+db+'/LuceneIndex'
     if dropDB: shutil.rmtree(directory)
     self.indexDir = SimpleFSDirectory(Paths.get(directory)) #creates directory if not exists
Esempio n. 42
0
def realPathName(path):
    return Paths.get(sys.netshell_root.toString(), path)
Esempio n. 43
0
 def readJsonFile (self, filePath = None):
     fp = Paths.get(filePath);
     jsonStr = Files.readAllBytes(fp);
     return self.deserFromJson(jsonStr)
Esempio n. 44
0
stats = False
for o, a in options:
    if o == "--format":
        format = a
    elif o == "--index":
        indexDir = a
    elif o == "--stats":
        stats = True


class CustomTemplate(Template):
    delimiter = '#'

template = CustomTemplate(format)

fsDir = SimpleFSDirectory(Paths.get(indexDir))
searcher = IndexSearcher(DirectoryReader.open(fsDir))

analyzer = StandardAnalyzer()
parser = QueryParser("keywords", analyzer)
parser.setDefaultOperator(QueryParser.Operator.AND)
query = parser.parse(' '.join(args))
start = datetime.now()
scoreDocs = searcher.search(query, 50).scoreDocs
duration = datetime.now() - start
if stats:
    print >>sys.stderr, "Found %d document(s) (in %s) that matched query '%s':" %(len(scoreDocs), duration, query)

for scoreDoc in scoreDocs:
    doc = searcher.doc(scoreDoc.doc)
    table = dict((field.name(), field.stringValue())
Esempio n. 45
0
    doc.add(Field("synopsis", synopsis.strip(), TextField.TYPE_STORED))
    doc.add(Field("keywords", ' '.join((command, name, synopsis, description)),
                  TextField.TYPE_NOT_STORED))
    doc.add(Field("filename", os.path.abspath(path), StringField.TYPE_STORED))

    writer.addDocument(doc)


if __name__ == '__main__':

    if len(sys.argv) != 2:
        print "Usage: python manindex.py <index dir>"

    else:
        lucene.initVM(vmargs=['-Djava.awt.headless=true'])
        directory = SimpleFSDirectory(Paths.get(sys.argv[1]))
        analyzer = StandardAnalyzer()
        analyzer = LimitTokenCountAnalyzer(analyzer, 10000)
        config = IndexWriterConfig(analyzer)
        writer = IndexWriter(directory, config)

        manpath = os.environ.get('MANPATH', '/usr/share/man').split(os.pathsep)
        for dir in manpath:
            print "Crawling", dir
            for name in os.listdir(dir):
                path = os.path.join(dir, name)
                if os.path.isdir(path):
                    indexDirectory(path)
        writer.commit()
        writer.close()
Esempio n. 46
0
def getReader(path):
    return DirectoryReader.open(FSDirectory.open(Paths.get(path)))
Esempio n. 47
0
    def openStore(self):

        return MMapDirectory(Paths.get(self.STORE_DIR))
Esempio n. 48
0
    def openStore(self):

        return SimpleFSDirectory(Paths.get(self.STORE_DIR))
Esempio n. 49
0
    def create(
            name = "Launcher",
            bundle = [],
            platforms=["mac", "win"], 
            outdir="dist.platforms", 
            ignorelibs=["*video*"]
        ):
        """Creates a launcher for the given platform"""

        import jycessing.Runner as Runner
        import jycessing.launcher.StandaloneSketch as StandaloneSketch
        import sys
        # Check if we should bail out - we're not running from a standalone sketch
        if not isinstance(Runner.sketch, StandaloneSketch):
            print >>sys.stderr, "Don't use launcher.create() from processing - use the export button instead!"
            return

        # Check if we are already deployed. In that case, 
        # don't do anything
        if "--internal" in sys.argv: return

        # Our own imports 
        import jycessing.launcher.LaunchHelper as LaunchHelper
        
        import java.lang.System as System
        import java.nio.file.Paths as Paths
        import os, shutil, zipfile, inspect, stat, glob, errno

        main = System.getProperty("python.main")
        mainroot = System.getProperty("python.main.root")

        outdir = mainroot + "/" + outdir

        # Clean the outdir ...
        try: shutil.rmtree(outdir) 
        except: pass


        def copyeverything(src, dst):
            """The Machine That Copies EVERYTHING.
            https://www.youtube.com/watch?v=ibEdgQJEdTA
            """
            import shutil, errno
        
            try:
                shutil.copytree(src, dst)
            except OSError as exc:
                if exc.errno == errno.ENOTDIR:
                    shutil.copy(src, dst)
                else: raise

        def copyjars(root):
            """Copy jars & co"""
            sketch = Runner.sketch
            _mainjar = sketch.getMainJarFile()
            mainjar, mainjarname = _mainjar.getAbsolutePath(), _mainjar.getName()
            shutil.copyfile(mainjar, root + "/" + mainjarname)
            
            libraries = sketch.getLibraryDirectories()
            for lib in libraries:
                shutil.copytree(lib.getPath(), root + "/libraries", ignore=shutil.ignore_patterns(*ignorelibs))


        def copydata(runtimedir):
            """Copy the main script and the given data"""
            # Create runtime directory 

            try: os.mkdir(runtimedir)
            except: pass

            # Copy bundled files
            for data in bundle:
                for f in list(glob.iglob(mainroot + "/" + data)):
                    copyeverything(f, runtimedir + "/" + f.replace(mainroot, ""))


            # Eventually copy the main file
            shutil.copyfile(main, runtimedir + "/sketch.py")


        # ... and recreate it
        os.mkdir(outdir)
        for platform in platforms: 

            pdir = outdir + "/" + platform
            tmpfile = pdir + ".zip"

            os.mkdir(pdir)

            # Copy archive
            LaunchHelper.copyResourceTo("launcher." + platform + ".zip", Paths.get(tmpfile))
            
            # Unzip
            z = zipfile.ZipFile(tmpfile, "r")
            z.extractall(pdir)
            z.close()

            # Try to remove the platform file we created
            try:
                os.remove(tmpfile)
            except Exception, e:
                print("Could not remove %s we used for creating the launcher. Please report." % tmpfile, e)
Esempio n. 50
0
    def loadResource(self, u) :
        sysloader = self.java.lang.ClassLoader.getSystemClassLoader()
        return sysloader.getResourceAsStream(u)

import java.nio.file.Files as Files
import java.nio.file.Paths as Paths
import java.lang.System as System
import java.util.List
from java.awt import *
import ucar.unidata.idv.DefaultIdv as DefaultIdv
import ucar.unidata.idv.ui.ImageGenerator as ImageGenerator

idv = DefaultIdv([])
islInterpreter = ImageGenerator(idv)

# need to load a few resources from the classpath

my_files = ["ucar/unidata/idv/resources/python/shell.py",
           "ucar/unidata/idv/resources/python/isl.py"]

cpl = resourceLoader()
tmpfile = System.getProperty("java.io.tmpdir") + "/idv.py"

for f in my_files:
    inpstr = cpl.loadResource(f)
    path = Paths.get(tmpfile)
    Files.copy(inpstr, path)
    execfile(tmpfile)
    Files.delete(path)
Esempio n. 51
0
def convert(input_svg_path, rotation_x, rotation_y):
    assert isinstance(input_svg_path, (str, unicode))
    assert os.path.splitext(input_svg_path)[1] == ".svg"

    input_file_name = os.path.splitext(input_svg_path)[0]
    output_png_path = "{}_rotX_{}_rotY_{}.png".format(input_file_name, rotation_x, rotation_y)
    _log.info("      converting '%s' to Pocket Code compatible png '%s'", input_svg_path, output_png_path)

    output_svg_path = input_svg_path.replace(".svg", "_modified.svg")
    output_svg_URI = Paths.get(output_svg_path).toUri().toURL().toString()

    if os.path.exists(output_png_path):
        _log.error("      '%s' already exists", output_png_path)
        #assert False # "Still a Duplicate?"
        # remove temporary files
        if os.path.exists(output_svg_path):
            os.remove(output_svg_path)
        return output_png_path # avoid duplicate conversions!

    png_ostream = None
    error = None
    try:
        _parse_and_rewrite_svg_file(input_svg_path, output_svg_path)

        input_svg_image = TranscoderInput(output_svg_URI)

        output_png_image = TranscoderOutput(FileOutputStream(output_png_path))

        _log.info("      converting '%s' to Pocket Code compatible png '%s'",
                  input_svg_path, output_png_path)
        png_converter = PNGTranscoder()
        png_converter.transcode(input_svg_image, output_png_image)
        assert os.path.exists(output_png_path)

        final_image = _translation(output_png_path, rotation_x, rotation_y)

        if final_image is None:
            raise RuntimeError("...")

        from javax.imageio import ImageIO
        from java.io import File
        ImageIO.write(final_image, "PNG", File(output_png_path))
        return output_png_path
    except BaseException as err:
        import traceback
        import sys
        exc_info = sys.exc_info()
        _log.error(err)
        _log.error(traceback.format_exc())
        _log.error(exc_info)
        error = common.ScratchtobatError("SVG to PNG conversion call failed for: %s" % input_svg_path)
    finally:
        # free resources
        if png_ostream != None:
            png_ostream.flush()
            png_ostream.close()
        # remove temporary files
        if os.path.exists(output_svg_path):
            os.remove(output_svg_path)

    if error != None:
        raise error