コード例 #1
0
def process_wget_output(lines):
    ind = index.Index()

    logger.info('Worker processing lines')
    parsed_paths = []

    for line in lines:
        p = parse(line)

        logger.info('Worker input {}'.format(line))
        logger.info('Parsed {}'.format(p))

        if p is not None:
            parsed_paths.append(p)

    with SqliteDict(RETRIEVE_CACHE_PATH) as url_map:
        for remote_url, local_path in parsed_paths:
            local_path = local_path.replace(WGET_DOWNLOADS + "/", "")

            remote_url = path_utils.strip_scheme(remote_url)
            url_map[remote_url] = local_path

        url_map.commit()

    for remote_url, local_path in parsed_paths:
        if is_html_file(local_path):
            local_path = local_path.replace(WGET_DOWNLOADS + "/", "")
            logger.info('Found non-none path in wget output {} {}'.format(
                remote_url, local_path))
            ind.index_html(remote_url, local_path)

    logger.info('Worker EOF reached')
コード例 #2
0
ファイル: soph.py プロジェクト: AXAz0r/Soph
    def reloadIndex(self):
        """ reloads Index if necessary """
        reloaded = reload(index, "index.py")

        if reloaded or not self.index:
            self.index = index.Index("index")
        return index
コード例 #3
0
    def __init__(self):
        """ We keep a record of all magic handlers and instantiate them all.
        """
        if not MagicResolver.indexer:
            MagicResolver.indexer = index.Index()
            for cls in Registry.MAGIC_HANDLERS.classes:
                cls = cls()
                MagicResolver.magic_handlers.append(cls)
                for rule in cls.regex_rules:
                    MagicResolver.indexer.add_word(rule[0],
                                                   MagicResolver.count,
                                                   index.WORD_EXTENDED)
                    MagicResolver.index_map[MagicResolver.count] = cls
                    MagicResolver.rule_map[MagicResolver.count] = rule
                    MagicResolver.count += 1

                for rule in cls.literal_rules:
                    MagicResolver.indexer.add_word(rule[0],
                                                   MagicResolver.count,
                                                   index.WORD_ENGLISH)
                    MagicResolver.index_map[MagicResolver.count] = cls
                    MagicResolver.rule_map[MagicResolver.count] = rule
                    MagicResolver.count += 1

            pyflaglog.log(
                pyflaglog.DEBUG,
                "Loaded %s signatures into Magic engine" % MagicResolver.count)
コード例 #4
0
ファイル: othello.py プロジェクト: miudit/othello
 def __init__(self, screen):
     self.board = [
         Cell(i % Config.CELL_NUM, i / Config.CELL_NUM)
         for i in range(Config.CELL_NUM**2)
     ]
     self.at(3, 3).state = Cell.WHITE
     self.at(3, 4).state = Cell.BLACK
     self.at(4, 3).state = Cell.BLACK
     self.at(4, 4).state = Cell.WHITE
     self.empty_img = pygame.image.load('empty.png').convert()
     self.black_img = pygame.image.load('black.png').convert()
     self.white_img = pygame.image.load('white.png').convert()
     self.empty_rect = self.empty_img.get_rect()
     self.black_rect = self.empty_img.get_rect()
     self.white_rect = self.empty_img.get_rect()
     self.screen = screen
     self.__index = index.Index()
     self.__dummyCell = Cell(-1, -1)  # at()での範囲外のマスへの参照用
     self.__prevStates = None
     self.__referenceContainer = self.__initReferenceContainer()  # サイズは64
     self.__emptyCells = [cell for cell in self.board
                          ]  # 空マスのリスト. placeableCells()で使用する
     self.modifyEmptyCells(3, 3)
     self.modifyEmptyCells(3, 4)
     self.modifyEmptyCells(4, 3)
     self.modifyEmptyCells(4, 4)
コード例 #5
0
ファイル: main.py プロジェクト: setr/cs429
def main():
    """ Do not modify.
    Run and evaluate all methods.
    """
    queries, relevances, docs = read_data()
    NHITS = 10
    indexer = index.Index(docs)

    scorers = [
        score.Cosine(),
        score.RSV(),
        score.BM25(k=1, b=.5),
        score.BM25(k=1, b=1),
        score.BM25(k=2, b=.5),
        score.BM25(k=2, b=1)
    ]

    evaluators = [
        evaluate.Precision(),
        evaluate.Recall(),
        evaluate.F1(),
        evaluate.MAP()
    ]

    all_results = run_all(queries, relevances, docs, indexer, scorers,
                          evaluators, NHITS)
    write_results(all_results, 'Results.md')
コード例 #6
0
	def parse_folder(path):
		"""
		Parses all .bib files in given folder.
		Returns a tuple (parsed_iten, search_index) containing all items found
		"""
		if not os.path.isdir(path):
			raise Exception("Path to folder expected")

		parsed_items = []
		files = utils.search_in_folder(path, lambda path: path.endswith(".bib"))
		executor = concurrent.futures.ProcessPoolExecutor(max_workers=multiprocessing.cpu_count())
		futures = [
			executor.submit(
				BibParser()._parse_file,
				os.path.join(path, filename)
			)
			for filename in files
		]
		for future in futures:
			parsed_items += future.result()
		executor.shutdown()

		parsed_items = list(sorted(
			parsed_items,
			key=BibItem.key_to_key_func(const.DEFAULT_ORDER_BY)
		))
		item_index = search_index.Index(parsed_items)
		fin_ctx = FinalizingContext(item_index)
		for item in parsed_items:
			item.finalize_item_set(fin_ctx)
		item_index.update(parsed_items)
		return (parsed_items, item_index)
コード例 #7
0
ファイル: LogicalIndex.py プロジェクト: johnmccabe/pyflag
    def test01timing_tests(self):
        """ Tests timing of indexing """
        for count in [10, 100, 1000, 10000, 100000]:
            words = [
                line.strip() for line in open(self.word_file) if len(line) >= 3
            ]
            idx = index.Index()
            t = time.time()
            print "Loading %s words: " % count,

            ## We want to load words from the dictionary in random
            ## order so we dont have a bias due to the fact the
            ## dictionary is sorted.
            print "Randomizing dictionary words"
            for line_count in range(0, count):
                if len(words) == 0: break
                i = random.randint(0, len(words) - 1)
                idx.add_word(words.pop(i), 1, index.WORD_LITERAL)

            new_t = time.time()
            print "Done - %s seconds (%s lines)" % (new_t - t, line_count)
            fd = open(self.test_file)
            count = 0
            while 1:
                data = fd.read(1024 * 1024)
                if len(data) == 0: break

                for offset, matches in idx.index_buffer(data, unique=False):
                    for id, length in matches:
                        count += 1

            print "Indexed file in %s seconds (%s hits)" % (time.time() -
                                                            new_t, count)
コード例 #8
0
def build_index():
    idx = index.Index()
    for i in range(32, sys.maxunicode + 1):
        char = chr(i)
        for word in unicodedata.name(char, "").split():
            idx.add(word, char)
    return idx
コード例 #9
0
def run(driver_path=None):
    while True:
        try:
            print_menu()
            menu = input('Which information do you want to know : ')
            if menu == '1':
                i = index.Index(driver_path)
                i.print_index()
            elif menu == '2':
                stockgraph.run()
            elif menu == '3':
                r = rate.Rate(driver_path)
                r.print_rate()
            elif menu == '4':
                r = rate.Rate(driver_path)
                r.calculate_rate()
            elif menu == '5':
                realtimePrice.run_mystock()
                break
            elif menu == '6':
                import sys
                sys.exit()
            else:
                print('invalid number!! write valid number')
        except KeyboardInterrupt as e:
            print('시스템이 강제로 종류 되었습니다.\n')
            break
コード例 #10
0
 def test_OverlapSearchEngine(self):
     request = "department matrix programming"
     index = Index.Index(
         os.path.abspath(os.path.join(testdir, "resources/test_cacm.all")),
         os.path.abspath(os.path.join(testdir, "resources/common_words")))
     searchEngine = SearchEngine.OverlapSearchEngine(index)
     result = searchEngine.search(request)
     self.assertEqual(result, [0])
コード例 #11
0
def home():
    if request.method == 'POST':
        new_task = request.form.get('task')
        time1 = request.form.get('time1')
        time2 = request.form.get('time2')
        index.Index().home(new_task=new_task, time1=time1, time2=time2)
    tasks = database.Task.query.order_by(database.Task.time1).all()
    return render_template('index.html', tasks=tasks)
コード例 #12
0
 def __init__(self, opts):
     self.log = SophLogger("textEngine.log")
     self.options = opts
     self.dir = opts.get("dir", "index")
     self.start = opts.get("startIndexing", False)
     self.maxResults = int(opts.get("maxResults", 150))
     self.index = index.Index(self.dir, start=self.start)
     self.qp = question.DumbQuestionParser()
コード例 #13
0
ファイル: LogicalIndex.py プロジェクト: johnmccabe/pyflag
    def build_idx(self, dictionary):
        ## build an indexer:
        idx = index.Index()

        for k, v in dictionary.items():
            idx.add_word(v, k, index.WORD_ENGLISH)

        return idx
コード例 #14
0
 def test_BooleanSearchEngine(self):
     request = "AND(department,OR(NOT(program),matrix))"
     booleanRequest = SearchEngine.BooleanRequestParser().parse(request)
     index = Index.Index(
         os.path.abspath(os.path.join(testdir, "resources/test_cacm.all")),
         os.path.abspath(os.path.join(testdir, "resources/common_words")))
     searchEngine = SearchEngine.BooleanSearchEngine(index)
     result = searchEngine.search(booleanRequest)
     self.assertEqual(result, [0])
コード例 #15
0
ファイル: test_index.py プロジェクト: gagnadref/search-engine
 def test_splitCACMFile(self):
     index = Index.Index()
     listOfDocuments = []
     with open(
             os.path.abspath(
                 os.path.join(testdir, "resources/test_cacm.all")),
             "r") as cacmFile:
         listOfDocuments = index.splitCACMFile(cacmFile)
     self.assertEqual(listOfDocuments[0], self.test_document)
コード例 #16
0
ファイル: spelling.py プロジェクト: ra2003/whoosh-2
 def index(self):
     """Returns the backend index of this object (instantiating it if
     it didn't already exist).
     """
     
     import index
     if not self._index:
         create = not index.exists(self.storage, indexname = self.indexname)
         self._index = index.Index(self.storage, create = create,
                                   schema = self._schema(), indexname = self.indexname)
     return self._index
コード例 #17
0
ファイル: query.py プロジェクト: igorgomes96/ModeloBooleano
    def executeQuery(self):
        query = open(self.nomeArqQuery).read().lower()
        query = util.tokenize(query)
        indObj = index.Index(self.nomeArqBase, self.nomeArqIndice)
        ind = indObj.loadIndex()
        indArqs = self.base.keys()

        #substitui os tokens pelos indices
        for i, v in enumerate(query):
            if v not in self.OPERATORS:
                query[i] = [int(oc.doc) for oc in ind[v]]

        #NOT
        while True:
            flag = False
            for i, v in enumerate(query):
                if v == "!":
                    flag = True
                    del query[i] #remove operator
                    query[i] = conditions.Condition.notCondition(query[i], indArqs)
            
            if not flag:
                break
        
        #AND
        while True:
            flag = False
            for i, v in enumerate(query):
                if v == "&":
                    flag = True
                    del query[i] #remove operator
                    query[i - 1] = conditions.Condition.andCondition(query[i-1], query[i]) #execute intersection
                    del query[i] #remove one of the lists
            
            if not flag:
                break
        
        #OR
        while True:
            flag = False
            for i, v in enumerate(query):
                if (v == "|"):
                    flag = True
                    del query[i] #remove operator
                    query[i - 1] = conditions.Condition.orCondition(query[i-1], query[i]) #execute intersection
                    del query[i] #remove one of the lists
            
            if not flag:
                break

        query[0].sort()
        return [self.base[q] for q in query[0]]

        
コード例 #18
0
ファイル: test_index.py プロジェクト: gagnadref/search-engine
 def test_createIndexFromCACMFile(self):
     index = Index.Index(
         os.path.abspath(os.path.join(testdir, "resources/test_cacm.all")),
         os.path.abspath(os.path.join(testdir, "resources/common_words")))
     self.assertEqual(index.getIndexWithDocid(0), {
         'depart': 1,
         'program': 1,
         'scheme': 1,
         'matrix': 1,
         'techniqu': 1
     })
コード例 #19
0
    def test_search_items(self):
        """
		Tests if parsed items can be searched by a bunch of parameters
		"""
        items = bib_parser.BibParser().parse_string(TEST_ITEMS)
        item_index = index.Index(items)
        for item in items:
            item.process_crossrefs(item_index)
        item_index.update(items)

        author_search = search.search_for_iterable("author", "Петров")
        filtered_items = filter(author_search, items)
        self.assertEqual(len(list(filtered_items)), 1)

        #testing exact match
        year_search = search.and_([
            search.search_for("year_from", 1825),
            search.search_for("year_to", 1825)
        ])
        filtered_items = filter(year_search, items)
        self.assertEqual(len(list(filtered_items)), 1)

        #testing partial intersection
        year_search = search.and_([
            search.search_for("year_from", 1500),
            search.search_for("year_to", 1600)
        ])
        filtered_items = filter(year_search, items)
        self.assertEqual(len(list(filtered_items)), 1)

        #testing inner containment
        year_search = search.and_([
            search.search_for("year_from", 1499),
            search.search_for("year_to", 1501)
        ])
        filtered_items = filter(year_search, items)
        self.assertEqual(len(list(filtered_items)), 1)

        #testing outer containment
        year_search = search.and_([
            search.search_for("year_from", 1400),
            search.search_for("year_to", 1600)
        ])
        filtered_items = filter(year_search, items)
        self.assertEqual(len(list(filtered_items)), 1)

        filtered_items = item_index["keywords"]["grumbling"]
        self.assertEqual(len(list(filtered_items)), 1)

        filtered_items = \
         item_index["keywords"]["cinquecento"] & \
         item_index["keywords"]["historical dance"]
        self.assertEqual(len(list(filtered_items)), 1)
コード例 #20
0
ファイル: test_index.py プロジェクト: gagnadref/search-engine
 def test_persistIndex(self):
     index = Index.Index(
         os.path.abspath(os.path.join(testdir, "resources/test_cacm.all")),
         os.path.abspath(os.path.join(testdir, "resources/common_words")))
     index.persistIndex("test/resources/index.txt")
     json = ""
     with open("test/resources/index.txt", "r") as persistedIndex:
         json = persistedIndex.read()
     self.assertEqual(
         json,
         """[{"program": 1, "scheme": 1, "depart": 1, "techniqu": 1, "matrix": 1}, {"program": 1, "engin": 1, "glossari": 1, "of": 1, "comput": 2, "terminolog": 1}]"""
     )
コード例 #21
0
    def test_inverted_index_search(self):
        items = bib_parser.BibParser()._parse_string(TEST_ITEMS)
        item_index = index.Index(items)

        DIRECT_KEY = "cinquecento"
        INVERTED_KEY = const.INVERTED_INDEX_KEY_PREFIX + DIRECT_KEY
        subindex = item_index["keywords"]
        self.assertIn(DIRECT_KEY, subindex)
        self.assertIn(INVERTED_KEY, subindex)
        filtered_items = item_index["keywords"][INVERTED_KEY]
        self.assertEqual(len(filtered_items), 1)
        self.assertEqual(utils.first(filtered_items).id(), "id_2")
コード例 #22
0
ファイル: LogicalIndex.py プロジェクト: johnmccabe/pyflag
    def test04uniqueIndexing(self):
        """ Test unique indexing mode """
        idx = index.Index(unique=True)
        idx.add_word("\d{2,5}", 1, index.WORD_EXTENDED)

        data = "1234567890" * 3
        results = []
        for offset, matches in idx.index_buffer(data, unique=True):
            for id, length in matches:
                print "Found hit %s" % data[offset:offset + length]
                results.append(offset)

        ## We should only find a single hit since we are in unique
        ## mode
        self.assertEqual(len(results), 1)
コード例 #23
0
    def on_botonIniciar_clicked(self, widget):
        # Se busca el usuario y clave para ver si es un usuario correcto.
        usuario = users.buscarLogin(self.textoUsuario.get_text(),
                                    self.textoClave.get_text())

        # Si el login es correcto, muestra bienvenida y abre el index de la aplicación.
        if (usuario != None):
            globalDef.glb_usuario = usuario.getId()
            globalDef.glb_usrNombre = usuario.getUsername()
            mostrar = mensajes.aviso(
                self.winLogin, mensajes.LOGIN_TRUE + '\n' + usuario.getName())
            self.winLogin.hide(
            )  # El login fue correcto, oculta la ventana de ingreso.
            inicio = index.Index()  # Ventana princiapal de la aplicación.
        else:
            mostrar = mensajes.error(self.winLogin, mensajes.LOGIN_FALSE)
コード例 #24
0
    def test_parse_string(self):
        """
		Tests if string can be succesfully parsed by BibParser
		"""
        items = bib_parser.BibParser()._parse_string(TEST_ITEMS)
        item_index = index.Index(items)

        languages = set(langid for langid in item_index["langid"].keys()
                        if not langid.startswith("!"))
        keywords = set(item_index["keywords"].keys())

        self.assertEqual(len(items), 2)
        self.assertEqual(languages, EXPECTED_LANGUAGES)
        self.assertEqual(keywords, EXPECTED_KEYWORDS)

        item1 = next(iter(item_index["id"]["id_1"]))
        self.assertTrue('{' not in item1.title())
        self.assertTrue('}' not in item1.title())
コード例 #25
0
ファイル: base.py プロジェクト: pombredanne/pakfire
    def __init__(self, pakfire, name, description):
        self.pakfire = pakfire
        self.name = name
        self.description = description

        # Reference to corresponding Repo object in the solver.
        self.solver_repo = satsolver.Repo(self.pool, self.name)
        self.solver_repo.set_priority(self.priority)

        # Some repositories may have a cache.
        self.cache = None

        log.debug("Initialized new repository: %s" % self)

        # Create an index (in memory).
        self.index = index.Index(self.pakfire, self)

        # Marks if this repository has been opened.
        self.opened = False
コード例 #26
0
    def test02UCS16Indexing(self):
        """ Test unicode indexing - simple words """
        dictionary = { 5: u"hello", 10:u"world" }
        ## These are the encodings which will be tested:
        encodings = ["utf-16_le", "utf-16_be", "rot-13", "ms-pst"]
        line = u"Hello cruel world, hello..."

        print
        for encoding in encodings:
            print "Testing encoding %s" % encoding
            idx = index.Index()
            for k,v in dictionary.items():
                idx.add_word(v.encode(encoding), k, index.WORD_LITERAL)

            data = line.encode(encoding)
            for offset, matches in idx.index_buffer(data):
                for id , length in matches:
                    word = dictionary[id]
                    matched = data[offset:offset+length]
                    print "word: %s" % word, "matched: %r" % matched
                    self.assertEqual(word.lower(), matched.decode(encoding).lower())
コード例 #27
0
ファイル: LogicalIndex.py プロジェクト: johnmccabe/pyflag
def reindex():
    global INDEX, INDEX_VERSION
    pyflaglog.log(pyflaglog.DEBUG, "Index manager: Building index trie")
    start_time = time.time()

    dbh = DB.DBO()
    INDEX_VERSION = Indexing.get_dict_version()
    dbh.execute("select word,id,type,class from dictionary")
    INDEX = index.Index()
    for row in dbh:
        ## Classes starting with _ are private classes and want to
        ## return all hits.
        if row['class'].startswith("_"):
            id = row['id'] + 2**30
        else:
            id = row['id']

        t = row['type']
        ## Literal and extended are encoded using latin
        if t == 'literal':
            INDEX.add_word(row['word'].decode("latin").encode("latin"), id,
                           index.WORD_LITERAL)
        elif t == 'regex':
            if type(row['word']) == str:
                word = row['word'].decode('latin')
            else:
                word = row['word']

            INDEX.add_word(word.encode("latin"), id, index.WORD_EXTENDED)
        elif t == 'word':
            try:
                word = row['word'].decode("UTF-8").lower()
                for e in config.INDEX_ENCODINGS.split(","):
                    w = word.encode(e)
                    if len(w) >= 3:
                        INDEX.add_word(w, id, index.WORD_ENGLISH)
            except UnicodeDecodeError, error:
                pyflaglog.log(
                    pyflaglog.ERROR,
                    "Unable to encode in encoding %e: %s" % (e, error))
コード例 #28
0
    def test_parse_string(self):
        """
		Tests if string can be succesfully parsed by BibParser
		"""
        items = bib_parser.BibParser().parse_string(TEST_ITEMS)
        item_index = index.Index(items)
        for item in items:
            item.process_crossrefs(item_index)
        item_index.update(items)

        languages = set(item_index["langid"].keys())
        keywords = set(item_index["keywords"].keys())

        self.assertEqual(len(items), 2)
        self.assertEqual(languages, EXPECTED_LANGUAGES)
        self.assertEqual(keywords, EXPECTED_KEYWORDS)

        item1 = next(iter(item_index["id"]["id_1"]))
        self.assertTrue('{' not in item1.title())
        self.assertTrue('}' not in item1.title())
        self.assertEqual(
            item1.annotation(),
            '<a href="http://example.com/description">http://example.com/description</a>'
        )
コード例 #29
0
 def mainprogram(self):
     num_class = self.comboBox.currentText()
     self.index = index.Index(num_class)
     self.close()
     self.index.show()
コード例 #30
0
def main():
    idx = index.Index()
    master = tk.Tk()
    GUI = gui(master, idx)
    master.mainloop()