コード例 #1
0
def test_sub_blocks():
	start = [8]
	quant = [[64,8,8]]
	color = ['hsv']
	for s in start:
		for q in quant:
			print('--- # -- ')
			db_desc = SubBlockDescriptor(db)
			q1_desc = SubBlockDescriptor(qs1_w1)
			q2_desc = SubBlockDescriptor(qs2_w1,masks=True,mask_path=mask_root)
			db_desc.compute_descriptors(grid_blocks=[s,s],quantify=q,color_space=color[0])
			q1_desc.compute_descriptors(grid_blocks=[s,s],quantify=q,color_space=color[0])
			q2_desc.compute_descriptors(grid_blocks=[s,s],quantify=q,color_space=color[0])
			# -- SEARCH -- #
			q1_search = Searcher(db_desc.result,q1_desc.result)
			q2_search = Searcher(db_desc.result,q2_desc.result)
			q1_desc.clear_memory()
			q2_desc.clear_memory()
			db_desc.clear_memory()
			q1_search.search(limit=3)
			q2_search.search(limit=3)
			# -- EVALUATION -- #
			q1_eval = EvaluateDescriptors(q1_search.result,res_root+os.sep+'gt_corresps1.pkl')
			q2_eval = EvaluateDescriptors(q2_search.result,res_root+os.sep+'gt_corresps2.pkl')
			q1_search.clear_memory()
			q2_search.clear_memory()
			q1_eval.compute_mapatk(limit=1)
			q2_eval.compute_mapatk(limit=1)
			filename = res_root+os.sep+'tests'+os.sep+'sub_res_'+str(s)+'_'+str(q[0])+'.pkl'
			with open(filename,'wb') as f:
				pickle.dump(q1_eval.score,f)
				pickle.dump(q2_eval.score,f)
			print('--- # -- ')
コード例 #2
0
    def do_searching(self):
        '''Run search.
        '''
        # start searching
        if not self.max_page_limit:
            wiki_searcher = Searcher(self.start_lemma_name,
                                     self.end_lemma_name)
        else:
            try:
                limit_int = int(self.max_page_limit)
            except ValueError:
                self.display_text('Invallid maximum limit.')
                return
            wiki_searcher = Searcher(self.start_lemma_name,
                                     self.end_lemma_name,
                                     limit_int)

        self.display_text('Busy searching...')
        log('Busy searching...')
        wiki_searcher.run_search()
        # got it, display result
        if wiki_searcher.found_target:
            self.show_result(wiki_searcher.get_result())
        else:
            self.clear_text()
            self.display_text('Search reach the maximum page limit.\n')
        # clean Entry for next search
        self.start_entry.delete(0, tk.END)
        self.end_entry.delete(0, tk.END)
コード例 #3
0
    def main(args):
        try:
            if len(args) != 3:
                Main.print_with_help("Invalid number of parameters.\n")
                return 1

            dirs = args[:-1]
            for directory in dirs:
                if not os.path.isdir(directory):
                    Main.print_with_help("'{}' is not directory.\n".format(directory))
                    return 1
                elif not len(os.listdir(directory)):
                    print("'{}' directory is empty.\n".format(directory))
                    return 1

            search_type = args[-1]

            # using of strategy pattern
            if search_type == '-n':
                searcher = Searcher(SearchByName())
            elif search_type == '-c':
                searcher = Searcher(SearchByContent())
            else:
                Main.print_with_help("Invalid parameter.\n")
                return 1

            searcher.duplicate_search(dirs)
            searcher.print_results()

        except Exception as exp:
            print("Exception: " + str(exp))
            return -1
コード例 #4
0
ファイル: __main__.py プロジェクト: dbvirus/dbvirus
def main(
    email,
    api_key=None,
    query=None,
    mongo_url=None,
    download_all=False,
    limit=None,
    **kwargs,
):
    """
    Given an e-mail and API key, searches for Human RNA and outputs the result
    """

    searcher = Searcher(email, api_key=api_key, mongo_url=mongo_url)

    if query and query != "human":
        searcher.search(query, limit=limit, **kwargs)
    else:
        searcher.search_human_rna(limit=limit)

    if download_all:
        assert searcher.cached, "Searcher is not cached."

        print(f"Found {len(searcher)} results")
        all(tqdm(searcher))
コード例 #5
0
 def __init__(self):
     self.searcher_body = Searcher(
         'C:\\Users\\Jun-Wei\\Desktop\\webpages_parsed_2',
         'C:\\Users\\Jun-Wei\\Desktop\\webpages_parsed_2\\index',
         'C:\\Users\\Jun-Wei\\Desktop\\webpages_raw\\bookkeeping.json',
         'body', 'lsi')
     self.searcher_title = Searcher(
         'C:\\Users\\Jun-Wei\\Desktop\\webpages_parsed_2',
         'C:\\Users\\Jun-Wei\\Desktop\\webpages_parsed_2\\index',
         'C:\\Users\\Jun-Wei\\Desktop\\webpages_raw\\bookkeeping.json',
         'title', 'tfidf')
     self.searcher_anchor = Searcher(
         'C:\\Users\\Jun-Wei\\Desktop\\webpages_parsed_2',
         'C:\\Users\\Jun-Wei\\Desktop\\webpages_parsed_2\\index',
         'C:\\Users\\Jun-Wei\\Desktop\\webpages_raw\\bookkeeping.json',
         'anchor', 'tfidf')
コード例 #6
0
def main(args):
    graph = Graph(args.graph, idify=args.idify, verbose=args.verbosity > 0)
    if args.verbosity > 0:
        print('Graph loaded.')
    searcher = Searcher(graph)
    if args.verbosity > 0:
        print('Creating model.')

    model = searcher.build_model(verbosity=args.verbosity,
                                 passes=2,
                                 label_qualify=True,
                                 order=['mdl_err', 'coverage', 'lex'])
    if args.verbosity > 0:
        print('***** Initial model *****')
        model.print_stats()
        model.save('{}{}_model'.format(args.output_path, args.graph))

    if args.rule_merging:
        model = model.merge_rules(verbosity=args.verbosity)
        if args.verbosity > 0:
            print('***** Model refined with Rm *****')
            model.print_stats()
            model.save('{}{}_model_Rm'.format(args.output_path, args.graph))

    if args.rule_nesting:
        model = model.nest_rules(verbosity=args.verbosity)
        if args.verbosity:
            print('***** Model refined with Rn *****')
            model.print_stats()
            model.save('{}{}_model_Rm_Rn'.format(args.output_path, args.graph))
コード例 #7
0
ファイル: __main__.py プロジェクト: dbvirus/searcher
def main(
    email,
    api_key=None,
    query=None,
    mongo_url=None,
    download_all=False,
    limit=None,
    **kwargs,
):
    """
    Given an e-mail and API key, searches for Human RNA and outputs the result
    """
    logging.basicConfig(level=logging.INFO)
    searcher = Searcher(email, api_key=api_key, mongo_url=mongo_url)

    if query and query != "human":
        searcher.search(query, limit=limit, **kwargs)
    else:
        logging.info("No query supplied, searching for Human RNA")
        searcher.search_human_rna(limit=limit)

    if download_all:
        assert searcher.cached, "Searcher is not cached."

        logging.info(f"Found {len(searcher)} results")
        logging.info("Downloading all results")
        searcher.download_all()
    else:
        for result in searcher:
            print(json.dumps(result, indent=4, sort_keys=True))
コード例 #8
0
ファイル: search_app.py プロジェクト: chi0321/image_retrieval
	def searchimg(self, index1='index1.csv', index2='index2.csv', result_path='jpg'):
		cd = ColorDescriptor((8,12,3))
		vd = Vgg16Descriptor((224,224,3))
		query = cv2.imread("./query/" + self.filename)
		feature1 = cd.describe(query)
		feature2 = vd.describe(self.querypath)

		searcher = Searcher(index1,index2)
		results = searcher.search(feature1,feature2)

		result0 = cv2.resize(query,(128,128),interpolation=cv2.INTER_CUBIC)	

		name = locals()
		i = 1

		for (score, resultID) in results:
			name['result%d'%i] = cv2.imread(result_path + "/" + resultID)
			name['result%d'%i] = cv2.resize(name['result%d'%i],(128,128),interpolation=cv2.INTER_CUBIC)	
			i = i + 1
			
		result_0 = np.hstack(name['result%d'%i] for i in range(1,6))
		result_1 = np.hstack(name['result%d'%i] for i in range(6,11))
		result = np.vstack((result_0,result_1))
		cv2.imwrite("./result/%s" % self.filename, result)
		return self.filename
コード例 #9
0
def search_and_rank_query(query, inverted_index, k, config):
    p = Parse(config.toStem)
    query_as_list = p.parse_sentence(query)
    searcher = Searcher(inverted_index, config)
    relevant_docs, documents_dict = searcher.relevant_docs_from_posting(query_as_list)
    ranked_docs = searcher.ranker.rank_relevant_doc(relevant_docs, documents_dict, query_as_list)
    return searcher.ranker.retrieve_top_k(ranked_docs, k)
コード例 #10
0
 def test_search_keywords(self, mock_PostgresImageDBController, mock_redis):
     dbConnection = PostgresImageDBController("imagedb", "test", "test",
                                              "localhost")
     redisConn = redis.StrictRedis()
     searcher = Searcher(dbConnection, redisConn)
     searcher.search_keywords(('person', 'cup', 'spoon'))
     dbConnection.search_tags.assert_called_with(('person', 'cup', 'spoon'))
コード例 #11
0
    def test_top_BoundedMinHeap_freq(self):
        graph = Graph('test', verbose=False)
        searcher = Searcher(graph)

        heap = searcher.BoundedMinHeap(
            bound=10,
            key=lambda rule: len(graph.candidates[rule]['ca_to_size']))
        for rule in searcher.candidates:
            heap.push(rule)
        rules = heap.get_reversed()
        for i in range(len(rules) - 1):
            assert (len(graph.candidates[rules[i]]['ca_to_size']) >= len(
                graph.candidates[rules[i + 1]]['ca_to_size']))

        heap = searcher.BoundedMinHeap(
            bound=5,
            key=lambda rule: len(graph.candidates[rule]['ca_to_size']))
        for rule in searcher.candidates:
            heap.push(rule)
        rules = heap.get_reversed()
        for i in range(len(rules) - 1):
            assert (len(graph.candidates[rules[i]]['ca_to_size']) >= len(
                graph.candidates[rules[i + 1]]['ca_to_size']))

        heap = searcher.BoundedMinHeap(
            bound=15,
            key=lambda rule: len(graph.candidates[rule]['ca_to_size']))
        for rule in searcher.candidates:
            heap.push(rule)
        rules = heap.get_reversed()
        for i in range(len(rules) - 1):
            assert (len(graph.candidates[rules[i]]['ca_to_size']) >= len(
                graph.candidates[rules[i + 1]]['ca_to_size']))
コード例 #12
0
def search_and_rank_query(query, inverted_index,num_docs_to_retrieve):
    p = Parse()
    dictFromQuery = {}
    p.tokenSplit(query, dictFromQuery)
    query_as_list = [*dictFromQuery]
    searcher = Searcher(inverted_index)
    #posting = utils.load_obj("posting")
    print('-------------------------------------')
    print('Start import mapReduce')
    map_reduce = MapReduce.import_map_reduce('MapReduceData/')
    print('Done importing mapReduce')
    posting = {}
    print('-------------------------------------')
    print('Start build posting file')
    for term in query_as_list:
        posting[term] = map_reduce.read_from(term)
    print('Done building posting file')
    print('-------------------------------------')
    print('Get relevant Doc')
    relevant_docs = searcher.relevant_docs_from_posting(query_as_list,posting)
    print('Done getting relevant Doc')
    print('-------------------------------------')
    print('Start ranking docs')
    ranked_docs = searcher.ranker.rank_relevant_doc(relevant_docs,dictFromQuery,posting,num_docs_to_retrieve)
    print('Done ranking docs')
    return searcher.ranker.retrieve_top_k(ranked_docs,num_docs_to_retrieve)
コード例 #13
0
    def search(self, query):
        """
        Executes a query over an existing index and returns the number of
        relevant docs and an ordered list of search results.
        Input:
            query - string.
        Output:
            A tuple containing the number of relevant search results, and
            a list of tweet_ids where the first element is the most relavant
            and the last is the least relevant result.
        """
        searcher = Searcher(self._parser, self._indexer, model=self._model)
        query_as_list = self._parser.parse_sentence(query)
        add_to_query = {}
        for q in query_as_list:
            for syn in wordnet.synsets(q):
                for lemma in syn.lemmas():
                    if lemma.name() == q.lower():
                        continue
                    score = wordnet.synsets(q)[0].wup_similarity(syn)
                    if score is not None and score > 0.8:
                        add_to_query[lemma.name()] = score

        if len(add_to_query) > 3:
            add_to_query = sorted(add_to_query.items(), key=lambda item: item[1], reverse=True)
            query_as_list.extend([add_to_query[0][0], add_to_query[1][0], add_to_query[2][0]])
        else:
            query_as_list.extend(add_to_query)

        new_query = ' '.join(query_as_list)
        relevant_docs = searcher.search(new_query)

        return relevant_docs
コード例 #14
0
ファイル: views.py プロジェクト: patje31/django-spotnet
def search(request, search=None, cats=None, scats=None):
    page_number = request.GET.get('page', 1)
    searcher = Searcher(search, cats, scats)
    snps = Post.objects.order_by('-posted')
    snps = searcher.filter_queryset(snps)
    searcher.unfilter_categories()

    selector = QuerySelector(snps, dict(
        download=DownloadNzbAction(),
    ))

    if request.method == 'POST':
        action_response = selector.apply_action(request)
        if isinstance(action_response, HttpResponse):
            return action_response

    paginator = Paginator(
        selector,
        settings.POST_PER_PAGE,
        allow_empty_first_page=True,
        orphans=0,
    )

    if page_number == "last":
        page = paginator.page(paginator.num_pages)
    else:
        try:
            page = paginator.page(page_number)
        except InvalidPage, EmptyPage:
            raise Http404
コード例 #15
0
def search_and_rank_query(query, inverted_index, k):
    p = Parse()
    query_as_list = p.parse_sentence(query)
    searcher = Searcher(inverted_index)
    relevant_docs = searcher.relevant_docs_from_posting(query_as_list)
    ranked_docs = searcher.ranker.rank_relevant_doc(relevant_docs)
    return searcher.ranker.retrieve_top_k(ranked_docs, k)
コード例 #16
0
def run_search(dict_file, postings_file, queries_file, results_file, expand, feedback, score):
    """
    using the given dictionary file and postings file,
    perform searching on the given queries file and output the results to a file
    """
    print('running search on the queries...')
    # This is an empty method
    # Pls implement your code in below

    searcher = Searcher(dict_file, postings_file, expand=expand, feedback=feedback, score=score)

    first_line = True
    with open(queries_file, 'r') as fin, \
         open(results_file, 'w') as fout:

        query = None
        relevant_docs = []
        for line in fin:
            if first_line:
                query = line
                first_line = False
            elif line not in ['\r', '\n', '\r\n']:
                relevant_docs.append(int(line))

        result, score = searcher.search(query, relevant_docs)
        result = map(str, result)

        result = '\n'.join(result)
        fout.write(result)

        if score:
            scores = '\n' + ' '.join(map(str, scores))
            fout.write(scores)
コード例 #17
0
    def recognize(self):
        if not self.filename:
            QtWidgets.QMessageBox.critical(self.centralwidget,
                                           'Selection error',
                                           'You have to load a query image !')
            return None
        queryImage = cv2.imread(self.filename)
        desc = RGBHistogram([8, 8, 8], 5)
        queryFeatures = desc.describe(queryImage)

        # load the index perform the search
        if not os.path.exists("index.pkl"):
            QtWidgets.QMessageBox.critical(
                self.centralwidget, 'Indexing error',
                'You have to Index the images features from the dataset !')
            self.open_indexer()
            return None
        with open("index.pkl", 'rb') as handle:
            index = pickle.load(handle)
        searcher = Searcher(index)
        results = searcher.search(queryFeatures)

        # loop over the top ten results
        images = [x[1] for x in results][:10]
        self.thumbnail.update(images)
コード例 #18
0
    def search(self, query):
        """
        Executes a query over an existing index and returns the number of
        relevant docs and an ordered list of search results.
        Input:
            query - string.
        Output:
            A tuple containing the number of relevant search results, and
            a list of tweet_ids where the first element is the most relavant
            and the last is the least relevant result.
        """
        searcher = Searcher(self._parser, self._indexer, model=self._model)
        return searcher.search(query)


# def main():
#     config = ConfigClass()
#     se = SearchEngine(config=config)
#     r = ReadFile(corpus_path=config.get__corpusPath())
#     # parquet_file_path =r.get_all_path_of_parquet()[0][0]+r.get_all_path_of_parquet()[0][1]
#     # se.build_index_from_parquet(parquet_file_path)
#     se.load_index('idx_bench')
#     query = "trump want to change the world"
#     num,list = se.search(query)
#     # for key in dictionary.keys():
#     #     print('tweet id: {}, score (unique common words with query): {}'.format(key[0], dictionary[key]))
コード例 #19
0
 def search(self, query):
     """
     Executes a query over an existing index and returns the number of
     relevant docs and an ordered list of search results.
     Input:
         query - string.
     Output:
         A tuple containing the number of relevant search results, and
         a list of tweet_ids where the first element is the most relavant
         and the last is the least relevant result.
     """
     self._indexer.inverted_idx, self._indexer.document_dict = self.load_index(
         'idx_engine2.pkl')
     searcher = Searcher(self._parser, self._indexer, model=self.model)
     # TODO check about K
     query_as_list = self._parser.parse_sentence(query)
     list_copy = list(query_as_list[0])
     tagged_words = pos_tag(list_copy)
     for word in tagged_words:
         wn_tag = Wordnet.get_wordnet_pos(word[1])
         synonym = Wordnet.get_closest_term(word[0], wn_tag)
         if synonym is not None:
             list_copy.append(synonym)
     l_res = searcher.search(list_copy)
     t_ids = [tup[1] for tup in l_res]
     return len(l_res), t_ids
コード例 #20
0
 def search(self, query, k=None):
     """
     Executes a query over an existing index and returns the number of
     relevant docs and an ordered list of search results.
     Input:
         query - string.
     Output:
         A tuple containing the number of relevant search results, and
         a list of tweet_ids where the first element is the most relavant
         and the last is the least relevant result.
     """
     terms, entities = self._parser.parse_sentence(query)
     query_as_list = terms + entities
     self.spell = SpellChecker()
     self.spell._distance = 2
     corrected_words = []
     for word in query_as_list:
         in_dictionary = word.upper(
         ) in self._indexer.inverted_idx or word.lower(
         ) in self._indexer.inverted_idx
         if not in_dictionary and not word.isupper():
             corrected_words.append(self.spell.correction(word))
         else:
             corrected_words.append(word)
     searcher = Searcher(self._parser, self._indexer, model=self._model)
     return searcher.search(corrected_words, k)
コード例 #21
0
def search_and_rank_query(query, inverted_index, k, output_path, vectorDict,
                          stemming):
    p = Parse(stemming)
    # parse query.
    query_as_dict = p.parse_sentence(query, term_dict={})
    if len(query_as_dict.keys()) == 0:
        return []
    searcher = Searcher(inverted_index, output_path)
    # search for relevant docs given the query. min threshold is 100 docs.
    relevant_docs = searcher.relevant_docs_from_posting(
        query_as_dict, 100, output_path)
    # rank those docs and get the top 100 of them.
    ranked_docs, sorted_keys = searcher.ranker.rank_relevant_doc(
        relevant_docs, query_as_dict, inverted_index, output_path,
        vectorDict)  # { doc: 4, doc: 10}
    top_100_keys = searcher.ranker.retrieve_top_k(sorted_keys, 100)
    # build association matrix and expand the query.
    expanded_query = local_method.build_association_matrix(
        inverted_index, query_as_dict, top_100_keys, vectorDict)
    # search again, with the expanded query.
    relevant_docs = searcher.relevant_docs_from_posting(
        expanded_query, k, output_path)
    # rank again and return the top K (given input) ranked.
    ranked_docs, sorted_keys = searcher.ranker.rank_relevant_doc(
        relevant_docs, expanded_query, inverted_index, output_path,
        vectorDict)  # { doc: 4, doc: 10}
    top_k_keys = searcher.ranker.retrieve_top_k(sorted_keys, k)
    top_K = []
    for doc_id in top_k_keys:
        top_K.append(ranked_docs[doc_id])
    return top_K
コード例 #22
0
def search_sample():
    query_image = config.queryImage
    category = predict(query_image)
    indexFile = config.indexDir + '/' + category + '.csv'
    catFile = config.dataFile + '/' + category

    cd = Region_Based(
        (8, 12, 3)
    )  #initialize the image descriptor. Here we specify the number of bins for hue, saturation and value.

    # load the query image and describe it
    query = cv2.imread(query_image)
    features = cd.describe(query)

    searcher = Searcher(indexFile)
    # perform the search
    results = searcher.search(features)

    i = 0
    print("Got results")
    rmtree(config.resultDir)
    mkdir(config.resultDir)
    for (score, resultID) in results:
        print(catFile + "/" + resultID)  # load the result image and display it
        copy2(catFile + "/" + resultID, config.resultDir)
        i = i + 1
コード例 #23
0
def server(config):
    """
    Function to initialize the server using the config file
    @param config: Configuration file
    """
    app.searcher = Searcher(config)
    return app
コード例 #24
0
    def minimum_effort_to_travel(self, terrain):
        last_index = len(terrain) - 1
        start_point = (0, 0)
        end_point = (last_index, last_index)

        g = SquareTerrain(terrain)
        optimal = Searcher(g, start_point, end_point)
        return optimal.find_optimal_cost()
コード例 #25
0
def search_and_rank_query(config, query, inverted_index, inverted_docs, k, avg_doc_len):
    p = Parse(config)
    query_as_list = p.parse_sentence(query)[0]
    searcher = Searcher(config, inverted_index, inverted_docs)
    query_dict = searcher.get_query_dict(query_as_list)
    relevant_docs, query_vector = searcher.relevant_docs_from_posting(query_dict)
    ranked_docs = searcher.ranker.rank_relevant_docs(relevant_docs, query_vector, avg_doc_len)
    return searcher.ranker.retrieve_top_k(ranked_docs, k)
コード例 #26
0
ファイル: banana.py プロジェクト: wrestrtdr/banana
 def search(self, query):
     """
     Search in the index for answer to query and return a list of relevant
     urls.
     """
     self._logger.info('query: ' + query)
     searcher = Searcher()
     return searcher.query(query)
コード例 #27
0
 def __init__(self):
     self.client = docker.from_env(timeout=86400)
     self.preparer = Preparer()
     self.searcher = Searcher()
     self.trainer = Trainer()
     self.interactor = Interactor()
     self.generate_save_tag = lambda tag, save_id: hashlib.sha256(
         (tag + save_id).encode()).hexdigest()
コード例 #28
0
ファイル: search_engine_1.py プロジェクト: GalAgas/SEPartC
 def __init__(self, config=None):
     self._config = config
     # self._parser = Parse()
     self._parser = Parse(self._config)
     self._indexer = Indexer(self._config)
     self._ranker = Ranker()
     self._model = None
     self._searcher = Searcher(self._parser, self._indexer)
コード例 #29
0
ファイル: test_model.py プロジェクト: ouyangbo1988/KGist
 def test_label_qualify_2(self):
     '''
     Test that merge does not work when it doesn't save cost.
     '''
     graph = Graph('test', verbose=False)
     searcher = Searcher(graph)
     searcher.label_qualify(verbosity=0)
     assert((('7241965',), (('6293378', 'in', (('1927286',), ())),)) in graph.candidates)
 def __init__(self):
     # Mongodb
     client = MongoClient("mongodb://localhost:27017/")
     db = client.invertedindex
     # mongodb collections
     self._char_index = db.char_index
     self._doc_index = db.doc_index
     self._searcher = Searcher()