def test_sub_blocks(): start = [8] quant = [[64,8,8]] color = ['hsv'] for s in start: for q in quant: print('--- # -- ') db_desc = SubBlockDescriptor(db) q1_desc = SubBlockDescriptor(qs1_w1) q2_desc = SubBlockDescriptor(qs2_w1,masks=True,mask_path=mask_root) db_desc.compute_descriptors(grid_blocks=[s,s],quantify=q,color_space=color[0]) q1_desc.compute_descriptors(grid_blocks=[s,s],quantify=q,color_space=color[0]) q2_desc.compute_descriptors(grid_blocks=[s,s],quantify=q,color_space=color[0]) #Â -- SEARCH -- # q1_search = Searcher(db_desc.result,q1_desc.result) q2_search = Searcher(db_desc.result,q2_desc.result) q1_desc.clear_memory() q2_desc.clear_memory() db_desc.clear_memory() q1_search.search(limit=3) q2_search.search(limit=3) # -- EVALUATION -- # q1_eval = EvaluateDescriptors(q1_search.result,res_root+os.sep+'gt_corresps1.pkl') q2_eval = EvaluateDescriptors(q2_search.result,res_root+os.sep+'gt_corresps2.pkl') q1_search.clear_memory() q2_search.clear_memory() q1_eval.compute_mapatk(limit=1) q2_eval.compute_mapatk(limit=1) filename = res_root+os.sep+'tests'+os.sep+'sub_res_'+str(s)+'_'+str(q[0])+'.pkl' with open(filename,'wb') as f: pickle.dump(q1_eval.score,f) pickle.dump(q2_eval.score,f) print('--- # -- ')
def do_searching(self): '''Run search. ''' # start searching if not self.max_page_limit: wiki_searcher = Searcher(self.start_lemma_name, self.end_lemma_name) else: try: limit_int = int(self.max_page_limit) except ValueError: self.display_text('Invallid maximum limit.') return wiki_searcher = Searcher(self.start_lemma_name, self.end_lemma_name, limit_int) self.display_text('Busy searching...') log('Busy searching...') wiki_searcher.run_search() # got it, display result if wiki_searcher.found_target: self.show_result(wiki_searcher.get_result()) else: self.clear_text() self.display_text('Search reach the maximum page limit.\n') # clean Entry for next search self.start_entry.delete(0, tk.END) self.end_entry.delete(0, tk.END)
def main(args): try: if len(args) != 3: Main.print_with_help("Invalid number of parameters.\n") return 1 dirs = args[:-1] for directory in dirs: if not os.path.isdir(directory): Main.print_with_help("'{}' is not directory.\n".format(directory)) return 1 elif not len(os.listdir(directory)): print("'{}' directory is empty.\n".format(directory)) return 1 search_type = args[-1] # using of strategy pattern if search_type == '-n': searcher = Searcher(SearchByName()) elif search_type == '-c': searcher = Searcher(SearchByContent()) else: Main.print_with_help("Invalid parameter.\n") return 1 searcher.duplicate_search(dirs) searcher.print_results() except Exception as exp: print("Exception: " + str(exp)) return -1
def main( email, api_key=None, query=None, mongo_url=None, download_all=False, limit=None, **kwargs, ): """ Given an e-mail and API key, searches for Human RNA and outputs the result """ searcher = Searcher(email, api_key=api_key, mongo_url=mongo_url) if query and query != "human": searcher.search(query, limit=limit, **kwargs) else: searcher.search_human_rna(limit=limit) if download_all: assert searcher.cached, "Searcher is not cached." print(f"Found {len(searcher)} results") all(tqdm(searcher))
def __init__(self): self.searcher_body = Searcher( 'C:\\Users\\Jun-Wei\\Desktop\\webpages_parsed_2', 'C:\\Users\\Jun-Wei\\Desktop\\webpages_parsed_2\\index', 'C:\\Users\\Jun-Wei\\Desktop\\webpages_raw\\bookkeeping.json', 'body', 'lsi') self.searcher_title = Searcher( 'C:\\Users\\Jun-Wei\\Desktop\\webpages_parsed_2', 'C:\\Users\\Jun-Wei\\Desktop\\webpages_parsed_2\\index', 'C:\\Users\\Jun-Wei\\Desktop\\webpages_raw\\bookkeeping.json', 'title', 'tfidf') self.searcher_anchor = Searcher( 'C:\\Users\\Jun-Wei\\Desktop\\webpages_parsed_2', 'C:\\Users\\Jun-Wei\\Desktop\\webpages_parsed_2\\index', 'C:\\Users\\Jun-Wei\\Desktop\\webpages_raw\\bookkeeping.json', 'anchor', 'tfidf')
def main(args): graph = Graph(args.graph, idify=args.idify, verbose=args.verbosity > 0) if args.verbosity > 0: print('Graph loaded.') searcher = Searcher(graph) if args.verbosity > 0: print('Creating model.') model = searcher.build_model(verbosity=args.verbosity, passes=2, label_qualify=True, order=['mdl_err', 'coverage', 'lex']) if args.verbosity > 0: print('***** Initial model *****') model.print_stats() model.save('{}{}_model'.format(args.output_path, args.graph)) if args.rule_merging: model = model.merge_rules(verbosity=args.verbosity) if args.verbosity > 0: print('***** Model refined with Rm *****') model.print_stats() model.save('{}{}_model_Rm'.format(args.output_path, args.graph)) if args.rule_nesting: model = model.nest_rules(verbosity=args.verbosity) if args.verbosity: print('***** Model refined with Rn *****') model.print_stats() model.save('{}{}_model_Rm_Rn'.format(args.output_path, args.graph))
def main( email, api_key=None, query=None, mongo_url=None, download_all=False, limit=None, **kwargs, ): """ Given an e-mail and API key, searches for Human RNA and outputs the result """ logging.basicConfig(level=logging.INFO) searcher = Searcher(email, api_key=api_key, mongo_url=mongo_url) if query and query != "human": searcher.search(query, limit=limit, **kwargs) else: logging.info("No query supplied, searching for Human RNA") searcher.search_human_rna(limit=limit) if download_all: assert searcher.cached, "Searcher is not cached." logging.info(f"Found {len(searcher)} results") logging.info("Downloading all results") searcher.download_all() else: for result in searcher: print(json.dumps(result, indent=4, sort_keys=True))
def searchimg(self, index1='index1.csv', index2='index2.csv', result_path='jpg'): cd = ColorDescriptor((8,12,3)) vd = Vgg16Descriptor((224,224,3)) query = cv2.imread("./query/" + self.filename) feature1 = cd.describe(query) feature2 = vd.describe(self.querypath) searcher = Searcher(index1,index2) results = searcher.search(feature1,feature2) result0 = cv2.resize(query,(128,128),interpolation=cv2.INTER_CUBIC) name = locals() i = 1 for (score, resultID) in results: name['result%d'%i] = cv2.imread(result_path + "/" + resultID) name['result%d'%i] = cv2.resize(name['result%d'%i],(128,128),interpolation=cv2.INTER_CUBIC) i = i + 1 result_0 = np.hstack(name['result%d'%i] for i in range(1,6)) result_1 = np.hstack(name['result%d'%i] for i in range(6,11)) result = np.vstack((result_0,result_1)) cv2.imwrite("./result/%s" % self.filename, result) return self.filename
def search_and_rank_query(query, inverted_index, k, config): p = Parse(config.toStem) query_as_list = p.parse_sentence(query) searcher = Searcher(inverted_index, config) relevant_docs, documents_dict = searcher.relevant_docs_from_posting(query_as_list) ranked_docs = searcher.ranker.rank_relevant_doc(relevant_docs, documents_dict, query_as_list) return searcher.ranker.retrieve_top_k(ranked_docs, k)
def test_search_keywords(self, mock_PostgresImageDBController, mock_redis): dbConnection = PostgresImageDBController("imagedb", "test", "test", "localhost") redisConn = redis.StrictRedis() searcher = Searcher(dbConnection, redisConn) searcher.search_keywords(('person', 'cup', 'spoon')) dbConnection.search_tags.assert_called_with(('person', 'cup', 'spoon'))
def test_top_BoundedMinHeap_freq(self): graph = Graph('test', verbose=False) searcher = Searcher(graph) heap = searcher.BoundedMinHeap( bound=10, key=lambda rule: len(graph.candidates[rule]['ca_to_size'])) for rule in searcher.candidates: heap.push(rule) rules = heap.get_reversed() for i in range(len(rules) - 1): assert (len(graph.candidates[rules[i]]['ca_to_size']) >= len( graph.candidates[rules[i + 1]]['ca_to_size'])) heap = searcher.BoundedMinHeap( bound=5, key=lambda rule: len(graph.candidates[rule]['ca_to_size'])) for rule in searcher.candidates: heap.push(rule) rules = heap.get_reversed() for i in range(len(rules) - 1): assert (len(graph.candidates[rules[i]]['ca_to_size']) >= len( graph.candidates[rules[i + 1]]['ca_to_size'])) heap = searcher.BoundedMinHeap( bound=15, key=lambda rule: len(graph.candidates[rule]['ca_to_size'])) for rule in searcher.candidates: heap.push(rule) rules = heap.get_reversed() for i in range(len(rules) - 1): assert (len(graph.candidates[rules[i]]['ca_to_size']) >= len( graph.candidates[rules[i + 1]]['ca_to_size']))
def search_and_rank_query(query, inverted_index,num_docs_to_retrieve): p = Parse() dictFromQuery = {} p.tokenSplit(query, dictFromQuery) query_as_list = [*dictFromQuery] searcher = Searcher(inverted_index) #posting = utils.load_obj("posting") print('-------------------------------------') print('Start import mapReduce') map_reduce = MapReduce.import_map_reduce('MapReduceData/') print('Done importing mapReduce') posting = {} print('-------------------------------------') print('Start build posting file') for term in query_as_list: posting[term] = map_reduce.read_from(term) print('Done building posting file') print('-------------------------------------') print('Get relevant Doc') relevant_docs = searcher.relevant_docs_from_posting(query_as_list,posting) print('Done getting relevant Doc') print('-------------------------------------') print('Start ranking docs') ranked_docs = searcher.ranker.rank_relevant_doc(relevant_docs,dictFromQuery,posting,num_docs_to_retrieve) print('Done ranking docs') return searcher.ranker.retrieve_top_k(ranked_docs,num_docs_to_retrieve)
def search(self, query): """ Executes a query over an existing index and returns the number of relevant docs and an ordered list of search results. Input: query - string. Output: A tuple containing the number of relevant search results, and a list of tweet_ids where the first element is the most relavant and the last is the least relevant result. """ searcher = Searcher(self._parser, self._indexer, model=self._model) query_as_list = self._parser.parse_sentence(query) add_to_query = {} for q in query_as_list: for syn in wordnet.synsets(q): for lemma in syn.lemmas(): if lemma.name() == q.lower(): continue score = wordnet.synsets(q)[0].wup_similarity(syn) if score is not None and score > 0.8: add_to_query[lemma.name()] = score if len(add_to_query) > 3: add_to_query = sorted(add_to_query.items(), key=lambda item: item[1], reverse=True) query_as_list.extend([add_to_query[0][0], add_to_query[1][0], add_to_query[2][0]]) else: query_as_list.extend(add_to_query) new_query = ' '.join(query_as_list) relevant_docs = searcher.search(new_query) return relevant_docs
def search(request, search=None, cats=None, scats=None): page_number = request.GET.get('page', 1) searcher = Searcher(search, cats, scats) snps = Post.objects.order_by('-posted') snps = searcher.filter_queryset(snps) searcher.unfilter_categories() selector = QuerySelector(snps, dict( download=DownloadNzbAction(), )) if request.method == 'POST': action_response = selector.apply_action(request) if isinstance(action_response, HttpResponse): return action_response paginator = Paginator( selector, settings.POST_PER_PAGE, allow_empty_first_page=True, orphans=0, ) if page_number == "last": page = paginator.page(paginator.num_pages) else: try: page = paginator.page(page_number) except InvalidPage, EmptyPage: raise Http404
def search_and_rank_query(query, inverted_index, k): p = Parse() query_as_list = p.parse_sentence(query) searcher = Searcher(inverted_index) relevant_docs = searcher.relevant_docs_from_posting(query_as_list) ranked_docs = searcher.ranker.rank_relevant_doc(relevant_docs) return searcher.ranker.retrieve_top_k(ranked_docs, k)
def run_search(dict_file, postings_file, queries_file, results_file, expand, feedback, score): """ using the given dictionary file and postings file, perform searching on the given queries file and output the results to a file """ print('running search on the queries...') # This is an empty method # Pls implement your code in below searcher = Searcher(dict_file, postings_file, expand=expand, feedback=feedback, score=score) first_line = True with open(queries_file, 'r') as fin, \ open(results_file, 'w') as fout: query = None relevant_docs = [] for line in fin: if first_line: query = line first_line = False elif line not in ['\r', '\n', '\r\n']: relevant_docs.append(int(line)) result, score = searcher.search(query, relevant_docs) result = map(str, result) result = '\n'.join(result) fout.write(result) if score: scores = '\n' + ' '.join(map(str, scores)) fout.write(scores)
def recognize(self): if not self.filename: QtWidgets.QMessageBox.critical(self.centralwidget, 'Selection error', 'You have to load a query image !') return None queryImage = cv2.imread(self.filename) desc = RGBHistogram([8, 8, 8], 5) queryFeatures = desc.describe(queryImage) # load the index perform the search if not os.path.exists("index.pkl"): QtWidgets.QMessageBox.critical( self.centralwidget, 'Indexing error', 'You have to Index the images features from the dataset !') self.open_indexer() return None with open("index.pkl", 'rb') as handle: index = pickle.load(handle) searcher = Searcher(index) results = searcher.search(queryFeatures) # loop over the top ten results images = [x[1] for x in results][:10] self.thumbnail.update(images)
def search(self, query): """ Executes a query over an existing index and returns the number of relevant docs and an ordered list of search results. Input: query - string. Output: A tuple containing the number of relevant search results, and a list of tweet_ids where the first element is the most relavant and the last is the least relevant result. """ searcher = Searcher(self._parser, self._indexer, model=self._model) return searcher.search(query) # def main(): # config = ConfigClass() # se = SearchEngine(config=config) # r = ReadFile(corpus_path=config.get__corpusPath()) # # parquet_file_path =r.get_all_path_of_parquet()[0][0]+r.get_all_path_of_parquet()[0][1] # # se.build_index_from_parquet(parquet_file_path) # se.load_index('idx_bench') # query = "trump want to change the world" # num,list = se.search(query) # # for key in dictionary.keys(): # # print('tweet id: {}, score (unique common words with query): {}'.format(key[0], dictionary[key]))
def search(self, query): """ Executes a query over an existing index and returns the number of relevant docs and an ordered list of search results. Input: query - string. Output: A tuple containing the number of relevant search results, and a list of tweet_ids where the first element is the most relavant and the last is the least relevant result. """ self._indexer.inverted_idx, self._indexer.document_dict = self.load_index( 'idx_engine2.pkl') searcher = Searcher(self._parser, self._indexer, model=self.model) # TODO check about K query_as_list = self._parser.parse_sentence(query) list_copy = list(query_as_list[0]) tagged_words = pos_tag(list_copy) for word in tagged_words: wn_tag = Wordnet.get_wordnet_pos(word[1]) synonym = Wordnet.get_closest_term(word[0], wn_tag) if synonym is not None: list_copy.append(synonym) l_res = searcher.search(list_copy) t_ids = [tup[1] for tup in l_res] return len(l_res), t_ids
def search(self, query, k=None): """ Executes a query over an existing index and returns the number of relevant docs and an ordered list of search results. Input: query - string. Output: A tuple containing the number of relevant search results, and a list of tweet_ids where the first element is the most relavant and the last is the least relevant result. """ terms, entities = self._parser.parse_sentence(query) query_as_list = terms + entities self.spell = SpellChecker() self.spell._distance = 2 corrected_words = [] for word in query_as_list: in_dictionary = word.upper( ) in self._indexer.inverted_idx or word.lower( ) in self._indexer.inverted_idx if not in_dictionary and not word.isupper(): corrected_words.append(self.spell.correction(word)) else: corrected_words.append(word) searcher = Searcher(self._parser, self._indexer, model=self._model) return searcher.search(corrected_words, k)
def search_and_rank_query(query, inverted_index, k, output_path, vectorDict, stemming): p = Parse(stemming) # parse query. query_as_dict = p.parse_sentence(query, term_dict={}) if len(query_as_dict.keys()) == 0: return [] searcher = Searcher(inverted_index, output_path) # search for relevant docs given the query. min threshold is 100 docs. relevant_docs = searcher.relevant_docs_from_posting( query_as_dict, 100, output_path) # rank those docs and get the top 100 of them. ranked_docs, sorted_keys = searcher.ranker.rank_relevant_doc( relevant_docs, query_as_dict, inverted_index, output_path, vectorDict) # { doc: 4, doc: 10} top_100_keys = searcher.ranker.retrieve_top_k(sorted_keys, 100) # build association matrix and expand the query. expanded_query = local_method.build_association_matrix( inverted_index, query_as_dict, top_100_keys, vectorDict) # search again, with the expanded query. relevant_docs = searcher.relevant_docs_from_posting( expanded_query, k, output_path) # rank again and return the top K (given input) ranked. ranked_docs, sorted_keys = searcher.ranker.rank_relevant_doc( relevant_docs, expanded_query, inverted_index, output_path, vectorDict) # { doc: 4, doc: 10} top_k_keys = searcher.ranker.retrieve_top_k(sorted_keys, k) top_K = [] for doc_id in top_k_keys: top_K.append(ranked_docs[doc_id]) return top_K
def search_sample(): query_image = config.queryImage category = predict(query_image) indexFile = config.indexDir + '/' + category + '.csv' catFile = config.dataFile + '/' + category cd = Region_Based( (8, 12, 3) ) #initialize the image descriptor. Here we specify the number of bins for hue, saturation and value. # load the query image and describe it query = cv2.imread(query_image) features = cd.describe(query) searcher = Searcher(indexFile) # perform the search results = searcher.search(features) i = 0 print("Got results") rmtree(config.resultDir) mkdir(config.resultDir) for (score, resultID) in results: print(catFile + "/" + resultID) # load the result image and display it copy2(catFile + "/" + resultID, config.resultDir) i = i + 1
def server(config): """ Function to initialize the server using the config file @param config: Configuration file """ app.searcher = Searcher(config) return app
def minimum_effort_to_travel(self, terrain): last_index = len(terrain) - 1 start_point = (0, 0) end_point = (last_index, last_index) g = SquareTerrain(terrain) optimal = Searcher(g, start_point, end_point) return optimal.find_optimal_cost()
def search_and_rank_query(config, query, inverted_index, inverted_docs, k, avg_doc_len): p = Parse(config) query_as_list = p.parse_sentence(query)[0] searcher = Searcher(config, inverted_index, inverted_docs) query_dict = searcher.get_query_dict(query_as_list) relevant_docs, query_vector = searcher.relevant_docs_from_posting(query_dict) ranked_docs = searcher.ranker.rank_relevant_docs(relevant_docs, query_vector, avg_doc_len) return searcher.ranker.retrieve_top_k(ranked_docs, k)
def search(self, query): """ Search in the index for answer to query and return a list of relevant urls. """ self._logger.info('query: ' + query) searcher = Searcher() return searcher.query(query)
def __init__(self): self.client = docker.from_env(timeout=86400) self.preparer = Preparer() self.searcher = Searcher() self.trainer = Trainer() self.interactor = Interactor() self.generate_save_tag = lambda tag, save_id: hashlib.sha256( (tag + save_id).encode()).hexdigest()
def __init__(self, config=None): self._config = config # self._parser = Parse() self._parser = Parse(self._config) self._indexer = Indexer(self._config) self._ranker = Ranker() self._model = None self._searcher = Searcher(self._parser, self._indexer)
def test_label_qualify_2(self): ''' Test that merge does not work when it doesn't save cost. ''' graph = Graph('test', verbose=False) searcher = Searcher(graph) searcher.label_qualify(verbosity=0) assert((('7241965',), (('6293378', 'in', (('1927286',), ())),)) in graph.candidates)
def __init__(self): # Mongodb client = MongoClient("mongodb://localhost:27017/") db = client.invertedindex # mongodb collections self._char_index = db.char_index self._doc_index = db.doc_index self._searcher = Searcher()