def search(): """ When a user enters a search query, it is obtained here. It calculates the top documents by using the Searcher class. :return: A template which is populated by input_query, top 20 results, cosine scores of various query_words, and words whose document frequency is zero. """ query = request.form.get('searchBar') query = unicodedata.normalize('NFKD', query).encode('ascii', 'ignore') now = time.clock() searcher = Searcher(query) results = searcher.cosine_score() scores = searcher.query_score print time.clock() - now zero_scores = searcher.top_corrections boolean_results = searcher.boolean_results if len(boolean_results) == 0: boolean_error = True else: boolean_error = False title_results = searcher.title_results if len(title_results) > 10: title_results = [] return render_template("displayResults.html", input_query=query, results=results, scores=scores, zero_scores=zero_scores, title_results=title_results, error=boolean_error, boolean_results=boolean_results)
def visualize_specific_ranker(self, query): viewer = GraphViewer() specific_documents = Searcher(query).get_topic_documents() m = self.basic_matrix specific_doc_ids = list() with closing(shelve.open("ids.db")) as db: for doc in specific_documents: specific_doc_ids.append(db[doc[16:]]) specific_vector = np.zeros(m.shape[0]) for doc_id in specific_doc_ids: specific_vector[doc_id] = (1 - self.taxation_factor) / \ len(specific_doc_ids) rank_vector = np.full(m.shape[0], 1 / m.shape[0]) print len(specific_doc_ids) viewer.view_graph(node_list=list(specific_doc_ids)) count = 0 while True: count += 1 rank_vector1 = m * rank_vector + specific_vector diff = rank_vector1 - rank_vector diff = sum(diff * diff) if diff < 1e-50: break else: rank_vector = rank_vector1 if count % 25 == 0: try: viewer.view_graph(node_list=list(specific_doc_ids), ranks=list(rank_vector), mult_factor=40000, concat=150) except networkx.exception.NetworkXError as e: print e
def main(): # get options from console. options = args() # get configuration from file. config = get_conf(options['config_file']) # create ES connection to hosts. connections.create_connection(hosts=config['elasticsearch']['hosts'], timeout=30) # create the searcher instance to find alarms, given the options from # console. searcher = Searcher(options['from'], options['query'], ttime=options['to'], per_page=500, min_priority=options['min_priority']) buckets = [ PathClassBucket( utils.build_url(config['kibana']['host'], config['kibana']['secure'])) ] # manually fetch all alarms from the searcher and pass it to every bucket. for alarm in searcher.pages(): for bucket in buckets: bucket.cherry_pick(alarm) # dump all buckets, this will print out all buckets. for bucket in buckets: bucket.dump()
def search(): query = request.form['query'] field = request.form['field'] searcher = Searcher() result = searcher.search(query, field) return render_template("results.html", query=query, videos=result["videos"])
def colorSearch(self): ''' Searches query image against index and returns the specified number of matches. Results are in the format (chi-squared distance, image name). ''' searcher = Searcher(self.colorIndex) queryFeatures = self.createHistogram(self.image) results = searcher.search(queryFeatures) return results
def aftermath(self, results): try: keywords = self.analyse_keywords(results) sentiment = self.analyse_sentiment(results) except Exception as e: print(e) searcher = Searcher() for keyword in keywords: imageResults = searcher.searchImages(keyword) imageResults = searcher.validateLinks(imageResults) self.completionFunction(self.name, keyword, sentiment, imageResults)
def mainapp(): result = "" input = request.args.get('searchstr') sorttype = request.args.get('sortselect') results = [] if ((input is not None and sorttype is not None) and input != ""): searcher = Searcher(input, sorttype) results = searcher.search() if input is None: input = "" widgets = [r.widget for r in results] #return render_template('index.html', input=input,result="".join(widgets)) return render_template('index2.html', input=input, results=results)
def topic_specific_search(self, query, scheme="topic"): if scheme == "trust": rank_vector = np.load("trustRank.npy") else: rank_vector = self.topic_specific_ranker(query) results = Searcher(query).cosine_score(ranker=True) result_ids = [] with closing(shelve.open("ids.db")) as db: for doc, score in results: doc_id = db[doc[16:]] doc_rank = rank_vector[doc_id] result_ids.append((doc_id, doc_rank)) sorted_scores = heapq.nlargest(20, result_ids, key=operator.itemgetter(1)) return sorted_scores
def search(): query = request.args.get("q") if query is None: return render_template("index.html") try: page = int(request.args.get("p", 1)) except (TypeError, ValueError): page = 1 searcher = Searcher() results = searcher.search_page(query, page) paginator = Paginator(results) return render_template("index.html", results=results, paginator=paginator, q=query)
def main(args): searcher = Searcher(args["limit"]) workers = [] if "domains" in args: # load domains from file domains = load_domains(args["domains"]) for domain in domains: if domain == "": continue # lookup in search engine result = searcher.google_search(domain) # start the worker worker = Worker(domain, result.urls, result.page_source) workers.append(worker) print "\nNow waiting for workers to finish" else: # lookup in search engine result = searcher.google_search(args["domain"]) # start the worker worker = Worker(args["domain"], result.urls, result.page_source) workers.append(worker) searcher.close() # wait for all workers to finish for worker in workers: worker.wait() # write emails to a file if "output" in args: write_excel_file(args["output"], workers) print "\nFinished scraping!\n" # output all emails for worker in workers: for email in worker.emails: print "> " + email
def weighted_search(): """ When a user Enters weights to use for different query_words, those are obtained by this method. :return: A template populated by input_query, results obtained due to weighted search, the scores of various query_words as given by the user and words whose document frequency is zero """ weights = {} query = request.form.get("query") for key in request.form: if key == "query": query = request.form[key] query = unicodedata.normalize('NFKD', query).encode('ascii', 'ignore') else: weights[key] = request.form[key] weights[key] = unicodedata.normalize('NFKD', weights[key]).encode( 'ascii', 'ignore') weights[key] = float(weights[key]) / 100 searcher = Searcher(query, query_score=weights) results = searcher.cosine_score() scores = searcher.query_score zero_scores = searcher.top_corrections boolean_results = searcher.boolean_results if len(boolean_results) == 0: boolean_error = True else: boolean_error = False title_results = searcher.title_results return render_template("displayResults.html", input_query=query, results=results, scores=scores, zero_scores=zero_scores, title_results=title_results, error=boolean_error)
def topic_specific_ranker(self, query): specific_documents = Searcher(query).get_topic_documents() m = self.basic_matrix specific_doc_ids = list() with closing(shelve.open("ids.db")) as db: for doc in specific_documents: specific_doc_ids.append(db[doc[16:]]) specific_vector = np.zeros(m.shape[0]) for doc_id in specific_doc_ids: specific_vector[doc_id] = (1 - self.taxation_factor) / \ len(specific_doc_ids) rank_vector = np.full(m.shape[0], 1 / m.shape[0]) while True: rank_vector1 = m * rank_vector + specific_vector diff = rank_vector1 - rank_vector diff = sum(diff * diff) if diff < 1e-50: break else: rank_vector = rank_vector1 return rank_vector
parser.add_argument('-hn', '--house_number', type=str, action='store') parser.add_argument('-p', '--path', type=str, action='store', help="path to save downloaded data") parser.add_argument('-r', '--regime', type=str, action='store', choices=["region", "russia"]) args = parser.parse_args().__dict__ street = args['street'] house_number = args['house_number'] city = args['city'] regime = args["regime"] path = args["path"] address = city + ", " + street + ", " + house_number if regime == "russia": links_to_download = russia else: links_to_download = region if not os.path.isdir(os.path.join(path, 'geodata')): Downloader.download_bz2(links_to_download, 'geodata') if not os.path.isdir(os.path.join(path, 'prepared')): for file in os.listdir(os.path.join(path, 'geodata')): if file.endswith(".txt"): print(f"preparing file: {os.path.join(path, 'geodata', file)}") db = DataBase(os.path.join(path, 'geodata', file), os.path.join(path, 'prepared', file)) s = Searcher(os.path.join(path, 'prepared', file)) for file in os.listdir(os.path.join(path, 'prepared')): print("Created Searcher") s = Searcher(os.path.join(path, 'prepared', file)) print(DataBase.return_json_address(args['city'], args['street'], args['house_number'], Searcher.get_median(s.search_nodes(address))))
from generator import Generator from search import Searcher from connectionist import Connectionist from vertex import Vertex n_vertices = 10 # number of elements/nodes g = Generator(n_vertices) searcher = Searcher() connector = Connectionist() n = 20 # number of runs for i in range(n): g.generate() belief_network = g.get_belief_network() neural_network = g.get_neural_network() coherence, (true, false) = searcher.run(belief_network) print 'coherence search:', coherence print 'accepted propositions:', sorted(true, key=lambda v: v.n) print 'rejected propositions:', sorted(false, key=lambda v: v.n) print '-----------------------------------------------' activations, harmony = connector.run(neural_network) print 'harmony', harmony true = [] false = [] for i, a in enumerate(activations): if a == 1: true.append(Vertex(i)) else: false.append(Vertex(i))
def __init__(self, index_dir): self.searcher = Searcher(index_dir)
}, { 'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator', }, { 'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator', }, { 'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator', }, ] SEARCHOBJECT = Searcher( "/Users/younesagabi/Desktop/YouTaQA/IR/index_wiki_v7.0") THETOKENIZER = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) MODELCLASSIFIER = BertForSequenceClassification.from_pretrained( 'bert-base-uncased', num_labels=2, output_attentions=False, output_hidden_states=False) DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu') MODELCLASSIFIER.to(DEVICE) MODELCLASSIFIER.load_state_dict(torch.load( '/Users/younesagabi/Desktop/YouTaQA/DeepLearning/Classifier/Models/BERT_ft_epoch10.model', map_location=torch.device(DEVICE)), strict=False)
import urllib import json app = Flask(__name__, static_url_path='') @app.route("/") def main(): return send_from_directory('static', 'search.html') @app.route("/q/<input>") @crossdomain(origin='*') def search(input): print urllib.unquote_plus(input).encode('utf-8') doclist = searcher.search(input) result = [] for doc in doclist: result.append(str(doc.id) + ", " + str(doc.name) + "<br>" + doc.text) return json.dumps(result) index = Indexer("docs.txt") searcher = Searcher(index) if __name__ == "__main__": app.run(host='127.0.0.1', port=8282, debug=True)
def main_1(var): num_groups = int(var[0]) num_clusters = int(var[1]) if var[2] >= 50: dist_function_name = 'euclidean' else: dist_function_name = 'cosine' threshold = var[3] server_url = 'localhost:9200' num_queries = 200 with open('evaluation_set.json') as f: evaluation_set = json.load(f) f.close() training_embedding_vectors = np.load("PCA_2048_to_512_new.npy") query_vector_indices = random.sample(range(len(evaluation_set.keys())), num_queries) train_labels, image_names = get_image_data( 'vn_celeb_face_recognition/train.csv') # print("working on {} groups, {} clusters, {} threshold".format(num_groups, num_clusters, threshold)) search_times = [] mean_average_accuracy = 0 mean_recall = 0 for query_vector_index in query_vector_indices: query_vector = training_embedding_vectors[evaluation_set[str( query_vector_index)][0]] # print(query_vector) actual_query_label = train_labels[evaluation_set[str( query_vector_index)][0]] num_actual_results = len(evaluation_set[str(actual_query_label)]) # print(actual_query_label) # print("------------") es = Elasticsearch(server_url) index_name = 'face_off_' + str(num_groups) + 'groups_' + str( num_clusters) + 'clusters_vgg' if not es.indices.exists( index_name ): # if data is not indexed, create index and take data to ES # then query data_encoder = DataEncoder(num_groups, num_clusters, 1000, training_embedding_vectors, 'encode_results_vgg') data_encoder.run_encode_data() json_string_tokens_generator = JsonStringTokenGenerator( 'encode_results_vgg', 'PCA_2048_to_512_new.npy', 'vn_celeb_face_recognition/train.csv', num_groups, num_clusters) encoded_string_tokens_list = json_string_tokens_generator.get_string_tokens_list( ) train_embs = json_string_tokens_generator.get_image_fetures() train_labels, image_names = json_string_tokens_generator.get_image_metadata( ) json_string_tokens_list = json_string_tokens_generator.generate_json_string_tokens_list( encoded_string_tokens_list, train_labels, image_names, train_embs) json_string_tokens_generator.save_json_string_tokens( json_string_tokens_list) print('saving completed....') print('******************************') indexer = ESIndexer('encode_results_vgg', num_groups, num_clusters, server_url, 'vgg') indexer.index() start_time = datetime.now() searcher = Searcher(threshold, num_groups, num_clusters, query_vector, server_url, index_name, dist_function_name, 'vgg') results = searcher.search() # print(len(results)) if len(results) == 0: continue search_time = datetime.now() - start_time search_time_in_ms = (search_time.days * 24 * 60 * 60 + search_time.seconds) * 1000 + \ search_time.microseconds / 1000.0 search_times.append(search_time_in_ms) else: # if not, commit query start_time = datetime.now() searcher = Searcher(threshold, num_groups, num_clusters, query_vector, server_url, index_name, dist_function_name, 'vgg') results = searcher.search() # print(len(results)) if len(results) == 0: continue search_time = datetime.now() - start_time search_time_in_ms = (search_time.days * 24 * 60 * 60 + search_time.seconds) * 1000 + \ search_time.microseconds / 1000.0 search_times.append(search_time_in_ms) results_labels = list() for result in results: results_labels.append(result['id']) # with open('evaluation_set.json', 'r') as fh: # evaluation_set_dict = json.load(fh) # fh.close() accuracy_i = 0 for i in range(len(results)): step_list = results_labels[:(i + 1)] num_corrects = len([ i for i, x in enumerate(step_list) if x == actual_query_label ]) accuracy_i += num_corrects / len(step_list) # print(accuracy_i/num_returns) mean_average_accuracy += accuracy_i / len(results) recall_i = num_corrects / num_actual_results # print(num_corrects) mean_recall += recall_i # print("*************************************") mean_average_accuracy = mean_average_accuracy / num_queries mean_recall = mean_recall / num_queries print(mean_average_accuracy, mean_recall) # print("precision: {} and recall: {}".format(mean_average_accuracy, mean_recall)) # print(average_search_time) # print(mean_average_accuracy) return 3 - mean_average_accuracy - mean_recall - ( 2 * mean_average_accuracy * mean_recall / (mean_average_accuracy + mean_recall))
def main(): server_url = 'localhost:9200' num_queries = 1000 with open('hyper_params_set.json', 'r') as fh: hyper_params = json.load(fh) nums_groups = hyper_params['nums_groups'] nums_clusters = hyper_params['nums_clusters'] thresholds = hyper_params['thresholds'] fh.close() with open('evaluation_set.json') as f: evaluation_set = json.load(f) f.close() final_results = [] training_embedding_vectors = np.load("train_embs_VGGFace.npy") query_vector_indices = random.sample(range(len(evaluation_set.keys())), num_queries) train_labels, image_names = get_image_data( 'vn_celeb_face_recognition/train.csv') for threshold in thresholds: for num_groups in nums_groups: for num_clusters in nums_clusters: print("working on {} groups, {} clusters, {} threshold".format( num_groups, num_clusters, threshold)) search_times = [] mean_average_accuracy = 0 mean_recall = 0 for query_vector_index in query_vector_indices: query_vector = training_embedding_vectors[evaluation_set[ str(query_vector_index)][0]] actual_query_label = train_labels[evaluation_set[str( query_vector_index)][0]] num_actual_results = len( evaluation_set[str(actual_query_label)]) # print(actual_query_label) # print("------------") es = Elasticsearch(server_url) index_name = 'face_off_' + str( num_groups) + 'groups_' + str( num_clusters) + 'clusters_vgg' if not es.indices.exists( index_name ): # if data is not indexed, create index and take data to ES # then query indexer = ESIndexer('encode_results_vgg', num_groups, num_clusters, server_url, 'vgg') indexer.index() start_time = datetime.now() searcher = Searcher(threshold, num_groups, num_clusters, query_vector, server_url, index_name, 'cosine', 'vgg') results = searcher.search() # print(len(results)) if len(results) == 0: continue search_time = datetime.now() - start_time search_time_in_ms = (search_time.days * 24 * 60 * 60 + search_time.seconds) * 1000 + \ search_time.microseconds / 1000.0 search_times.append(search_time_in_ms) else: # if not, commit query start_time = datetime.now() searcher = Searcher(threshold, num_groups, num_clusters, query_vector, server_url, index_name, 'cosine', 'vgg') results = searcher.search() # print(len(results)) if len(results) == 0: continue search_time = datetime.now() - start_time search_time_in_ms = (search_time.days * 24 * 60 * 60 + search_time.seconds) * 1000 + \ search_time.microseconds / 1000.0 search_times.append(search_time_in_ms) # print(len(results)) results_labels = list() for result in results: # print(result['id']) results_labels.append(result['id']) # with open('evaluation_set.json', 'r') as fh: # evaluation_set_dict = json.load(fh) # fh.close() accuracy_i = 0 for i in range(len(results)): step_list = results_labels[:(i + 1)] num_corrects = len([ i for i, x in enumerate(step_list) if x == actual_query_label ]) accuracy_i += num_corrects / len(step_list) # print(accuracy_i/num_returns) mean_average_accuracy += accuracy_i / len(results) recall_i = num_corrects / num_actual_results # print(num_corrects) mean_recall += recall_i # print("*************************************") average_search_time = round( np.mean(np.asarray(search_times)) / 1000, 3) mean_average_accuracy = mean_average_accuracy / num_queries mean_recall = mean_recall / num_queries # print(average_search_time) # print(accuracy) final_results.append([ num_groups, num_clusters, threshold, num_queries, 'euclidean', average_search_time, round(mean_average_accuracy, 4), round(mean_recall, 4) ]) print([ num_groups, num_clusters, threshold, num_queries, 'euclidean', average_search_time, round(mean_average_accuracy, 4), round(mean_recall, 4) ]) print("finish") print("-----------------------------------------------")
output_attentions=False, output_hidden_states=False) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') modelClassifier.to(device) modelClassifier.load_state_dict(torch.load( '/Users/younesagabi/Desktop/YouTaQA/DeepLearning/Classifier/Models/BERT_ft_epoch10.model', map_location=torch.device(device)), strict=False) modelExtractor = BertForQuestionAnswering.from_pretrained( 'bert-large-uncased-whole-word-masking-finetuned-squad') txt_file = open(r"txt_file.txt", "w+") tsv_file = open("test.tsv") read_tsv = csv.reader(tsv_file, delimiter="\t") searchObject = Searcher( "/Users/younesagabi/Desktop/YouTaQA/IR/index_wiki_v7.0") Similarity = Similarity() for row in read_tsv: inputQuery = row[0] result = searchObject.multiFieldsSearch(inputQuery, BM25Similarity()) # print(result) # print("#" * 100) # print("#" * 100) content = "" list = [''] list.append(inputQuery) list.pop(0) j = 0 for i in range(len(result)): hitDoc = searchObject.searcher.doc(result[i].doc) score = result[i].score
from cspProblemDefine import CSP, Constraint, ne_, is_ from operator import lt,ne,eq,gt from search import Search_from_CSP, Searcher def meet_at(p1,p2): """returns a function that is true when the words meet at the postions p1, p2 """ def meets(w1,w2): return w1[p1] == w2[p2] meets.__name__ = "meet_at("+str(p1)+','+str(p2)+')' return meets crossword1 = CSP({'one_across':{'ant', 'bus', 'car', 'has'}, 'one_down':{'buys', 'hold', 'lane', 'year'}, 'three_across':{'buys', 'hold', 'lane', 'year'}, 'two_down':{'search', 'syntax'}, 'four_across':{'ant', 'bus', 'car', 'has'} }, [Constraint(('one_across','one_down'),meet_at(0,0)), Constraint(('one_down','three_across'),meet_at(2,0)), Constraint(('one_across','two_down'),meet_at(2,0)), Constraint(('three_across','two_down'),meet_at(2,2)), Constraint(('four_across','two_down'),meet_at(0,4)) ]) searcher3 = Searcher(Search_from_CSP(crossword1)) print('The first solution searched is:') print(searcher3.search())
ap = argparse.ArgumentParser() ap.add_argument("-q", "--query", required=True, help="Path to input image") arg = vars(ap.parse_args()) f = open('dictionary.txt', 'r') dataset = cPickle.loads(f.read()) queryImage = cv2.imread(arg["query"]) cv2.imshow("QueryImage", queryImage) print "Query :: %s" % (arg["query"][arg["query"].rfind('/') + 1:]) rgbHist = RGBHist([8, 8, 8]) queryHist = rgbHist.getHist(queryImage) searcher = Searcher(dataset) results = searcher.search(queryHist) set1 = np.zeros((150 * 5, 400, 3), dtype='unit8') set2 = np.zeros((150 * 5, 400, 3), dtype='unit8') for i in xrange(0, 10): (fileName, dist) = results[i] print "Result %d :: %s, Score :: %f" % (i, fileName, dist) path = './dataset/' + fileName image = cv2.imread(path) if i < 5: set1[150 * i:150 * (i + 1), :, :] = image else: set2[150 * (i - 5):150 * (i - 4), :, :] = image
def home(): searcher = Searcher() movies, tvs = searcher.default_display() return render_template("index.html", movie_videos=movies["videos"], tv_videos=tvs["videos"])
def api_search(): query = request.args['query'] field = request.args['field'] searcher = Searcher() result = searcher.search(query, field) return jsonify(result)