def test_run(): global grade, test_number, num_test_failed num_test_failed = 0 orig_stdout = sys.stdout run_file = open('run.txt', 'w') sys.stdout = run_file try: test_number += 1 search_engine.main(corpus_path, output_path, stemming, queries, num_doc_to_retrieve) student_answers_all = [ line.rstrip('\n') for line in open('run.txt') ] student_answers = [ len(student_answers_all[i:i + num_doc_to_retrieve]) for i in range(0, len(student_answers_all), num_doc_to_retrieve) ] correct_answers = [ num_doc_to_retrieve for _ in range(len(queries)) ] test_part(correct_answers, student_answers, error_str="run") if num_test_failed == 0: results_summary.append('Running Passed') else: results_summary.append('You are printing in your project') except Exception as e: results_summary.append( f'Test Number: {test_number} Running Main Program Failed with the following error: {e}' ) run_file.close() sys.stdout = orig_stdout
def test_input_one(self, stdout): main() self.assertEqual(stdout.getvalue(), "\n".join([ '1 2', '3', '2 1', ]) + '\n')
def main(stemmer_obj): LOGGER.info("Starting run script") generate_configs.main(stemmer_obj) queries_parser.main(stemmer_obj) make_inverse_index.main(stemmer_obj) create_model.main() search_engine.main() evaluate_results.main(stemmer_obj) LOGGER.info("Finishing run script")
def result(): if request.method == 'POST': result = request.form query_list = create_tokens.tokenizer(result.values()) main_urls = search_engine.main(query_list) if len(main_urls) > 0: return render_template("result.html", result = main_urls) else: return render_template("no_result.html")
def search(event): output = se.main(self.entry.get()) self.button['state'] = DISABLED self.button.unbind("<Button-1>") if len(output) != 0: text = "" for i in range(0, len(output[:20])): text += "Document #" + str( output[i][0]) + " with score " + str( round((output[i][1]), 5)) + "\n" self.message_output['text'] = text self.message_output.bind( "<Button-1>", functools.partial(callback, index=output[0][0])) self.nothing['text'] = "Results: " + str(len(output)) elif len(output) == 0: self.nothing['text'] = "No coincidence" self.message_output['text'] = "" self.message_output.unbind("<Button-1>") else: self.nothing['text'] = output self.message_output['text'] = "" self.message_output.unbind("<Button-1>")
import search_engine if __name__ == '__main__': search_engine.main('Data', 'posting', False, 'C:/Users/maorb/OneDrive/Desktop/arnona/queries.txt', 2000)
import search_engine # import search_engine_best if __name__ == '__main__': corpus_path = 'C:\\Users\\amitv\\University\\Information retrieval\\corpus' output_path = 'C:\\Users\\amitv\\University\\Information retrieval\\output' stemming = False queries = 'C:\\Users\\amitv\\University\\Information retrieval\\Search_Engine\\data\\queries_train.tsv' num_docs_to_retrieve = 100 search_engine.main(corpus_path, output_path, stemming, queries, num_docs_to_retrieve) # srarch_engine_best.main()
import search_engine if __name__ == '__main__': search_engine.main( 'C:\\Users\elitm\PycharmProjects\Search_Engine-maste\Data', 'C:\\Users\elitm\PycharmProjects\Search_Engine-maste\output_path', False, 'C:\\Users\elitm\PycharmProjects\queries.txt', 2000)
import search_engine if __name__ == '__main__': search_engine.main('corpus', 'posting', True, 'queries.txt', 100)
import search_engine if __name__ == '__main__': search_engine.main(stemming=True, index_corpus=True, query_engine=True)
import search_engine if __name__ == '__main__': search_engine.main('C:\\Users\\guykl\\PycharmProjects\\Data', 'posting', False, "queries.txt", 250)
def setUp(self): from search_engine import main app = main({}) from webtest import TestApp self.testapp = TestApp(app)
import search_engine import parser_module import os if __name__ == '__main__': search_engine.main(f"{os.getcwd()}\\Data", f"{os.getcwd()}\\Postings", False, "queries.txt", 2000) #parser = parser_module.Parse() #parser.parse_sentence('https://www.instagram.com/p/CD7fAPWs3WM/?igshid=o9kf0ugp1l8x https://twitter.com/i/web/status/1290533420381085697')
import nltk import search_engine nltk.download('stopwords') nltk.download('punkt') search_engine.main()
import search_engine import utils from MapReduce import MapReduce if __name__ == '__main__': # lstQuery=['Dr. Anthony Fauci wrote in a 2005 paper published in Virology Journal that hydroxychloroquine was effective in treating SARS.', # 'The seasonal flu kills more people every year in the U.S. than COVID-19 has to date.', # 'Coronavirus is less dangerous than the flu'] # x = list() # x.append() lstQuery = ['schools we need', 'Donald Trump'] search_engine.main("C:\\Code\\Python\\Data", "C:\\Code\\Answers", False, lstQuery, 20) # inv = search_engine.load_index() # search_engine.search_and_rank_query('schools we need', inv, 20) # term_lst =['Abx','Trg','TRG','Bcd','bcc','erg','png'] # term_lst.sort(key=lambda x: x.lower()) # x.clear() # print(x[0]) # map_reduce = MapRe duce.import_map_reduce('MapReduceData/') # x = map_reduce.read_from_func('@brettboyter24') # print(len(x)) # inverted_index = utils.load_obj("inverted_idx") # print(inverted_index) """ path= 'MapReduceData/' map_reduce = MapReduce(path=path) doc_0 ='doc0fgfgfgfgfgfgfgfgfgfgfgfgfgfgfgfgfgfgfgfgfgfgfgfgfghjjjjjjjjjjjj' doc_1 ='doc1' doc_2 ='doc2'
import search_engine if __name__ == '__main__': search_engine.main('C:\\Users\\Ben Rozilio\\Desktop\\SemA\\engine\\Data\\Data2','output_path',True,'C:\\Users\\Ben Rozilio\\Desktop\\SemA\\engine\\queries.txt',5)
import search_engine if __name__ == '__main__': ### guy's main ### q = "queries.txt" c_path = r'C:\Users\Guyza\OneDrive\Desktop\Information_Systems\University\Third_year\Semester_E' \ r'\Information_Retrieval\Search_Engine_Project\Data\Data' search_engine.main(stemming=False, num_docs_to_retrieve=20, queries=q, corpus_path=c_path, output_path='') ### yoni's main ### q = r"C:\Users\yonym\Desktop\ThirdYear\IR\engineV1\full_run_data\queries.txt" search_engine.main( stemming=False, num_docs_to_retrieve=20, queries=q, corpus_path= 'C:\\Users\\yonym\\Desktop\\ThirdYear\\IR\\engineV1\\Data\\', output_path='')
import search_engine if __name__ == '__main__': corpus_path = "C:/Users/gal/Desktop/Data" output_path = "C:/Users/gal/Desktop" queries = [] numOfDocs = 20 stemming = False search_engine.main(corpus_path, output_path, stemming, queries, numOfDocs)
import search_engine import configuration if __name__ == '__main__': search_engine.main(**configuration.config)
def test_input_two(self, stdout): main() self.assertEqual(stdout.getvalue(), "\n".join([ '4 5 1 2 3', ]) + '\n')
"The COVID-19 pandemic was planned by the Rockefeller Foundation in Operation Lockstep.", "COVID-19 could lose its epidemic status in the United States because of declining coronavirus death rates according to CDC data.", "healthy people should NOT wear masks", "coronavirus is a bioweapon created in a lab in Wuhan", "The outbreak began because people ate bat soup", "Outbreak people ate bat", "Coronavirus eat bat soup", "Wearing a mask to prevent the spread of COVID-19 is unnecessary because the disease can also be spread via farts.", "For younger people, seasonal flu is “in many cases” a deadlier virus than COVID-19.", "The coronavirus disease (COVID-19) is caused by a virus", "Covid-19 is not caused by bacteria", "The prolonged use of medical masks when properly worn, DOES NOT cause CO2 intoxication nor oxygen deficiency", "Masks don't cause CO2 intoxication.", "The COVID-19 coronavirus pandemic caused a nationwide shortage of U.S. coins in circulation during the summer of 2020." "Coins shortage due to coronavirus" ] search_engine.main("C:\\Users\\ayman\\Downloads\\Data", "", False, lstQuery, 20) print('Finish all Time ' + str(time.time() - start_time)) # inv = search_engine.load_index() # search_engine.search_and_rank_query('schools we need', inv, 20) # term_lst =['Abx','Trg','TRG','Bcd','bcc','erg','png'] # term_lst.sort(key=lambda x: x.lower()) # x.clear() # print(x[0]) # map_reduce = MapRe duce.import_map_reduce('MapReduceData/') # x = map_reduce.read_from_func('@brettboyter24') # print(len(x)) # inverted_index = utils.load_obj("inverted_idx") # print(inverted_index) """