Exemple #1
0
 def test_run():
     global grade, test_number, num_test_failed
     num_test_failed = 0
     orig_stdout = sys.stdout
     run_file = open('run.txt', 'w')
     sys.stdout = run_file
     try:
         test_number += 1
         search_engine.main(corpus_path, output_path, stemming, queries,
                            num_doc_to_retrieve)
         student_answers_all = [
             line.rstrip('\n') for line in open('run.txt')
         ]
         student_answers = [
             len(student_answers_all[i:i + num_doc_to_retrieve]) for i
             in range(0, len(student_answers_all), num_doc_to_retrieve)
         ]
         correct_answers = [
             num_doc_to_retrieve for _ in range(len(queries))
         ]
         test_part(correct_answers, student_answers, error_str="run")
         if num_test_failed == 0:
             results_summary.append('Running Passed')
         else:
             results_summary.append('You are printing in your project')
     except Exception as e:
         results_summary.append(
             f'Test Number: {test_number} Running Main Program Failed with the following error: {e}'
         )
     run_file.close()
     sys.stdout = orig_stdout
 def test_input_one(self, stdout):
     main()
     self.assertEqual(stdout.getvalue(), "\n".join([
         '1 2',
         '3',
         '2 1',
     ]) + '\n')
Exemple #3
0
def main(stemmer_obj):
    LOGGER.info("Starting run script")
    generate_configs.main(stemmer_obj)
    queries_parser.main(stemmer_obj)
    make_inverse_index.main(stemmer_obj)
    create_model.main()
    search_engine.main()
    evaluate_results.main(stemmer_obj)
    LOGGER.info("Finishing run script")
Exemple #4
0
def result():
   if request.method == 'POST':
      result = request.form
      query_list = create_tokens.tokenizer(result.values())
      main_urls = search_engine.main(query_list)
      if len(main_urls) > 0:
         return render_template("result.html", result = main_urls)
      else:
         return render_template("no_result.html")
Exemple #5
0
 def search(event):
     output = se.main(self.entry.get())
     self.button['state'] = DISABLED
     self.button.unbind("<Button-1>")
     if len(output) != 0:
         text = ""
         for i in range(0, len(output[:20])):
             text += "Document #" + str(
                 output[i][0]) + " with score " + str(
                     round((output[i][1]), 5)) + "\n"
         self.message_output['text'] = text
         self.message_output.bind(
             "<Button-1>",
             functools.partial(callback, index=output[0][0]))
         self.nothing['text'] = "Results: " + str(len(output))
     elif len(output) == 0:
         self.nothing['text'] = "No coincidence"
         self.message_output['text'] = ""
         self.message_output.unbind("<Button-1>")
     else:
         self.nothing['text'] = output
         self.message_output['text'] = ""
         self.message_output.unbind("<Button-1>")
Exemple #6
0
import search_engine

if __name__ == '__main__':
    search_engine.main('Data', 'posting', False, 'C:/Users/maorb/OneDrive/Desktop/arnona/queries.txt', 2000)
Exemple #7
0
import search_engine
# import search_engine_best

if __name__ == '__main__':
    corpus_path = 'C:\\Users\\amitv\\University\\Information retrieval\\corpus'
    output_path = 'C:\\Users\\amitv\\University\\Information retrieval\\output'
    stemming = False
    queries = 'C:\\Users\\amitv\\University\\Information retrieval\\Search_Engine\\data\\queries_train.tsv'
    num_docs_to_retrieve = 100
    search_engine.main(corpus_path, output_path, stemming, queries,
                       num_docs_to_retrieve)
    # srarch_engine_best.main()
import search_engine

if __name__ == '__main__':
    search_engine.main(
        'C:\\Users\elitm\PycharmProjects\Search_Engine-maste\Data',
        'C:\\Users\elitm\PycharmProjects\Search_Engine-maste\output_path',
        False, 'C:\\Users\elitm\PycharmProjects\queries.txt', 2000)
Exemple #9
0
import search_engine

if __name__ == '__main__':
    search_engine.main('corpus', 'posting', True, 'queries.txt', 100)
Exemple #10
0
import search_engine

if __name__ == '__main__':
    search_engine.main(stemming=True, index_corpus=True, query_engine=True)
Exemple #11
0
import search_engine

if __name__ == '__main__':
    search_engine.main('C:\\Users\\guykl\\PycharmProjects\\Data', 'posting',
                       False, "queries.txt", 250)
Exemple #12
0
 def setUp(self):
     from search_engine import main
     app = main({})
     from webtest import TestApp
     self.testapp = TestApp(app)
Exemple #13
0
import search_engine
import parser_module
import os
if __name__ == '__main__':
    search_engine.main(f"{os.getcwd()}\\Data", f"{os.getcwd()}\\Postings",
                       False, "queries.txt", 2000)
    #parser = parser_module.Parse()
    #parser.parse_sentence('https://www.instagram.com/p/CD7fAPWs3WM/?igshid=o9kf0ugp1l8x https://twitter.com/i/web/status/1290533420381085697')
Exemple #14
0
import nltk
import search_engine

nltk.download('stopwords')
nltk.download('punkt')

search_engine.main()
Exemple #15
0
import search_engine
import utils
from MapReduce import MapReduce

if __name__ == '__main__':
    # lstQuery=['Dr. Anthony Fauci wrote in a 2005 paper published in Virology Journal that hydroxychloroquine was effective in treating SARS.',
    #           'The seasonal flu kills more people every year in the U.S. than COVID-19 has to date.',
    #           'Coronavirus is less dangerous than the flu']
    # x = list()
    # x.append()
    lstQuery = ['schools we need', 'Donald Trump']
    search_engine.main("C:\\Code\\Python\\Data", "C:\\Code\\Answers", False,
                       lstQuery, 20)
    # inv = search_engine.load_index()
    # search_engine.search_and_rank_query('schools we need', inv, 20)
    # term_lst =['Abx','Trg','TRG','Bcd','bcc','erg','png']
    # term_lst.sort(key=lambda x: x.lower())
    # x.clear()
    # print(x[0])
    # map_reduce = MapRe    duce.import_map_reduce('MapReduceData/')
    # x = map_reduce.read_from_func('@brettboyter24')
    # print(len(x))

    # inverted_index = utils.load_obj("inverted_idx")
    # print(inverted_index)
    """
    path= 'MapReduceData/'
    map_reduce = MapReduce(path=path)
    doc_0 ='doc0fgfgfgfgfgfgfgfgfgfgfgfgfgfgfgfgfgfgfgfgfgfgfgfgfghjjjjjjjjjjjj'
    doc_1 ='doc1'
    doc_2 ='doc2'
Exemple #16
0
import search_engine

if __name__ == '__main__':
    search_engine.main('C:\\Users\\Ben Rozilio\\Desktop\\SemA\\engine\\Data\\Data2','output_path',True,'C:\\Users\\Ben Rozilio\\Desktop\\SemA\\engine\\queries.txt',5)
Exemple #17
0
import search_engine

if __name__ == '__main__':

    ### guy's main ###
    q = "queries.txt"
    c_path = r'C:\Users\Guyza\OneDrive\Desktop\Information_Systems\University\Third_year\Semester_E' \
             r'\Information_Retrieval\Search_Engine_Project\Data\Data'
    search_engine.main(stemming=False,
                       num_docs_to_retrieve=20,
                       queries=q,
                       corpus_path=c_path,
                       output_path='')

    ### yoni's main ###
    q = r"C:\Users\yonym\Desktop\ThirdYear\IR\engineV1\full_run_data\queries.txt"
    search_engine.main(
        stemming=False,
        num_docs_to_retrieve=20,
        queries=q,
        corpus_path=
        'C:\\Users\\yonym\\Desktop\\ThirdYear\\IR\\engineV1\\Data\\',
        output_path='')
Exemple #18
0
import search_engine

if __name__ == '__main__':
    corpus_path = "C:/Users/gal/Desktop/Data"
    output_path = "C:/Users/gal/Desktop"
    queries = []
    numOfDocs = 20
    stemming = False
    search_engine.main(corpus_path, output_path, stemming, queries, numOfDocs)
Exemple #19
0
import search_engine
import configuration
if __name__ == '__main__':
    search_engine.main(**configuration.config)
 def test_input_two(self, stdout):
     main()
     self.assertEqual(stdout.getvalue(), "\n".join([
         '4 5 1 2 3',
     ]) + '\n')
Exemple #21
0
        "The COVID-19 pandemic was planned by the Rockefeller Foundation in Operation Lockstep.",
        "COVID-19 could lose its epidemic status in the United States because of declining coronavirus death rates according to CDC data.",
        "healthy people should NOT wear masks",
        "coronavirus is a bioweapon created in a lab in Wuhan",
        "The outbreak began because people ate bat soup",
        "Outbreak people ate bat", "Coronavirus eat bat soup",
        "Wearing a mask to prevent the spread of COVID-19 is unnecessary because the disease can also be spread via farts.",
        "For younger people, seasonal flu is “in many cases” a deadlier virus than COVID-19.",
        "The coronavirus disease (COVID-19) is caused by a virus",
        "Covid-19 is not caused by bacteria",
        "The prolonged use of medical masks when properly worn, DOES NOT cause CO2 intoxication nor oxygen deficiency",
        "Masks don't cause CO2 intoxication.",
        "The COVID-19 coronavirus pandemic caused a nationwide shortage of U.S. coins in circulation during the summer of 2020."
        "Coins shortage due to coronavirus"
    ]
    search_engine.main("C:\\Users\\ayman\\Downloads\\Data", "", False,
                       lstQuery, 20)
    print('Finish all Time ' + str(time.time() - start_time))

    # inv = search_engine.load_index()
    # search_engine.search_and_rank_query('schools we need', inv, 20)
    # term_lst =['Abx','Trg','TRG','Bcd','bcc','erg','png']
    # term_lst.sort(key=lambda x: x.lower())
    # x.clear()
    # print(x[0])
    # map_reduce = MapRe    duce.import_map_reduce('MapReduceData/')
    # x = map_reduce.read_from_func('@brettboyter24')
    # print(len(x))

    # inverted_index = utils.load_obj("inverted_idx")
    # print(inverted_index)
    """