Beispiel #1
0
def viewSearchbyTitle():
    if request.method == 'POST':
        mystring = "Title"
        query = request.form['namesearchbytitle']
        data = pickle.load(open(r"DataBase\data_file.pkl", "rb"))
        titles = pickle.load(open(r"DataBase\title_file.pkl", "rb"))
        auto_tag = pickle.load(open(r"DataBase\svos_file.pkl", "rb"))
        summary = pickle.load(open(r"DataBase\summary_file.pkl", "rb"))

        corpus = pickle.load(open(r"DataBase\title_corpus.pkl", "rb"))
        bm25 = search_by_BM25(corpus)

        tokenized_query, old_query, new_query = clean_query(query.lower())

        indexes, results = bm25.get_top_n(tokenized_query, data, n=5)
        results_titles = []
        results_summaries = []
        results_tags = []

        for i in indexes:
            results_titles.append(titles[i])
            results_summaries.append(summary[i])
            if auto_tag[i] != []:
                results_tags.append(list(set(random.choices(auto_tag[i],
                                                            k=3))))
            else:
                results_tags.append(['No Auto tags'])
        text = []
        for i in results:
            text_to_show = " ".join(sent_tokenize(i)[:2])
            if text_to_show != '':
                text.append(text_to_show + '....')
            else:
                text.append(i)

        # text = results
        title = results_titles
        summaries = results_summaries
        tags = results_tags

        title_len = len(title)

        document_file = pickle.load(open(r"DataBase\document_file.pkl", "rb"))
        extension_list = []
        for i in indexes:
            extension_list.append(document_file[i]["extension"])

        # return render_template('searchbyTitle.html', text=text, tag=query, title=title, summaries=summaries, tags=tags, title_len = title_len)
        return render_template('searchbyText.html',
                               text=text,
                               tag=query,
                               title=title,
                               summaries=summaries,
                               tags=tags,
                               type=mystring,
                               title_len=title_len,
                               old_query=old_query,
                               new_query=new_query,
                               extension_list=extension_list)
Beispiel #2
0
def viewSearchbyTitle(the_text):
    mystring = "Title"
    query = the_text

    corpus = data_for_title
    bm25 = search_by_BM25(corpus)

    tokenized_query, old_query, new_query = clean_query(query.lower())

    indexes, results = bm25.get_top_n(tokenized_query, data, n=5)
    results_titles = []
    results_summaries = []
    results_tags = []

    for i in indexes:
        results_titles.append(titles[i])
        results_summaries.append(summary[i])
        if auto_tag[i] != []:
            results_tags.append(list(set(random.choices(auto_tag[i], k=3))))
        else:
            results_tags.append(['No Auto tags'])
    text = []
    for i in results:
        text_to_show = " ".join(sent_tokenize(i)[:2])
        if text_to_show != '':
            text.append(text_to_show + '....')
        else:
            text.append(i)

    # text = results
    title = results_titles
    summaries = results_summaries
    tags = results_tags

    title_len = len(title)

    extension_list = []
    for i in indexes:
        extension_list.append(document_file[i]["extension"])

    # return render_template('searchbyTitle.html', text=text, tag=query, title=title, summaries=summaries, tags=tags, title_len = title_len)
    return render_template('searchbyText.html',
                           text=text,
                           tag=query,
                           title=title,
                           summaries=summaries,
                           tags=tags,
                           type=mystring,
                           title_len=title_len,
                           old_query=old_query,
                           new_query=new_query,
                           extension_list=extension_list)
Beispiel #3
0
def viewSearchbyText():
    if request.method == 'POST':
        mystring = "Text"
        query = request.form['namesearchbytext']
        # query = "Corona Virus"
        data = pickle.load(open(r"DataBase\data_file.pkl", "rb"))
        titles = pickle.load(open(r"DataBase\title_file.pkl", "rb"))

        corpus = pickle.load(open(r"DataBase\wiki_corpus_file.pkl", "rb"))
        bm25 = search_by_BM25(corpus)

        tokenized_query, old_query, new_query = clean_query(query.lower())

        indexes, results = bm25.get_top_n(tokenized_query, data, n=5)
        results_titles = []

        for i in indexes:
            results_titles.append(titles[i])
        text = []
        for i in results:
            text_to_show = " ".join(sent_tokenize(i)[:2])
            if text_to_show != '':
                text.append(text_to_show + '....')
            else:
                text.append(i)

        title = results_titles
        title_len = len(title)

        document_file = pickle.load(open(r"DataBase\document_file.pkl", "rb"))
        extension_list = []
        for i in indexes:
            if i < 10909:
                extension_list.append('wikipedia')
            else:
                extension_list.append(document_file[i - 10909]["extension"])

        return render_template('searchbyText.html',
                               text=text,
                               tag=query,
                               title=title,
                               type=mystring,
                               title_len=title_len,
                               old_query=old_query,
                               new_query=new_query,
                               extension_list=extension_list)
        return lemmed

if __name__ == '__main__':
    data = pickle.load(open(r"DataBase/data_file.pkl", "rb"))
    titles = pickle.load(open(r"DataBase/title_file.pkl", "rb"))

    option = input("Enter option of search")

    if option == 'default search':
        corpus = pickle.load(open(r"DataBase/corpus_file.pkl", "rb"))
    elif option == 'tag search':
        corpus = pickle.load(open(r"DataBase/tags_pickle.pkl", "rb"))
    elif option == 'title search':
        corpus = pickle.load(open(r"DataBase/title_corpus.pkl", "rb"))
    else:
        print('Not valid option')

    bm25 = search_by_BM25(corpus)
    query = input("Enter query")
    tokenized_query = clean_query(query.lower())

    indexes, results = bm25.get_top_n(tokenized_query, data, n=20)
    results_titles = []
    for i in indexes:
        results_titles.append(titles[i])

    for i in range(10):
        print(f"Title_{i}: {results_titles[i]}")
        print(f"\nText_{i}: {results[i]}")
        print('\n\n')