Python Indexer.create_unigram_index Examples

Programming Language: Python

Namespace/Package Name: indexer

Class/Type: Indexer

Method/Function: create_unigram_index

Examples at hotexamples.com: 3

Python Indexer.create_unigram_index - 3 examples found. These are the top rated real world Python examples of indexer.Indexer.create_unigram_index extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

add_new_doc(30)

Indexer(30)

create_index(6)

create_unigram_index(3)

calculate_idf(3)

LoadIndexes(3)

close(3)

dump(3)

coords_to_indices(2)

indices_to_coords(2)

calculationSummerize(2)

add_idf_to_dictionary(2)

add_document(2)

LoadDict(2)

fix_inverted_index(2)

finish(2)

evaluate_input(1)

execute(1)

create_save_indexer_with_relevant_docs(1)

entities_and_small_big(1)

directory(1)

delete_dict_after_saving(1)

create_indexer(1)

create_dirs(1)

create_bulk_index_string(1)

finish_index(1)

CreatInvertedIndex(1)

finish_indexing(1)

get_num_spatial_nodes(1)

tokenize(1)

set_idx_fields(1)

process(1)

keys(1)

isStopword(1)

ignore_extensions(1)

get__lda__(1)

fit(1)

getStemmed(1)

getOr(1)

getAnd(1)

get(1)

generate_local_index(1)

create_block(1)

generate_global_index(1)

compute_tf(1)

createIndex(1)

add_square_Wij(1)

bp_index(1)

batch_get_feat_stacked(1)

after_indexing(1)

Example #1

Show file

def main():
   # File_path is a class that stores the file paths for required documents.
    f = File_path()
    f.declare_paths()
    corpus = Create_corpus(f.raw_files_folder, True, True)
    corpus.parse_files(f.raw_files_folder, f.parsed_file_folder, True , True)

    a = Indexer()
    a.create_unigram_index(f.parsed_file_folder,f.index_file_path)

    c = Context()
    index = c.read_inverted_index(f.index_file_path)
    DL = c.calculate_document_length(f.parsed_file_folder)
    AvDL = c.calculate_avg_doc_length(f.parsed_file_folder)
    q = Query_Parser()
    q.parse_queries(f.query_file_path,f.parsed_query_file_path)

    f1 = open(f.parsed_query_file_path,"r")
    query = dict()
    for lines in f1:
        lines = lines.split(":")
        query[lines[0]] = lines[1].strip()
    bm = BM25WithRelevance("BM25WithRelevance")
    bm.retrieve_bm25_scores(query,f.parsed_file_folder,AvDL,DL,index, f.relevance_file_path, f.output_folder_path)

    tf = Tf_idf("TfIdfRanking")
    tf.retrieve_tfidf_scores(DL,query,index,f.output_folder_path)

    q = QueryLikelihood("QLModel")
    q.retrieve_QL_scores(DL, query,index,f.output_folder_path)

   # task 2 - pseudo relevance feedback
    pr = PseudoRelFeedback()
    pr.PRmain(f.parsed_file_folder,f.index_file_path,f.parsed_query_file_path,f.relevance_file_path,f.stop_file_path,f.output_folder_path)

    # task 3 - stemmed queries
    t = Task3()
    t.driver_stemmed(f)
    t.ranking_with_stopwords(f)

    # phase 2 - Snippet generation
    sg = SnippetGeneration(f.raw_files_folder)
    sg.get_queries(f.parsed_query_file_path)
    output_file_path = f.output_folder_path+"/"+"BM25WithRelevance"+".txt"
    sg.get_ranklist(output_file_path)
    sg.generate_snippet(f.snippet_file)

Example #2

Show file

File: Task3.py Project: SonalSingh93/SearchEngine

 def driver_stemmed(self,f):
     s = Stemmed_parser(f.stemmed_file, f.stemmed_query_file)
     dict = s.get_stem_documents()
     s.create_files_from_dictionary(dict,f.stemmed_folder_path)
     query = s.create_queryList_stemmed_query(f.stemmed_query_file)
     a = Indexer()
     a.create_unigram_index(f.stemmed_folder_path,f.stemmed_index_file_path)
     c = Context()
     index = c.read_inverted_index(f.stemmed_index_file_path)
     DL = c.calculate_document_length(f.stemmed_folder_path)
     AvDL = c.calculate_avg_doc_length(f.stemmed_folder_path)
     bm = BM25WithRelevance("BM25WithStemming")
     K = bm.calculate_K(f.stemmed_folder_path,AvDL,DL)
     bm.retrieve_bm25_scores(query,f.stemmed_folder_path,AvDL,DL,index, f.relevance_file_path, f.output_folder_path)
     tf = Tf_idf("TfidfWithStemming")
     tf.retrieve_tfidf_scores(DL,query,index,f.output_folder_path)
     ql = QueryLikelihood("QLWithStemming")
     ql.retrieve_QL_scores(DL, query,index,f.output_folder_path)

Example #3

Show file

File: Task3.py Project: SonalSingh93/SearchEngine

    def ranking_with_stopwords(self,f):
        s = StoppedCorpus()
        s.stop_corpus(f.stop_corpus_folder_path,f.parsed_file_folder,f.parsed_query_file_path,f.stop_query_path,f.stop_file_path)
        a = Indexer()
        a.create_unigram_index(f.stop_corpus_folder_path,f.stop_index_file_path)
        c = Context()
        index = c.read_inverted_index(f.stop_index_file_path)
        DL = c.calculate_document_length(f.stop_corpus_folder_path)
        AvDL = c.calculate_avg_doc_length(f.stop_corpus_folder_path)
        bm1 = BM25WithRelevance("BM25WithStopping")
        f1 = open(f.parsed_query_file_path,"r")
        query_stopped = dict()
        for lines in f1:
            lines = lines.split(":")
            query_stopped[lines[0]] = lines[1].strip()
        bm1.retrieve_bm25_scores(query_stopped,f.stop_corpus_folder_path,AvDL,DL,index, f.relevance_file_path, f.output_folder_path)

        tf1 = Tf_idf("TfIdfWithStopping")
        tf1.retrieve_tfidf_scores(DL,query_stopped,index,f.output_folder_path)

        q1 = QueryLikelihood("QLModelWithStopping")
        q1.retrieve_QL_scores(DL, query_stopped,index,f.output_folder_path)