def search_tm(query_text, limit, lda_dictionary, lda_mdl, lda_index, lda_file_path_index): ts_results = search_lda_model(query_text, lda_dictionary, lda_mdl, lda_index, lda_file_path_index, limit) ## ts_results are in this format [doc_id, doc_dir_path, doc_name, score] if len(ts_results) == 0: print 'No documents found.' return # Note: we need a float conversion because # it's retrieving as string results = [[row[2], ((float(row[3]) + 1.0) / 2.0)] for row in ts_results] return results
def search_tm(query_text, limit, mdl_cfg): lda_dictionary, lda_mdl, lda_index, lda_file_path_index = load_tm(mdl_cfg) ts_results = search_lda_model(query_text, lda_dictionary, lda_mdl, lda_index, lda_file_path_index, limit) ## ts_results are in this format [doc_id, doc_dir_path, doc_name, score] # grabs the files details from the index index_dir = mdl_cfg['LUCENE']['lucene_index_dir'] ts_results = get_indexed_file_details(ts_results, index_dir) if len(ts_results) == 0: print 'No documents found.' return # Normalize the similarity scores results = [[row[0], ((float(row[10]) + 1.0) / 2.0)] for row in ts_results] return results