def run_svm_rank_model(test_file, model_file, predictions_folder): if not os.path.exists(predictions_folder): os.makedirs(predictions_folder) predictions_file = predictions_folder + os.path.basename(model_file) command = "./svm_rank_classify " + test_file + " " + model_file + " " + predictions_file run_and_print(command, command_name='pair ranking') return predictions_file
def run_model(features_file, jar_path, score_file, model_path): ensure_dirs(score_file) run_and_print('touch ' + score_file) command = "java -jar " + jar_path + " -load " + model_path + " -rank " + features_file + " -score " + \ score_file run_and_print(command, 'ranking') return score_file
def create_features_file_diff(features_dir, base_index_path, new_index_path, new_features_file, working_set_file, scripts_path, swig_path, stopwords_file, queries_text_file): """ Creates a feature file via a given index and a given working set file """ if os.path.exists(features_dir): run_and_print( "rm -r " + features_dir ) # 'Why delete this directory and then check if it exists?' os.makedirs(features_dir) ensure_dirs(new_features_file) command = f'java -Djava.library.path={swig_path} -cp seo_indri_utils.jar LTRFeatures {base_index_path} ' \ f'{new_index_path} {stopwords_file} {queries_text_file} {working_set_file} {features_dir}' run_and_print(command, command_name='LTRFeatures') constants.lock.acquire() command = f"perl {scripts_path}generate.pl {features_dir} {working_set_file}" run_and_print(command, 'generate.pl') command = f"mv features {new_features_file}" run_and_print(command, 'move') command = "mv featureID " + os.path.dirname(new_features_file) run_and_print(command, 'move') constants.lock.release() return new_features_file
def create_sentence_vector_files(output_dir, raw_ds_file, base_index, new_index, swig_path, documents_ws): for index in [base_index, new_index]: if not os.path.exists(index): raise ValueError('The index {} does not exist'.format(index)) command = f'java -Djava.library.path={swig_path} -cp seo_indri_utils.jar PrepareTFIDFVectorsSentences ' \ f'{base_index} {new_index} {raw_ds_file} {output_dir} {documents_ws}' run_and_print(command, command_name='PrepareTFIDFVectorsSentences')
def feature_creation(qrid, ranked_lists, doc_texts, ref_index, copy_docs, doc_tfidf_vectors_dir, sentence_tfidf_vectors_dir, raw_dataset_file, query_text, output_feature_files_dir, output_final_features_file, workingset_file, word_embed_model): ensure_dirs(output_feature_files_dir, output_final_features_file) raw_ds = read_raw_ds(raw_dataset_file) create_ws(raw_ds, workingset_file, ref_index) create_features(qrid, ranked_lists, doc_texts, ref_index, copy_docs, doc_tfidf_vectors_dir, sentence_tfidf_vectors_dir, query_text, output_feature_files_dir, raw_ds, word_embed_model) constants.lock.acquire() command = f"perl scripts/generateSentences.pl {output_feature_files_dir} {workingset_file}" run_and_print(command, 'generateSentences.pl') command = "mv features " + output_final_features_file run_and_print(command, 'move') constants.lock.release()
def create_index(trectext_file, index_name, indri_path=constants.indri_path): """ Parse the trectext file given, and create an index. """ if os.path.exists(index_name): shutil.rmtree(index_name) ensure_dirs(index_name) corpus_class = 'trectext' memory = '1G' stemmer = 'krovetz' command = f'{indri_path}bin/IndriBuildIndex -corpus.path={trectext_file} -corpus.class={corpus_class} ' \ f'-index={index_name} -memory={memory} -stemmer.name={stemmer}' run_and_print(command, command_name='IndriBuildIndex') return index_name
def generate_document_tfidf_files(workingset_file, output_dir, new_index, base_index=clueweb_index): ensure_dirs(output_dir) command = f'java -Djava.library.path={swig_path} -cp {indri_utils_path} PrepareTFIDFVectorsWSDiff ' \ f'{base_index} {new_index} {workingset_file} {output_dir}' run_and_print(command, command_name='Document tfidf Creation')
def generate_predictions(model_path, svm_rank_scripts_dir, predictions_dir, feature_file): predictions_file = predictions_dir + '_predictions'.join(splitext(basename(feature_file))) ensure_dirs(predictions_file) command = f'{svm_rank_scripts_dir}svm_rank_classify {feature_file} {model_path} {predictions_file}' run_and_print(command, 'pair classify') return predictions_file
def create_model(svm_rank_scripts_dir, model_path, learning_data, svm_rank_c=0.01): ensure_dirs(model_path) command = f'{svm_rank_scripts_dir}svm_rank_learn -c {svm_rank_c} {learning_data} {model_path}' run_and_print(command, 'pair ranker learn')