コード例 #1
0
def run_svm_rank_model(test_file, model_file, predictions_folder):
    if not os.path.exists(predictions_folder):
        os.makedirs(predictions_folder)
    predictions_file = predictions_folder + os.path.basename(model_file)
    command = "./svm_rank_classify " + test_file + " " + model_file + " " + predictions_file
    run_and_print(command, command_name='pair ranking')
    return predictions_file
コード例 #2
0
def run_model(features_file, jar_path, score_file, model_path):
    ensure_dirs(score_file)
    run_and_print('touch ' + score_file)
    command = "java -jar " + jar_path + " -load " + model_path + " -rank " + features_file + " -score " + \
              score_file
    run_and_print(command, 'ranking')
    return score_file
コード例 #3
0
def create_features_file_diff(features_dir, base_index_path, new_index_path,
                              new_features_file, working_set_file,
                              scripts_path, swig_path, stopwords_file,
                              queries_text_file):
    """
    Creates a feature file via a given index and a given working set file
    """
    if os.path.exists(features_dir):
        run_and_print(
            "rm -r " + features_dir
        )  # 'Why delete this directory and then check if it exists?'
    os.makedirs(features_dir)
    ensure_dirs(new_features_file)

    command = f'java -Djava.library.path={swig_path} -cp seo_indri_utils.jar LTRFeatures {base_index_path} ' \
              f'{new_index_path} {stopwords_file} {queries_text_file} {working_set_file} {features_dir}'
    run_and_print(command, command_name='LTRFeatures')

    constants.lock.acquire()
    command = f"perl {scripts_path}generate.pl {features_dir} {working_set_file}"
    run_and_print(command, 'generate.pl')

    command = f"mv features {new_features_file}"
    run_and_print(command, 'move')

    command = "mv featureID " + os.path.dirname(new_features_file)
    run_and_print(command, 'move')
    constants.lock.release()

    return new_features_file
コード例 #4
0
def create_sentence_vector_files(output_dir, raw_ds_file, base_index, new_index, swig_path, documents_ws):
    for index in [base_index, new_index]:
        if not os.path.exists(index):
            raise ValueError('The index {} does not exist'.format(index))

    command = f'java -Djava.library.path={swig_path} -cp seo_indri_utils.jar PrepareTFIDFVectorsSentences ' \
              f'{base_index} {new_index} {raw_ds_file} {output_dir} {documents_ws}'
    run_and_print(command, command_name='PrepareTFIDFVectorsSentences')
コード例 #5
0
def feature_creation(qrid, ranked_lists, doc_texts, ref_index, copy_docs, doc_tfidf_vectors_dir,
                     sentence_tfidf_vectors_dir, raw_dataset_file, query_text, output_feature_files_dir,
                     output_final_features_file, workingset_file, word_embed_model):
    ensure_dirs(output_feature_files_dir, output_final_features_file)
    raw_ds = read_raw_ds(raw_dataset_file)
    create_ws(raw_ds, workingset_file, ref_index)
    create_features(qrid, ranked_lists, doc_texts, ref_index, copy_docs, doc_tfidf_vectors_dir,
                    sentence_tfidf_vectors_dir, query_text, output_feature_files_dir, raw_ds, word_embed_model)

    constants.lock.acquire()
    command = f"perl scripts/generateSentences.pl {output_feature_files_dir} {workingset_file}"
    run_and_print(command, 'generateSentences.pl')
    command = "mv features " + output_final_features_file
    run_and_print(command, 'move')
    constants.lock.release()
コード例 #6
0
def create_index(trectext_file, index_name, indri_path=constants.indri_path):
    """
    Parse the trectext file given, and create an index.
    """
    if os.path.exists(index_name):
        shutil.rmtree(index_name)
    ensure_dirs(index_name)

    corpus_class = 'trectext'
    memory = '1G'
    stemmer = 'krovetz'
    command = f'{indri_path}bin/IndriBuildIndex -corpus.path={trectext_file} -corpus.class={corpus_class} ' \
              f'-index={index_name} -memory={memory} -stemmer.name={stemmer}'
    run_and_print(command, command_name='IndriBuildIndex')
    return index_name
コード例 #7
0
def generate_document_tfidf_files(workingset_file, output_dir, new_index, base_index=clueweb_index):
    ensure_dirs(output_dir)
    command = f'java -Djava.library.path={swig_path} -cp {indri_utils_path} PrepareTFIDFVectorsWSDiff ' \
              f'{base_index} {new_index} {workingset_file} {output_dir}'
    run_and_print(command, command_name='Document tfidf Creation')
コード例 #8
0
def generate_predictions(model_path, svm_rank_scripts_dir, predictions_dir, feature_file):
    predictions_file = predictions_dir + '_predictions'.join(splitext(basename(feature_file)))
    ensure_dirs(predictions_file)
    command = f'{svm_rank_scripts_dir}svm_rank_classify {feature_file} {model_path} {predictions_file}'
    run_and_print(command, 'pair classify')
    return predictions_file
コード例 #9
0
def create_model(svm_rank_scripts_dir, model_path, learning_data, svm_rank_c=0.01):
    ensure_dirs(model_path)
    command = f'{svm_rank_scripts_dir}svm_rank_learn -c {svm_rank_c} {learning_data} {model_path}'
    run_and_print(command, 'pair ranker learn')