Python MS2Libraryの例、ms2query.ms2library.MS2Library Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_ms2library.py プロジェクト: iomega/ms2query

def test_get_chemical_neighbourhood_scores(file_names):
    sqlite_file_loc, spec2vec_model_file_loc, s2v_pickled_embeddings_file, \
        ms2ds_model_file_name, ms2ds_embeddings_file_name, \
        spectrum_id_column_name, ms2q_model_file_name = file_names

    test_library = MS2Library(sqlite_file_loc,
                              spec2vec_model_file_loc,
                              ms2ds_model_file_name,
                              s2v_pickled_embeddings_file,
                              ms2ds_embeddings_file_name,
                              ms2q_model_file_name,
                              spectrum_id_column_name=spectrum_id_column_name)
    average_inchickey_scores = \
        {'BKUKTJSDOUXYFL': 0.8,
         'UZMVEOVJASEKLP': 0.8,
         'QWSYKJZSJYRUSS': 0.8,
         'GRVRRAOIXXYICO': 0.8,
         'WXDBUBIFYCCNLE': 0.8,
         'ORRFIXSGNXBETO': 0.7,
         'LLWMPGSQZXZZAE': 0.7,
         'CTBBEXWJRAPJIZ': 0.6,
         'YQLQWGVOWKPLFR': 0.6,
         'BTVYFIMKUHNOBZ': 0.6}

    results = test_library._calculate_average_multiple_library_structures(
        {"BKUKTJSDOUXYFL"}, average_inchickey_scores)
    assert isinstance(results, dict), "expected a dictionary"
    assert len(results) == 1, "Expected different number of results in " \
                              "dictionary"
    assert 'BKUKTJSDOUXYFL' in results
    scores = results['BKUKTJSDOUXYFL']
    assert isinstance(scores, tuple)
    assert len(scores) == 2, "Expected two scores for each InChiKey"
    assert math.isclose(scores[0], 0.72)
    assert math.isclose(scores[1], 0.4607757103045708)

コード例 #2

0

ファイルを表示

ファイル: test_ms2library.py プロジェクト: iomega/ms2query

def test_calculate_scores_for_metadata(file_names, test_spectra):
    """Test collect_matches_data_multiple_spectra method of ms2library"""
    sqlite_file_loc, spec2vec_model_file_loc, s2v_pickled_embeddings_file, \
        ms2ds_model_file_name, ms2ds_embeddings_file_name, \
        spectrum_id_column_name, ms2q_model_file_name = file_names

    test_library = MS2Library(sqlite_file_loc,
                              spec2vec_model_file_loc,
                              ms2ds_model_file_name,
                              s2v_pickled_embeddings_file,
                              ms2ds_embeddings_file_name,
                              ms2q_model_file_name,
                              spectrum_id_column_name=spectrum_id_column_name)

    ms2dscores: pd.DataFrame = load_pickled_file(
        os.path.join(
            os.path.split(os.path.dirname(__file__))[0],
            'tests/test_files/test_files_ms2library/expected_ms2ds_scores.pickle'
        ))
    results_table = ResultsTable(preselection_cut_off=20,
                                 ms2deepscores=ms2dscores.iloc[:, 0],
                                 query_spectrum=test_spectra[0],
                                 sqlite_file_name=sqlite_file_loc)

    results_table = test_library._calculate_features_for_random_forest_model(
        results_table)
    expected_result = load_pickled_file(
        os.path.join(
            os.path.split(os.path.dirname(__file__))[0],
            "tests/test_files/test_files_ms2library/expected_results_table_with_scores.pickle"
        ))

    results_table.assert_results_table_equal(expected_result)

コード例 #3

0

ファイルを表示

ファイル: test_ms2library.py プロジェクト: iomega/ms2query

def test_analog_search(file_names, test_spectra):
    """Test analog search"""
    sqlite_file_loc, spec2vec_model_file_loc, s2v_pickled_embeddings_file, \
        ms2ds_model_file_name, ms2ds_embeddings_file_name, \
        spectrum_id_column_name, ms2q_model_file_name = file_names

    test_library = MS2Library(sqlite_file_loc,
                              spec2vec_model_file_loc,
                              ms2ds_model_file_name,
                              s2v_pickled_embeddings_file,
                              ms2ds_embeddings_file_name,
                              ms2q_model_file_name,
                              spectrum_id_column_name=spectrum_id_column_name)

    cutoff = 20
    results = test_library.analog_search_return_results_tables(
        test_spectra, cutoff)

    expected_result = load_pickled_file(
        os.path.join(
            os.path.split(os.path.dirname(__file__))[0],
            "tests/test_files/test_files_ms2library/expected_analog_search_results.pickle"
        ))

    for i in range(len(expected_result)):
        results[i].assert_results_table_equal(expected_result[i])

コード例 #4

0

ファイルを表示

ファイル: test_ms2library.py プロジェクト: iomega/ms2query

def test_get_s2v_scores(file_names, test_spectra):
    """Test _get_s2v_scores method of MS2Library"""
    sqlite_file_loc, spec2vec_model_file_loc, s2v_pickled_embeddings_file, \
        ms2ds_model_file_name, ms2ds_embeddings_file_name, \
        spectrum_id_column_name, ms2q_model_file_name = file_names

    test_library = MS2Library(sqlite_file_loc,
                              spec2vec_model_file_loc,
                              ms2ds_model_file_name,
                              s2v_pickled_embeddings_file,
                              ms2ds_embeddings_file_name,
                              ms2q_model_file_name,
                              spectrum_id_column_name=spectrum_id_column_name)
    result = test_library._get_s2v_scores(
        test_spectra[0], ["CCMSLIB00000001572", "CCMSLIB00000001648"])
    assert np.allclose(result, np.array([0.97565603, 0.97848464])), \
        "Expected different Spec2Vec scores"

コード例 #5

0

ファイルを表示

ファイル: test_ms2library.py プロジェクト: iomega/ms2query

def test_ms2library_set_settings(file_names):
    """Tests creating a ms2library object"""
    sqlite_file_loc, spec2vec_model_file_loc, s2v_pickled_embeddings_file, \
        ms2ds_model_file_name, ms2ds_embeddings_file_name, \
        spectrum_id_column_name, ms2q_model_file_name = file_names

    test_library = MS2Library(sqlite_file_loc,
                              spec2vec_model_file_loc,
                              ms2ds_model_file_name,
                              s2v_pickled_embeddings_file,
                              ms2ds_embeddings_file_name,
                              ms2q_model_file_name,
                              spectrum_id_column_name=spectrum_id_column_name)

    assert test_library.settings["spectrum_id_column_name"] == "spectrumid", \
        "Different value for attribute was expected"
    assert test_library.settings["progress_bars"] == True, \
        "Different value for attribute was expected"

コード例 #6

0

ファイルを表示

ファイル: test_ms2library.py プロジェクト: iomega/ms2query

def test_analog_search_store_in_csv(file_names, test_spectra, tmp_path):
    sqlite_file_loc, spec2vec_model_file_loc, s2v_pickled_embeddings_file, \
        ms2ds_model_file_name, ms2ds_embeddings_file_name, \
        spectrum_id_column_name, ms2q_model_file_name = file_names

    test_library = MS2Library(sqlite_file_loc,
                              spec2vec_model_file_loc,
                              ms2ds_model_file_name,
                              s2v_pickled_embeddings_file,
                              ms2ds_embeddings_file_name,
                              ms2q_model_file_name,
                              spectrum_id_column_name=spectrum_id_column_name)
    results_csv_file = os.path.join(tmp_path, "test_csv_analog_search")
    test_library.analog_search_store_in_csv(test_spectra, results_csv_file)
    assert os.path.exists(results_csv_file)
    with open(results_csv_file, "r") as test_file:
        assert test_file.readlines() == \
               ['query_spectrum_nr,ms2query_model_prediction,precursor_mz_difference,precursor_mz_query_spectrum,precursor_mz_analog,inchikey,spectrum_ids,analog_compound_name\n',
                '0,0.5645,33.2500,907.0000,940.2500,KNGPFNUOXXLKCN,CCMSLIB00000001548,Hoiamide B\n',
                '1,0.4090,61.3670,928.0000,866.6330,GRJSOZDXIUZXEW,CCMSLIB00000001570,Halovir A\n'], \
               "Expected different results to be stored in csv file"

コード例 #7

0

ファイルを表示

ファイル: test_ms2library.py プロジェクト: iomega/ms2query

def test_get_average_ms2ds_for_inchikey14(file_names):
    sqlite_file_loc, spec2vec_model_file_loc, s2v_pickled_embeddings_file, \
        ms2ds_model_file_name, ms2ds_embeddings_file_name, \
        spectrum_id_column_name, ms2q_model_file_name = file_names

    test_library = MS2Library(sqlite_file_loc,
                              spec2vec_model_file_loc,
                              ms2ds_model_file_name,
                              s2v_pickled_embeddings_file,
                              ms2ds_embeddings_file_name,
                              ms2q_model_file_name,
                              spectrum_id_column_name=spectrum_id_column_name)
    inchickey14s = {"BKUKTJSDOUXYFL", "BTVYFIMKUHNOBZ"}
    ms2ds_scores = pd.Series([0.1, 0.8, 0.3],
                             index=[
                                 'CCMSLIB00000001678', 'CCMSLIB00000001651',
                                 'CCMSLIB00000001653'
                             ])
    results = test_library._get_average_ms2ds_for_inchikey14(
        ms2ds_scores, inchickey14s)
    assert results == \
           {'BKUKTJSDOUXYFL': 0.1, 'BTVYFIMKUHNOBZ': 0.55}, \
           "Expected different results"

コード例 #8

0

ファイルを表示

ファイル: test_ms2library.py プロジェクト: iomega/ms2query

def test_get_all_ms2ds_scores(file_names, test_spectra):
    """Test get_all_ms2ds_scores method of ms2library"""
    sqlite_file_loc, spec2vec_model_file_loc, s2v_pickled_embeddings_file, \
        ms2ds_model_file_name, ms2ds_embeddings_file_name, \
        spectrum_id_column_name, ms2q_model_file_name = file_names

    test_library = MS2Library(sqlite_file_loc,
                              spec2vec_model_file_loc,
                              ms2ds_model_file_name,
                              s2v_pickled_embeddings_file,
                              ms2ds_embeddings_file_name,
                              ms2q_model_file_name,
                              spectrum_id_column_name=spectrum_id_column_name)

    result = test_library._get_all_ms2ds_scores(test_spectra)

    expected_result: pd.DataFrame = load_pickled_file(
        os.path.join(
            os.path.split(os.path.dirname(__file__))[0],
            'tests/test_files/test_files_ms2library/expected_ms2ds_scores.pickle'
        ))
    assert isinstance(result, pd.DataFrame), "Expected dictionary"
    assert_frame_equal(result, expected_result)

コード例 #9

0

ファイルを表示

def run_complete_folder(
        ms2library: MS2Library,
        folder_with_spectra: str,
        results_folder: Union[str, None] = None,
        nr_of_analogs_to_store: int = 1,
        minimal_ms2query_score: Union[int, float] = 0.0,
        additional_metadata_columns: Tuple[str] = (
            "retention_time",
            "retention_index",
        ),
        additional_ms2query_score_columns: List[str] = None) -> None:
    """Stores analog and library search results for all spectra files in folder

    Args:
    ------
    ms2library:
        MS2Library object
    folder_with_spectra:
        Path to folder containing spectra on which analog search should be run.
    results_folder:
        Path to folder in which the results are stored, folder does not have to exist yet.
        In this folder the csv files with results are stored. When None results_folder is set to
        folder_with_spectra/result.
    nr_of_top_analogs_to_store:
        The number of returned analogs that are stored.
    minimal_ms2query_score:
        The minimal ms2query metascore needed to be stored in the csv file.
        Spectra for which no analog with this minimal metascore was found,
        will not be stored in the csv file.
    additional_metadata_columns:
        Additional columns with query spectrum metadata that should be added. For instance "retention_time".
    additional_ms2query_score_columns:
        Additional columns with scores used for calculating the ms2query metascore
        Options are: "mass_similarity", "s2v_score", "ms2ds_score", "average_ms2ds_score_for_inchikey14",
        "nr_of_spectra_with_same_inchikey14*0.01", "chemical_neighbourhood_score",
        "average_tanimoto_score_for_chemical_neighbourhood_score",
        "nr_of_spectra_for_chemical_neighbourhood_score*0.01"
    set_charge_to:
        The charge of all spectra that have no charge is set to this value. This is important for precursor m/z
        calculations. It is important that for positive mode charge is set to 1 and at negative mode charge is set to -1
        for correct precursor m/z calculations.
    change_all_charges:
        If True the charge of all spectra is set to this value. If False only the spectra that do not have a specified
        charge will be changed.
    """
    # pylint: disable=too-many-arguments

    if results_folder is None:
        results_folder = os.path.join(folder_with_spectra, "results")
    # check if there is a results folder otherwise create one
    if not os.path.exists(results_folder):
        os.mkdir(results_folder)

    # Go through spectra files in directory
    for file_name in os.listdir(folder_with_spectra):
        file_path = os.path.join(folder_with_spectra, file_name)
        # skip folders
        if os.path.isfile(file_path):
            spectra = convert_files_to_matchms_spectrum_objects(
                os.path.join(folder_with_spectra, file_name))
            if spectra is not None:
                analogs_results_file_name = os.path.join(
                    results_folder,
                    os.path.splitext(file_name)[0] + ".csv")
                ms2library.analog_search_store_in_csv(
                    spectra,
                    analogs_results_file_name,
                    nr_of_top_analogs_to_save=nr_of_analogs_to_store,
                    minimal_ms2query_metascore=minimal_ms2query_score,
                    additional_metadata_columns=additional_metadata_columns,
                    additional_ms2query_score_columns=
                    additional_ms2query_score_columns)
                print(f"Results stored in {analogs_results_file_name}")