コード例 #1
0
def test_is_valid_inchikey():
    """Test if strings are correctly classified."""
    inchikeys_true = ["XYLJNLCSTIOKRM-UHFFFAOYSA-N"]
    inchikeys_false = [
        "XYLJNLCSTIOKRM-UHFFFAOYSA", "XYLJNLCSTIOKRMRUHFFFAOYSASN",
        "XYLJNLCSTIOKR-MUHFFFAOYSA-N", "XYLJNLCSTIOKRM-UHFFFAOYSA-NN",
        "Brcc(NC2=NCN2)-ccc3nccnc1-3", "2YLJNLCSTIOKRM-UHFFFAOYSA-N",
        "XYLJNLCSTIOKRM-aaaaaaaaaa-a"
    ]

    for inchikey in inchikeys_true:
        assert is_valid_inchikey(inchikey), "Expected inchikey is True."
    for inchikey in inchikeys_false:
        assert not is_valid_inchikey(inchikey), "Expected inchikey is False."
コード例 #2
0
def test_is_valid_inchikey_none_input():
    """Test None entry."""
    assert not is_valid_inchikey(None), "Expected None entry to give False."
コード例 #3
0
def pubchem_metadata_lookup(spectrum_in,
                            name_search_depth=10,
                            formula_search=False,
                            min_formula_length=6,
                            formula_search_depth=25,
                            verbose=1):
    """

    Parameters
    ----------
    spectrum_in
        Matchms type spectrum as input.
    name_search_depth: int
        How many of the most relevant name matches to explore deeper. Default = 10.

    """
    if spectrum_in is None:
        return None

    spectrum = spectrum_in.clone()
    if is_valid_inchikey(spectrum.get("inchikey")):
        return spectrum

    def _plausible_name(compound_name):
        return (isinstance(compound_name, str) and len(compound_name) > 4)

    compound_name = spectrum.get("compound_name")
    if not _plausible_name(compound_name):
        return spectrum

    # Start pubchem search
    inchi = spectrum.get("inchi")
    parent_mass = spectrum.get("parent_mass")
    if isinstance(parent_mass, np.ndarray):
        parent_mass = parent_mass[0]
    formula = spectrum.get("formula")

    # 1) Search for matching compound name
    results_pubchem = pubchem_name_search(compound_name,
                                          name_search_depth=name_search_depth,
                                          verbose=verbose)

    if len(results_pubchem) > 0:

        # 1a) Search for matching inchi
        if likely_has_inchi(inchi):
            inchi_pubchem, inchikey_pubchem, smiles_pubchem = find_pubchem_inchi_match(
                results_pubchem, inchi, verbose=verbose)
        # 1b) Search for matching mass
        if not likely_has_inchi(inchi) or inchikey_pubchem is None:
            inchi_pubchem, inchikey_pubchem, smiles_pubchem = find_pubchem_mass_match(
                results_pubchem, parent_mass, verbose=verbose)

        if inchikey_pubchem is not None and inchi_pubchem is not None:
            logging.info("Matching compound name: %s", compound_name)
            if verbose >= 1:
                print(f"Matching compound name: {compound_name}")
            spectrum.set("inchikey", inchikey_pubchem)
            spectrum.set("inchi", inchi_pubchem)
            spectrum.set("smiles", smiles_pubchem)
            return spectrum

        elif verbose >= 2:
            print(f"No matches found for compound name: {compound_name}")

    # 2) Search for matching formula
    if formula_search and formula and len(formula) >= min_formula_length:
        results_pubchem = pubchem_formula_search(
            formula,
            formula_search_depth=formula_search_depth,
            verbose=verbose)

        if len(results_pubchem) > 0:

            # 2a) Search for matching inchi
            if likely_has_inchi(inchi):
                inchi_pubchem, inchikey_pubchem, smiles_pubchem = find_pubchem_inchi_match(
                    results_pubchem, inchi)
            # 2b) Search for matching mass
            if inchikey_pubchem is None:
                inchi_pubchem, inchikey_pubchem, smiles_pubchem = find_pubchem_mass_match(
                    results_pubchem, parent_mass)

            if inchikey_pubchem is not None and inchi_pubchem is not None:
                logging.info("Matching formula: %s", formula)
                if verbose >= 1:
                    print(f"Matching formula: {formula}")
                spectrum.set("inchikey", inchikey_pubchem)
                spectrum.set("inchi", inchi_pubchem)
                spectrum.set("smiles", smiles_pubchem)
                return spectrum

            elif verbose >= 2:
                print(f"No matches found for formula: {formula}")

    return spectrum