def test_create_index( elastic_repository: ElasticRepository, resource_loader: Callable[[str], str], ): sdf = iterate_file( Path(resource_loader("resources/rand_queries_small.sdf"))) elastic_repository.index_records(sdf, chunk_size=10)
def test_create_index( elastic_repository_molecule: ElasticRepository, resource_loader, ): sdf = iterate_file( Path(resource_loader("molecules/rand_queries_small.sdf"))) elastic_repository_molecule.index_records(sdf, chunk_size=10)
def test_filter_by_name( elastic_repository_molecule: ElasticRepository, indigo_fixture: Indigo, loaded_sdf: IndigoRecordMolecule, resource_loader, ): mol = indigo_fixture.loadMoleculeFromFile( resource_loader("molecules/composition1.mol")) elastic_repository_molecule.index_record( IndigoRecordMolecule(indigo_object=mol)) time.sleep(1) result = elastic_repository_molecule.filter(name="Composition1") for item in result: assert item.name == "Composition1" result = elastic_repository_molecule.filter( similarity=TanimotoSimilarityMatch( IndigoRecordMolecule(indigo_object=mol), 0.1)) i = 0 for _ in result: i += 1 assert i == 10 result = elastic_repository_molecule.filter( similarity=TanimotoSimilarityMatch( IndigoRecordMolecule(indigo_object=mol), 0.1), name="Composition1", ) for item in result: assert item.name == "Composition1"
def test_similaririty_matches_reactions( elastic_repository_reaction: ElasticRepository, loaded_rxns, resource_loader, indigo_fixture, ) -> None: reaction = indigo_fixture.loadReactionFromFile( resource_loader("reactions/rheadb/50353.rxn")) reaction_rec = IndigoRecordMolecule(indigo_object=reaction) for found_reaction in elastic_repository_reaction.filter( similarity=TanimotoSimilarityMatch(reaction_rec, 0.99)): assert (as_iob( found_reaction, indigo_fixture).countReactants() == reaction.countReactants()) for found_reaction in elastic_repository_reaction.filter( similarity=EuclidSimilarityMatch(reaction_rec, 0.99)): assert (as_iob( found_reaction, indigo_fixture).countReactants() == reaction.countReactants()) for found_reaction in elastic_repository_reaction.filter( similarity=TverskySimilarityMatch(reaction_rec, 0.99)): assert (as_iob( found_reaction, indigo_fixture).countReactants() == reaction.countReactants()) for found_reaction in elastic_repository_reaction.filter( exact=reaction_rec): assert (as_iob( found_reaction, indigo_fixture).countReactants() == reaction.countReactants())
def loaded_sdf( elastic_repository_molecule: ElasticRepository, resource_loader, ) -> IndigoRecordMolecule: resource = resource_loader("molecules/rand_queries_small.sdf") sdf = iterate_file(Path(resource)) elastic_repository_molecule.index_records(sdf, chunk_size=10) time.sleep(5) return next( iterate_file(Path(resource_loader("molecules/rand_queries_small.sdf"))) )
def loaded_rxns( elastic_repository_reaction: ElasticRepository, resource_loader: Callable[[str], str], indigo_fixture, ): for file_ in Path(resource_loader("reactions/rheadb")).iterdir(): if file_.suffix == ".rxn": reaction_file = load_reaction(file_, indigo_fixture) elastic_repository_reaction.index_record(reaction_file) time.sleep(5)
def loaded_sdf( elastic_repository: ElasticRepository, resource_loader: Callable[[str], str], ) -> IndigoRecord: resource = resource_loader("resources/rand_queries_small.sdf") sdf = iterate_file(Path(resource)) elastic_repository.index_records(sdf, chunk_size=10) time.sleep(5) return next( iterate_file(Path(resource_loader("resources/rand_queries_small.sdf"))) )
def test_wildcard_search( elastic_repository: ElasticRepository, indigo_fixture: Indigo, loaded_sdf: IndigoRecord, resource_loader: Callable[[str], str], ): mol = indigo_fixture.loadMoleculeFromFile( resource_loader("resources/composition1.mol")) elastic_repository.index_record(IndigoRecord(indigo_object=mol)) time.sleep(1) result = elastic_repository.filter(name=WildcardQuery("Comp*")) for item in result: assert item.name == "Composition1"
def test_range_search( elastic_repository_molecule: ElasticRepository, indigo_fixture: Indigo, resource_loader, ): for i, item in enumerate( iterate_file( Path(resource_loader("molecules/rand_queries_small.sdf")))): item.ind_number = i elastic_repository_molecule.index_record(item) result = elastic_repository_molecule.filter(ind_number=RangeQuery(1, 10)) i = 0 for _ in result: i += 1 assert i == 10
def test_custom_fields( elastic_repository: ElasticRepository, indigo_fixture: Indigo, loaded_sdf: IndigoRecord, resource_loader: Callable[[str], str], ): mol = indigo_fixture.loadMoleculeFromFile( resource_loader("resources/composition1.mol")) rec = IndigoRecord(indigo_object=mol, PUBCHEM_IUPAC_INCHIKEY="RDHQFKQIGNGIED-UHFFFAOYSA-N") elastic_repository.index_record(rec) time.sleep(1) result = elastic_repository.filter( PUBCHEM_IUPAC_INCHIKEY="RDHQFKQIGNGIED-UHFFFAOYSA-N") for item in result: assert item.PUBCHEM_IUPAC_INCHIKEY == "RDHQFKQIGNGIED-UHFFFAOYSA-N"
def test_exact_match( elastic_repository_molecule: ElasticRepository, indigo_fixture: Indigo, loaded_sdf: IndigoRecordMolecule, ): result = elastic_repository_molecule.filter(exact=loaded_sdf) assert (loaded_sdf.as_indigo_object(indigo_fixture).canonicalSmiles() == next(result).as_indigo_object(indigo_fixture).canonicalSmiles())
def test_search_empty_fingerprint( elastic_repository: ElasticRepository, indigo_fixture: Indigo, resource_loader: Callable[[str], str], ): for smile in ["[H][H]", "[H][F]"]: rec = IndigoRecord(indigo_object=indigo_fixture.loadMolecule(smile), skip_errors=True) elastic_repository.index_record(rec) time.sleep(5) result = elastic_repository.filter(exact=IndigoRecord( indigo_object=indigo_fixture.loadMolecule("[H][H]"), skip_errors=True)) assert ("[H][H]" == next(result).as_indigo_object( indigo_fixture).canonicalSmiles()) with pytest.raises(StopIteration): next(result).as_indigo_object(indigo_fixture).canonicalSmiles()
def test_substructure_search( elastic_repository_molecule: ElasticRepository, indigo_fixture: Indigo, loaded_sdf: IndigoRecordMolecule, ): result = elastic_repository_molecule.filter(substructure=loaded_sdf) for item in result: assert (item.as_indigo_object(indigo_fixture).canonicalSmiles() == loaded_sdf.as_indigo_object(indigo_fixture).canonicalSmiles())
def test_similarity_matches( elastic_repository_molecule: ElasticRepository, indigo_fixture: Indigo, loaded_sdf: IndigoRecordMolecule, ): for sim_alg in [ TanimotoSimilarityMatch(loaded_sdf, 0.9), EuclidSimilarityMatch(loaded_sdf, 0.9), TverskySimilarityMatch(loaded_sdf, 0.9, 0.5, 0.5), ]: result = elastic_repository_molecule.filter(similarity=sim_alg) assert (loaded_sdf.as_indigo_object(indigo_fixture).canonicalSmiles( ) == next(result).as_indigo_object(indigo_fixture).canonicalSmiles())
def test_create_reaction( elastic_repository_reaction: ElasticRepository, indigo_fixture, resource_loader, ) -> None: reaction = indigo_fixture.loadReactionFromFile( resource_loader("reactions/rheadb/58029.rxn")) indigo_reaction = IndigoRecordReaction(indigo_object=reaction) test_smiles = set([ reactant.canonicalSmiles() for reactant in reaction.iterateReactants() ]) count_reactants = reaction.countReactants() count_products = reaction.countProducts() assert isinstance(indigo_reaction, IndigoRecordReaction) elastic_repository_reaction.index_record(indigo_reaction) sleep(1) for found_react in elastic_repository_reaction.filter( similarity=SimilarityMatch(indigo_reaction, 0.9)): found_react = as_iob(found_react, indigo_fixture) assert count_products == found_react.countProducts() assert count_reactants == found_react.countReactants() for reactant in found_react.iterateReactants(): assert reactant.canonicalSmiles() in test_smiles
def clear_index(elastic_repository: ElasticRepository): elastic_repository.delete_all_records()
def elastic_repository() -> ElasticRepository: return ElasticRepository(host="127.0.0.1", port=9200)
import time from pathlib import Path from bingo_elastic.model.record import IndigoRecord from bingo_elastic.queries import SimilarityMatch from bingo_elastic.elastic import ElasticRepository from bingo_elastic.model import helpers from indigo import Indigo if __name__ == '__main__': indigo = Indigo() start_time = time.time() repository = ElasticRepository(host="127.0.0.1", port=9200) # try: # sdf = helpers.iterate_sdf("../data/pubchem/Compound_000000001_000500000.sdf") # repository.index_records(sdf) # except ValueError as e: # print(e) # # # time.sleep(300) print( "--- {} seconds for reading and indexing data ---".format(time.time() - start_time)) mol = indigo.loadMolecule( "Cc1ccc2nc(-c3ccc(NC(C4N(C(c5cccs5)=O)CCC4)=O)cc3)sc2c1") target = IndigoRecord(indigo_object=mol)
def elastic_repository_molecule() -> ElasticRepository: return ElasticRepository( IndexName.BINGO_MOLECULE, host="127.0.0.1", port=9200 )
def elastic_repository_reaction() -> ElasticRepository: return ElasticRepository( IndexName.BINGO_REACTION, host="127.0.0.1", port=9200 )
def clear_index( elastic_repository_molecule: ElasticRepository, elastic_repository_reaction: ElasticRepository, ): elastic_repository_molecule.delete_all_records() elastic_repository_reaction.delete_all_records()