def test_filter_by_name( elastic_repository_molecule: ElasticRepository, indigo_fixture: Indigo, loaded_sdf: IndigoRecordMolecule, resource_loader, ): mol = indigo_fixture.loadMoleculeFromFile( resource_loader("molecules/composition1.mol")) elastic_repository_molecule.index_record( IndigoRecordMolecule(indigo_object=mol)) time.sleep(1) result = elastic_repository_molecule.filter(name="Composition1") for item in result: assert item.name == "Composition1" result = elastic_repository_molecule.filter( similarity=TanimotoSimilarityMatch( IndigoRecordMolecule(indigo_object=mol), 0.1)) i = 0 for _ in result: i += 1 assert i == 10 result = elastic_repository_molecule.filter( similarity=TanimotoSimilarityMatch( IndigoRecordMolecule(indigo_object=mol), 0.1), name="Composition1", ) for item in result: assert item.name == "Composition1"
def test_empty_create(indigo_fixture): mol = indigo_fixture.createMolecule() with pytest.raises(Exception): IndigoRecordMolecule(indigo_object=mol) def err_handler(instance: object, err_: BaseException) -> None: assert isinstance(instance.record_id, str) assert isinstance(err_, ValueError) IndigoRecordMolecule(indigo_object=mol, error_handler=err_handler) IndigoRecordMolecule(indigo_object=mol, skip_errors=True)
def search(data): indigo = Indigo() compound = indigo.loadMolecule(data) indigo_record = IndigoRecordMolecule(indigo_object=compound) elasticRepository = elastic_repository_molecule() return elasticRepository.filter(exact=indigo_record)
def iterate_file( file: Path, iterator: str = None, error_handler: Optional[Callable[[object, BaseException], None]] = None, ) -> Generator[IndigoRecordMolecule, None, None]: """ :param file: :param iterator: supported iterators sdf, smiles, smi, cml. If iterator is not set, trying to determine iterator by file extension :type iterator: str :param error_handler: lambda for catching exceptions :type error_handler: Optional[Callable[[object, BaseException], None]] :return: """ iterators = { "sdf": "iterateSDFile", "smiles": "iterateSmilesFile", "smi": "iterateSmilesFile", "cml": "iterateCMLFile", } if not iterator: iterator = file.suffix[1:] iterator_fn = iterators.get(iterator) if not iterator_fn: raise AttributeError(f"Unsupported iterator {iterator}") indigo_object: IndigoObject for indigo_object in getattr(Indigo(), iterator_fn)(str(file)): yield IndigoRecordMolecule( indigo_object=indigo_object, error_handler=error_handler )
def push(data): indigo = Indigo() compound = indigo.loadMolecule(data) indigo_record = IndigoRecordMolecule(indigo_object=compound) elasticRepository = elastic_repository_molecule() elasticRepository.index_record(record=indigo_record)
def test_similaririty_matches_reactions( elastic_repository_reaction: ElasticRepository, loaded_rxns, resource_loader, indigo_fixture, ) -> None: reaction = indigo_fixture.loadReactionFromFile( resource_loader("reactions/rheadb/50353.rxn")) reaction_rec = IndigoRecordMolecule(indigo_object=reaction) for found_reaction in elastic_repository_reaction.filter( similarity=TanimotoSimilarityMatch(reaction_rec, 0.99)): assert (as_iob( found_reaction, indigo_fixture).countReactants() == reaction.countReactants()) for found_reaction in elastic_repository_reaction.filter( similarity=EuclidSimilarityMatch(reaction_rec, 0.99)): assert (as_iob( found_reaction, indigo_fixture).countReactants() == reaction.countReactants()) for found_reaction in elastic_repository_reaction.filter( similarity=TverskySimilarityMatch(reaction_rec, 0.99)): assert (as_iob( found_reaction, indigo_fixture).countReactants() == reaction.countReactants()) for found_reaction in elastic_repository_reaction.filter( exact=reaction_rec): assert (as_iob( found_reaction, indigo_fixture).countReactants() == reaction.countReactants())
def get_record_by_index( response: Dict, index: str) -> Union[IndigoRecordMolecule, IndigoRecordReaction]: if index == IndexName.BINGO_MOLECULE.value: return IndigoRecordMolecule(elastic_response=response) if index == IndexName.BINGO_REACTION.value: return IndigoRecordReaction(elastic_response=response) raise AttributeError(f"Unknown index {index}")
def load_molecule( file_: Union[str, Path], session: Indigo ) -> IndigoRecordMolecule: """ Helper for loading molecules from file into IndigoRecordMolecule object """ molecule = session.loadMoleculeFromFile(file_) return IndigoRecordMolecule(indigo_object=molecule)
def test_exact_match( elastic_repository_molecule: ElasticRepository, indigo_fixture: Indigo, loaded_sdf: IndigoRecordMolecule, ): result = elastic_repository_molecule.filter(exact=loaded_sdf) assert (loaded_sdf.as_indigo_object(indigo_fixture).canonicalSmiles() == next(result).as_indigo_object(indigo_fixture).canonicalSmiles())
def test_substructure_search( elastic_repository_molecule: ElasticRepository, indigo_fixture: Indigo, loaded_sdf: IndigoRecordMolecule, ): result = elastic_repository_molecule.filter(substructure=loaded_sdf) for item in result: assert (item.as_indigo_object(indigo_fixture).canonicalSmiles() == loaded_sdf.as_indigo_object(indigo_fixture).canonicalSmiles())
def test_search_empty_fingerprint( elastic_repository_molecule: ElasticRepository, indigo_fixture: Indigo, resource_loader, ): for smile in ["[H][H]", "[H][F]"]: rec = IndigoRecordMolecule( indigo_object=indigo_fixture.loadMolecule(smile), skip_errors=True) elastic_repository_molecule.index_record(rec) time.sleep(5) result = elastic_repository_molecule.filter(exact=IndigoRecordMolecule( indigo_object=indigo_fixture.loadMolecule("[H][H]"), skip_errors=True, )) assert ("[H][H]" == next(result).as_indigo_object( indigo_fixture).canonicalSmiles()) with pytest.raises(StopIteration): next(result).as_indigo_object(indigo_fixture).canonicalSmiles()
def test_similarity_matches( elastic_repository_molecule: ElasticRepository, indigo_fixture: Indigo, loaded_sdf: IndigoRecordMolecule, ): for sim_alg in [ TanimotoSimilarityMatch(loaded_sdf, 0.9), EuclidSimilarityMatch(loaded_sdf, 0.9), TverskySimilarityMatch(loaded_sdf, 0.9, 0.5, 0.5), ]: result = elastic_repository_molecule.filter(similarity=sim_alg) assert (loaded_sdf.as_indigo_object(indigo_fixture).canonicalSmiles( ) == next(result).as_indigo_object(indigo_fixture).canonicalSmiles())
def test_wildcard_search( elastic_repository_molecule: ElasticRepository, indigo_fixture: Indigo, loaded_sdf: IndigoRecordMolecule, resource_loader, ): mol = indigo_fixture.loadMoleculeFromFile( resource_loader("molecules/composition1.mol")) elastic_repository_molecule.index_record( IndigoRecordMolecule(indigo_object=mol)) time.sleep(1) result = elastic_repository_molecule.filter(name=WildcardQuery("Comp*")) for item in result: assert item.name == "Composition1"
def test_custom_fields( elastic_repository_molecule: ElasticRepository, indigo_fixture: Indigo, loaded_sdf: IndigoRecordMolecule, resource_loader, ): mol = indigo_fixture.loadMoleculeFromFile( resource_loader("molecules/composition1.mol")) rec = IndigoRecordMolecule( indigo_object=mol, PUBCHEM_IUPAC_INCHIKEY="RDHQFKQIGNGIED-UHFFFAOYSA-N") elastic_repository_molecule.index_record(rec) time.sleep(1) result = elastic_repository_molecule.filter( PUBCHEM_IUPAC_INCHIKEY="RDHQFKQIGNGIED-UHFFFAOYSA-N") for item in result: assert item.PUBCHEM_IUPAC_INCHIKEY == "RDHQFKQIGNGIED-UHFFFAOYSA-N"
def test_create_with_name(indigo_fixture, resource_loader): mol = indigo_fixture.loadMoleculeFromFile( resource_loader("molecules/composition1.mol")) indigo_record = IndigoRecordMolecule(indigo_object=mol) assert indigo_record.name == "Composition1"
def test_create_without_fingerprint(indigo_fixture): mol = indigo_fixture.loadMolecule("[H][H]") indigo_record = IndigoRecordMolecule(indigo_object=mol, skip_errors=True) assert len(indigo_record.sim_fingerprint) == 0 assert len(indigo_record.sub_fingerprint) == 0
def test_create(indigo_fixture): mol = indigo_fixture.loadMolecule("N1(CC)C2=C(C(=NC=N2)N)N=C1") indigo_record = IndigoRecordMolecule(indigo_object=mol) assert len(indigo_record.sim_fingerprint) == 70 assert len(indigo_record.sub_fingerprint) == 644 assert len(indigo_record.cmf) == 140