def test_sdf_file_parser_target_index(sdf_file, mols): preprocessor = NFPPreprocessor() parser = SDFFileParser(preprocessor) result = parser.parse(sdf_file, return_smiles=True, target_index=[0, 2], return_is_successful=True) dataset = result['dataset'] smiles = result['smiles'] assert len(dataset) == 2 is_successful = result['is_successful'] assert numpy.alltrue(is_successful) assert len(is_successful) == 2 # As we want test SDFFileParser, we assume # NFPPreprocessor works as documented. expect = preprocessor.get_input_features(mols[0]) check_input_features(dataset[0], expect) expect = preprocessor.get_input_features(mols[2]) check_input_features(dataset[1], expect) # check smiles array assert type(smiles) == numpy.ndarray assert smiles.ndim == 1 assert len(smiles) == len(dataset) assert smiles[0] == 'CN=C=O' assert smiles[1] == 'CC1=CC2CC(CC1)O2'
def test_sdf_file_parser(sdf_file, mols): preprocessor = NFPPreprocessor() parser = SDFFileParser(preprocessor) dataset = parser.parse(sdf_file) assert len(dataset) == 2 # As we want test SDFFileParser, we assume # NFPPreprocessor works as documented. expect = preprocessor.get_input_features(mols[0]) check_input_features(dataset[0], expect) expect = preprocessor.get_input_features(mols[1]) check_input_features(dataset[1], expect)
def test_nfp_preprocessor_with_tox21(): preprocessor = NFPPreprocessor() dataset = SDFFileParser(preprocessor, postprocess_label=None).parse( get_tox21_filepath('train')) index = numpy.random.choice(len(dataset), None) atoms, adjs = dataset[index] assert atoms.ndim == 1 # (atom, ) assert atoms.dtype == numpy.int32 # (atom from, atom to) assert adjs.ndim == 2 assert adjs.dtype == numpy.float32
def test_sdf_file_parser_not_return_smiles(sdf_file, mols): preprocessor = NFPPreprocessor() parser = SDFFileParser(preprocessor) result = parser.parse(sdf_file, return_smiles=False) dataset = result['dataset'] smiles = result['smiles'] assert len(dataset) == 3 assert smiles is None # As we want test SDFFileParser, we assume # NFPPreprocessor works as documented. for i in range(3): expect = preprocessor.get_input_features(mols[i]) check_input_features(dataset[i], expect)
def test_gat_preprocessor(): preprocessor = RelGATPreprocessor() def postprocess_label(label_list): # Set -1 to the place where the label is not found, # this corresponds to not calculate loss with `sigmoid_cross_entropy` return [-1 if label is None else label for label in label_list] dataset = SDFFileParser(preprocessor, postprocess_label=postprocess_label ).parse(get_tox21_filepath('train'))["dataset"] index = numpy.random.choice(len(dataset), None) atoms, adjs = dataset[index] assert atoms.ndim == 1 # (atom, ) assert atoms.dtype == numpy.int32 # (edge_type, atom from, atom to) assert adjs.ndim == 3 assert adjs.dtype == numpy.float32
def test_sdf_file_parser_return_is_successful(sdf_file_long, mols): """test `labels` option and retain_smiles=True.""" preprocessor = NFPPreprocessor(max_atoms=10) parser = SDFFileParser(preprocessor) result = parser.parse(sdf_file_long, return_smiles=True, return_is_successful=True) dataset = result['dataset'] # smiles = result['smiles'] assert len(dataset) == 3 is_successful = result['is_successful'] assert len(is_successful) == 5 assert numpy.alltrue(is_successful[[1, 3, 4]]) assert numpy.alltrue(~is_successful[[0, 2]]) # We assume NFPPreprocessor works as documented. for i in range(3): expect = preprocessor.get_input_features(mols[i]) check_input_features(dataset[i], expect)
def test_sdf_file_parser_retain_smiles(sdf_file, mols): preprocessor = NFPPreprocessor() parser = SDFFileParser(preprocessor) dataset = parser.parse(sdf_file, retain_smiles=True) smiles = parser.get_smiles() assert len(dataset) == 2 # As we want test SDFFileParser, we assume # NFPPreprocessor works as documented. expect = preprocessor.get_input_features(mols[0]) check_input_features(dataset[0], expect) expect = preprocessor.get_input_features(mols[1]) check_input_features(dataset[1], expect) # check smiles array assert type(smiles) == numpy.ndarray assert smiles.ndim == 1 assert len(smiles) == len(dataset) assert smiles[0] == 'CN=C=O' assert smiles[1] == 'Cc1ccccc1'
def test_sdf_file_parser_return_smiles(sdf_file, mols): preprocessor = NFPPreprocessor() parser = SDFFileParser(preprocessor) result = parser.parse(sdf_file, return_smiles=True) dataset = result['dataset'] smiles = result['smiles'] assert len(dataset) == 3 # As we want test SDFFileParser, we assume # NFPPreprocessor works as documented. for i in range(3): expect = preprocessor.get_input_features(mols[i]) check_input_features(dataset[i], expect) # check smiles array assert type(smiles) == numpy.ndarray assert smiles.ndim == 1 assert len(smiles) == len(dataset) assert smiles[0] == 'CN=C=O' assert smiles[1] == 'Cc1ccccc1' assert smiles[2] == 'CC1=CC2CC(CC1)O2'
def test_sdf_file_parser_extract_total_num(sdf_file): preprocessor = NFPPreprocessor() parser = SDFFileParser(preprocessor) num = parser.extract_total_num(sdf_file) assert num == 3