def test_data_frame_parser_target_index(data_frame, mols, label_a): """test `labels` option and retain_smiles=True.""" preprocessor = NFPPreprocessor() parser = DataFrameParser(preprocessor, labels='labelA', smiles_col='smiles') result = parser.parse(data_frame, return_smiles=True, target_index=[0, 2], return_is_successful=True) dataset = result['dataset'] smiles = result['smiles'] assert len(dataset) == 2 is_successful = result['is_successful'] assert numpy.alltrue(is_successful) assert len(is_successful) == 2 # We assume NFPPreprocessor works as documented. expect = preprocessor.get_input_features(mols[0]) check_features(dataset[0], expect, label_a[0]) expect = preprocessor.get_input_features(mols[2]) check_features(dataset[1], expect, label_a[2]) # check smiles array assert type(smiles) == numpy.ndarray assert smiles.ndim == 1 assert len(smiles) == len(dataset) assert smiles[0] == 'CN=C=O' assert smiles[1] == 'CC1=CC2CC(CC1)O2'
def test_csv_file_parser_target_index(csv_file_invalid, mols, label_a): """test `labels` option and retain_smiles=True.""" preprocessor = NFPPreprocessor() parser = CSVFileParser(preprocessor, labels='labelA', smiles_col='smiles') result = parser.parse(csv_file_invalid, return_smiles=True, target_index=[1, 2, 4], return_is_successful=True) dataset = result['dataset'] smiles = result['smiles'] assert len(dataset) == 2 is_successful = result['is_successful'] assert numpy.array_equal(is_successful, numpy.array([True, False, True])) assert len(is_successful) == 3 # As we want test CSVFileParser, we assume # NFPPreprocessor works as documented. expect = preprocessor.get_input_features(mols[0]) check_features(dataset[0], expect, label_a[0]) expect = preprocessor.get_input_features(mols[2]) check_features(dataset[1], expect, label_a[2]) # check smiles array assert type(smiles) == numpy.ndarray assert smiles.ndim == 1 assert len(smiles) == len(dataset) assert smiles[0] == 'CN=C=O' assert smiles[1] == 'CC1=CC2CC(CC1)O2'
def test_smiles_parser_target_index(mol_smiles, mols): """test `labels` option and retain_smiles=True.""" preprocessor = NFPPreprocessor() parser = SmilesParser(preprocessor) result = parser.parse(mol_smiles, return_smiles=True, target_index=[0, 2], return_is_successful=True) dataset = result['dataset'] smiles = result['smiles'] assert len(dataset) == 2 is_successful = result['is_successful'] assert numpy.alltrue(is_successful) assert len(is_successful) == 2 # As we want test CSVFileParser, we assume # NFPPreprocessor works as documented. expect = preprocessor.get_input_features(mols[0]) check_input_features(dataset[0], expect) expect = preprocessor.get_input_features(mols[2]) check_input_features(dataset[1], expect) # check smiles array assert type(smiles) == numpy.ndarray assert smiles.ndim == 1 assert len(smiles) == len(dataset) assert smiles[0] == 'CN=C=O' assert smiles[1] == 'CC1=CC2CC(CC1)O2'
def test_sdf_file_parser_not_return_smiles(sdf_file, mols): preprocessor = NFPPreprocessor() parser = SDFFileParser(preprocessor) result = parser.parse(sdf_file, return_smiles=False) dataset = result['dataset'] smiles = result['smiles'] assert len(dataset) == 3 assert smiles is None # As we want test SDFFileParser, we assume # NFPPreprocessor works as documented. for i in range(3): expect = preprocessor.get_input_features(mols[i]) check_input_features(dataset[i], expect)
def test_csv_file_parser_not_return_smiles(csv_file, mols): preprocessor = NFPPreprocessor() parser = CSVFileParser(preprocessor, smiles_col='smiles') # Actually, `dataset, smiles = parser.parse(..)` is enough. result = parser.parse(csv_file, return_smiles=False) dataset = result['dataset'] smiles = result['smiles'] assert len(dataset) == 3 assert smiles is None # As we want test CSVFileParser, we assume # NFPPreprocessor works as documented. for i in range(3): expect = preprocessor.get_input_features(mols[i]) check_input_features(dataset[i], expect)
def test_smiles_parser_not_return_smiles(mol_smiles, mols): preprocessor = NFPPreprocessor() parser = SmilesParser(preprocessor) result = parser.parse(mol_smiles, return_smiles=False) dataset = result['dataset'] smiles = result['smiles'] is_successful = result['is_successful'] assert len(dataset) == 3 assert smiles is None assert is_successful is None # As we want test CSVFileParser, we assume # NFPPreprocessor works as documented. for i in range(3): expect = preprocessor.get_input_features(mols[i]) check_input_features(dataset[i], expect)
def test_csv_file_parser_not_return_smiles(csv_file, mols): preprocessor = NFPPreprocessor() parser = CSVFileParser(preprocessor, smiles_col='smiles') # Actually, `dataset, smiles = parser.parse(..)` is enough. result = parser.parse(csv_file, return_smiles=False) dataset = result['dataset'] smiles = result['smiles'] is_successful = result['is_successful'] assert len(dataset) == 3 assert smiles is None assert is_successful is None # As we want test CSVFileParser, we assume # NFPPreprocessor works as documented. for i in range(3): expect = preprocessor.get_input_features(mols[i]) check_input_features(dataset[i], expect)
def test_data_frame_parser_not_return_smiles(data_frame, mols): """Test default behavior""" preprocessor = NFPPreprocessor() parser = DataFrameParser(preprocessor, smiles_col='smiles') # Actually, `dataset, smiles = parser.parse(..)` is enough. result = parser.parse(data_frame, return_smiles=False) dataset = result['dataset'] smiles = result['smiles'] is_successful = result['is_successful'] assert len(dataset) == 3 assert smiles is None assert is_successful is None # As we want test DataFrameParser, we assume # NFPPreprocessor works as documented. for i in range(3): expect = preprocessor.get_input_features(mols[i]) check_input_features(dataset[i], expect)
def test_smiles_parser_return_is_successful(mols): """test `labels` option and retain_smiles=True.""" preprocessor = NFPPreprocessor() parser = SmilesParser(preprocessor) mol_smiles_with_invalid = [ 'var', 'CN=C=O', 'hoge', 'Cc1ccccc1', 'CC1=CC2CC(CC1)O2'] result = parser.parse(mol_smiles_with_invalid, return_smiles=True, return_is_successful=True) dataset = result['dataset'] assert len(dataset) == 3 is_successful = result['is_successful'] assert len(is_successful) == 5 assert numpy.alltrue(is_successful[[1, 3, 4]]) assert numpy.alltrue(~is_successful[[0, 2]]) # We assume NFPPreprocessor works as documented. for i in range(3): expect = preprocessor.get_input_features(mols[i]) check_input_features(dataset[i], expect)
def test_sdf_file_parser_return_is_successful(sdf_file_long, mols): """test `labels` option and retain_smiles=True.""" preprocessor = NFPPreprocessor(max_atoms=10) parser = SDFFileParser(preprocessor) result = parser.parse(sdf_file_long, return_smiles=True, return_is_successful=True) dataset = result['dataset'] # smiles = result['smiles'] assert len(dataset) == 3 is_successful = result['is_successful'] assert len(is_successful) == 5 assert numpy.alltrue(is_successful[[1, 3, 4]]) assert numpy.alltrue(~is_successful[[0, 2]]) # We assume NFPPreprocessor works as documented. for i in range(3): expect = preprocessor.get_input_features(mols[i]) check_input_features(dataset[i], expect)
def test_sdf_file_parser_return_smiles(sdf_file, mols): preprocessor = NFPPreprocessor() parser = SDFFileParser(preprocessor) result = parser.parse(sdf_file, return_smiles=True) dataset = result['dataset'] smiles = result['smiles'] assert len(dataset) == 3 # As we want test SDFFileParser, we assume # NFPPreprocessor works as documented. for i in range(3): expect = preprocessor.get_input_features(mols[i]) check_input_features(dataset[i], expect) # check smiles array assert type(smiles) == numpy.ndarray assert smiles.ndim == 1 assert len(smiles) == len(dataset) assert smiles[0] == 'CN=C=O' assert smiles[1] == 'Cc1ccccc1' assert smiles[2] == 'CC1=CC2CC(CC1)O2'
def test_csv_parser_return_is_successful(csv_file_invalid, mols, label_a): """test `labels` option and retain_smiles=True.""" preprocessor = NFPPreprocessor() parser = CSVFileParser(preprocessor, labels='labelA', smiles_col='smiles') result = parser.parse(csv_file_invalid, return_smiles=True, return_is_successful=True) dataset = result['dataset'] # smiles = result['smiles'] assert len(dataset) == 3 is_successful = result['is_successful'] assert len(is_successful) == 5 # print('is_successful', is_successful) assert numpy.alltrue(is_successful[[1, 3, 4]]) assert numpy.alltrue(~is_successful[[0, 2]]) # We assume NFPPreprocessor works as documented. for i in range(3): expect = preprocessor.get_input_features(mols[i]) check_features(dataset[i], expect, label_a[i])
def test_csv_file_parser_return_smiles(csv_file, mols, label_a): """test `labels` option and retain_smiles=True.""" preprocessor = NFPPreprocessor() parser = CSVFileParser(preprocessor, labels='labelA', smiles_col='smiles') result = parser.parse(csv_file, return_smiles=True) dataset = result['dataset'] smiles = result['smiles'] assert len(dataset) == 3 # As we want test CSVFileParser, we assume # NFPPreprocessor works as documented. for i in range(3): expect = preprocessor.get_input_features(mols[i]) check_features(dataset[i], expect, label_a[i]) # check smiles array assert type(smiles) == numpy.ndarray assert smiles.ndim == 1 assert len(smiles) == len(dataset) assert smiles[0] == 'CN=C=O' assert smiles[1] == 'Cc1ccccc1' assert smiles[2] == 'CC1=CC2CC(CC1)O2'
def test_smiles_parser_return_smiles(mol_smiles, mols): """test `labels` option and retain_smiles=True.""" preprocessor = NFPPreprocessor() parser = SmilesParser(preprocessor) result = parser.parse(mol_smiles, return_smiles=True) dataset = result['dataset'] smiles = result['smiles'] assert len(dataset) == 3 # As we want test CSVFileParser, we assume # NFPPreprocessor works as documented. for i in range(3): expect = preprocessor.get_input_features(mols[i]) check_input_features(dataset[i], expect) # check smiles array assert type(smiles) == numpy.ndarray assert smiles.ndim == 1 assert len(smiles) == len(dataset) assert smiles[0] == 'CN=C=O' assert smiles[1] == 'Cc1ccccc1' assert smiles[2] == 'CC1=CC2CC(CC1)O2'
def test_smiles_parser_return_is_successful(mols): """test `labels` option and retain_smiles=True.""" preprocessor = NFPPreprocessor() parser = SmilesParser(preprocessor) mol_smiles_with_invalid = [ 'var', 'CN=C=O', 'hoge', 'Cc1ccccc1', 'CC1=CC2CC(CC1)O2' ] result = parser.parse(mol_smiles_with_invalid, return_smiles=True, return_is_successful=True) dataset = result['dataset'] assert len(dataset) == 3 is_successful = result['is_successful'] assert len(is_successful) == 5 assert numpy.alltrue(is_successful[[1, 3, 4]]) assert numpy.alltrue(~is_successful[[0, 2]]) # We assume NFPPreprocessor works as documented. for i in range(3): expect = preprocessor.get_input_features(mols[i]) check_input_features(dataset[i], expect)
def test_data_frame_parser_return_smiles(data_frame, mols, label_a): """test `labels` option and retain_smiles=True.""" preprocessor = NFPPreprocessor() parser = DataFrameParser(preprocessor, labels='labelA', smiles_col='smiles') result = parser.parse(data_frame, return_smiles=True) dataset = result['dataset'] smiles = result['smiles'] assert len(dataset) == 3 # We assume NFPPreprocessor works as documented. for i in range(3): expect = preprocessor.get_input_features(mols[i]) check_features(dataset[i], expect, label_a[i]) # check smiles array assert type(smiles) == numpy.ndarray assert smiles.ndim == 1 assert len(smiles) == len(dataset) assert smiles[0] == 'CN=C=O' assert smiles[1] == 'Cc1ccccc1' assert smiles[2] == 'CC1=CC2CC(CC1)O2'
def test_data_frame_parser_return_is_successful(mols, label_a): """test `labels` option and retain_smiles=True.""" preprocessor = NFPPreprocessor() parser = DataFrameParser(preprocessor, labels='labelA', smiles_col='smiles') df = pandas.DataFrame({ 'smiles': ['var', 'CN=C=O', 'hoge', 'Cc1ccccc1', 'CC1=CC2CC(CC1)O2'], 'labelA': [0., 2.1, 0., 5.3, -1.2], }) result = parser.parse(df, return_smiles=True, return_is_successful=True) dataset = result['dataset'] # smiles = result['smiles'] assert len(dataset) == 3 is_successful = result['is_successful'] assert len(is_successful) == 5 # print('is_successful', is_successful) assert numpy.alltrue(is_successful[[1, 3, 4]]) assert numpy.alltrue(~is_successful[[0, 2]]) # We assume NFPPreprocessor works as documented. for i in range(3): expect = preprocessor.get_input_features(mols[i]) check_features(dataset[i], expect, label_a[i])