def test_data_frame_parser_target_index(data_frame, mols, label_a):
    """test `labels` option and retain_smiles=True."""
    preprocessor = NFPPreprocessor()
    parser = DataFrameParser(preprocessor, labels='labelA',
                             smiles_col='smiles')
    result = parser.parse(data_frame, return_smiles=True, target_index=[0, 2],
                          return_is_successful=True)
    dataset = result['dataset']
    smiles = result['smiles']
    assert len(dataset) == 2
    is_successful = result['is_successful']
    assert numpy.alltrue(is_successful)
    assert len(is_successful) == 2

    # We assume NFPPreprocessor works as documented.
    expect = preprocessor.get_input_features(mols[0])
    check_features(dataset[0], expect, label_a[0])

    expect = preprocessor.get_input_features(mols[2])
    check_features(dataset[1], expect, label_a[2])

    # check smiles array
    assert type(smiles) == numpy.ndarray
    assert smiles.ndim == 1
    assert len(smiles) == len(dataset)
    assert smiles[0] == 'CN=C=O'
    assert smiles[1] == 'CC1=CC2CC(CC1)O2'
def test_csv_file_parser_target_index(csv_file_invalid, mols, label_a):
    """test `labels` option and retain_smiles=True."""
    preprocessor = NFPPreprocessor()
    parser = CSVFileParser(preprocessor, labels='labelA', smiles_col='smiles')
    result = parser.parse(csv_file_invalid, return_smiles=True,
                          target_index=[1, 2, 4], return_is_successful=True)
    dataset = result['dataset']
    smiles = result['smiles']
    assert len(dataset) == 2
    is_successful = result['is_successful']
    assert numpy.array_equal(is_successful, numpy.array([True, False, True]))
    assert len(is_successful) == 3

    # As we want test CSVFileParser, we assume
    # NFPPreprocessor works as documented.
    expect = preprocessor.get_input_features(mols[0])
    check_features(dataset[0], expect, label_a[0])

    expect = preprocessor.get_input_features(mols[2])
    check_features(dataset[1], expect, label_a[2])

    # check smiles array
    assert type(smiles) == numpy.ndarray
    assert smiles.ndim == 1
    assert len(smiles) == len(dataset)
    assert smiles[0] == 'CN=C=O'
    assert smiles[1] == 'CC1=CC2CC(CC1)O2'
def test_smiles_parser_target_index(mol_smiles, mols):
    """test `labels` option and retain_smiles=True."""
    preprocessor = NFPPreprocessor()
    parser = SmilesParser(preprocessor)
    result = parser.parse(mol_smiles, return_smiles=True, target_index=[0, 2],
                          return_is_successful=True)
    dataset = result['dataset']
    smiles = result['smiles']
    assert len(dataset) == 2
    is_successful = result['is_successful']
    assert numpy.alltrue(is_successful)
    assert len(is_successful) == 2

    # As we want test CSVFileParser, we assume
    # NFPPreprocessor works as documented.
    expect = preprocessor.get_input_features(mols[0])
    check_input_features(dataset[0], expect)

    expect = preprocessor.get_input_features(mols[2])
    check_input_features(dataset[1], expect)

    # check smiles array
    assert type(smiles) == numpy.ndarray
    assert smiles.ndim == 1
    assert len(smiles) == len(dataset)
    assert smiles[0] == 'CN=C=O'
    assert smiles[1] == 'CC1=CC2CC(CC1)O2'
Exemple #4
0
def test_sdf_file_parser_not_return_smiles(sdf_file, mols):
    preprocessor = NFPPreprocessor()
    parser = SDFFileParser(preprocessor)
    result = parser.parse(sdf_file, return_smiles=False)
    dataset = result['dataset']
    smiles = result['smiles']
    assert len(dataset) == 3
    assert smiles is None

    # As we want test SDFFileParser, we assume
    # NFPPreprocessor works as documented.
    for i in range(3):
        expect = preprocessor.get_input_features(mols[i])
        check_input_features(dataset[i], expect)
def test_csv_file_parser_not_return_smiles(csv_file, mols):
    preprocessor = NFPPreprocessor()
    parser = CSVFileParser(preprocessor, smiles_col='smiles')
    # Actually, `dataset, smiles = parser.parse(..)` is enough.
    result = parser.parse(csv_file, return_smiles=False)
    dataset = result['dataset']
    smiles = result['smiles']
    assert len(dataset) == 3
    assert smiles is None

    # As we want test CSVFileParser, we assume
    # NFPPreprocessor works as documented.
    for i in range(3):
        expect = preprocessor.get_input_features(mols[i])
        check_input_features(dataset[i], expect)
def test_smiles_parser_not_return_smiles(mol_smiles, mols):
    preprocessor = NFPPreprocessor()
    parser = SmilesParser(preprocessor)
    result = parser.parse(mol_smiles, return_smiles=False)
    dataset = result['dataset']
    smiles = result['smiles']
    is_successful = result['is_successful']
    assert len(dataset) == 3
    assert smiles is None
    assert is_successful is None

    # As we want test CSVFileParser, we assume
    # NFPPreprocessor works as documented.
    for i in range(3):
        expect = preprocessor.get_input_features(mols[i])
        check_input_features(dataset[i], expect)
def test_csv_file_parser_not_return_smiles(csv_file, mols):
    preprocessor = NFPPreprocessor()
    parser = CSVFileParser(preprocessor, smiles_col='smiles')
    # Actually, `dataset, smiles = parser.parse(..)` is enough.
    result = parser.parse(csv_file, return_smiles=False)
    dataset = result['dataset']
    smiles = result['smiles']
    is_successful = result['is_successful']
    assert len(dataset) == 3
    assert smiles is None
    assert is_successful is None

    # As we want test CSVFileParser, we assume
    # NFPPreprocessor works as documented.
    for i in range(3):
        expect = preprocessor.get_input_features(mols[i])
        check_input_features(dataset[i], expect)
def test_data_frame_parser_not_return_smiles(data_frame, mols):
    """Test default behavior"""
    preprocessor = NFPPreprocessor()
    parser = DataFrameParser(preprocessor, smiles_col='smiles')
    # Actually, `dataset, smiles = parser.parse(..)` is enough.
    result = parser.parse(data_frame, return_smiles=False)
    dataset = result['dataset']
    smiles = result['smiles']
    is_successful = result['is_successful']
    assert len(dataset) == 3
    assert smiles is None
    assert is_successful is None

    # As we want test DataFrameParser, we assume
    # NFPPreprocessor works as documented.
    for i in range(3):
        expect = preprocessor.get_input_features(mols[i])
        check_input_features(dataset[i], expect)
def test_smiles_parser_return_is_successful(mols):
    """test `labels` option and retain_smiles=True."""
    preprocessor = NFPPreprocessor()
    parser = SmilesParser(preprocessor)
    mol_smiles_with_invalid = [
        'var', 'CN=C=O', 'hoge', 'Cc1ccccc1', 'CC1=CC2CC(CC1)O2']
    result = parser.parse(mol_smiles_with_invalid, return_smiles=True,
                          return_is_successful=True)

    dataset = result['dataset']
    assert len(dataset) == 3
    is_successful = result['is_successful']
    assert len(is_successful) == 5
    assert numpy.alltrue(is_successful[[1, 3, 4]])
    assert numpy.alltrue(~is_successful[[0, 2]])

    # We assume NFPPreprocessor works as documented.
    for i in range(3):
        expect = preprocessor.get_input_features(mols[i])
        check_input_features(dataset[i], expect)
def test_sdf_file_parser_return_is_successful(sdf_file_long, mols):
    """test `labels` option and retain_smiles=True."""
    preprocessor = NFPPreprocessor(max_atoms=10)
    parser = SDFFileParser(preprocessor)
    result = parser.parse(sdf_file_long,
                          return_smiles=True,
                          return_is_successful=True)

    dataset = result['dataset']
    # smiles = result['smiles']
    assert len(dataset) == 3
    is_successful = result['is_successful']
    assert len(is_successful) == 5
    assert numpy.alltrue(is_successful[[1, 3, 4]])
    assert numpy.alltrue(~is_successful[[0, 2]])

    # We assume NFPPreprocessor works as documented.
    for i in range(3):
        expect = preprocessor.get_input_features(mols[i])
        check_input_features(dataset[i], expect)
Exemple #11
0
def test_sdf_file_parser_return_smiles(sdf_file, mols):
    preprocessor = NFPPreprocessor()
    parser = SDFFileParser(preprocessor)
    result = parser.parse(sdf_file, return_smiles=True)
    dataset = result['dataset']
    smiles = result['smiles']
    assert len(dataset) == 3

    # As we want test SDFFileParser, we assume
    # NFPPreprocessor works as documented.
    for i in range(3):
        expect = preprocessor.get_input_features(mols[i])
        check_input_features(dataset[i], expect)

    # check smiles array
    assert type(smiles) == numpy.ndarray
    assert smiles.ndim == 1
    assert len(smiles) == len(dataset)
    assert smiles[0] == 'CN=C=O'
    assert smiles[1] == 'Cc1ccccc1'
    assert smiles[2] == 'CC1=CC2CC(CC1)O2'
def test_csv_parser_return_is_successful(csv_file_invalid, mols, label_a):
    """test `labels` option and retain_smiles=True."""
    preprocessor = NFPPreprocessor()
    parser = CSVFileParser(preprocessor, labels='labelA',
                           smiles_col='smiles')
    result = parser.parse(csv_file_invalid, return_smiles=True,
                          return_is_successful=True)

    dataset = result['dataset']
    # smiles = result['smiles']
    assert len(dataset) == 3
    is_successful = result['is_successful']
    assert len(is_successful) == 5
    # print('is_successful', is_successful)
    assert numpy.alltrue(is_successful[[1, 3, 4]])
    assert numpy.alltrue(~is_successful[[0, 2]])

    # We assume NFPPreprocessor works as documented.
    for i in range(3):
        expect = preprocessor.get_input_features(mols[i])
        check_features(dataset[i], expect, label_a[i])
def test_csv_parser_return_is_successful(csv_file_invalid, mols, label_a):
    """test `labels` option and retain_smiles=True."""
    preprocessor = NFPPreprocessor()
    parser = CSVFileParser(preprocessor, labels='labelA', smiles_col='smiles')
    result = parser.parse(csv_file_invalid,
                          return_smiles=True,
                          return_is_successful=True)

    dataset = result['dataset']
    # smiles = result['smiles']
    assert len(dataset) == 3
    is_successful = result['is_successful']
    assert len(is_successful) == 5
    # print('is_successful', is_successful)
    assert numpy.alltrue(is_successful[[1, 3, 4]])
    assert numpy.alltrue(~is_successful[[0, 2]])

    # We assume NFPPreprocessor works as documented.
    for i in range(3):
        expect = preprocessor.get_input_features(mols[i])
        check_features(dataset[i], expect, label_a[i])
def test_csv_file_parser_return_smiles(csv_file, mols, label_a):
    """test `labels` option and retain_smiles=True."""
    preprocessor = NFPPreprocessor()
    parser = CSVFileParser(preprocessor, labels='labelA', smiles_col='smiles')
    result = parser.parse(csv_file, return_smiles=True)
    dataset = result['dataset']
    smiles = result['smiles']
    assert len(dataset) == 3

    # As we want test CSVFileParser, we assume
    # NFPPreprocessor works as documented.
    for i in range(3):
        expect = preprocessor.get_input_features(mols[i])
        check_features(dataset[i], expect, label_a[i])

    # check smiles array
    assert type(smiles) == numpy.ndarray
    assert smiles.ndim == 1
    assert len(smiles) == len(dataset)
    assert smiles[0] == 'CN=C=O'
    assert smiles[1] == 'Cc1ccccc1'
    assert smiles[2] == 'CC1=CC2CC(CC1)O2'
def test_smiles_parser_return_smiles(mol_smiles, mols):
    """test `labels` option and retain_smiles=True."""
    preprocessor = NFPPreprocessor()
    parser = SmilesParser(preprocessor)
    result = parser.parse(mol_smiles, return_smiles=True)
    dataset = result['dataset']
    smiles = result['smiles']
    assert len(dataset) == 3

    # As we want test CSVFileParser, we assume
    # NFPPreprocessor works as documented.
    for i in range(3):
        expect = preprocessor.get_input_features(mols[i])
        check_input_features(dataset[i], expect)

    # check smiles array
    assert type(smiles) == numpy.ndarray
    assert smiles.ndim == 1
    assert len(smiles) == len(dataset)
    assert smiles[0] == 'CN=C=O'
    assert smiles[1] == 'Cc1ccccc1'
    assert smiles[2] == 'CC1=CC2CC(CC1)O2'
def test_smiles_parser_return_is_successful(mols):
    """test `labels` option and retain_smiles=True."""
    preprocessor = NFPPreprocessor()
    parser = SmilesParser(preprocessor)
    mol_smiles_with_invalid = [
        'var', 'CN=C=O', 'hoge', 'Cc1ccccc1', 'CC1=CC2CC(CC1)O2'
    ]
    result = parser.parse(mol_smiles_with_invalid,
                          return_smiles=True,
                          return_is_successful=True)

    dataset = result['dataset']
    assert len(dataset) == 3
    is_successful = result['is_successful']
    assert len(is_successful) == 5
    assert numpy.alltrue(is_successful[[1, 3, 4]])
    assert numpy.alltrue(~is_successful[[0, 2]])

    # We assume NFPPreprocessor works as documented.
    for i in range(3):
        expect = preprocessor.get_input_features(mols[i])
        check_input_features(dataset[i], expect)
def test_data_frame_parser_return_smiles(data_frame, mols, label_a):
    """test `labels` option and retain_smiles=True."""
    preprocessor = NFPPreprocessor()
    parser = DataFrameParser(preprocessor, labels='labelA',
                             smiles_col='smiles')
    result = parser.parse(data_frame, return_smiles=True)
    dataset = result['dataset']
    smiles = result['smiles']
    assert len(dataset) == 3

    # We assume NFPPreprocessor works as documented.
    for i in range(3):
        expect = preprocessor.get_input_features(mols[i])
        check_features(dataset[i], expect, label_a[i])

    # check smiles array
    assert type(smiles) == numpy.ndarray
    assert smiles.ndim == 1
    assert len(smiles) == len(dataset)
    assert smiles[0] == 'CN=C=O'
    assert smiles[1] == 'Cc1ccccc1'
    assert smiles[2] == 'CC1=CC2CC(CC1)O2'
def test_data_frame_parser_return_is_successful(mols, label_a):
    """test `labels` option and retain_smiles=True."""
    preprocessor = NFPPreprocessor()
    parser = DataFrameParser(preprocessor, labels='labelA',
                             smiles_col='smiles')
    df = pandas.DataFrame({
        'smiles': ['var', 'CN=C=O', 'hoge', 'Cc1ccccc1', 'CC1=CC2CC(CC1)O2'],
        'labelA': [0., 2.1, 0., 5.3, -1.2],
    })
    result = parser.parse(df, return_smiles=True, return_is_successful=True)

    dataset = result['dataset']
    # smiles = result['smiles']
    assert len(dataset) == 3
    is_successful = result['is_successful']
    assert len(is_successful) == 5
    # print('is_successful', is_successful)
    assert numpy.alltrue(is_successful[[1, 3, 4]])
    assert numpy.alltrue(~is_successful[[0, 2]])

    # We assume NFPPreprocessor works as documented.
    for i in range(3):
        expect = preprocessor.get_input_features(mols[i])
        check_features(dataset[i], expect, label_a[i])