def test_sdf_file_parser_target_index(sdf_file, mols):
    preprocessor = NFPPreprocessor()
    parser = SDFFileParser(preprocessor)
    result = parser.parse(sdf_file,
                          return_smiles=True,
                          target_index=[0, 2],
                          return_is_successful=True)
    dataset = result['dataset']
    smiles = result['smiles']
    assert len(dataset) == 2
    is_successful = result['is_successful']
    assert numpy.alltrue(is_successful)
    assert len(is_successful) == 2

    # As we want test SDFFileParser, we assume
    # NFPPreprocessor works as documented.
    expect = preprocessor.get_input_features(mols[0])
    check_input_features(dataset[0], expect)

    expect = preprocessor.get_input_features(mols[2])
    check_input_features(dataset[1], expect)

    # check smiles array
    assert type(smiles) == numpy.ndarray
    assert smiles.ndim == 1
    assert len(smiles) == len(dataset)
    assert smiles[0] == 'CN=C=O'
    assert smiles[1] == 'CC1=CC2CC(CC1)O2'
def test_sdf_file_parser(sdf_file, mols):
    preprocessor = NFPPreprocessor()
    parser = SDFFileParser(preprocessor)
    dataset = parser.parse(sdf_file)
    assert len(dataset) == 2

    # As we want test SDFFileParser, we assume
    # NFPPreprocessor works as documented.
    expect = preprocessor.get_input_features(mols[0])
    check_input_features(dataset[0], expect)

    expect = preprocessor.get_input_features(mols[1])
    check_input_features(dataset[1], expect)
예제 #3
0
def test_sdf_file_parser_not_return_smiles(sdf_file, mols):
    preprocessor = NFPPreprocessor()
    parser = SDFFileParser(preprocessor)
    result = parser.parse(sdf_file, return_smiles=False)
    dataset = result['dataset']
    smiles = result['smiles']
    assert len(dataset) == 3
    assert smiles is None

    # As we want test SDFFileParser, we assume
    # NFPPreprocessor works as documented.
    for i in range(3):
        expect = preprocessor.get_input_features(mols[i])
        check_input_features(dataset[i], expect)
def test_sdf_file_parser_return_is_successful(sdf_file_long, mols):
    """test `labels` option and retain_smiles=True."""
    preprocessor = NFPPreprocessor(max_atoms=10)
    parser = SDFFileParser(preprocessor)
    result = parser.parse(sdf_file_long,
                          return_smiles=True,
                          return_is_successful=True)

    dataset = result['dataset']
    # smiles = result['smiles']
    assert len(dataset) == 3
    is_successful = result['is_successful']
    assert len(is_successful) == 5
    assert numpy.alltrue(is_successful[[1, 3, 4]])
    assert numpy.alltrue(~is_successful[[0, 2]])

    # We assume NFPPreprocessor works as documented.
    for i in range(3):
        expect = preprocessor.get_input_features(mols[i])
        check_input_features(dataset[i], expect)
def test_sdf_file_parser_retain_smiles(sdf_file, mols):
    preprocessor = NFPPreprocessor()
    parser = SDFFileParser(preprocessor)
    dataset = parser.parse(sdf_file, retain_smiles=True)
    smiles = parser.get_smiles()
    assert len(dataset) == 2

    # As we want test SDFFileParser, we assume
    # NFPPreprocessor works as documented.
    expect = preprocessor.get_input_features(mols[0])
    check_input_features(dataset[0], expect)

    expect = preprocessor.get_input_features(mols[1])
    check_input_features(dataset[1], expect)

    # check smiles array
    assert type(smiles) == numpy.ndarray
    assert smiles.ndim == 1
    assert len(smiles) == len(dataset)
    assert smiles[0] == 'CN=C=O'
    assert smiles[1] == 'Cc1ccccc1'
예제 #6
0
def test_sdf_file_parser_return_smiles(sdf_file, mols):
    preprocessor = NFPPreprocessor()
    parser = SDFFileParser(preprocessor)
    result = parser.parse(sdf_file, return_smiles=True)
    dataset = result['dataset']
    smiles = result['smiles']
    assert len(dataset) == 3

    # As we want test SDFFileParser, we assume
    # NFPPreprocessor works as documented.
    for i in range(3):
        expect = preprocessor.get_input_features(mols[i])
        check_input_features(dataset[i], expect)

    # check smiles array
    assert type(smiles) == numpy.ndarray
    assert smiles.ndim == 1
    assert len(smiles) == len(dataset)
    assert smiles[0] == 'CN=C=O'
    assert smiles[1] == 'Cc1ccccc1'
    assert smiles[2] == 'CC1=CC2CC(CC1)O2'
def test_nfp_preprocessor_with_tox21():
    preprocessor = NFPPreprocessor()

    dataset = SDFFileParser(preprocessor, postprocess_label=None).parse(
        get_tox21_filepath('train'))

    index = numpy.random.choice(len(dataset), None)
    atoms, adjs = dataset[index]

    assert atoms.ndim == 1  # (atom, )
    assert atoms.dtype == numpy.int32
    # (atom from, atom to)
    assert adjs.ndim == 2
    assert adjs.dtype == numpy.float32
예제 #8
0
def test_gat_preprocessor():
    preprocessor = RelGATPreprocessor()

    def postprocess_label(label_list):
        # Set -1 to the place where the label is not found,
        # this corresponds to not calculate loss with `sigmoid_cross_entropy`
        return [-1 if label is None else label for label in label_list]

    dataset = SDFFileParser(preprocessor, postprocess_label=postprocess_label
                            ).parse(get_tox21_filepath('train'))["dataset"]

    index = numpy.random.choice(len(dataset), None)
    atoms, adjs = dataset[index]

    assert atoms.ndim == 1  # (atom, )
    assert atoms.dtype == numpy.int32
    # (edge_type, atom from, atom to)
    assert adjs.ndim == 3
    assert adjs.dtype == numpy.float32
예제 #9
0
def test_sdf_file_parser_extract_total_num(sdf_file):
    preprocessor = NFPPreprocessor()
    parser = SDFFileParser(preprocessor)
    num = parser.extract_total_num(sdf_file)
    assert num == 3