def test_get_labels(self): a = Connectivity(depth=2) X = a.fit_transform([METHANE]) labels = a.get_labels() self.assertEqual(X.shape[1], len(labels)) expected = ('C-H', ) self.assertEqual(labels, expected)
def test_fit_atom_coordination(self): a = Connectivity(depth=1, use_coordination=True) a.fit(ALL_DATA) self.assertEqual( a._base_chains, set([('C1', ), ('N3', ), ('N2', ), ('O2', ), ('N1', ), ('O1', ), ('C4', ), ('H0', ), ('H1', ), ('O0', ), ('C3', ), ('C2', )]))
def test_get_labels_bond_order(self): a = Connectivity(depth=3, use_bond_order=True) X = a.fit_transform([METHANE]) labels = a.get_labels() self.assertEqual(X.shape[1], len(labels)) expected = ('H-C-1_C-H-1', ) self.assertEqual(labels, expected)
def test_unknown(self): a = Connectivity(add_unknown=True) expected_results = numpy.array([[1, 4, 0], [2, 3, 4], [25, 15, 9]]) a.fit([METHANE]) self.assertTrue((a.transform(ALL_DATA) == expected_results).all())
def test_get_labels_coordination(self): a = Connectivity(depth=1, use_coordination=True) X = a.fit_transform([METHANE]) labels = a.get_labels() self.assertEqual(X.shape[1], len(labels)) expected = ('C4', 'H1') self.assertEqual(labels, expected)
def test_fit_bond(self): a = Connectivity(depth=2) a.fit(ALL_DATA) self.assertEqual( a._base_chains, set([('H', 'O'), ('C', 'H'), ('H', 'N'), ('C', 'C'), ('H', 'H'), ('O', 'O'), ('C', 'N'), ('C', 'O')]))
def test_fit_atom_bond(self): # This should be the exact same thing as doing it with # use_bond_order=False a = Connectivity(depth=1, use_bond_order=True) a.fit(ALL_DATA) self.assertEqual(a._base_chains, set([('N', ), ('C', ), ('O', ), ('H', )]))
def test_get_labels_unknown(self): a = Connectivity(depth=2, add_unknown=True) X = a.fit_transform([METHANE]) labels = a.get_labels() self.assertEqual(X.shape[1], len(labels)) expected = ('C-H', UNKNOWN) self.assertEqual(labels, expected)
def test_fit_angle(self): a = Connectivity(depth=3) a.fit(ALL_DATA) self.assertEqual(a._base_groups, (('C', 'C', 'C'), ('C', 'C', 'H'), ('C', 'C', 'N'), ('C', 'C', 'O'), ('C', 'N', 'C'), ('C', 'N', 'H'), ('C', 'O', 'C'), ('C', 'O', 'H'), ('H', 'C', 'H'), ('H', 'C', 'N'), ('H', 'C', 'O'), ('H', 'N', 'H'), ('N', 'C', 'N'), ('N', 'C', 'O')))
def test_fit_bond_bond(self): a = Connectivity(depth=2, use_bond_order=True) a.fit(ALL_DATA) self.assertEqual( a._base_groups, ((('C', 'C', '1'), ), (('C', 'C', '2'), ), (('C', 'C', '3'), ), (('C', 'C', 'Ar'), ), (('C', 'H', '1'), ), (('C', 'N', '2'), ), (('C', 'N', '3'), ), (('C', 'N', 'Ar'), ), (('C', 'O', '1'), ), (('C', 'O', 'Ar'), ), (('H', 'H', '1'), ), (('H', 'N', '1'), ), (('H', 'O', '1'), ), (('O', 'O', '1'), )))
def test_tfidf(self): a = Connectivity(do_tfidf=True) expected = numpy.array([[0., 0., 0., 0.], [0., 0., 0., 1.62186043], [0., 0., 5.49306144, 1.62186043]]) a.fit(ALL_DATA) try: m = a.transform(ALL_DATA) numpy.testing.assert_array_almost_equal(m, expected) except AssertionError as e: self.fail(e)
def test_fit_dihedral(self): # This is to test the double order flipping (CCCH vs HCCC) a = Connectivity(depth=4) a.fit(ALL_DATA) self.assertEqual( a._base_groups, (('C', 'C', 'C', 'C'), ('C', 'C', 'C', 'H'), ('C', 'C', 'C', 'N'), ('C', 'C', 'C', 'O'), ('C', 'C', 'N', 'C'), ('C', 'C', 'N', 'H'), ('C', 'C', 'O', 'C'), ('C', 'C', 'O', 'H'), ('H', 'C', 'C', 'H'), ('H', 'C', 'C', 'N'), ('H', 'C', 'C', 'O'), ('H', 'C', 'N', 'C'), ('H', 'C', 'O', 'C'), ('N', 'C', 'C', 'O'), ('N', 'C', 'N', 'C'), ('N', 'C', 'N', 'H'), ('N', 'C', 'O', 'H'), ('O', 'C', 'N', 'C')))
def test_fit(self): trans = Connectivity(input_type="filename") a = FragmentMap(transformer=trans) a.fit([ALL]) expected = {'mid': [2, 3, 4], 'methane': [1, 4, 0]} simplified = {x: y.tolist() for x, y in a._x_fragments.items()} self.assertEqual(expected, simplified)
def test_callable_filename_to_label(self): trans = Connectivity(input_type="filename") a = FragmentMap(transformer=trans, filename_to_label=lambda x: x[-5:], label_to_filename=(DATA_PATH, )) func = a._get_filename_to_label() self.assertEqual(func(METHANE_PATH), METHANE_PATH[-5:])
def test_get_labels(self): trans = Connectivity(input_type="filename") a = FragmentMap(transformer=trans) res = a.fit_transform([ALL]) labels = a.get_labels() self.assertEqual(res.shape[2], len(labels)) expected = ('C', 'H', 'O') self.assertEqual(labels, expected)
def test_get_labels_no_labels(self): trans = Pipeline([('Con', Connectivity(input_type="filename"))]) a = FragmentMap(transformer=trans) res = a.fit_transform([ALL]) labels = a.get_labels() self.assertEqual(res.shape[2], len(labels)) expected = ('0', '1', '2') self.assertEqual(labels, expected)
def test_fit_transform(self): trans = Connectivity(input_type="filename") a = FragmentMap(transformer=trans) res = a.fit_transform([ALL]) expected = numpy.array([[[1, 4, 0], [2, 3, 4]]]) try: numpy.testing.assert_array_almost_equal(expected, res) except AssertionError as e: self.fail(e)
def test_callable_label_to_filename(self): trans = Connectivity(input_type="filename") def func(x): return os.path.join(DATA_PATH, x) a = FragmentMap(transformer=trans, label_to_filename=func) self.assertEqual(a._get_label_to_filename()('test'), os.path.join(DATA_PATH, 'test'))
def test_label_to_filename(self): trans = Connectivity(input_type="filename") a = FragmentMap(transformer=trans, label_to_filename=(DATA_PATH, )) # The paths are sorted when searching path = METHANE_PATH.replace('.out', '.cry') self.assertEqual(a._get_label_to_filename()('methane'), path)
def test_label_to_filename_not_found(self): trans = Connectivity(input_type="filename") a = FragmentMap(transformer=trans, label_to_filename=(DATA_PATH, )) with self.assertRaises(ValueError): a._get_label_to_filename()('not real')
def test_fit_atom_separated(self): a = Connectivity(depth=1) a.fit([METHANE2]) self.assertEqual(a._base_chains, set([('C', ), ('H', )])) self.assertTrue((a.transform([METHANE2]) == numpy.array([[1, 4]])).all())
def test_fit_atom(self): a = Connectivity(depth=1) a.fit(ALL_DATA) self.assertEqual(a._base_chains, set([('N', ), ('C', ), ('O', ), ('H', )]))
def test_fit_transform(self): a = Connectivity() self.assertTrue((a.fit_transform(ALL_DATA) == ALL_ATOM).all())
def test_large_to_small_transform(self): a = Connectivity() a.fit([BIG]) self.assertTrue((a.transform(ALL_DATA) == ALL_ATOM).all())
def test_transform_before_fit(self): a = Connectivity() with self.assertRaises(ValueError): a.transform(ALL_DATA)
def test_small_to_large_transform(self): a = Connectivity() a.fit([METHANE]) self.assertTrue((a.transform(ALL_DATA) == ALL_ATOM[:, :2]).all())
def test_fit_atom(self): a = Connectivity(depth=1) a.fit(ALL_DATA) self.assertEqual(a._base_groups, (('C',), ('H',), ('N',), ('O',)))
def test_invalid_label_to_filename(self): trans = Connectivity(input_type="filename") a = FragmentMap(transformer=trans, label_to_filename=lambda x: 1) with self.assertRaises(KeyError): a.fit_transform(ALL)
def test_bad_input_type(self): trans = Connectivity(input_type="filename") a = FragmentMap(input_type='bad', transformer=trans) with self.assertRaises(ValueError): a.fit([ALL])
def test_label_input_type(self): trans = Connectivity(input_type="filename") a = FragmentMap(input_type='label', transformer=trans, label_to_filename=(DATA_PATH, )) a.fit([['methane', 'mid', 'bad']])