def setUp(self): super().setUp() self.dists = bond_length_distribution.AtomPairLengthDistributions() self.dists.add( dataset_pb2.BondTopology.BOND_SINGLE, bond_length_distribution.FixedWindowLengthDistribution(1.2, 1.8, None)) self.dists.add( dataset_pb2.BondTopology.BOND_DOUBLE, bond_length_distribution.FixedWindowLengthDistribution(1.0, 1.4, None))
def test_probability_bond_types(self): all_dists = bond_length_distribution.AllAtomPairLengthDistributions() all_dists.add( dataset_pb2.BondTopology.ATOM_N, dataset_pb2.BondTopology.ATOM_O, dataset_pb2.BondTopology.BOND_SINGLE, bond_length_distribution.FixedWindowLengthDistribution(1, 4, None)) all_dists.add( dataset_pb2.BondTopology.ATOM_N, dataset_pb2.BondTopology.ATOM_O, dataset_pb2.BondTopology.BOND_DOUBLE, bond_length_distribution.FixedWindowLengthDistribution(1, 2, None)) got = all_dists.probability_of_bond_types( dataset_pb2.BondTopology.ATOM_N, dataset_pb2.BondTopology.ATOM_O, 1.5) self.assertLen(got, 2) self.assertAlmostEqual(got[dataset_pb2.BondTopology.BOND_SINGLE], 0.25) self.assertAlmostEqual(got[dataset_pb2.BondTopology.BOND_DOUBLE], 0.75)
def _parse_bond_lengths_arg(self, bond_lengths_arg): """Parses bond length argument.""" if not bond_lengths_arg: return terms = [x.strip() for x in bond_lengths_arg.split(',')] for term in terms: try: atoms_a = self._ATOM_SPECIFICATION_MAP[term[0]] bonds = self._BOND_SPECIFICATION_MAP[term[1]] atoms_b = self._ATOM_SPECIFICATION_MAP[term[2]] if term[3] != ':': raise BondLengthParseError(term) min_str, max_str = term[4:].split('-') if min_str: min_val = float(min_str) else: min_val = 0 if max_str: max_val = float(max_str) right_tail_mass = None else: # These numbers are pretty arbitrary max_val = min_val + 0.1 right_tail_mass = 0.9 for atom_a, atom_b, bond in itertools.product( atoms_a, atoms_b, bonds): self.bond_lengths.add( atom_a, atom_b, bond, bond_length_distribution.FixedWindowLengthDistribution( min_val, max_val, right_tail_mass)) except (KeyError, IndexError, ValueError) as an_exception: raise BondLengthParseError(term) from an_exception
def test_right_tail(self): dist = bond_length_distribution.FixedWindowLengthDistribution( 3, 5, right_tail_mass=0.8) # 0.2 of the mass is in the window, divded by 2 (size of the window) self.assertAlmostEqual(dist.pdf(3.456), 0.1) self.assertAlmostEqual(dist.pdf(5), 0.1) # Test slightly above the maximum to make sure we got the left side of the # right tail correct. self.assertAlmostEqual(dist.pdf(5.00000001), 0.1) self.assertAlmostEqual(dist.pdf(6), 0.08824969)
def test_atom_ordering(self): all_dists = bond_length_distribution.AllAtomPairLengthDistributions() all_dists.add( dataset_pb2.BondTopology.ATOM_N, dataset_pb2.BondTopology.ATOM_O, dataset_pb2.BondTopology.BOND_SINGLE, bond_length_distribution.FixedWindowLengthDistribution(1, 2, None)) self.assertEqual( all_dists.pdf_length_given_type( dataset_pb2.BondTopology.ATOM_N, dataset_pb2.BondTopology.ATOM_O, dataset_pb2.BondTopology.BOND_SINGLE, 1.5), 1) self.assertEqual( all_dists.pdf_length_given_type( dataset_pb2.BondTopology.ATOM_O, dataset_pb2.BondTopology.ATOM_N, dataset_pb2.BondTopology.BOND_SINGLE, 1.5), 1) self.assertEqual( all_dists.pdf_length_given_type( dataset_pb2.BondTopology.ATOM_N, dataset_pb2.BondTopology.ATOM_O, dataset_pb2.BondTopology.BOND_SINGLE, 999), 0) self.assertEqual( all_dists.pdf_length_given_type( dataset_pb2.BondTopology.ATOM_O, dataset_pb2.BondTopology.ATOM_N, dataset_pb2.BondTopology.BOND_SINGLE, 999), 0) # Make sure subsequent additions work as well all_dists.add( dataset_pb2.BondTopology.ATOM_N, dataset_pb2.BondTopology.ATOM_O, dataset_pb2.BondTopology.BOND_DOUBLE, bond_length_distribution.FixedWindowLengthDistribution(2, 3, None)) self.assertEqual( all_dists.pdf_length_given_type( dataset_pb2.BondTopology.ATOM_N, dataset_pb2.BondTopology.ATOM_O, dataset_pb2.BondTopology.BOND_DOUBLE, 2.5), 1) self.assertEqual( all_dists.pdf_length_given_type( dataset_pb2.BondTopology.ATOM_O, dataset_pb2.BondTopology.ATOM_N, dataset_pb2.BondTopology.BOND_DOUBLE, 2.5), 1)
def test_missing_types(self): all_dists = bond_length_distribution.AllAtomPairLengthDistributions() all_dists.add( dataset_pb2.BondTopology.ATOM_N, dataset_pb2.BondTopology.ATOM_O, dataset_pb2.BondTopology.BOND_SINGLE, bond_length_distribution.FixedWindowLengthDistribution(1, 2, None)) with self.assertRaises(KeyError): all_dists.probability_of_bond_types(dataset_pb2.BondTopology.ATOM_C, dataset_pb2.BondTopology.ATOM_C, 1.0) with self.assertRaises(KeyError): all_dists.pdf_length_given_type(dataset_pb2.BondTopology.ATOM_C, dataset_pb2.BondTopology.ATOM_C, dataset_pb2.BondTopology.BOND_SINGLE, 1.0)
def test_simple(self): dist = bond_length_distribution.FixedWindowLengthDistribution( 3, 5, None) self.assertAlmostEqual(dist.pdf(2.9), 0.0) self.assertAlmostEqual(dist.pdf(5.1), 0.0) self.assertAlmostEqual(dist.pdf(3.456), 0.5)
def test_multi_topology_detection(self): """Tests that we can find multiple versions of the same topology.""" single = dataset_pb2.BondTopology.BondType.BOND_SINGLE double = dataset_pb2.BondTopology.BondType.BOND_DOUBLE all_dist = bond_length_distribution.AllAtomPairLengthDistributions() for bond_type in [single, double]: all_dist.add( dataset_pb2.BondTopology.ATOM_N, dataset_pb2.BondTopology.ATOM_N, bond_type, bond_length_distribution.FixedWindowLengthDistribution( 1.0, 2.0, None)) # This conformer is a flat aromatic square of nitrogens. The single and # double bonds can be rotated such that it's the same topology but # individual bonds have switched single/double. conformer = dataset_pb2.Conformer() conformer.bond_topologies.add(bond_topology_id=123, smiles="N1=NN=N1") conformer.bond_topologies[0].atoms.extend([ dataset_pb2.BondTopology.ATOM_N, dataset_pb2.BondTopology.ATOM_N, dataset_pb2.BondTopology.ATOM_N, dataset_pb2.BondTopology.ATOM_N, ]) conformer.bond_topologies[0].bonds.extend([ dataset_pb2.BondTopology.Bond(atom_a=0, atom_b=1, bond_type=single), dataset_pb2.BondTopology.Bond(atom_a=1, atom_b=2, bond_type=double), dataset_pb2.BondTopology.Bond(atom_a=2, atom_b=3, bond_type=single), dataset_pb2.BondTopology.Bond(atom_a=3, atom_b=0, bond_type=double), ]) dist15a = 1.5 / smu_utils_lib.BOHR_TO_ANGSTROMS conformer.optimized_geometry.atom_positions.extend([ dataset_pb2.Geometry.AtomPos(x=0, y=0, z=0), dataset_pb2.Geometry.AtomPos(x=0, y=dist15a, z=0), dataset_pb2.Geometry.AtomPos(x=dist15a, y=dist15a, z=0), dataset_pb2.Geometry.AtomPos(x=dist15a, y=0, z=0), ]) matching_parameters = smu_molecule.MatchingParameters() result = topology_from_geom.bond_topologies_from_geom( bond_lengths=all_dist, conformer_id=123, fate=dataset_pb2.Conformer.FATE_SUCCESS, bond_topology=conformer.bond_topologies[0], geometry=conformer.optimized_geometry, matching_parameters=matching_parameters) self.assertLen(result.bond_topology, 2) # The returned order is arbitrary so we figure out which is is marked # as the starting topology. starting_idx = min([ i for i, bt, in enumerate(result.bond_topology) if bt.is_starting_topology ]) other_idx = (starting_idx + 1) % 2 starting = result.bond_topology[starting_idx] self.assertTrue(starting.is_starting_topology) self.assertEqual(smu_utils_lib.get_bond_type(starting, 0, 1), single) self.assertEqual(smu_utils_lib.get_bond_type(starting, 1, 2), double) self.assertEqual(smu_utils_lib.get_bond_type(starting, 2, 3), single) self.assertEqual(smu_utils_lib.get_bond_type(starting, 3, 0), double) other = result.bond_topology[other_idx] self.assertFalse(other.is_starting_topology) self.assertEqual(smu_utils_lib.get_bond_type(other, 0, 1), double) self.assertEqual(smu_utils_lib.get_bond_type(other, 1, 2), single) self.assertEqual(smu_utils_lib.get_bond_type(other, 2, 3), double) self.assertEqual(smu_utils_lib.get_bond_type(other, 3, 0), single)