def test_fully_saturated(self): self.assertEqual( smu_utils_lib.get_canonical_stoichiometry_with_hydrogens( smu_utils_lib.create_bond_topology('C', '', '4')), '(ch4)') self.assertEqual( smu_utils_lib.get_canonical_stoichiometry_with_hydrogens( smu_utils_lib.create_bond_topology('N', '', '3')), '(nh3)') self.assertEqual( smu_utils_lib.get_canonical_stoichiometry_with_hydrogens( smu_utils_lib.create_bond_topology('O', '', '2')), '(oh2)') self.assertEqual( smu_utils_lib.get_canonical_stoichiometry_with_hydrogens( smu_utils_lib.create_bond_topology('F', '', '1')), '(fh)')
def test_one_heavy(self): got = smu_utils_lib.create_bond_topology('C', '', '4') expected_str = ''' atoms: ATOM_C atoms: ATOM_H atoms: ATOM_H atoms: ATOM_H atoms: ATOM_H bonds { atom_b: 1 bond_type: BOND_SINGLE } bonds { atom_b: 2 bond_type: BOND_SINGLE } bonds { atom_b: 3 bond_type: BOND_SINGLE } bonds { atom_b: 4 bond_type: BOND_SINGLE } ''' expected = str_to_bond_topology(expected_str) self.assertEqual(str(expected), str(got))
def main(argv): if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') with gfile.GFile(FLAGS.output_csv, 'w') as outfile: writer = csv.writer(outfile, dialect='unix', quoting=csv.QUOTE_MINIMAL) writer.writerow([ 'id', 'num_atoms', 'atoms_str', 'connectivity_matrix', 'hydrogens', 'smiles' ]) infiles = sorted(gfile.Glob(FLAGS.input_glob)) bt_id = 1 for infn in infiles: logging.info('Opening %s at id %d', infn, bt_id) with gfile.GFile(infn) as infile: for line in infile: num_atoms, atoms, connectivity, hydrogens = ( smu_utils_lib.parse_bond_topology_line(line)) # The atoms strings looks like 'C N N+O O-' where every atom has a # space, +, or - after it. create_bond_topology doesn't want the # charge markings (just a string like 'CNNOO') so the [::2] skips # those. bond_topology = smu_utils_lib.create_bond_topology( atoms[::2], connectivity, hydrogens) smiles = smu_utils_lib.compute_smiles_for_bond_topology( bond_topology, include_hs=False) writer.writerow( [bt_id, num_atoms, atoms, connectivity, hydrogens, smiles]) bt_id += 1 # Add the special cases for SMU 1 for _, bt_id, atom, valence in smu_utils_lib.SPECIAL_ID_CASES: # Note that the SMILES is just the atom. Convenient writer.writerow([bt_id, 1, atom, '', valence, atom])
def add_bond_topology_to_conformer(self, conformer, btid): # We'll use a simple rule for making smiles. The SMILES is just btid # number of Cs def make_connectivity_matrix(num_c): if num_c == 2: return '1' return '1' + ('0' * (num_c - 2)) + make_connectivity_matrix(num_c - 1) if btid == 1: bt = smu_utils_lib.create_bond_topology('C', '', '4') else: bt = smu_utils_lib.create_bond_topology( 'C' * btid, make_connectivity_matrix(btid), '3' + ('2' * (btid - 2)) + '3') bt.bond_topology_id = btid bt.smiles = 'C' * btid conformer.bond_topologies.append(bt)
def test_charged(self): # This is actually C N N+O- got = smu_utils_lib.create_bond_topology('CNNO', '200101', '2020') expected_str = ''' atoms: ATOM_C atoms: ATOM_N atoms: ATOM_NPOS atoms: ATOM_ONEG atoms: ATOM_H atoms: ATOM_H atoms: ATOM_H atoms: ATOM_H bonds { atom_b: 1 bond_type: BOND_DOUBLE } bonds { atom_a: 1 atom_b: 2 bond_type: BOND_SINGLE } bonds { atom_a: 2 atom_b: 3 bond_type: BOND_SINGLE } bonds { atom_b: 4 bond_type: BOND_SINGLE } bonds { atom_b: 5 bond_type: BOND_SINGLE } bonds { atom_a: 2 atom_b: 6 bond_type: BOND_SINGLE } bonds { atom_a: 2 atom_b: 7 bond_type: BOND_SINGLE } ''' expected = str_to_bond_topology(expected_str) self.assertEqual(str(expected), str(got))
def parse_bond_topology(self): """Parse region with adjancy matrix, hydrogen count, smiles, and atom types.""" adjacency_code = str(self.parse(ParseModes.RAW, num_lines=1)[0]).strip() hydrogen_counts = [ int(count) for count in str(self.parse(ParseModes.RAW, num_lines=1)[0]).strip() ] smiles = self.parse(ParseModes.RAW, num_lines=1)[0] entry_id = str(self.parse(ParseModes.RAW, num_lines=1)[0]).strip() assert entry_id.startswith('x'), 'Expected line like x02_c2h2' atom_types = entry_id[4:].lower() expanded_atom_types = self.expand_atom_types(atom_types) self._conformer.bond_topologies.add() self._conformer.bond_topologies[-1].CopyFrom( smu_utils_lib.create_bond_topology(expanded_atom_types, adjacency_code, hydrogen_counts)) self._conformer.bond_topologies[-1].smiles = str(smiles).replace( '\'', '').strip()
def test_no_charged(self): got = smu_utils_lib.create_bond_topology('CNFF', '111000', '1200') expected_str = ''' atoms: ATOM_C atoms: ATOM_N atoms: ATOM_F atoms: ATOM_F atoms: ATOM_H atoms: ATOM_H atoms: ATOM_H bonds { atom_b: 1 bond_type: BOND_SINGLE } bonds { atom_b: 2 bond_type: BOND_SINGLE } bonds { atom_b: 3 bond_type: BOND_SINGLE } bonds { atom_b: 4 bond_type: BOND_SINGLE } bonds { atom_a: 1 atom_b: 5 bond_type: BOND_SINGLE } bonds { atom_a: 1 atom_b: 6 bond_type: BOND_SINGLE } ''' expected = str_to_bond_topology(expected_str) self.assertEqual(str(expected), str(got))
def test_ethylene(self): bt = smu_utils_lib.create_bond_topology('CC', '2', '22') self.assertEqual( smu_utils_lib.get_canonical_stoichiometry_with_hydrogens(bt), '(ch2)2')
def test_cyclobutane(self): bt = smu_utils_lib.create_bond_topology('CCCC', '110011', '2222') self.assertEqual( smu_utils_lib.get_canonical_stoichiometry_with_hydrogens(bt), '(ch2)4')
def test_nplus_oneg(self): bt = smu_utils_lib.create_bond_topology('NO', '1', '30') self.assertEqual( smu_utils_lib.get_canonical_stoichiometry_with_hydrogens(bt), '(nh3)(o)')
def test_fluorine(self): bt = smu_utils_lib.create_bond_topology('OFF', '110', '000') self.assertEqual( smu_utils_lib.get_canonical_stoichiometry_with_hydrogens(bt), '(o)(f)2')
def test_acrylic_acid(self): bt = smu_utils_lib.create_bond_topology('CCCOO', '2000100210', '21001') self.assertEqual( smu_utils_lib.get_canonical_stoichiometry_with_hydrogens(bt), '(c)(ch)(ch2)(o)(oh)')