Beispiel #1
0
 def test_fully_saturated(self):
   self.assertEqual(
       smu_utils_lib.get_canonical_stoichiometry_with_hydrogens(
           smu_utils_lib.create_bond_topology('C', '', '4')), '(ch4)')
   self.assertEqual(
       smu_utils_lib.get_canonical_stoichiometry_with_hydrogens(
           smu_utils_lib.create_bond_topology('N', '', '3')), '(nh3)')
   self.assertEqual(
       smu_utils_lib.get_canonical_stoichiometry_with_hydrogens(
           smu_utils_lib.create_bond_topology('O', '', '2')), '(oh2)')
   self.assertEqual(
       smu_utils_lib.get_canonical_stoichiometry_with_hydrogens(
           smu_utils_lib.create_bond_topology('F', '', '1')), '(fh)')
Beispiel #2
0
  def test_one_heavy(self):
    got = smu_utils_lib.create_bond_topology('C', '', '4')
    expected_str = '''
atoms: ATOM_C
atoms: ATOM_H
atoms: ATOM_H
atoms: ATOM_H
atoms: ATOM_H
bonds {
  atom_b: 1
  bond_type: BOND_SINGLE
}
bonds {
  atom_b: 2
  bond_type: BOND_SINGLE
}
bonds {
  atom_b: 3
  bond_type: BOND_SINGLE
}
bonds {
  atom_b: 4
  bond_type: BOND_SINGLE
}
'''
    expected = str_to_bond_topology(expected_str)
    self.assertEqual(str(expected), str(got))
Beispiel #3
0
def main(argv):
  if len(argv) > 1:
    raise app.UsageError('Too many command-line arguments.')

  with gfile.GFile(FLAGS.output_csv, 'w') as outfile:
    writer = csv.writer(outfile, dialect='unix', quoting=csv.QUOTE_MINIMAL)
    writer.writerow([
        'id', 'num_atoms', 'atoms_str', 'connectivity_matrix', 'hydrogens',
        'smiles'
    ])
    infiles = sorted(gfile.Glob(FLAGS.input_glob))
    bt_id = 1
    for infn in infiles:
      logging.info('Opening %s at id %d', infn, bt_id)
      with gfile.GFile(infn) as infile:
        for line in infile:
          num_atoms, atoms, connectivity, hydrogens = (
              smu_utils_lib.parse_bond_topology_line(line))
          # The atoms strings looks like 'C N N+O O-' where every atom has a
          # space, +, or - after it. create_bond_topology doesn't want the
          # charge markings (just a string like 'CNNOO') so the [::2] skips
          # those.
          bond_topology = smu_utils_lib.create_bond_topology(
              atoms[::2], connectivity, hydrogens)
          smiles = smu_utils_lib.compute_smiles_for_bond_topology(
              bond_topology, include_hs=False)
          writer.writerow(
              [bt_id, num_atoms, atoms, connectivity, hydrogens, smiles])
          bt_id += 1

    # Add the special cases for SMU 1
    for _, bt_id, atom, valence in smu_utils_lib.SPECIAL_ID_CASES:
      # Note that the SMILES is just the atom. Convenient
      writer.writerow([bt_id, 1, atom, '', valence, atom])
Beispiel #4
0
    def add_bond_topology_to_conformer(self, conformer, btid):
        # We'll use a simple rule for making smiles. The SMILES is just btid
        # number of Cs
        def make_connectivity_matrix(num_c):
            if num_c == 2:
                return '1'
            return '1' + ('0' *
                          (num_c - 2)) + make_connectivity_matrix(num_c - 1)

        if btid == 1:
            bt = smu_utils_lib.create_bond_topology('C', '', '4')
        else:
            bt = smu_utils_lib.create_bond_topology(
                'C' * btid, make_connectivity_matrix(btid),
                '3' + ('2' * (btid - 2)) + '3')
        bt.bond_topology_id = btid
        bt.smiles = 'C' * btid
        conformer.bond_topologies.append(bt)
Beispiel #5
0
  def test_charged(self):
    # This is actually C N N+O-
    got = smu_utils_lib.create_bond_topology('CNNO', '200101', '2020')
    expected_str = '''
atoms: ATOM_C
atoms: ATOM_N
atoms: ATOM_NPOS
atoms: ATOM_ONEG
atoms: ATOM_H
atoms: ATOM_H
atoms: ATOM_H
atoms: ATOM_H
bonds {
  atom_b: 1
  bond_type: BOND_DOUBLE
}
bonds {
  atom_a: 1
  atom_b: 2
  bond_type: BOND_SINGLE
}
bonds {
  atom_a: 2
  atom_b: 3
  bond_type: BOND_SINGLE
}
bonds {
  atom_b: 4
  bond_type: BOND_SINGLE
}
bonds {
  atom_b: 5
  bond_type: BOND_SINGLE
}
bonds {
  atom_a: 2
  atom_b: 6
  bond_type: BOND_SINGLE
}
bonds {
  atom_a: 2
  atom_b: 7
  bond_type: BOND_SINGLE
}
'''
    expected = str_to_bond_topology(expected_str)
    self.assertEqual(str(expected), str(got))
  def parse_bond_topology(self):
    """Parse region with adjancy matrix, hydrogen count, smiles, and atom types."""
    adjacency_code = str(self.parse(ParseModes.RAW, num_lines=1)[0]).strip()
    hydrogen_counts = [
        int(count)
        for count in str(self.parse(ParseModes.RAW, num_lines=1)[0]).strip()
    ]

    smiles = self.parse(ParseModes.RAW, num_lines=1)[0]

    entry_id = str(self.parse(ParseModes.RAW, num_lines=1)[0]).strip()
    assert entry_id.startswith('x'), 'Expected line like x02_c2h2'
    atom_types = entry_id[4:].lower()
    expanded_atom_types = self.expand_atom_types(atom_types)
    self._conformer.bond_topologies.add()
    self._conformer.bond_topologies[-1].CopyFrom(
        smu_utils_lib.create_bond_topology(expanded_atom_types, adjacency_code,
                                           hydrogen_counts))
    self._conformer.bond_topologies[-1].smiles = str(smiles).replace(
        '\'', '').strip()
Beispiel #7
0
  def test_no_charged(self):
    got = smu_utils_lib.create_bond_topology('CNFF', '111000', '1200')
    expected_str = '''
atoms: ATOM_C
atoms: ATOM_N
atoms: ATOM_F
atoms: ATOM_F
atoms: ATOM_H
atoms: ATOM_H
atoms: ATOM_H
bonds {
  atom_b: 1
  bond_type: BOND_SINGLE
}
bonds {
  atom_b: 2
  bond_type: BOND_SINGLE
}
bonds {
  atom_b: 3
  bond_type: BOND_SINGLE
}
bonds {
  atom_b: 4
  bond_type: BOND_SINGLE
}
bonds {
  atom_a: 1
  atom_b: 5
  bond_type: BOND_SINGLE
}
bonds {
  atom_a: 1
  atom_b: 6
  bond_type: BOND_SINGLE
}
'''
    expected = str_to_bond_topology(expected_str)
    self.assertEqual(str(expected), str(got))
Beispiel #8
0
 def test_ethylene(self):
   bt = smu_utils_lib.create_bond_topology('CC', '2', '22')
   self.assertEqual(
       smu_utils_lib.get_canonical_stoichiometry_with_hydrogens(bt), '(ch2)2')
Beispiel #9
0
 def test_cyclobutane(self):
   bt = smu_utils_lib.create_bond_topology('CCCC', '110011', '2222')
   self.assertEqual(
       smu_utils_lib.get_canonical_stoichiometry_with_hydrogens(bt), '(ch2)4')
Beispiel #10
0
 def test_nplus_oneg(self):
   bt = smu_utils_lib.create_bond_topology('NO', '1', '30')
   self.assertEqual(
       smu_utils_lib.get_canonical_stoichiometry_with_hydrogens(bt),
       '(nh3)(o)')
Beispiel #11
0
 def test_fluorine(self):
   bt = smu_utils_lib.create_bond_topology('OFF', '110', '000')
   self.assertEqual(
       smu_utils_lib.get_canonical_stoichiometry_with_hydrogens(bt), '(o)(f)2')
Beispiel #12
0
 def test_acrylic_acid(self):
   bt = smu_utils_lib.create_bond_topology('CCCOO', '2000100210', '21001')
   self.assertEqual(
       smu_utils_lib.get_canonical_stoichiometry_with_hydrogens(bt),
       '(c)(ch)(ch2)(o)(oh)')