Esempio n. 1
0
def parse_equivalent_file(filename):
    """Parses the .dat of equivalent structure.

  The file is just pairs of entries where the first was kept over the second.
  Yields one entry per line keyed by the discarded conformer id.
  See merge_duplicate_information for how information is transferred to the kept
  conformer.

  Args:
    filename: string

  Yields:
    dataset_pb2.Conformer
  """
    with gfile.GFile(filename) as f:
        for line in f:
            kept_str, discard_str = line.split()
            _, _, kept_btid, kept_cid = smu_parser_lib.parse_long_identifier(
                kept_str)
            _, _, discard_btid, discard_cid = smu_parser_lib.parse_long_identifier(
                discard_str)
            # Convert to our conformer ids which include the btid
            kept_cid = kept_btid * 1000 + kept_cid
            discard_cid = discard_btid * 1000 + discard_cid

            yield dataset_pb2.Conformer(conformer_id=discard_cid,
                                        duplicated_by=kept_cid)
 def test_success_smu7(self):
     num_heavy_atoms, stoich, btid, cid = smu_parser_lib.parse_long_identifier(
         'x07_c4o2fh7.618451.001')
     self.assertEqual(7, num_heavy_atoms)
     self.assertEqual('c4o2fh7', stoich)
     self.assertEqual(618451, btid)
     self.assertEqual(1, cid)
 def test_success_smu2(self):
     num_heavy_atoms, stoich, btid, cid = smu_parser_lib.parse_long_identifier(
         'x02_c2h2.123.456')
     self.assertEqual(2, num_heavy_atoms)
     self.assertEqual('c2h2', stoich)
     self.assertEqual(123, btid)
     self.assertEqual(456, cid)
 def test_failure(self):
     with self.assertRaises(ValueError):
         smu_parser_lib.parse_long_identifier(
             'Im a little teapot, short and stout')