def parse_equivalent_file(filename): """Parses the .dat of equivalent structure. The file is just pairs of entries where the first was kept over the second. Yields one entry per line keyed by the discarded conformer id. See merge_duplicate_information for how information is transferred to the kept conformer. Args: filename: string Yields: dataset_pb2.Conformer """ with gfile.GFile(filename) as f: for line in f: kept_str, discard_str = line.split() _, _, kept_btid, kept_cid = smu_parser_lib.parse_long_identifier( kept_str) _, _, discard_btid, discard_cid = smu_parser_lib.parse_long_identifier( discard_str) # Convert to our conformer ids which include the btid kept_cid = kept_btid * 1000 + kept_cid discard_cid = discard_btid * 1000 + discard_cid yield dataset_pb2.Conformer(conformer_id=discard_cid, duplicated_by=kept_cid)
def test_success_smu7(self): num_heavy_atoms, stoich, btid, cid = smu_parser_lib.parse_long_identifier( 'x07_c4o2fh7.618451.001') self.assertEqual(7, num_heavy_atoms) self.assertEqual('c4o2fh7', stoich) self.assertEqual(618451, btid) self.assertEqual(1, cid)
def test_success_smu2(self): num_heavy_atoms, stoich, btid, cid = smu_parser_lib.parse_long_identifier( 'x02_c2h2.123.456') self.assertEqual(2, num_heavy_atoms) self.assertEqual('c2h2', stoich) self.assertEqual(123, btid) self.assertEqual(456, cid)
def test_failure(self): with self.assertRaises(ValueError): smu_parser_lib.parse_long_identifier( 'Im a little teapot, short and stout')