def get_stage1_header(self, conformer, identifier): """Returns formatted header (separator and first line). This is for the stage1 format, which just contains the results of geometry optimization Args: conformer: dataset_pb2.Conformer. identifier: string like x07_c3n3oh7 Returns: A multiline string representation of the header. """ num_atoms = len(conformer.bond_topologies[0].atoms) result = smu_parser_lib.SEPARATOR_LINE + '\n' if self.annotate: result += ( '# From original_conformer_index, topology, bond_topology_id, ' 'error_{nstat1, nstatc, nstatt, frequences} conformer_id\n') errors = conformer.properties.errors bond_topology_id = conformer.bond_topologies[-1].bond_topology_id if smu_utils_lib.special_case_dat_id_from_bt_id(bond_topology_id): bond_topology_id = 0 result += '{:5s}{:5d}{:5d}{:5d}{:5d}{:5d} {:s}.{:06d}.{:03d}\n'.format( self._conformer_index_string(conformer), errors.error_nstat1, errors.error_nstatc, errors.error_nstatt, errors.error_frequencies, num_atoms, identifier, bond_topology_id, conformer.conformer_id % 1000) return result
def get_stage2_header(self, conformer, identifier): """Returns formatted header (separator and first line). This is for the stage2 format which is at the end of the pipeline. Args: conformer: dataset_pb2.Conformer. identifier: string like x07_c3n3oh7 Returns: A multiline string representation of the header. """ num_atoms = len(conformer.bond_topologies[0].atoms) result = smu_parser_lib.SEPARATOR_LINE + '\n' if self.annotate: result += ('# From original_conformer_index, topology, ' 'bond_topology_id, conformer_id\n') bond_topology_id = conformer.bond_topologies[-1].bond_topology_id if smu_utils_lib.special_case_dat_id_from_bt_id(bond_topology_id): bond_topology_id = 0 result += '%s%s %s\n' % ( self._conformer_index_string(conformer), str(num_atoms).rjust(5), '%s.%s.%s' % (identifier, str(bond_topology_id).rjust( 6, '0'), str(conformer.conformer_id % 1000).rjust(3, '0'))) return result
def get_ids(self, molecule, stage, bt_idx): """Returns lines with identifiers. This include the smiles string, the file, and the ID line. We meed to know the stage because the SMU1 special cases are handled differently in the two stages. Args: molecule: dataset_pb2.Molecule stage: 'stage1' or 'stage2' bt_idx: bond topology index Returns: A multiline string representation of id lines. """ result = '' if self.annotate: result += '# From smiles or properties.smiles_openbabel\n' if molecule.properties.HasField('smiles_openbabel'): result += molecule.properties.smiles_openbabel + '\n' else: result += molecule.bond_topologies[bt_idx].smiles + '\n' if self.annotate: result += '# From topology\n' result += smu_utils_lib.get_composition( molecule.bond_topologies[bt_idx]) + '\n' if self.annotate: result += '# From bond_topology_id, molecule_id\n' bond_topology_id = molecule.bond_topologies[bt_idx].bond_topology_id # Special case SMU1. Fun. if smu_utils_lib.special_case_dat_id_from_bt_id(bond_topology_id): if stage == 'stage1': bond_topology_id = 0 elif stage == 'stage2': bond_topology_id = smu_utils_lib.special_case_dat_id_from_bt_id( bond_topology_id) else: raise ValueError(f'Unknown stage {stage}') result += 'ID{:8d}{:8d}\n'.format(bond_topology_id, molecule.molecule_id % 1000) return result
def get_ids(self, conformer, identifier, stage): """Returns lines with identifiers. This include the smiles string, the file, and the ID line. We meed to know the stage because the SMU1 special cases are handled differently in the two stages. Args: conformer: dataset_pb2.Conformer identifier: string for the file/stoichiometry stage: 'stage1' or 'stage2' Returns: A multiline string representation of id lines. """ result = '' if self.annotate: result += '# From smiles\n' result += conformer.bond_topologies[0].smiles + '\n' if self.annotate: result += '# From topology\n' result += identifier + '\n' if self.annotate: result += '# From bond_topology_id, conformer_id\n' bond_topology_id = conformer.bond_topologies[-1].bond_topology_id # Special case SMU1. Fun. if smu_utils_lib.special_case_dat_id_from_bt_id(bond_topology_id): if stage == 'stage1': bond_topology_id = 0 elif stage == 'stage2': bond_topology_id = smu_utils_lib.special_case_dat_id_from_bt_id( bond_topology_id) else: raise ValueError(f'Unknown stage {stage}') result += 'ID{:8d}{:8d}\n'.format(bond_topology_id, conformer.conformer_id % 1000) return result
def test_from_bt_id(self): self.assertIsNone(smu_utils_lib.special_case_dat_id_from_bt_id(123456)) self.assertEqual( smu_utils_lib.special_case_dat_id_from_bt_id(899651), 999997)