def get_stage1_header(self, conformer, identifier):
        """Returns formatted header (separator and first line).

    This is for the stage1 format, which just contains the results of geometry
    optimization

    Args:
      conformer: dataset_pb2.Conformer.
      identifier: string like x07_c3n3oh7

    Returns:
      A multiline string representation of the header.
    """
        num_atoms = len(conformer.bond_topologies[0].atoms)
        result = smu_parser_lib.SEPARATOR_LINE + '\n'
        if self.annotate:
            result += (
                '# From original_conformer_index, topology, bond_topology_id, '
                'error_{nstat1, nstatc, nstatt, frequences} conformer_id\n')
        errors = conformer.properties.errors
        bond_topology_id = conformer.bond_topologies[-1].bond_topology_id
        if smu_utils_lib.special_case_dat_id_from_bt_id(bond_topology_id):
            bond_topology_id = 0
        result += '{:5s}{:5d}{:5d}{:5d}{:5d}{:5d}     {:s}.{:06d}.{:03d}\n'.format(
            self._conformer_index_string(conformer), errors.error_nstat1,
            errors.error_nstatc, errors.error_nstatt, errors.error_frequencies,
            num_atoms, identifier, bond_topology_id,
            conformer.conformer_id % 1000)
        return result
    def get_stage2_header(self, conformer, identifier):
        """Returns formatted header (separator and first line).

    This is for the stage2 format which is at the end of the pipeline.

    Args:
      conformer: dataset_pb2.Conformer.
      identifier: string like x07_c3n3oh7

    Returns:
      A multiline string representation of the header.
    """
        num_atoms = len(conformer.bond_topologies[0].atoms)
        result = smu_parser_lib.SEPARATOR_LINE + '\n'
        if self.annotate:
            result += ('# From original_conformer_index, topology, '
                       'bond_topology_id, conformer_id\n')
        bond_topology_id = conformer.bond_topologies[-1].bond_topology_id
        if smu_utils_lib.special_case_dat_id_from_bt_id(bond_topology_id):
            bond_topology_id = 0
        result += '%s%s     %s\n' % (
            self._conformer_index_string(conformer), str(num_atoms).rjust(5),
            '%s.%s.%s' % (identifier, str(bond_topology_id).rjust(
                6, '0'), str(conformer.conformer_id % 1000).rjust(3, '0')))
        return result
    def get_ids(self, molecule, stage, bt_idx):
        """Returns lines with identifiers.

    This include the smiles string, the file, and the ID line.
    We meed to know the stage because the SMU1 special cases are handled
    differently in the two stages.

    Args:
      molecule: dataset_pb2.Molecule
      stage: 'stage1' or 'stage2'
      bt_idx: bond topology index

    Returns:
      A multiline string representation of id lines.
    """
        result = ''
        if self.annotate:
            result += '# From smiles or properties.smiles_openbabel\n'
        if molecule.properties.HasField('smiles_openbabel'):
            result += molecule.properties.smiles_openbabel + '\n'
        else:
            result += molecule.bond_topologies[bt_idx].smiles + '\n'
        if self.annotate:
            result += '# From topology\n'
        result += smu_utils_lib.get_composition(
            molecule.bond_topologies[bt_idx]) + '\n'
        if self.annotate:
            result += '# From bond_topology_id, molecule_id\n'
        bond_topology_id = molecule.bond_topologies[bt_idx].bond_topology_id
        # Special case SMU1. Fun.
        if smu_utils_lib.special_case_dat_id_from_bt_id(bond_topology_id):
            if stage == 'stage1':
                bond_topology_id = 0
            elif stage == 'stage2':
                bond_topology_id = smu_utils_lib.special_case_dat_id_from_bt_id(
                    bond_topology_id)
            else:
                raise ValueError(f'Unknown stage {stage}')
        result += 'ID{:8d}{:8d}\n'.format(bond_topology_id,
                                          molecule.molecule_id % 1000)
        return result
    def get_ids(self, conformer, identifier, stage):
        """Returns lines with identifiers.

    This include the smiles string, the file, and the ID line.
    We meed to know the stage because the SMU1 special cases are handled
    differently in the two stages.

    Args:
      conformer: dataset_pb2.Conformer
      identifier: string for the file/stoichiometry
      stage: 'stage1' or 'stage2'

    Returns:
      A multiline string representation of id lines.
    """
        result = ''
        if self.annotate:
            result += '# From smiles\n'
        result += conformer.bond_topologies[0].smiles + '\n'
        if self.annotate:
            result += '# From topology\n'
        result += identifier + '\n'
        if self.annotate:
            result += '# From bond_topology_id, conformer_id\n'
        bond_topology_id = conformer.bond_topologies[-1].bond_topology_id
        # Special case SMU1. Fun.
        if smu_utils_lib.special_case_dat_id_from_bt_id(bond_topology_id):
            if stage == 'stage1':
                bond_topology_id = 0
            elif stage == 'stage2':
                bond_topology_id = smu_utils_lib.special_case_dat_id_from_bt_id(
                    bond_topology_id)
            else:
                raise ValueError(f'Unknown stage {stage}')
        result += 'ID{:8d}{:8d}\n'.format(bond_topology_id,
                                          conformer.conformer_id % 1000)
        return result
Exemple #5
0
 def test_from_bt_id(self):
   self.assertIsNone(smu_utils_lib.special_case_dat_id_from_bt_id(123456))
   self.assertEqual(
       smu_utils_lib.special_case_dat_id_from_bt_id(899651), 999997)