예제 #1
0
 def test_duplicate_different_topology(self):
     conformer = get_stage1_conformer()
     # bond topology is conformer_id // 1000
     conformer.duplicated_by = conformer.conformer_id + 1000
     self.assertEqual(
         dataset_pb2.Conformer.FATE_DUPLICATE_DIFFERENT_TOPOLOGY,
         smu_utils_lib.determine_fate(conformer))
예제 #2
0
 def test_duplicate_same_topology(self):
   conformer = get_stage1_conformer()
   # bond topology is conformer_id // 1000
   conformer.duplicated_by = conformer.conformer_id + 1
   smu_utils_lib.clean_up_error_codes(conformer)
   self.assertEqual(dataset_pb2.Conformer.FATE_DUPLICATE_SAME_TOPOLOGY,
                    smu_utils_lib.determine_fate(conformer))
예제 #3
0
    def process(self, conformer):
        conformer = copy.deepcopy(conformer)

        conformer.fate = smu_utils_lib.determine_fate(conformer)

        yield from self._compare_smiles(conformer)

        yield conformer
예제 #4
0
  def process(self, conformer, bond_length_records, smiles_id_dict):
    """Per conformer updates.

    Args:
      conformer: dataset_pb2.Conformer
      bond_length_records: tuples to go to
        bond_length_distribution.AllAtomPairLengthDistributions
      smiles_id_dict: dict from SMILES to bond topology id

    Yields:
      Conformer.
    """
    # There is probably a better way to do this.
    # We get the side input with each call to process. We'll assume that it's
    # always the same input, so we set our cache value and never update it.
    # We only do this with bond_length_records because there is a reasonable
    # amount of processing in creating AllAtomPairLengthDistributions.
    # The smiles_id_dict is used directly.
    if not self._cached_bond_lengths:
      self._cached_bond_lengths = (
          bond_length_distribution.AllAtomPairLengthDistributions())
      try:
        self._cached_bond_lengths.add_from_sparse_dataframe(
            bond_length_distribution.sparse_dataframe_from_records(
                bond_length_records), _BOND_LENGTHS_UNBONDED_RIGHT_TAIL_MASS,
            _BOND_LENGTHS_SIG_DIGITS)
      except ValueError as err:
        raise ValueError(
            'Invalid sparse dataframe for conformer {0} org. ValueError: {1}'
            .format(str(conformer.conformer_id), err))

    conformer = copy.deepcopy(conformer)

    conformer.fate = smu_utils_lib.determine_fate(conformer)

    yield from self._compare_smiles(conformer)

    if (conformer.duplicated_by == 0 and
        conformer.properties.errors.status < 512):
      # The duplicate records do not need topology extraction and anything
      # with this high an error is pretty messed so, do we won't bother trying
      # to match the topolgy.
      self._add_alternative_bond_topologies(conformer, smiles_id_dict)
    else:
      beam.metrics.Metrics.counter(_METRICS_NAMESPACE,
                                   'skipped_topology_matches').inc()

    yield conformer
예제 #5
0
  def process(self, molecule, bond_length_records, smiles_id_dict):
    """Per molecule updates.

    Args:
      molecule: dataset_pb2.Molecule
      bond_length_records: tuples to go to
        bond_length_distribution.AllAtomPairLengthDistributions
      smiles_id_dict: dict from SMILES to bond topology id

    Yields:
      Molecule.
    """
    # There is probably a better way to do this.
    # We get the side input with each call to process. We'll assume that it's
    # always the same input, so we set our cache value and never update it.
    # We only do this with bond_length_records because there is a reasonable
    # amount of processing in creating AllAtomPairLengthDistributions.
    # The smiles_id_dict is used directly.
    if not self._cached_bond_lengths:
      self._cached_bond_lengths = (
          bond_length_distribution.AllAtomPairLengthDistributions())
      try:
        self._cached_bond_lengths.add_from_sparse_dataframe(
            bond_length_distribution.sparse_dataframe_from_records(
                bond_length_records),
            bond_length_distribution.STANDARD_UNBONDED_RIGHT_TAIL_MASS,
            bond_length_distribution.STANDARD_SIG_DIGITS)
      except ValueError as err:
        raise ValueError(
            'Invalid sparse dataframe for molecule {0} org. ValueError: {1}'
            .format(str(molecule.molecule_id), err)) from err

    molecule = copy.deepcopy(molecule)

    molecule.properties.errors.fate = smu_utils_lib.determine_fate(molecule)

    yield from self._compare_smiles(molecule)

    if smu_utils_lib.molecule_eligible_for_topology_detection(molecule):
      self._add_alternative_bond_topologies(molecule, smiles_id_dict)
    else:
      molecule.bond_topologies[
          0].source = dataset_pb2.BondTopology.SOURCE_STARTING
      beam.metrics.Metrics.counter(_METRICS_NAMESPACE,
                                   'skipped_topology_matches').inc()

    yield molecule
예제 #6
0
 def test_success(self):
   conformer = get_stage2_conformer()
   self.assertEqual(dataset_pb2.Conformer.FATE_SUCCESS,
                    smu_utils_lib.determine_fate(conformer))
예제 #7
0
 def test_calculation_errors(self):
   conformer = get_stage2_conformer()
   # This is a random choice of an error to set. I just need some error.
   conformer.properties.errors.error_atomic_analysis = 999
   self.assertEqual(dataset_pb2.Conformer.FATE_CALCULATION_WITH_ERROR,
                    smu_utils_lib.determine_fate(conformer))
예제 #8
0
 def test_no_result(self):
   conformer = get_stage1_conformer()
   self.assertEqual(dataset_pb2.Conformer.FATE_NO_CALCULATION_RESULTS,
                    smu_utils_lib.determine_fate(conformer))
예제 #9
0
 def test_geometry_failures(self, nstat1, expected_fate):
   conformer = get_stage1_conformer()
   conformer.properties.errors.error_nstat1 = nstat1
   self.assertEqual(expected_fate, smu_utils_lib.determine_fate(conformer))
예제 #10
0
 def test_calculation_warnings_vibrational(self):
   conformer = get_stage2_conformer()
   conformer.properties.errors.warn_vib_linearity = 1234
   self.assertEqual(
       dataset_pb2.Conformer.FATE_CALCULATION_WITH_WARNING_VIBRATIONAL,
       smu_utils_lib.determine_fate(conformer))
예제 #11
0
 def test_calculation_warnings_serious(self):
   conformer = get_stage2_conformer()
   conformer.properties.errors.warn_t1_excess = 1234
   self.assertEqual(
       dataset_pb2.Conformer.FATE_CALCULATION_WITH_WARNING_SERIOUS,
       smu_utils_lib.determine_fate(conformer))
예제 #12
0
 def test_calculation_errors(self, status, expected):
   conformer = get_stage2_conformer()
   conformer.properties.errors.status = status
   self.assertEqual(expected, smu_utils_lib.determine_fate(conformer))
예제 #13
0
 def test_discarded_other(self, status):
   conformer = get_stage1_conformer()
   conformer.properties.errors.status = status
   smu_utils_lib.clean_up_error_codes(conformer)
   self.assertEqual(dataset_pb2.Conformer.FATE_DISCARDED_OTHER,
                    smu_utils_lib.determine_fate(conformer))