def conformer_to_bond_topology_summaries(conformer):
    """Produces BondTopologySummary protos from Conformer.

  Since a conformer can be associated with many bond topologies, this can output
  potentially many summaries.

  Args:
    conformer: dataset_pb2.Conformer

  Yields:
    dataset_pb2.BondTopologySummary
  """
    summary = dataset_pb2.BondTopologySummary()
    if (conformer.conformer_id // 1000 !=
            conformer.bond_topologies[0].bond_topology_id):
        raise ValueError('conformers_to_bond_topology_summaries assumes the '
                         'first bond topology is the one that generated this.')
    summary.bond_topology.CopyFrom(conformer.bond_topologies[0])
    summary.count_attempted_conformers = 1

    fate = conformer.fate

    if fate == dataset_pb2.Conformer.FATE_UNDEFINED:
        raise ValueError(
            f'Conformer {conformer.conformer_id} has undefined fate')
    elif fate == dataset_pb2.Conformer.FATE_DUPLICATE_SAME_TOPOLOGY:
        summary.count_duplicates_same_topology = 1
    elif fate == dataset_pb2.Conformer.FATE_DUPLICATE_DIFFERENT_TOPOLOGY:
        summary.count_duplicates_different_topology = 1
    elif (fate == dataset_pb2.Conformer.FATE_GEOMETRY_OPTIMIZATION_PROBLEM
          or fate == dataset_pb2.Conformer.FATE_DISASSOCIATED
          or fate == dataset_pb2.Conformer.FATE_FORCE_CONSTANT_FAILURE
          or fate == dataset_pb2.Conformer.FATE_DISCARDED_OTHER):
        summary.count_failed_geometry_optimization = 1
    elif fate == dataset_pb2.Conformer.FATE_NO_CALCULATION_RESULTS:
        summary.count_kept_geometry = 1
        summary.count_missing_calculation = 1
    elif fate == dataset_pb2.Conformer.FATE_CALCULATION_WITH_ERROR:
        summary.count_kept_geometry = 1
        summary.count_calculation_with_error = 1
        for bt in conformer.bond_topologies[1:]:
            other_summary = dataset_pb2.BondTopologySummary()
            other_summary.bond_topology.CopyFrom(bt)
            other_summary.count_detected_match_with_error = 1
            yield other_summary
    elif fate == dataset_pb2.Conformer.FATE_SUCCESS:
        summary.count_kept_geometry = 1
        summary.count_calculation_success = 1
        for bt in conformer.bond_topologies[1:]:
            other_summary = dataset_pb2.BondTopologySummary()
            other_summary.bond_topology.CopyFrom(bt)
            other_summary.count_detected_match_success = 1
            yield other_summary
    else:
        raise ValueError(f'Did not understand {fate}')

    yield summary
Beispiel #2
0
def bond_topology_summaries_from_csv(filename):
  """Beam DoFn for generating bare BondTopologySummary.

  Args:
    filename: csv file of bond topologies to read

  Yields:
    dataset_pb2.Entry
  """
  for bt in smu_utils_lib.generate_bond_topologies_from_csv(filename):
    summary = dataset_pb2.BondTopologySummary()
    summary.bond_topology.CopyFrom(bt)
    # Note that we leave all the counts as 0.
    yield bt.bond_topology_id, summary