コード例 #1
0
def bond_topology_summaries_from_csv(filename):
  """Beam DoFn for generating bare BondTopologySummary.

  Args:
    filename: csv file of bond topologies to read

  Yields:
    dataset_pb2.Entry
  """
  for bt in smu_utils_lib.generate_bond_topologies_from_csv(filename):
    summary = dataset_pb2.BondTopologySummary()
    summary.bond_topology.CopyFrom(bt)
    # Note that we leave all the counts as 0.
    yield bt.bond_topology_id, summary
コード例 #2
0
def pipeline(root):
  """Beam pipeline.

  Args:
    root: the root of the pipeline.
  """
  _ = (
      root
      | 'CreateTopologies' >> beam.Create(
          smu_utils_lib.generate_bond_topologies_from_csv(
              FLAGS.input_bond_topology_csv))
      | 'Reshuffle1' >> beam.Reshuffle()
      | 'CheckInvariance' >> beam.FlatMap(check_smiles_permutation_invariance)
      | 'Reshuffle2' >> beam.Reshuffle()
      | 'CSVFormat' >> beam.Map(lambda vals: ','.join(str(x) for x in vals))
      | 'WriteOutput' >> beam.io.WriteToText(
          FLAGS.output_csv, header='bt_id,smiles0,smiles1', num_shards=1))
コード例 #3
0
  def test_basic(self):
    infile = tempfile.NamedTemporaryFile(mode='w', delete=False)
    infile.write(
        'id,num_atoms,atoms_str,connectivity_matrix,hydrogens,smiles\n')
    infile.write('68,3,C N+O-,310,010,[NH+]#C[O-]\n')
    infile.write('134,4,N+O-F F ,111000,1000,[O-][NH+](F)F\n')
    infile.close()

    out = smu_utils_lib.generate_bond_topologies_from_csv(infile.name)

    bt = next(out)
    self.assertEqual(68, bt.bond_topology_id)
    self.assertLen(bt.atoms, 4)
    self.assertEqual(bt.smiles, '[NH+]#C[O-]')

    bt = next(out)
    self.assertEqual(134, bt.bond_topology_id)
    self.assertLen(bt.atoms, 5)
    self.assertEqual(bt.smiles, '[O-][NH+](F)F')