コード例 #1
0
ファイル: util.py プロジェクト: raznem/beam
def Distinct(pcoll):  # pylint: disable=invalid-name
  """Produces a PCollection containing distinct elements of a PCollection."""
  return (
      pcoll
      | 'ToPairs' >> Map(lambda v: (v, None))
      | 'Group' >> CombinePerKey(lambda vs: None)
      | 'Distinct' >> Keys())
コード例 #2
0
def RemoveDuplicates(pcoll):  # pylint: disable=invalid-name
    """Produces a PCollection containing the unique elements of a PCollection."""
    return (pcoll
            | 'ToPairs' >> Map(lambda v: (v, None))
            | 'Group' >> CombinePerKey(lambda vs: None)
            | 'RemoveDuplicates' >> Keys())