Ejemplo n.º 1
0
    def PerKey(label, pcoll, n, compare, *args, **kwargs):
        """Identifies the compare-most N elements associated with each key.

    This transform will produce a PCollection mapping unique keys in the input
    PCollection to the n greatest elements with which they are associated, where
    "greatest" is determined by the comparator function supplied as the compare
    argument.

    compare should be an implementation of "a < b" taking at least two arguments
    (a and b). Additional arguments and side inputs specified in the apply call
    become additional arguments to the comparator.

    Args:
      label: display label for transform processes.
      pcoll: PCollection to process.
      n: number of elements to extract from pcoll.
      compare: as described above.
      *args: as described above.
      **kwargs: as described above.

    Raises:
      TypeCheckError: If the output type of the input PCollection is not
        compatible with KV[A, B].
    """
        return pcoll | core.CombinePerKey(label, TopCombineFn(n, compare), *
                                          args, **kwargs)
Ejemplo n.º 2
0
 def apply(self, pcoll):
     paired_with_void_type = KV[pcoll.element_type, Any]
     return (pcoll
             | (core.Map(
                 '%s:PairWithVoid' % self.label, lambda x:
                 (x, None)).with_output_types(paired_with_void_type))
             | core.CombinePerKey(CountCombineFn()))
Ejemplo n.º 3
0
 def apply(self, pcoll):
     return pcoll | core.CombinePerKey(CountCombineFn())
Ejemplo n.º 4
0
 def FixedSizePerKey(label, pcoll, n):
     return pcoll | core.CombinePerKey(label, SampleCombineFn(n))