Esempio n. 1
0
    def Of(label, pcoll, n, compare, *args, **kwargs):
        """Obtain a list of the compare-most N elements in a PCollection.

    This transform will retrieve the n greatest elements in the PCollection
    to which it is applied, where "greatest" is determined by the comparator
    function supplied as the compare argument.

    compare should be an implementation of "a < b" taking at least two arguments
    (a and b). Additional arguments and side inputs specified in the apply call
    become additional arguments to the comparator.

    Args:
      label: display label for transform processes.
      pcoll: PCollection to process.
      n: number of elements to extract from pcoll.
      compare: as described above.
      *args: as described above.
      **kwargs: as described above.
    """
        return pcoll | core.CombineGlobally(label, TopCombineFn(n, compare), *
                                            args, **kwargs)
Esempio n. 2
0
 def apply(self, pcoll):
     return pcoll | core.CombineGlobally(CountCombineFn())
Esempio n. 3
0
 def apply(self, pcoll):
     return pcoll | core.CombineGlobally(self.label, ToDictCombineFn())
Esempio n. 4
0
 def FixedSizeGlobally(label, pcoll, n):
     return pcoll | core.CombineGlobally(label, SampleCombineFn(n))