def process(self, element):
        """Receives a single element (a line) and produces words and side outputs.

    Important things to note here:
      - For a single element you may produce multiple main outputs:
        words of a single line.
      - For that same input you may produce multiple side outputs, along with
        multiple main outputs.
      - Side outputs may have different types (count) or may share the same type
        (words) as with the main output.

    Args:
      element: processing element.

    Yields:
      words as main output, short words as side output, line character count as
      side output.
    """
        # yield a count (integer) to the SIDE_OUTPUT_TAG_CHARACTER_COUNT tagged
        # collection.
        yield pvalue.SideOutputValue(self.SIDE_OUTPUT_TAG_CHARACTER_COUNT,
                                     len(element))

        words = re.findall(r'[A-Za-z\']+', element)
        for word in words:
            if len(word) <= 3:
                # yield word as a side output to the SIDE_OUTPUT_TAG_SHORT_WORDS tagged
                # collection.
                yield pvalue.SideOutputValue(self.SIDE_OUTPUT_TAG_SHORT_WORDS,
                                             word)
            else:
                # yield word to add it to the main collection.
                yield word
Beispiel #2
0
 def process(self, element, cutoff_length, marker):
     if len(element) <= cutoff_length:
         # Emit this short word to the main output.
         yield element
     else:
         # Emit this word's long length to a side output.
         yield pvalue.SideOutputValue('above_cutoff_lengths',
                                      len(element))
     if element.startswith(marker):
         # Emit this word to a different side output.
         yield pvalue.SideOutputValue('marked strings', element)
Beispiel #3
0
 def process(self, element):
     if element < 0:
         yield pvalue.SideOutputValue('tag_negative', element)
     else:
         yield element
Beispiel #4
0
 def even_odd(x):
     yield pvalue.SideOutputValue('odd' if x % 2 else 'even', x)
     if x % 10 == 0:
         yield x
 def process(self, context):
     if context.element < 0:
         yield pvalue.SideOutputValue('tag_negative',
                                      context.element)
     else:
         yield context.element