Ejemplo n.º 1
0
  def test_multiple_output_pardo(self):
    temp_path = self.create_temp_file(self.SAMPLE_TEXT)
    result_prefix = temp_path + '.result'

    multiple_output_pardo.run([
        '--input=%s*' % temp_path,
        '--output=%s' % result_prefix])

    expected_char_count = len(''.join(self.SAMPLE_TEXT.split('\n')))
    with open_shards(result_prefix + '-chars-*-of-*') as f:
      contents = f.read()
      self.assertEqual(expected_char_count, int(contents))

    short_words = self.get_wordcount_results(
        result_prefix + '-short-words-*-of-*')
    self.assertEqual(sorted(short_words), sorted(self.EXPECTED_SHORT_WORDS))

    words = self.get_wordcount_results(result_prefix + '-words-*-of-*')
    self.assertEqual(sorted(words), sorted(self.EXPECTED_WORDS))
  def test_multiple_output_pardo(self):
    temp_path = self.create_temp_file(self.SAMPLE_TEXT)
    result_prefix = temp_path + '.result'

    multiple_output_pardo.run([
        '--input=%s*' % temp_path,
        '--output=%s' % result_prefix]).wait_until_finish()

    expected_char_count = len(''.join(self.SAMPLE_TEXT.split('\n')))
    with open(result_prefix + '-chars-00000-of-00001') as f:
      contents = f.read()
      self.assertEqual(expected_char_count, int(contents))

    short_words = self.get_wordcount_results(
        result_prefix + '-short-words-00000-of-00001')
    self.assertEqual(sorted(short_words), sorted(self.EXPECTED_SHORT_WORDS))

    words = self.get_wordcount_results(result_prefix + '-words-00000-of-00001')
    self.assertEqual(sorted(words), sorted(self.EXPECTED_WORDS))