Ejemplo n.º 1
0
 def test_basics(self):
   # Setup the files with expected content.
   temp_folder = tempfile.mkdtemp()
   self.create_file(os.path.join(temp_folder, '1.txt'), 'abc def ghi')
   self.create_file(os.path.join(temp_folder, '2.txt'), 'abc def')
   self.create_file(os.path.join(temp_folder, '3.txt'), 'abc')
   tfidf.run([
       '--uris=%s/*' % temp_folder,
       '--output', os.path.join(temp_folder, 'result')])
   # Parse result file and compare.
   results = []
   with open_shards(os.path.join(
       temp_folder, 'result-*-of-*')) as result_file:
     for line in result_file:
       match = re.search(EXPECTED_LINE_RE, line)
       logging.info('Result line: %s', line)
       if match is not None:
         results.append(
             (match.group(1), match.group(2), float(match.group(3))))
   logging.info('Computed results: %s', set(results))
   self.assertEqual(set(results), EXPECTED_RESULTS)
Ejemplo n.º 2
0
 def test_basics(self):
   # Setup the files with expected content.
   temp_folder = tempfile.mkdtemp()
   self.create_file(os.path.join(temp_folder, '1.txt'), 'abc def ghi')
   self.create_file(os.path.join(temp_folder, '2.txt'), 'abc def')
   self.create_file(os.path.join(temp_folder, '3.txt'), 'abc')
   tfidf.run([
       '--uris=%s/*' % temp_folder,
       '--output', os.path.join(temp_folder, 'result')])
   # Parse result file and compare.
   results = []
   with open(os.path.join(temp_folder,
                          'result-00000-of-00001')) as result_file:
     for line in result_file:
       match = re.search(EXPECTED_LINE_RE, line)
       logging.info('Result line: %s', line)
       if match is not None:
         results.append(
             (match.group(1), match.group(2), float(match.group(3))))
   logging.info('Computed results: %s', set(results))
   self.assertEqual(set(results), EXPECTED_RESULTS)