def test_basics(self): # Setup the files with expected content. temp_folder = tempfile.mkdtemp() self.create_file(os.path.join(temp_folder, '1.txt'), 'abc def ghi') self.create_file(os.path.join(temp_folder, '2.txt'), 'abc def') self.create_file(os.path.join(temp_folder, '3.txt'), 'abc') tfidf.run([ '--uris=%s/*' % temp_folder, '--output', os.path.join(temp_folder, 'result')]) # Parse result file and compare. results = [] with open_shards(os.path.join( temp_folder, 'result-*-of-*')) as result_file: for line in result_file: match = re.search(EXPECTED_LINE_RE, line) logging.info('Result line: %s', line) if match is not None: results.append( (match.group(1), match.group(2), float(match.group(3)))) logging.info('Computed results: %s', set(results)) self.assertEqual(set(results), EXPECTED_RESULTS)
def test_basics(self): # Setup the files with expected content. temp_folder = tempfile.mkdtemp() self.create_file(os.path.join(temp_folder, '1.txt'), 'abc def ghi') self.create_file(os.path.join(temp_folder, '2.txt'), 'abc def') self.create_file(os.path.join(temp_folder, '3.txt'), 'abc') tfidf.run([ '--uris=%s/*' % temp_folder, '--output', os.path.join(temp_folder, 'result')]) # Parse result file and compare. results = [] with open(os.path.join(temp_folder, 'result-00000-of-00001')) as result_file: for line in result_file: match = re.search(EXPECTED_LINE_RE, line) logging.info('Result line: %s', line) if match is not None: results.append( (match.group(1), match.group(2), float(match.group(3)))) logging.info('Computed results: %s', set(results)) self.assertEqual(set(results), EXPECTED_RESULTS)