def setUp(self): self.test_raw_data = ElasticSearchOutput(source='localhost', index=INDEX, content_field='abstract') self.test_raw_data.import_from_iterable(read_input( '{}/test_data_json_stream.json'.format(test_data_path)), field_to_hash=CONTENT_FIELD)
def setUp(self): self.test_raw_data = ElasticSearchOutput( source='localhost', index=INDEX, content_field='abstract' ) self.test_raw_data.import_from_iterable(read_input( '{}/test_data_json_stream.json'.format(test_data_path)), field_to_hash=CONTENT_FIELD)
def test_read_input(): # json stream documents = read_input('{}/test_data_json_stream.json'.format(test_data_path)) nt.assert_true(next(documents)['abstract'] == solution_json_stream) # large json documents = read_input('{}/test_data_large_json.json'.format(test_data_path), json_prefix='item._source.isAuthorOf') nt.assert_true(next(documents)['text'] == solution_large_json) # document folder documents = read_input( '{}/test_data_folder_files'.format(test_data_path), folder_content_field="abstract") nt.assert_true(next(documents)['abstract'] == solution_document_folder) # document folder gz documents = read_input( '{}/test_data_folder_files_gz'.format(test_data_path), folder_content_field="abstract") nt.assert_true(next(documents)['abstract'] == solution_document_folder_gz)
def test_read_input(): # json stream documents = read_input( '{}/test_data_json_stream.json'.format(test_data_path)) nt.assert_true(next(documents)['abstract'] == solution_json_stream) # large json documents = read_input( '{}/test_data_large_json.json'.format(test_data_path), json_prefix='item._source.isAuthorOf') nt.assert_true(next(documents)['text'] == solution_large_json) # document folder documents = read_input('{}/test_data_folder_files'.format(test_data_path), folder_content_field="abstract") nt.assert_true(next(documents)['abstract'] == solution_document_folder) # document folder gz documents = read_input( '{}/test_data_folder_files_gz'.format(test_data_path), folder_content_field="abstract") nt.assert_true(next(documents)['abstract'] == solution_document_folder_gz)
def setUp(self): self.test_raw_data = ElasticSearchOutput( source='localhost', index=INDEX, content_field='abstract' ) try: self.test_raw_data.import_from_iterable(read_input( '{}/test_data_json_stream.json'.format(test_data_path)), field_to_hash=CONTENT_FIELD) except ConnectionError: raise SkipTest("Skipping Elasticsearch test - elasticsearch not running")
def setUp(self): self.test_raw_data = InMemoryOutput() self.test_raw_data.import_from_iterable(read_input( '{}/test_data_json_stream.json'.format(test_data_path)), field_to_hash=CONTENT_FIELD)