예제 #1
0
 def setUp(self):
     self.test_raw_data = ElasticSearchOutput(source='localhost',
                                              index=INDEX,
                                              content_field='abstract')
     self.test_raw_data.import_from_iterable(read_input(
         '{}/test_data_json_stream.json'.format(test_data_path)),
                                             field_to_hash=CONTENT_FIELD)
예제 #2
0
 def setUp(self):
     self.test_raw_data = ElasticSearchOutput(
         source='localhost',
         index=INDEX,
         content_field='abstract'
     )
     self.test_raw_data.import_from_iterable(read_input(
         '{}/test_data_json_stream.json'.format(test_data_path)),
         field_to_hash=CONTENT_FIELD)
예제 #3
0
def test_read_input():
    # json stream
    documents = read_input('{}/test_data_json_stream.json'.format(test_data_path))
    nt.assert_true(next(documents)['abstract'] == solution_json_stream)

    # large json
    documents = read_input('{}/test_data_large_json.json'.format(test_data_path),
                               json_prefix='item._source.isAuthorOf')
    nt.assert_true(next(documents)['text'] == solution_large_json)

    # document folder
    documents = read_input(
        '{}/test_data_folder_files'.format(test_data_path),
        folder_content_field="abstract")
    nt.assert_true(next(documents)['abstract'] == solution_document_folder)

    # document folder gz
    documents = read_input(
        '{}/test_data_folder_files_gz'.format(test_data_path),
        folder_content_field="abstract")
    nt.assert_true(next(documents)['abstract'] == solution_document_folder_gz)
예제 #4
0
def test_read_input():
    # json stream
    documents = read_input(
        '{}/test_data_json_stream.json'.format(test_data_path))
    nt.assert_true(next(documents)['abstract'] == solution_json_stream)

    # large json
    documents = read_input(
        '{}/test_data_large_json.json'.format(test_data_path),
        json_prefix='item._source.isAuthorOf')
    nt.assert_true(next(documents)['text'] == solution_large_json)

    # document folder
    documents = read_input('{}/test_data_folder_files'.format(test_data_path),
                           folder_content_field="abstract")
    nt.assert_true(next(documents)['abstract'] == solution_document_folder)

    # document folder gz
    documents = read_input(
        '{}/test_data_folder_files_gz'.format(test_data_path),
        folder_content_field="abstract")
    nt.assert_true(next(documents)['abstract'] == solution_document_folder_gz)
예제 #5
0
    def setUp(self):
        self.test_raw_data = ElasticSearchOutput(
            source='localhost',
            index=INDEX,
            content_field='abstract'
        )
        try:
            self.test_raw_data.import_from_iterable(read_input(
                '{}/test_data_json_stream.json'.format(test_data_path)),
                field_to_hash=CONTENT_FIELD)

        except ConnectionError:
            raise SkipTest("Skipping Elasticsearch test - elasticsearch not running")
예제 #6
0
 def setUp(self):
     self.test_raw_data = InMemoryOutput()
     self.test_raw_data.import_from_iterable(read_input(
         '{}/test_data_json_stream.json'.format(test_data_path)),
         field_to_hash=CONTENT_FIELD)
예제 #7
0
 def setUp(self):
     self.test_raw_data = InMemoryOutput()
     self.test_raw_data.import_from_iterable(read_input(
         '{}/test_data_json_stream.json'.format(test_data_path)),
                                             field_to_hash=CONTENT_FIELD)