예제 #1
0
    def setUp(self):
        super(MetadataEntryTransformationTests, self).setUp()
        self.org_uuid = 'org01'
        self.test_entry = {
            self.CATEGORY_FIELD: 'health',
            'dataSample': 'some sample',
            'format': 'csv',
            'recordCount': 13,
            'size': 99999,
            'sourceUri': 'some uri',
            self.TARGET_URI_FIELD: 'hdfs://6.6.6.6:8200/borker/long-long-hash/9213-154b-a0b9/000000_1',
            'title': 'a great title',
            'isPublic': True,
            self.CREATION_TIME_FIELD: '2015-02-13T13:00:00',
            self.ORG_UUID_FIELD: self.org_uuid
        }

        self.test_entry_index = {
            self.CATEGORY_FIELD: 'health',
            'dataSample': 'some sample',
            'format': 'csv',
            'recordCount': 13,
            'size': 99999,
            'sourceUri': 'some uri',
            self.TARGET_URI_FIELD: 'hdfs://6.6.6.6:8200/borker/long-long-hash/9213-154b-a0b9/000000_1',
            'title': 'a great title',
            'isPublic': True,
            self.CREATION_TIME_FIELD: '2015-02-13T13:00:00',
            self.ORG_UUID_FIELD: self.org_uuid
        }
        self.parser = MetadataIndexingTransformer()
class ElasticSearchAdminResource(DataCatalogResource):
    """
    Contains REST endpoint for managing elastic search data
    """

    def __init__(self):
        super(ElasticSearchAdminResource, self).__init__()
        self._elastic_search = Elasticsearch(
            '{}:{}'.format(self._config.elastic.elastic_hostname,
                           self._config.elastic.elastic_port))
        self._parser = MetadataIndexingTransformer()

    def delete(self):
        """
        Delete elastic search index
        """
        self._log.info('Deleting the ElasticSearch index.')
        if not flask.g.is_admin:
            self._log.warn('Deleting index aborted, not enough privileges (admin required)')
            return None, 403
        # pylint: disable=unexpected-keyword-arg
        self._elastic_search.indices.delete(
            self._config.elastic.elastic_index,
            ignore=404)

    def put(self):
        """
        Add all data into elastic search. Data that are corrupted are ommited
        """
        self._log.info("Adding data to elastic search")
        if not flask.g.is_admin:
            self._log.warn('Inserting data aborted, not enough privileges (admin required)')
            return None, 403
        data = flask.request.get_json(force=True)

        try:
            for entry in data:
                try:
                    self._parser.transform(entry)
                    self._elastic_search.index(
                        index=self._config.elastic.elastic_index,
                        doc_type=self._config.elastic.elastic_metadata_type,
                        id=entry["id"],
                        body=entry
                    )
                except InvalidEntryError as ex:
                    self._log.exception(ex)
        except RequestError:
            self._log.exception("Malformed data")
            return None, 400
        except ConnectionError:
            self._log.exception("Failed connection to ElasticSearch")
            return None, 503
        self._log.info("Data added")
        return None, 200
예제 #3
0
class MetadataEntryTransformationTests(DataCatalogTestCase):
    TEST_DATA_SET_ID = 'whatever-id'
    EXAMPLE_CATEGORIES = {'health', 'finance'}
    CREATION_TIME_FIELD = 'creationTime'
    CATEGORY_FIELD = 'category'
    TARGET_URI_FIELD = 'targetUri'
    ORG_UUID_FIELD = 'orgUUID'

    def setUp(self):
        super(MetadataEntryTransformationTests, self).setUp()
        self.org_uuid = 'org01'
        self.test_entry = {
            self.CATEGORY_FIELD: 'health',
            'dataSample': 'some sample',
            'format': 'csv',
            'recordCount': 13,
            'size': 99999,
            'sourceUri': 'some uri',
            self.TARGET_URI_FIELD: 'hdfs://6.6.6.6:8200/borker/long-long-hash/9213-154b-a0b9/000000_1',
            'title': 'a great title',
            'isPublic': True,
            self.CREATION_TIME_FIELD: '2015-02-13T13:00:00',
            self.ORG_UUID_FIELD: self.org_uuid
        }

        self.test_entry_index = {
            self.CATEGORY_FIELD: 'health',
            'dataSample': 'some sample',
            'format': 'csv',
            'recordCount': 13,
            'size': 99999,
            'sourceUri': 'some uri',
            self.TARGET_URI_FIELD: 'hdfs://6.6.6.6:8200/borker/long-long-hash/9213-154b-a0b9/000000_1',
            'title': 'a great title',
            'isPublic': True,
            self.CREATION_TIME_FIELD: '2015-02-13T13:00:00',
            self.ORG_UUID_FIELD: self.org_uuid
        }
        self.parser = MetadataIndexingTransformer()

    def test_entryTransformation_validEntry_entryTransformed(self):
        self.parser.transform(self.test_entry)

        self.assertDictEqual(
            self.test_entry_index,
            self.test_entry)

    def test_entryTransformation_invalidEntryURIs_raisesInvalidEntryError(self):
        def check_raises_for_url(url):
            self.test_entry[self.TARGET_URI_FIELD] = url
            self.assertRaises(InvalidEntryError, self.parser.transform, self.test_entry)

        check_raises_for_url('//onet.pl/')
        check_raises_for_url('hdfs://onet.pl/')
        check_raises_for_url('http://')
        check_raises_for_url('some_path')

    def test_entryTransformation_invalidEntryMissingField_raisesInvalidEntryError(self):
        del self.test_entry['dataSample']
        self.assertRaises(InvalidEntryError, self.parser.transform, self.test_entry)

    def test_entryTransformation_missingDate_dateCreated(self):
        del self.test_entry[self.CREATION_TIME_FIELD]
        self.parser.transform(self.test_entry)
        self.assertTrue(self.test_entry.__contains__(self.CREATION_TIME_FIELD))
 def __init__(self):
     super(ElasticSearchAdminResource, self).__init__()
     self._elastic_search = Elasticsearch(
         '{}:{}'.format(self._config.elastic.elastic_hostname,
                        self._config.elastic.elastic_port))
     self._parser = MetadataIndexingTransformer()