Ejemplo n.º 1
0
    def test_manifest_for_copy_data(self):
        """Test for copy data."""
        test_name = 'test_manifest_for_copy_data'
        corpus = self.get_corpus()
        content = TestHelper.get_input_file_content(
            self.tests_subpath, test_name, 'complete.manifest.cdm.json')
        cdm_manifest = ManifestPersistence.from_object(
            corpus.ctx, 'docName', 'someNamespace', '/',
            ManifestContent().decode(content))

        manifest_object = ManifestPersistence.to_data(cdm_manifest, None, None)
        self.assertEqual(manifest_object.schema,
                         'CdmManifestDefinition.cdm.json')
        self.assertEqual(manifest_object.jsonSchemaSemanticVersion, '0.9.0')
        self.assertEqual(manifest_object.manifestName, 'cdmTest')
        self.assertEqual(manifest_object.explanation,
                         'test cdm folder for cdm version 0.9+')
        self.assertEqual(1, len(manifest_object.imports))
        self.assertEqual(manifest_object.imports[0].corpusPath,
                         '/primitives.cdm.json')
        self.assertEqual(1, len(manifest_object.exhibitsTraits))
        self.assertEqual(2, len(manifest_object.entities))
        self.assertEqual(manifest_object.entities[0]['entityName'],
                         'testEntity')
        self.assertEqual(1, len(manifest_object.subManifests))
        self.assertEqual(manifest_object.subManifests[0].definition,
                         'test definition')
        self.assertEqual(manifest_object.lastFileModifiedTime, None)
Ejemplo n.º 2
0
    async def test_programmatically_create_partitions(self):
        corpus = CdmCorpusDefinition()
        corpus.ctx.report_at_level = CdmStatusLevel.WARNING
        corpus.storage.mount('local', LocalAdapter())
        manifest = corpus.make_object(CdmObjectType.MANIFEST_DEF, 'manifest')
        entity = manifest.entities.append('entity')

        relative_partition = corpus.make_object(
            CdmObjectType.DATA_PARTITION_DEF, 'relative partition')
        relative_partition.location = 'relative/path'
        absolute_partition = corpus.make_object(
            CdmObjectType.DATA_PARTITION_DEF, 'absolute partition')
        absolute_partition.location = 'local:/absolute/path'

        entity.data_partitions.append(relative_partition)
        entity.data_partitions.append(absolute_partition)

        manifest_data = ManifestPersistence.to_data(manifest, None, None)
        self.assertEqual(len(manifest_data.entities), 1)
        entityData = manifest_data.entities[0]
        partitions_list = entityData.dataPartitions
        self.assertEqual(len(partitions_list), 2)
        relative_partition_data = partitions_list[0]
        absolute_partition_data = partitions_list[-1]

        self.assertEqual(relative_partition_data.location,
                         relative_partition.location)
        self.assertEqual(absolute_partition_data.location,
                         absolute_partition.location)
Ejemplo n.º 3
0
    def test_load_local_entity_with_data_partition_pattern(self):
        content = TestHelper.get_input_file_content(
            self.test_subpath,
            'test_load_local_entity_with_data_partition_pattern',
            'entities.manifest.cdm.json')
        manifest_content = ManifestContent()
        manifest_content.decode(content)

        cdm_manifest = ManifestPersistence.from_object(
            CdmCorpusContext(CdmCorpusDefinition(), None), 'entities',
            'testNamespace', '/', manifest_content)
        self.assertEqual(len(cdm_manifest.entities), 2)
        entity1 = cdm_manifest.entities[0]
        self.assertEqual(entity1.object_type,
                         CdmObjectType.LOCAL_ENTITY_DECLARATION_DEF)
        self.assertEqual(len(entity1.data_partition_patterns), 1)
        pattern1 = entity1.data_partition_patterns[0]
        self.assertEqual(pattern1.name, 'testPattern')
        self.assertEqual(pattern1.explanation, 'test explanation')
        self.assertEqual(pattern1.root_location, 'test location')
        self.assertEqual(pattern1.regular_expression, '\\s*')
        self.assertEqual(len(pattern1.parameters), 2)
        self.assertEqual(pattern1.parameters[0], 'testParam1')
        self.assertEqual(pattern1.parameters[1], 'testParam2')
        self.assertEqual(pattern1.specialized_schema, 'test special schema')
        self.assertEqual(len(pattern1.exhibits_traits), 1)

        entity2 = cdm_manifest.entities[1]
        self.assertEqual(entity2.object_type,
                         CdmObjectType.LOCAL_ENTITY_DECLARATION_DEF)
        self.assertEqual(len(entity2.data_partition_patterns), 1)
        pattern2 = entity2.data_partition_patterns[0]
        self.assertEqual(pattern2.name, 'testPattern2')
        self.assertEqual(pattern2.root_location, 'test location2')
        self.assertEqual(pattern2.glob_pattern, '/*.csv')

        manifest_data = ManifestPersistence.to_data(cdm_manifest, None, None)
        self.assertEqual(len(manifest_data.entities), 2)
        entity_data1 = manifest_data.entities[0]
        self.assertEqual(len(entity_data1.dataPartitionPatterns), 1)
        pattern_data1 = entity_data1.dataPartitionPatterns[0]
        self.assertEqual(pattern_data1.name, 'testPattern')
        self.assertEqual(pattern_data1.explanation, 'test explanation')
        self.assertEqual(pattern_data1.rootLocation, 'test location')
        self.assertEqual(pattern_data1.regularExpression, '\\s*')
        self.assertEqual(len(pattern_data1.parameters), 2)
        self.assertEqual(pattern_data1.parameters[0], 'testParam1')
        self.assertEqual(pattern_data1.parameters[1], 'testParam2')
        self.assertEqual(pattern_data1.specializedSchema,
                         'test special schema')
        self.assertEqual(len(pattern_data1.exhibitsTraits), 1)

        pattern_data2 = manifest_data.entities[1].dataPartitionPatterns[0]
        self.assertEqual(pattern_data2.name, 'testPattern2')
        self.assertEqual(pattern_data2.rootLocation, 'test location2')
        self.assertEqual(pattern_data2.globPattern, '/*.csv')
Ejemplo n.º 4
0
    async def test_loading_model_json_result_and_cdm_folder_to_data(self):
        test_name = 'test_loading_model_json_result_and_cdm_folder_to_data'
        cdm_corpus = TestHelper.get_local_corpus(self.tests_subpath, test_name)
        manifest = await cdm_corpus.fetch_object_async(PersistenceLayer.MODEL_JSON_EXTENSION, cdm_corpus.storage.fetch_root_folder('local'))

        expected_data = TestHelper.get_expected_output_data(self.tests_subpath, test_name, 'cdmFolder{}'.format(PersistenceLayer.CDM_EXTENSION))
        actual_data = json.loads((CdmManifestPersistence.to_data(manifest, None, None)).encode())
        error_msg = TestHelper.compare_same_object(expected_data, actual_data)

        self.assertEqual('', error_msg, error_msg)
Ejemplo n.º 5
0
    async def test_programmatically_create_partitions(self):
        corpus = TestHelper.get_local_corpus(
            self.test_subpath,
            'test_programmatically_create_partitions',
            no_input_and_output_folder=True)
        manifest = corpus.make_object(CdmObjectType.MANIFEST_DEF, 'manifest')
        entity = manifest.entities.append('entity')

        relative_partition = corpus.make_object(
            CdmObjectType.DATA_PARTITION_DEF, 'relative partition')
        relative_partition.location = 'relative/path'
        relative_partition.arguments['test1'] = ['argument1']
        relative_partition.arguments['test2'] = ['argument2', 'argument3']

        absolute_partition = corpus.make_object(
            CdmObjectType.DATA_PARTITION_DEF, 'absolute partition')
        absolute_partition.location = 'local:/absolute/path'
        # add an empty arguments list to test empty list should not be displayed in ToData json.
        absolute_partition.arguments['test'] = []

        entity.data_partitions.append(relative_partition)
        entity.data_partitions.append(absolute_partition)

        manifest_data = ManifestPersistence.to_data(manifest, None, None)
        self.assertEqual(len(manifest_data.entities), 1)
        entityData = manifest_data.entities[0]
        partitions_list = entityData.dataPartitions
        self.assertEqual(len(partitions_list), 2)
        relative_partition_data = partitions_list[0]
        absolute_partition_data = partitions_list[-1]

        self.assertEqual(relative_partition_data.location,
                         relative_partition.location)
        arguments_list = relative_partition_data.arguments
        self.assertEqual(3, len(arguments_list))
        checked_arguments = []
        for argument in arguments_list:
            self.assertEqual(3, len(argument))
            checked_arguments.append(argument.value)
            if argument.value == 'argument1':
                self.assertEqual('test1', argument.name)
            elif argument.value == 'argument2':
                self.assertEqual('test2', argument.name)
            elif argument.value == 'argument3':
                self.assertEqual('test2', argument.name)
            else:
                raise Exception('unexpected argument in data partitions')
        self.assertTrue('argument1' in checked_arguments)
        self.assertTrue('argument2' in checked_arguments)
        self.assertTrue('argument3' in checked_arguments)

        self.assertEqual(absolute_partition_data.location,
                         absolute_partition.location)
        # test if empty argument list is set to null
        self.assertEqual(absolute_partition_data.arguments, None)
Ejemplo n.º 6
0
    async def test_to_incremental_partition_without_trait(self):
        """
        Testing saving manifest with local entity declaration having an incremental partition without incremental trait.
        """
        test_name = 'test_to_incremental_partition_without_trait'
        corpus = TestHelper.get_local_corpus(self.test_subpath, test_name)
        error_message_verified = False

        # not checking the CdmLogCode here as we want to check if this error message constructed correctly for the partition (it shares the same CdmLogCode with partition pattern)
        def callback(level, message):
            if 'Failed to persist object \'DeletePartition\'. This object does not contain the trait \'is.partition.incremental\', so it should not be in the collection \'incremental_partitions\'. | to_data' in message:
                nonlocal error_message_verified
                error_message_verified = True
            else:
                self.fail('Some unexpected failure - {}!'.format(message))

        corpus.set_event_callback(callback, CdmStatusLevel.WARNING)

        manifest = CdmManifestDefinition(corpus.ctx, 'manifest')
        corpus.storage.fetch_root_folder('local').documents.append(manifest)
        entity = CdmEntityDefinition(corpus.ctx, 'entityName', None)
        create_document_for_entity(corpus, entity)
        localized_entity_declaration = manifest.entities.append(entity)

        upsert_incremental_partition = corpus.make_object(
            CdmObjectType.DATA_PARTITION_DEF, 'UpsertPartition', False)
        upsert_incremental_partition.location = '/IncrementalData'
        upsert_incremental_partition.specialized_schema = 'csv'
        upsert_incremental_partition.exhibits_traits.append(
            Constants._INCREMENTAL_TRAIT_NAME,
            [['type', CdmIncrementalPartitionType.UPSERT.value]])

        delete_partition = corpus.make_object(CdmObjectType.DATA_PARTITION_DEF,
                                              'DeletePartition', False)
        delete_partition.location = '/IncrementalData'
        delete_partition.specialized_schema = 'csv'
        localized_entity_declaration.incremental_partitions.append(
            upsert_incremental_partition)
        localized_entity_declaration.incremental_partitions.append(
            delete_partition)

        with logger._enter_scope(DataPartitionTest.__name__, corpus.ctx,
                                 test_name):
            manifest_data = ManifestPersistence.to_data(manifest, None, None)

            self.assertEqual(1, len(manifest_data.entities))
            entity_data = manifest_data.entities[0]
            self.assertEqual(1, len(entity_data.incrementalPartitions))
            partition_data = entity_data.incrementalPartitions[0]
            self.assertEqual('UpsertPartition', partition_data.name)
            self.assertEqual(1, len(partition_data.exhibitsTraits))
            self.assertEqual(Constants._INCREMENTAL_TRAIT_NAME,
                             partition_data.exhibitsTraits[0].traitReference)

        self.assertTrue(error_message_verified)
Ejemplo n.º 7
0
    async def test_loading_model_json_and_cdm_folder_to_data(self):
        test_name = 'test_loading_model_json_and_cdm_folder_to_data'
        cdm_corpus = TestHelper.get_local_corpus(self.tests_subpath, test_name)
        manifest = await cdm_corpus.fetch_object_async(
            PersistenceLayer.MODEL_JSON_EXTENSION,
            cdm_corpus.storage.fetch_root_folder('local'))

        actual_data = json.loads(
            (CdmManifestPersistence.to_data(manifest, None, None)).encode())
        self._validate_output(
            test_name, 'cdmFolder{}'.format(PersistenceLayer.CDM_EXTENSION),
            actual_data)
Ejemplo n.º 8
0
    async def test_loading_model_json_and_cdm_folder_to_data(self):
        test_name = 'test_loading_model_json_and_cdm_folder_to_data'
        cdm_corpus = TestHelper.get_local_corpus(self.tests_subpath, test_name)
        manifest = await cdm_corpus.fetch_object_async('model.json', cdm_corpus.storage.fetch_root_folder('local'))

        cdm_corpus.storage.fetch_root_folder('output').documents.append(manifest)
        await manifest.save_as_async('cdm.json', save_referenced=True)

        expected_data = TestHelper.get_expected_output_data(self.tests_subpath, test_name, 'cdmFolder.cdm.json')
        actual_data = json.loads((CdmManifestPersistence.to_data(manifest, None, None)).encode())
        error_msg = TestHelper.compare_same_object(expected_data, actual_data)

        self.assertEqual('', error_msg, error_msg)
Ejemplo n.º 9
0
    async def _save_document_as_async(self, doc: 'CdmDocumentDefinition',
                                      options: 'CopyOptions', new_name: str,
                                      save_referenced: bool) -> bool:
        """a manifest or document can be saved with a new or exisitng name. This function on the corpus does all the actual work
        because the corpus knows about persistence types and about the storage adapters
        if saved with the same name, then consider this document 'clean' from changes. if saved with a back compat model or
        to a different name, then the source object is still 'dirty'
        an option will cause us to also save any linked documents."""

        # find out if the storage adapter is able to write.
        namespace = doc.namespace
        if namespace is None:
            namespace = self._corpus.storage.default_namespace

        adapter = self._corpus.storage.fetch_adapter(namespace)
        if adapter is None:
            logger.error(
                self._ctx, self._TAG, self._save_document_as_async.__name__,
                doc.at_corpus_path,
                CdmLogCode.ERR_PERSIST_ADAPTER_NOT_FOUND_FOR_NAMESPACE,
                namespace)
            return False
        if not adapter.can_write():
            logger.error(self._ctx, self._TAG,
                         self._save_document_as_async.__name__,
                         doc.at_corpus_path,
                         CdmLogCode.ERR_PERSIST_ADAPTER_WRITE_FAILURE,
                         namespace)
            return False

        if not new_name:
            logger.error(self._ctx, self._TAG,
                         self._save_document_as_async.__name__,
                         doc.at_corpus_path,
                         CdmLogCode.ERR_PERSIST_NULL_DOC_NAME)
            return None

        # what kind of document is requested?
        # check file extensions using a case-insensitive ordinal string comparison.
        persistence_type = self.MODEL_JSON if new_name.lower().endswith(
            self.MODEL_JSON_EXTENSION) else self.CDM_FOLDER

        if persistence_type == self.MODEL_JSON and new_name.lower(
        ) != self.MODEL_JSON_EXTENSION:
            logger.error(self._ctx, self._TAG,
                         self._save_document_as_async.__name__,
                         doc.at_corpus_path, CdmLogCode.ERR_PERSIST_FAILURE,
                         new_name, self.MODEL_JSON_EXTENSION)
            return False

        # save the object into a json blob
        res_opt = {
            'wrt_doc': doc,
            'directives': AttributeResolutionDirectiveSet()
        }
        persisted_doc = None

        try:
            if new_name.lower().endswith(
                    PersistenceLayer.MODEL_JSON_EXTENSION) or new_name.lower(
                    ).endswith(
                        PersistenceLayer.MANIFEST_EXTENSION) or new_name.lower(
                        ).endswith(PersistenceLayer.FOLIO_EXTENSION):
                if persistence_type == self.CDM_FOLDER:
                    from cdm.persistence.cdmfolder import ManifestPersistence
                    persisted_doc = ManifestPersistence.to_data(
                        doc, res_opt, options)
                else:
                    if new_name != self.MODEL_JSON_EXTENSION:
                        logger.error(self._ctx, self._TAG,
                                     self._save_document_as_async.__name__,
                                     doc.at_corpus_path,
                                     CdmLogCode.ERR_PERSIST_FAILURE, new_name)
                        return False
                    from cdm.persistence.modeljson import ManifestPersistence
                    persisted_doc = await ManifestPersistence.to_data(
                        doc, res_opt, options)
            elif new_name.lower().endswith(PersistenceLayer.CDM_EXTENSION):
                from cdm.persistence.cdmfolder import DocumentPersistence
                persisted_doc = DocumentPersistence.to_data(
                    doc, res_opt, options)
            else:
                # Could not find a registered persistence class to handle this document type.
                logger.error(self._ctx, self._TAG,
                             self._save_document_as_async.__name__,
                             doc.at_corpus_path,
                             CdmLogCode.ERR_PERSIST_CLASS_MISSING, new_name)
                return False
        except Exception as e:
            logger.error(self._ctx, self._TAG,
                         self._save_document_as_async.__name__,
                         doc.at_corpus_path,
                         CdmLogCode.ERR_PERSIST_FILE_PERSIST_ERROR, new_name,
                         e)
            return False

        if not persisted_doc:
            logger.error(self._ctx, self._TAG,
                         self._save_document_as_async.__name__,
                         doc.at_corpus_path,
                         CdmLogCode.ERR_PERSIST_FILE_PERSIST_FAILED, new_name)
            return False

        # turn the name into a path
        new_path = '{}{}'.format(doc.folder_path, new_name)
        new_path = self._ctx.corpus.storage.create_absolute_corpus_path(
            new_path, doc)
        if new_path.startswith(namespace + ':'):
            new_path = new_path[len(namespace) + 1:]

        # ask the adapter to make it happen
        try:
            content = persisted_doc.encode()
            await adapter.write_async(new_path, content)

            doc._file_system_modified_time = await adapter.compute_last_modified_time_async(
                new_path)

            # Write the adapter's config.
            if options._is_top_level_document:
                await self._corpus.storage.save_adapters_config_async(
                    '/config.json', adapter)

                # The next document won't be top level, so reset the flag.
                options._is_top_level_document = False
        except Exception as e:
            logger.error(self._ctx, self._TAG,
                         self._save_document_as_async.__name__,
                         doc.at_corpus_path,
                         CdmLogCode.ERR_PERSIST_FILE_WRITE_FAILURE, new_name,
                         e)
            return False

        # if we also want to save referenced docs, then it depends on what kind of thing just got saved
        # if a model.json there are none. If a manifest or definition doc then ask the docs to do the right things
        # definition will save imports, manifests will save imports, schemas, sub manifests
        if save_referenced and persistence_type == self.CDM_FOLDER:
            saved_linked_docs = await doc._save_linked_documents_async(options)
            if not saved_linked_docs:
                logger.error(self._ctx, self._TAG,
                             self._save_document_as_async.__name__,
                             doc.at_corpus_path,
                             CdmLogCode.ERR_PERSIST_SAVE_LINK_DOCS, new_name)
                return False
        return True