def test_migrate_OP_VIP_datasets(self):
        OP_VIP_DATASETS = []
        # OP_VIP_DATASETS.append('a572e5ec-0e81-42df-9dde-aad55a50bd44')
        #OP_VIP_DATASETS.append('db715fd8-0970-48bb-a1f4-6cb2bb10b36e')
        # OP_VIP_DATASETS.append('ed21b53a-e5ff-4077-8191-a4f107ebde6f')
        # OP_VIP_DATASETS.append('b941f99a-57da-4576-a544-4b8811acc327')
        # OP_VIP_DATASETS.append('150c8ae3-9d1f-4971-b23b-2129469abbb3') ea731c1b-422b-4b3c-a399-c400302a6c8b
        #OP_VIP_DATASETS.append('309e9d59-1c9c-4c79-8394-72bfd8dc7200')
        #OP_VIP_DATASETS.append('9a2ef9a0-b50e-448d-996d-577e892148e2')
        #OP_VIP_DATASETS.append('68c15f0f-c77b-42c0-b411-16fbce223932')
        # OP_VIP_DATASETS.append('54dd2284-52e8-4131-8b9c-3eebb1d88b38')
        #OP_VIP_DATASETS.append('e62c401b-e8d8-44c7-a758-abb78c2f62e6')
        #OP_VIP_DATASETS.append('9e8c7096-553d-40ac-9a74-4f01d552d583')
        OP_VIP_DATASETS.append('f3daf58c-3ab1-4fb9-8f68-33faf3f73625')
        packages_to_migrate = []
        for dataset in OP_VIP_DATASETS:
            condition = Package.id == dataset
            package = find_any_in_database(TEST_CONFIG_FILE_PATH, condition,
                                           Package)[0]
            packages_to_migrate.append(package)

        controlled_vocabulary = ControlledVocabulary()
        for package in packages_to_migrate:
            datasets_migration_manager.migrate_package_to_virtuoso(
                config_file_path=TEST_CONFIG_FILE_PATH,
                package=package,
                controlled_vocabulary=controlled_vocabulary)
    def test_migrate_3_most_viewed_packages_to_virtuoso(self):
        packages_to_migrate = []

        condition = Package.id == TED_PACKAGE_ID
        ted_package = find_any_in_database(TEST_CONFIG_FILE_PATH, condition,
                                           Package)[0]
        condition = Package.id == DGT_TRANSLATION_PACKAGE_ID
        dgt_translation_package = find_any_in_database(TEST_CONFIG_FILE_PATH,
                                                       condition, Package)[0]
        condition = Package.id == CORDISH2020PROJECTS_PACKAGE_ID
        cordisH2020projects_package = find_any_in_database(
            TEST_CONFIG_FILE_PATH, condition, Package)[0]

        packages_to_migrate.append(ted_package)
        packages_to_migrate.append(dgt_translation_package)
        packages_to_migrate.append(cordisH2020projects_package)

        controlled_vocabulary = ControlledVocabulary()

        for package in packages_to_migrate:
            datasets_migration_manager.migrate_package_to_virtuoso(
                config_file_path=TEST_CONFIG_FILE_PATH,
                package=package,
                controlled_vocabulary=controlled_vocabulary)

        dataset = DatasetDcatApOp(DATASET_URI_PREFIX + "ted-1")
        result = dataset.get_description_from_ts()
        assert result is True
        dataset = DatasetDcatApOp(DATASET_URI_PREFIX + "cordisH2020projects")
        result = dataset.get_description_from_ts()
        assert result is True
        dataset = DatasetDcatApOp(DATASET_URI_PREFIX +
                                  "dgt-translation-memory")
        result = dataset.get_description_from_ts()
        assert result is True
    def test_migrate_dataset_in_multiple_groups(self):
        condition = Package.id == CONNECT_SPARQL_ENDPOINT_ID
        ecb_web_service_package = find_any_in_database(TEST_CONFIG_FILE_PATH,
                                                       condition, Package)[0]
        controlled_vocabulary = ControlledVocabulary()

        datasets_migration_manager.migrate_package_to_virtuoso(
            config_file_path=TEST_CONFIG_FILE_PATH,
            package=ecb_web_service_package,
            controlled_vocabulary=controlled_vocabulary)
        dataset = DatasetDcatApOp(DATASET_URI_PREFIX +
                                  "connect-sparql-endpoint",
                                  graph_name=DCATAPOP_PRIVATE_GRAPH_NAME)
        result = dataset.get_description_from_ts()
        assert result is True
    def test_migrate_dataset_in_group(self):
        condition = Package.id == ECB_WEB_SERVICE_PACKAGE_ID
        ecb_web_service_package = find_any_in_database(TEST_CONFIG_FILE_PATH,
                                                       condition, Package)[0]

        controlled_vocabulary = ControlledVocabulary()

        datasets_migration_manager.migrate_package_to_virtuoso(
            config_file_path=TEST_CONFIG_FILE_PATH,
            package=ecb_web_service_package,
            controlled_vocabulary=controlled_vocabulary)
        dataset = DatasetDcatApOp(DATASET_URI_PREFIX + "ecb-web-service",
                                  graph_name=DCATAPOP_PRIVATE_GRAPH_NAME)
        result = dataset.get_description_from_ts()
        assert result is True
    def test_migrate_most_viewed_package_to_virtuoso(self):
        controlled_vocabulary = ControlledVocabulary()

        condition = Package.id == TED_PACKAGE_ID
        ted_package = find_any_in_database(TEST_CONFIG_FILE_PATH, condition,
                                           Package)[0]

        datasets_migration_manager.migrate_package_to_virtuoso(
            config_file_path=TEST_CONFIG_FILE_PATH,
            package=ted_package,
            controlled_vocabulary=controlled_vocabulary)

        dataset = DatasetDcatApOp(DATASET_URI_PREFIX + "ted-1")
        result = dataset.get_description_from_ts()
        assert result is True
    def test_migrate_100_packages_to_virtuoso(self):
        start = time.time()

        controlled_vocabulary = ControlledVocabulary()

        packages_to_migrate = postgresql_helper.get_all_active_packages(
            TEST_CONFIG_FILE_PATH)[:100]  # type:  list[Package]

        for package in packages_to_migrate:
            datasets_migration_manager.migrate_package_to_virtuoso(
                config_file_path=TEST_CONFIG_FILE_PATH,
                package=package,
                controlled_vocabulary=controlled_vocabulary)

        duration = time.time() - start
        log.info(duration)
예제 #7
0
#    published by the Free Software Foundation, either version 3 of the
#    License, or (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU Affero General Public License for more details.
#
#    You should have received a copy of the GNU Affero General Public License
#    along with this program.  If not, see <https://www.gnu.org/licenses/>.
#
#    contact: <https://publications.europa.eu/en/web/about-us/contact>

from ckanext.ecportal.migration.datasets_migration_manager import ControlledVocabulary

controlled_vocabulary = ControlledVocabulary()

FILE_FORMAT_MAPPING = {
    "interactive": "http://publications.europa.eu/resource/authority/file-type/DCR",
    "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "http://publications.europa.eu/resource/authority/file-type/XLSX",
    "image/jpeg": "http://publications.europa.eu/resource/authority/file-type/JPEG",
    "application/x-e00": "http://publications.europa.eu/resource/authority/file-type/E00",
    "text/comma-separated-values": "http://publications.europa.eu/resource/authority/file-type/CSV",
    "text/n3": "http://publications.europa.eu/resource/authority/file-type/RDF_N_TRIPLES",
    "application/vnd.openxmlformats-officedocument.presentationml.presentation": "http://publications.europa.eu/resource/authority/file-type/PPTX",
    "application/ms-word": "http://publications.europa.eu/resource/authority/file-type/DOC",
    "application/vnd.ms-excel": "http://publications.europa.eu/resource/authority/file-type/XLS",
    "application/x-compress": "http://publications.europa.eu/resource/authority/file-type/TAR",
    "ZIP": "http://publications.europa.eu/resource/authority/file-type/ZIP",
    "application/x-mxd": "http://publications.europa.eu/resource/authority/file-type/MXD",
    "application/x-n3": "http://publications.europa.eu/resource/authority/file-type/RDF_TURTLE",
예제 #8
0
def convert_package_to_dataset(package=Package(), controlled_vocabulary=ControlledVocabulary(),
                               configuration_file=CONFIGURATION_FILE_PATH):
    package_extra_list = \
        retrieve_package_extra_list_from_postgres(configuration_file, package)  # type: list[PackageExtra]

    tag_list = retrieve_tag_list_from_postgres(configuration_file, package)

    resource_list = retrieve_resource_list(configuration_file, package)

    dataset_uri = DATASET_URI_PREFIX + package.name
    dataset = DatasetDcatApOp(dataset_uri)

    dataset.graph_name = DCATAPOP_PUBLIC_GRAPH_NAME
    if package.private:
        dataset.graph_name = DCATAPOP_PRIVATE_GRAPH_NAME
        dataset.privacy_state = PRIVACY_STATE_PRIVATE

    dataset_schema = DatasetSchemaDcatApOp(dataset_uri,
                                           graph_name=dataset.graph_name)  # 1...1
    #dataset_schema.identifier_adms['0'] = SchemaGeneric(dataset_uri)
    dataset.schema_catalog_record = set_catalog_record(package, package_extra_list, dataset_schema)

    dataset_schema.versionInfo_owl['0'] = ResourceValue(package.version)

    #dataset_schema.isPartOfCatalog_dcatapop['0'] = CatalogSchemaDcatApOp(uri_util.new_cataloge_uri_from_title())

    set_landing_page(dataset_schema, package)

    set_package_titles(configuration_file, dataset_schema, package)  # 0...n
    set_package_descriptions(configuration_file, dataset_schema, package)  # 0...n

    dataset_schema.ckanName_dcatapop['0'] = ResourceValue(package.name)  # 1...1

    dataset_schema.modified_dcterms['0'] = ResourceValue(str(package.metadata_modified))

    groups = retrieve_groups(configuration_file, package)
    # To process only once the groups, multiple set are done once.
    set_publisher_and_theme_and_group(dataset_schema, groups, controlled_vocabulary.controlled_publishers)  # 0...1
    if not dataset_schema.publisher_dcterms.get('0', None):
        owner = model.Group.get(package.owner_org)
        if owner:
            dataset_schema.publisher_dcterms['0'] = AgentSchemaDcatApOp('http://publications.europa.eu/resource/authority/corporate-body/{0}'.format(owner.name.upper()), graph_name=dataset_schema.graph_name)
        else:
            log.warn('Dataset {0} has no publisher'.format(dataset_schema.uri))
            #raise MigrationError(message='Dataset {0} has no publisher'.format(dataset_schema.uri))

    for package_extra in package_extra_list:
        if package_extra.value:
            if package_extra.key == ACCRUAL_PERIODICITY:
                set_accrual_periodicity(dataset_schema, package_extra,
                                        controlled_vocabulary.controlled_frequencies)  # 0...1
            elif package_extra.key == TEMPORAL_COVERAGE_FROM:
                set_temporal(dataset_schema, package_extra)  # 0...1
            elif package_extra.key == TEMPORAL_COVERAGE_TO:
                set_temporal_to(dataset_schema, package_extra)  # 0...1
            elif package_extra.key == ALTERNATIVE_TITLE:
                set_alternative_titles(configuration_file, dataset_schema, package_extra)  # 0...n
            elif package_extra.key == IDENTIFIER:
                set_identifier(dataset_schema, package_extra)  # 0...n
            elif package_extra.key == METADATA_LANGUAGE:
                pass
            elif package_extra.key == CITATION:
                pass
            elif package_extra.key == RELEASE_DATE:
                #dataset_schema.issued_dcterms['0'] = ResourceValue(value_or_uri=str(package_extra.value),
                #                                         datatype=NAMESPACE_DCATAPOP.xsd + DATE_TIME)  # 0...1
                pass
            elif package_extra.key == EVALUATION_DATE:
                pass
            elif package_extra.key == SOURCE:
                pass
            elif package_extra.key == ANALYST_IN_EXTRA_FIELD:
                pass
            elif package_extra.key == THIS_IS_EXTRA_FIELD:
                pass
            elif package_extra.key == MODIFIED_DATE:
                pass
            elif package_extra.key == KIC:
                pass
            elif package_extra.key == CLC:
                pass
            elif package_extra.key == DATA_SOURCE:
                pass
            elif package_extra.key == EIT:
                pass
            elif package_extra.key == 'version_description':
                set_version_note(dataset_schema, package_extra)

    controlled_status = ""
    for tag in tag_list:  # type: Tag
        if tag.name:
            if not tag.vocabulary_id:  # where voc = /
                set_keyword(dataset_schema, tag, configuration_file)  # 0...n
            elif tag.vocabulary_id == VOC_LANGUAGE_ID:  # where voc = language
                set_language(dataset_schema, tag, controlled_vocabulary.controlled_languages)  # 0...n
            elif tag.vocabulary_id == VOC_GEO_COVERAGE:  # where voc = geographical_coverage
                set_spatial(dataset_schema, tag, controlled_vocabulary.controlled_country)  # 0...n
            elif tag.vocabulary_id == VOC_DATASET_TYPE:  # where voc = dataset_type
                set_dataset_type(dataset_schema, tag)  # 0...1
            elif tag.vocabulary_id == VOC_CONCEPTS_EUROVOC:  # where voc = concepts_eurovoc
                set_subject(dataset_schema, tag)  # 0...1
            elif tag.vocabulary_id == VOC_STATUS:  # where voc = status
                package_status = tag.name  # 0...1
                if package_status:
                    package_status_upper_case = package_status.split('/')[-1].upper()
                    if package_status_upper_case == 'UNDERDEVELOPMENT':
                        package_status_upper_case = 'DEVELOP'
                    controlled_status = next(
                        uri for uri, value in controlled_vocabulary.controlled_status.iteritems() if
                        value == package_status_upper_case)

                    # TODO no property for that in new ontology
                    # elif tag.vocabulary_id == '0311e5a2-c6a0-49c7-84cc-1ceec129fd7c':  # where voc = interoperability_level

    # TODO verify this field
    dataset_schema.issued_dcterms['0'] = ResourceValue(str(get_metadata_created_timestamp(package.id)),
                                                       datatype=NAMESPACE_DCATAPOP.xsd + DATE_TIME)  # 0...1

    for resource in resource_list:
        type = resource.resource_type or resource.extras
        if MAIN_DOCUMENTATION in type \
                or RELATED_DOCUMENTATION in type \
                or WEB_RELATED_DOCUMENTATION in type:
            set_document(configuration_file,
                         dataset_schema,
                         resource,
                         controlled_vocabulary.controlled_file_types,
                         controlled_vocabulary.controlled_documentation_types)  # 0...n
        else:
            set_distribution(configuration_file,
                             dataset_schema,
                             resource,
                             controlled_status,
                             controlled_vocabulary.controlled_file_types,
                             controlled_vocabulary.controlled_distribution_types)

    set_contact_point(dataset_schema, package_extra_list)

    dataset.schema = dataset_schema

    return dataset