Beispiel #1
0
def retrieve_resource_list(configuration_file=CONFIGURATION_FILE_PATH, package=Package()):
    resource_list = []
    resource_group_list = retrieve_resource_group_list_from_postgres(configuration_file,
                                                                     package)  # type: list[ResourceGroup]
    for resource_group in resource_group_list:
        resource_list = resource_list + \
                        (retrieve_resource_from_postgres(configuration_file, resource_group))
    return resource_list
    def test_convert_package_and_resource_to_distribution(self):
        package = Package()
        package.license_id = 'http://data.europa.eu/euodp/kos/licence/EuropeanCommission'

        resource = Resource()
        resource.resource_type = 'http://data.europa.eu/euodp/kos/documentation-type/MainDocumentation'
        resource.description = 'Download dataset in TSV format'
        resource.format = 'application/x-gzip'
        resource.created = '2017-06-20 08:24:55'
        resource.name = 'ESMS metadata (Euro-SDMX Metadata structure) SDMX'
        resource.last_modified = ''
        controlled_vocabulary = ControlledVocabulary()
        database_to_ontology_converter.convert_resource_to_distribution(
            TEST_CONFIG_FILE_PATH,
            resource=resource,
            file_types=controlled_vocabulary.controlled_file_types,
            status=controlled_vocabulary.controlled_status.itervalues().next,
            distribution_types=controlled_vocabulary.controlled_distribution_types)
Beispiel #3
0
def set_package_titles(configuration_file=CONFIGURATION_FILE_PATH, dataset_schema=None,
                       package=Package()):
    if not dataset_schema:
        dataset_schema = DatasetSchemaDcatApOp("")
    if package.title:
        dataset_schema.title_dcterms['0'] = ResourceValue(package.title, lang=LanguagesConstants.LANGUAGE_CODE_EN)
        condition = TermTranslation.term == package.title
        titles = find_any_in_database(configuration_file, condition, TermTranslation)  # type: list[TermTranslation]
        for title in titles:
            if title.term_translation:
                length = str(len(dataset_schema.title_dcterms))
                dataset_schema.title_dcterms[length] = ResourceValue(title.term_translation, lang=title.lang_code)
Beispiel #4
0
def set_package_descriptions(configuration_file=CONFIGURATION_FILE_PATH, dataset_schema=None,
                             package=Package()):
    if not dataset_schema:
        dataset_schema = DatasetSchemaDcatApOp("")
    description = package.notes or package.description
    dataset_schema.description_dcterms['0'] = ResourceValue(description, lang=LanguagesConstants.LANGUAGE_CODE_EN)
    condition = TermTranslation.term == u'{0}'.format(description)
    descriptions = find_any_in_database(configuration_file, condition, TermTranslation)  # type: list[TermTranslation]
    for description in descriptions:
        length = str(len(dataset_schema.description_dcterms))
        dataset_schema.description_dcterms[length] = ResourceValue(description.term_translation,
                                                                   lang=description.lang_code)
def create_package(self):
    # Manual creation of a package
    package = Package()
    package.creator_user_id = u'75d0190a-2f12-40ed-a863-b5bf30990d14'
    package.id = u'cdd61a23-eb87-4e06-808c-ed4bd69d2247'
    package.license_id = u'http://data.europa.eu/euodp/kos/licence/EuropeanCommission'
    package.metadata_modified = date.today()
    package.name = u'ted-1'
    package.notes = u'The European Union together with its Member States is the world\'s largest'
    package.owner_org = u'dbda9968-cb1e-47c3-b115-245a057e4d4a'
    package.private = False
    package.revision_id = u'9f663b6c-3467-4c33-baa1-f2e65dbe5cf1'
    package.state = u'active'
    package.title = u'Special Eurobarometer 453: Humanitarian aid'
    package.type = u'dataset'
    package.url = u'http://ted.europa.eu/TED/main/HomePage.do'
    package.version = u'v1.00'
Beispiel #6
0
def convert_package_to_dataset(package=Package(), controlled_vocabulary=ControlledVocabulary(),
                               configuration_file=CONFIGURATION_FILE_PATH):
    package_extra_list = \
        retrieve_package_extra_list_from_postgres(configuration_file, package)  # type: list[PackageExtra]

    tag_list = retrieve_tag_list_from_postgres(configuration_file, package)

    resource_list = retrieve_resource_list(configuration_file, package)

    dataset_uri = DATASET_URI_PREFIX + package.name
    dataset = DatasetDcatApOp(dataset_uri)

    dataset.graph_name = DCATAPOP_PUBLIC_GRAPH_NAME
    if package.private:
        dataset.graph_name = DCATAPOP_PRIVATE_GRAPH_NAME
        dataset.privacy_state = PRIVACY_STATE_PRIVATE

    dataset_schema = DatasetSchemaDcatApOp(dataset_uri,
                                           graph_name=dataset.graph_name)  # 1...1
    #dataset_schema.identifier_adms['0'] = SchemaGeneric(dataset_uri)
    dataset.schema_catalog_record = set_catalog_record(package, package_extra_list, dataset_schema)

    dataset_schema.versionInfo_owl['0'] = ResourceValue(package.version)

    #dataset_schema.isPartOfCatalog_dcatapop['0'] = CatalogSchemaDcatApOp(uri_util.new_cataloge_uri_from_title())

    set_landing_page(dataset_schema, package)

    set_package_titles(configuration_file, dataset_schema, package)  # 0...n
    set_package_descriptions(configuration_file, dataset_schema, package)  # 0...n

    dataset_schema.ckanName_dcatapop['0'] = ResourceValue(package.name)  # 1...1

    dataset_schema.modified_dcterms['0'] = ResourceValue(str(package.metadata_modified))

    groups = retrieve_groups(configuration_file, package)
    # To process only once the groups, multiple set are done once.
    set_publisher_and_theme_and_group(dataset_schema, groups, controlled_vocabulary.controlled_publishers)  # 0...1
    if not dataset_schema.publisher_dcterms.get('0', None):
        owner = model.Group.get(package.owner_org)
        if owner:
            dataset_schema.publisher_dcterms['0'] = AgentSchemaDcatApOp('http://publications.europa.eu/resource/authority/corporate-body/{0}'.format(owner.name.upper()), graph_name=dataset_schema.graph_name)
        else:
            log.warn('Dataset {0} has no publisher'.format(dataset_schema.uri))
            #raise MigrationError(message='Dataset {0} has no publisher'.format(dataset_schema.uri))

    for package_extra in package_extra_list:
        if package_extra.value:
            if package_extra.key == ACCRUAL_PERIODICITY:
                set_accrual_periodicity(dataset_schema, package_extra,
                                        controlled_vocabulary.controlled_frequencies)  # 0...1
            elif package_extra.key == TEMPORAL_COVERAGE_FROM:
                set_temporal(dataset_schema, package_extra)  # 0...1
            elif package_extra.key == TEMPORAL_COVERAGE_TO:
                set_temporal_to(dataset_schema, package_extra)  # 0...1
            elif package_extra.key == ALTERNATIVE_TITLE:
                set_alternative_titles(configuration_file, dataset_schema, package_extra)  # 0...n
            elif package_extra.key == IDENTIFIER:
                set_identifier(dataset_schema, package_extra)  # 0...n
            elif package_extra.key == METADATA_LANGUAGE:
                pass
            elif package_extra.key == CITATION:
                pass
            elif package_extra.key == RELEASE_DATE:
                #dataset_schema.issued_dcterms['0'] = ResourceValue(value_or_uri=str(package_extra.value),
                #                                         datatype=NAMESPACE_DCATAPOP.xsd + DATE_TIME)  # 0...1
                pass
            elif package_extra.key == EVALUATION_DATE:
                pass
            elif package_extra.key == SOURCE:
                pass
            elif package_extra.key == ANALYST_IN_EXTRA_FIELD:
                pass
            elif package_extra.key == THIS_IS_EXTRA_FIELD:
                pass
            elif package_extra.key == MODIFIED_DATE:
                pass
            elif package_extra.key == KIC:
                pass
            elif package_extra.key == CLC:
                pass
            elif package_extra.key == DATA_SOURCE:
                pass
            elif package_extra.key == EIT:
                pass
            elif package_extra.key == 'version_description':
                set_version_note(dataset_schema, package_extra)

    controlled_status = ""
    for tag in tag_list:  # type: Tag
        if tag.name:
            if not tag.vocabulary_id:  # where voc = /
                set_keyword(dataset_schema, tag, configuration_file)  # 0...n
            elif tag.vocabulary_id == VOC_LANGUAGE_ID:  # where voc = language
                set_language(dataset_schema, tag, controlled_vocabulary.controlled_languages)  # 0...n
            elif tag.vocabulary_id == VOC_GEO_COVERAGE:  # where voc = geographical_coverage
                set_spatial(dataset_schema, tag, controlled_vocabulary.controlled_country)  # 0...n
            elif tag.vocabulary_id == VOC_DATASET_TYPE:  # where voc = dataset_type
                set_dataset_type(dataset_schema, tag)  # 0...1
            elif tag.vocabulary_id == VOC_CONCEPTS_EUROVOC:  # where voc = concepts_eurovoc
                set_subject(dataset_schema, tag)  # 0...1
            elif tag.vocabulary_id == VOC_STATUS:  # where voc = status
                package_status = tag.name  # 0...1
                if package_status:
                    package_status_upper_case = package_status.split('/')[-1].upper()
                    if package_status_upper_case == 'UNDERDEVELOPMENT':
                        package_status_upper_case = 'DEVELOP'
                    controlled_status = next(
                        uri for uri, value in controlled_vocabulary.controlled_status.iteritems() if
                        value == package_status_upper_case)

                    # TODO no property for that in new ontology
                    # elif tag.vocabulary_id == '0311e5a2-c6a0-49c7-84cc-1ceec129fd7c':  # where voc = interoperability_level

    # TODO verify this field
    dataset_schema.issued_dcterms['0'] = ResourceValue(str(get_metadata_created_timestamp(package.id)),
                                                       datatype=NAMESPACE_DCATAPOP.xsd + DATE_TIME)  # 0...1

    for resource in resource_list:
        type = resource.resource_type or resource.extras
        if MAIN_DOCUMENTATION in type \
                or RELATED_DOCUMENTATION in type \
                or WEB_RELATED_DOCUMENTATION in type:
            set_document(configuration_file,
                         dataset_schema,
                         resource,
                         controlled_vocabulary.controlled_file_types,
                         controlled_vocabulary.controlled_documentation_types)  # 0...n
        else:
            set_distribution(configuration_file,
                             dataset_schema,
                             resource,
                             controlled_status,
                             controlled_vocabulary.controlled_file_types,
                             controlled_vocabulary.controlled_distribution_types)

    set_contact_point(dataset_schema, package_extra_list)

    dataset.schema = dataset_schema

    return dataset
Beispiel #7
0
def retrieve_package_extra_list_from_postgres(configuration_file=CONFIGURATION_FILE_PATH, package=Package()):
    # type: (str, Package()) -> list[PackageExtra]
    condition = PackageExtra.package_id == package.id
    return find_any_in_database(configuration_file, condition, PackageExtra)
Beispiel #8
0
def retrieve_package_tag_list(configuration_file=CONFIGURATION_FILE_PATH, package=Package()):
    # type: (str, Package()) -> list[PackageTag]
    condition = PackageTag.package_id == package.id
    return find_any_in_database(configuration_file, condition, PackageTag)
Beispiel #9
0
def retrieve_tag_list_from_postgres(configuration_file=CONFIGURATION_FILE_PATH, package=Package()):
    tag_list = []
    package_tag_list = retrieve_package_tag_list(configuration_file, package)  # type: list[PackageTag]
    for package_tag in package_tag_list:
        if package_tag.state == ACTIVE_STATE:
            tag_list.append(package_tag.tag)
    return tag_list
Beispiel #10
0
def retrieve_resource_group_list_from_postgres(configuration_file=CONFIGURATION_FILE_PATH, package=Package()):
    condition = ResourceGroup.package_id == package.id
    return find_any_in_database(configuration_file, condition, ResourceGroup)