def test_migrate_3_most_viewed_packages_to_virtuoso(self): packages_to_migrate = [] condition = Package.id == TED_PACKAGE_ID ted_package = find_any_in_database(TEST_CONFIG_FILE_PATH, condition, Package)[0] condition = Package.id == DGT_TRANSLATION_PACKAGE_ID dgt_translation_package = find_any_in_database(TEST_CONFIG_FILE_PATH, condition, Package)[0] condition = Package.id == CORDISH2020PROJECTS_PACKAGE_ID cordisH2020projects_package = find_any_in_database( TEST_CONFIG_FILE_PATH, condition, Package)[0] packages_to_migrate.append(ted_package) packages_to_migrate.append(dgt_translation_package) packages_to_migrate.append(cordisH2020projects_package) controlled_vocabulary = ControlledVocabulary() for package in packages_to_migrate: datasets_migration_manager.migrate_package_to_virtuoso( config_file_path=TEST_CONFIG_FILE_PATH, package=package, controlled_vocabulary=controlled_vocabulary) dataset = DatasetDcatApOp(DATASET_URI_PREFIX + "ted-1") result = dataset.get_description_from_ts() assert result is True dataset = DatasetDcatApOp(DATASET_URI_PREFIX + "cordisH2020projects") result = dataset.get_description_from_ts() assert result is True dataset = DatasetDcatApOp(DATASET_URI_PREFIX + "dgt-translation-memory") result = dataset.get_description_from_ts() assert result is True
def test_find_any(self): condition = TermTranslation.term == "EuroVoc, the EU's multilingual thesaurus" titles = postgresql_helper.find_any_in_database(config_file_path=TEST_CONFIG_FILE_PATH, condition=condition, table=TermTranslation) assert titles is not None packages = postgresql_helper.find_any_in_database(config_file_path=TEST_CONFIG_FILE_PATH, table=Package) assert packages is not None
def test_migrate_OP_VIP_datasets(self): OP_VIP_DATASETS = [] # OP_VIP_DATASETS.append('a572e5ec-0e81-42df-9dde-aad55a50bd44') #OP_VIP_DATASETS.append('db715fd8-0970-48bb-a1f4-6cb2bb10b36e') # OP_VIP_DATASETS.append('ed21b53a-e5ff-4077-8191-a4f107ebde6f') # OP_VIP_DATASETS.append('b941f99a-57da-4576-a544-4b8811acc327') # OP_VIP_DATASETS.append('150c8ae3-9d1f-4971-b23b-2129469abbb3') ea731c1b-422b-4b3c-a399-c400302a6c8b #OP_VIP_DATASETS.append('309e9d59-1c9c-4c79-8394-72bfd8dc7200') #OP_VIP_DATASETS.append('9a2ef9a0-b50e-448d-996d-577e892148e2') #OP_VIP_DATASETS.append('68c15f0f-c77b-42c0-b411-16fbce223932') # OP_VIP_DATASETS.append('54dd2284-52e8-4131-8b9c-3eebb1d88b38') #OP_VIP_DATASETS.append('e62c401b-e8d8-44c7-a758-abb78c2f62e6') #OP_VIP_DATASETS.append('9e8c7096-553d-40ac-9a74-4f01d552d583') OP_VIP_DATASETS.append('f3daf58c-3ab1-4fb9-8f68-33faf3f73625') packages_to_migrate = [] for dataset in OP_VIP_DATASETS: condition = Package.id == dataset package = find_any_in_database(TEST_CONFIG_FILE_PATH, condition, Package)[0] packages_to_migrate.append(package) controlled_vocabulary = ControlledVocabulary() for package in packages_to_migrate: datasets_migration_manager.migrate_package_to_virtuoso( config_file_path=TEST_CONFIG_FILE_PATH, package=package, controlled_vocabulary=controlled_vocabulary)
def test_find_any_for_result_clause(self): titles = postgresql_helper.find_any_in_tables_database(config_file_path=TEST_CONFIG_FILE_PATH, result_clause=[Package, PackageExtra]) assert titles is not None packages = postgresql_helper.find_any_in_database(config_file_path=TEST_CONFIG_FILE_PATH, table=Package) assert packages is not None
def set_keyword(dataset_schema=None, tag=Tag(), configuration_file=CONFIGURATION_FILE_PATH): if not dataset_schema: dataset_schema = DatasetSchemaDcatApOp("") length = str(len(dataset_schema.keyword_dcat)) condition = TermTranslation.term == tag.name keywords_translation = find_any_in_database(configuration_file, condition, TermTranslation) # type: list[TermTranslation] length = str(len(dataset_schema.keyword_dcat)) dataset_schema.keyword_dcat[length] = ResourceValue(value_or_uri=tag.name, lang='en') for keyword in keywords_translation: length = str(len(dataset_schema.keyword_dcat)) dataset_schema.keyword_dcat[length] = ResourceValue(value_or_uri=keyword.term_translation, lang=keyword.lang_code)
def set_distribution_titles(configuration_file=CONFIGURATION_FILE_PATH, distribution=None, resource=Resource()): if not distribution: distribution = DistributionSchemaDcatApOp('') if resource.name: distribution.title_dcterms['0'] = ResourceValue(resource.name or '', LanguagesConstants.LANGUAGE_CODE_EN) condition = TermTranslation.term == resource.name titles = find_any_in_database(configuration_file, condition, TermTranslation) # type: list[TermTranslation] for title in titles: length = str(len(distribution.title_dcterms)) distribution.title_dcterms[length] = ResourceValue(title.term_translation, title.lang_code)
def set_package_descriptions(configuration_file=CONFIGURATION_FILE_PATH, dataset_schema=None, package=Package()): if not dataset_schema: dataset_schema = DatasetSchemaDcatApOp("") description = package.notes or package.description dataset_schema.description_dcterms['0'] = ResourceValue(description, lang=LanguagesConstants.LANGUAGE_CODE_EN) condition = TermTranslation.term == u'{0}'.format(description) descriptions = find_any_in_database(configuration_file, condition, TermTranslation) # type: list[TermTranslation] for description in descriptions: length = str(len(dataset_schema.description_dcterms)) dataset_schema.description_dcterms[length] = ResourceValue(description.term_translation, lang=description.lang_code)
def set_document_descriptions(configuration_file=CONFIGURATION_FILE_PATH, document=None, resource=Resource()): if not document: document = DocumentSchemaDcatApOp('') if resource.name: document.title_dcterms['0'] = ResourceValue(resource.name, LanguagesConstants.LANGUAGE_CODE_EN) name_condition = TermTranslation.term == resource.name titles = find_any_in_database(configuration_file, name_condition, TermTranslation) # type: list[TermTranslation] for title in titles: length = str(len(document.title_dcterms)) document.title_dcterms[length] = ResourceValue(title.term_translation, title.lang_code) if resource.description: document.description_dcterms['0'] = ResourceValue(resource.description, LanguagesConstants.LANGUAGE_CODE_EN) condition = TermTranslation.term == resource.description descriptions = find_any_in_database(configuration_file, condition, TermTranslation) # type: list[TermTranslation] for description in descriptions: length = str(len(document.description_dcterms)) document.description_dcterms[length] = ResourceValue(description.term_translation, description.lang_code)
def set_package_titles(configuration_file=CONFIGURATION_FILE_PATH, dataset_schema=None, package=Package()): if not dataset_schema: dataset_schema = DatasetSchemaDcatApOp("") if package.title: dataset_schema.title_dcterms['0'] = ResourceValue(package.title, lang=LanguagesConstants.LANGUAGE_CODE_EN) condition = TermTranslation.term == package.title titles = find_any_in_database(configuration_file, condition, TermTranslation) # type: list[TermTranslation] for title in titles: if title.term_translation: length = str(len(dataset_schema.title_dcterms)) dataset_schema.title_dcterms[length] = ResourceValue(title.term_translation, lang=title.lang_code)
def set_alternative_titles(configuration_file=CONFIGURATION_FILE_PATH, dataset_schema=None, package_extra=PackageExtra()): if not dataset_schema: dataset_schema = DatasetSchemaDcatApOp("") dataset_schema.alternative_dcterms['0'] = ResourceValue(value_or_uri=package_extra.value, lang=LanguagesConstants.LANGUAGE_CODE_EN) condition = TermTranslation.term == package_extra.value alternative_titles = \ find_any_in_database(configuration_file, condition, TermTranslation) # type: list[TermTranslation] for title in alternative_titles: length = str(len(dataset_schema.alternative_dcterms)) dataset_schema.alternative_dcterms[length] = ResourceValue(value_or_uri=title.term_translation, lang=title.lang_code)
def test_migrate_most_viewed_package_to_virtuoso(self): controlled_vocabulary = ControlledVocabulary() condition = Package.id == TED_PACKAGE_ID ted_package = find_any_in_database(TEST_CONFIG_FILE_PATH, condition, Package)[0] datasets_migration_manager.migrate_package_to_virtuoso( config_file_path=TEST_CONFIG_FILE_PATH, package=ted_package, controlled_vocabulary=controlled_vocabulary) dataset = DatasetDcatApOp(DATASET_URI_PREFIX + "ted-1") result = dataset.get_description_from_ts() assert result is True
def test_convert_package_private_to_dataset(self): condition = Package.id == "cdd61a23-eb87-4e06-808c-ed4bd69d2247" package = find_any_in_database(TEST_CONFIG_FILE_PATH, condition, Package)[0] package.private = True controlled_vocabulary = ControlledVocabulary() dataset = database_to_ontology_converter. \ convert_package_to_dataset(package, controlled_vocabulary, TEST_CONFIG_FILE_PATH) # type: DatasetDcatApOp assert dataset is not None assert dataset.schema is not None assert dataset.schema.graph_name is DCATAPOP_PRIVATE_GRAPH_NAME dataset.save_to_ts()
def test_big_dataset(self): condition = Package.id == "ad49841e-b4f1-4efc-8b1c-f100364f1563" package = find_any_in_database(TEST_CONFIG_FILE_PATH, condition, Package)[0] controlled_vocabulary = ControlledVocabulary() dataset = database_to_ontology_converter. \ convert_package_to_dataset(package, controlled_vocabulary, TEST_CONFIG_FILE_PATH) # type: DatasetDcatApOp assert dataset is not None assert dataset.schema is not None assert dataset.schema_catalog_record is not None assert dataset.schema.graph_name is DCATAPOP_PUBLIC_GRAPH_NAME dataset.save_to_ts()
def test_migrate_dataset_in_multiple_groups(self): condition = Package.id == CONNECT_SPARQL_ENDPOINT_ID ecb_web_service_package = find_any_in_database(TEST_CONFIG_FILE_PATH, condition, Package)[0] controlled_vocabulary = ControlledVocabulary() datasets_migration_manager.migrate_package_to_virtuoso( config_file_path=TEST_CONFIG_FILE_PATH, package=ecb_web_service_package, controlled_vocabulary=controlled_vocabulary) dataset = DatasetDcatApOp(DATASET_URI_PREFIX + "connect-sparql-endpoint", graph_name=DCATAPOP_PRIVATE_GRAPH_NAME) result = dataset.get_description_from_ts() assert result is True
def test_migrate_dataset_in_group(self): condition = Package.id == ECB_WEB_SERVICE_PACKAGE_ID ecb_web_service_package = find_any_in_database(TEST_CONFIG_FILE_PATH, condition, Package)[0] controlled_vocabulary = ControlledVocabulary() datasets_migration_manager.migrate_package_to_virtuoso( config_file_path=TEST_CONFIG_FILE_PATH, package=ecb_web_service_package, controlled_vocabulary=controlled_vocabulary) dataset = DatasetDcatApOp(DATASET_URI_PREFIX + "ecb-web-service", graph_name=DCATAPOP_PRIVATE_GRAPH_NAME) result = dataset.get_description_from_ts() assert result is True
def migrate_OP_VIP_datasets(test_config_file): packages_to_migrate = [] OP_VIP_DATASETS = [ 'b07ecd46-d298-4a51-bfb4-0c11d4625134', 'e33c79b9-a549-4f47-8a8c-617a1183540e', 'c501a114-8b53-462d-aa28-2b9ff68fa5f8', 'bfaeb715-171e-4050-a3ca-f27e04ebd34b', '67f1306d-dade-4820-a732-f6019e5a299a', '8abe51cd-4314-4d5a-8303-a6ec5d9d9f12', '45a6f9bf-9819-4881-befa-f79e000e0dc6' ] OP_VIP_DATASETS = [] # OP_VIP_DATASETS.append('a572e5ec-0e81-42df-9dde-aad55a50bd44') #OP_VIP_DATASETS.append('db715fd8-0970-48bb-a1f4-6cb2bb10b36e') # OP_VIP_DATASETS.append('ed21b53a-e5ff-4077-8191-a4f107ebde6f') # OP_VIP_DATASETS.append('b941f99a-57da-4576-a544-4b8811acc327') # OP_VIP_DATASETS.append('150c8ae3-9d1f-4971-b23b-2129469abbb3') #OP_VIP_DATASETS.append('309e9d59-1c9c-4c79-8394-72bfd8dc7200') #OP_VIP_DATASETS.append('9a2ef9a0-b50e-448d-996d-577e892148e2') #OP_VIP_DATASETS.append('68c15f0f-c77b-42c0-b411-16fbce223932') # OP_VIP_DATASETS.append('54dd2284-52e8-4131-8b9c-3eebb1d88b38') #OP_VIP_DATASETS.append('e62c401b-e8d8-44c7-a758-abb78c2f62e6') #OP_VIP_DATASETS.append('9e8c7096-553d-40ac-9a74-4f01d552d583') #OP_VIP_DATASETS.append('f3daf58c-3ab1-4fb9-8f68-33faf3f73625') #OP_VIP_DATASETS.append('ea731c1b-422b-4b3c-a399-c400302a6c8b') #OP_VIP_DATASETS.append('b7c7c8d3-bb3b-49d8-80ad-20d04d1200c4') #OP_VIP_DATASETS.append('7de8a996-9765-47da-af80-fcb575e596ab') #OP_VIP_DATASETS.append('7fe111d2-635a-4017-9c5d-8dd9dfe163b2') #Test DB! OP_VIP_DATASETS.append('9a2e72ab-e470-4c8e-80f7-8da2607d4125') #PZ DB! from ckanext.ecportal.migration.postgresql.helpers.postgresql_helper import find_any_in_database for dataset in OP_VIP_DATASETS: condition = Package.id == dataset package = find_any_in_database(test_config_file, condition, Package)[0] packages_to_migrate.append(package) controlled_vocabulary = ControlledVocabulary() for package in packages_to_migrate: migrate_package_to_virtuoso( config_file_path=test_config_file, package=package, controlled_vocabulary=controlled_vocabulary)
def migrate_delta(configuration_file_path): triplet_list = [ Triplet(predicate=TYPE_PREDICATE, object=NAMESPACE_DCATAPOP.dcat + DATASET) ] properties_values = TripleStoreCRUDHelpers( ).find_any_in_graphs_for_where_clauses( [DCATAPOP_PUBLIC_GRAPH_NAME, DCATAPOP_PRIVATE_GRAPH_NAME], triplet_list, result_clause=SUBJECT_WITH_SPACES) # Transform to only get a list of uri migrated_datasets = [] for properties_value in properties_values: migrated_datasets.append( properties_value.get("s").get("value").split('/')[-1]) condition = Package.state == "active" all_packages = postgresql_helper.find_any_in_database( config_file_path=configuration_file_path, condition=condition, table=Package, result_clause=[Package.name]) # Transform to only get a list of uri all_packages_list = [] for package in all_packages: all_packages_list.append(package.name) not_migrated_datasets_names = set(all_packages_list) - set( migrated_datasets) for not_migrated_dataset_name in not_migrated_datasets_names: dataset = DatasetDcatApOp(DATASET_URI_PREFIX + not_migrated_dataset_name) DatasetDcatApOp.delete_from_ts(dataset) migrate_with_package_name_to_virtuoso(configuration_file_path, not_migrated_dataset_name) return migrated_datasets
def retrieve_groups(configuration_file, package): condition = and_(Member.table_id == package.id, Member.table_name == "package") members = find_any_in_database(configuration_file, condition, Member) # type: list[Member] groups = remove_deleted_and_duplicated_groups(members) return groups
def retrieve_resource_group_list_from_postgres(configuration_file=CONFIGURATION_FILE_PATH, package=Package()): condition = ResourceGroup.package_id == package.id return find_any_in_database(configuration_file, condition, ResourceGroup)
def migrate_with_package_name_to_virtuoso(config_file_path, package_name): condition = Package.name == package_name package = postgresql_helper.find_any_in_database( config_file_path, condition, Package) # type: list[Package] migrate_packages_to_virtuoso(config_file_path, package)
def retrieve_tag_from_postgres(configuration_file=CONFIGURATION_FILE_PATH, package_tag=PackageTag()): condition = Tag.id = package_tag.tag_id return find_any_in_database(configuration_file, condition, Tag)
def retrieve_package_tag_list(configuration_file=CONFIGURATION_FILE_PATH, package=Package()): # type: (str, Package()) -> list[PackageTag] condition = PackageTag.package_id == package.id return find_any_in_database(configuration_file, condition, PackageTag)
def retrieve_resource_from_postgres(configuration_file=CONFIGURATION_FILE_PATH, resource_group=ResourceGroup()): condition = and_(Resource.resource_group_id == resource_group.id,Resource.state == ACTIVE_STATE) return find_any_in_database(configuration_file, condition, Resource)
def retrieve_package_extra_list_from_postgres(configuration_file=CONFIGURATION_FILE_PATH, package=Package()): # type: (str, Package()) -> list[PackageExtra] condition = PackageExtra.package_id == package.id return find_any_in_database(configuration_file, condition, PackageExtra)