def test_get_description_from_ts(self): ds1 = DatasetDcatApOp( "http://data.europa.eu/88u/dataset/dgt-translation-memory-V1-2") ds2 = DatasetDcatApOp( "http://data.europa.eu/88u/dataset/dgt-translation-memory-V3") ds_private = DatasetDcatApOp( "http://data.europa.eu/88u/dataset/dgt-translation-memory-V4", privacy_state="private", graph_name=DCATAPOP_PRIVATE_GRAPH_NAME) desc1 = ds1.get_description_from_ts() desc2 = ds2.get_description_from_ts() desc_private = ds_private.get_description_from_ts() ckan_name1 = ds1.schema.ckanName_dcatapop['0'].value_or_uri ckan_name2 = ds2.schema.ckanName_dcatapop['0'].value_or_uri ckan_name_private = ds_private.schema.ckanName_dcatapop[ '0'].value_or_uri self.assertTrue( ckan_name1 == "dgt-translation-memory-V1-2" and ckan_name_private == "dgt-translation-memory-V4", "TestDataSet: CkanName is not correct") keyword_ds1 = ds1.schema.keyword_dcat['0'].value_or_uri len_keyword_ds2 = ds2.schema.keyword_dcat.__len__() self.assertTrue(keyword_ds1 == "translation" and len_keyword_ds2 == 4, "TestDataSet: Structure of keyword error ") self.assertTrue(ds1.get_telephone_numbers() == {'0': u'tel:086631722'})
def test_create_multi_lang_full_text(self): # TODO finish it ds = DatasetDcatApOp( "http://data.europa.eu/88u/dataset/dgt-translation-memory-V1-2") if ds.get_description_from_ts(): mega_field = ds.create_multi_lang_full_text() # TODO add an assertion pass
def create_dataset_schema_for_package_dict(data_dict): name = data_dict.get('name') uri = uri_util.new_dataset_uri_from_name(name) dataset = DatasetDcatApOp(uri) # Catalog Record catalogRecord = CatalogRecordSchemaDcatApOp( uri_util.new_catalog_record_uri()) date = datetime.datetime.now().strftime('%Y-%m-%dT%H:%M:%S') catalogRecord.issued_dcterms['0'] = ResourceValue(date, datatype=XSD.datetime) catalogRecord.modified_dcterms['0'] = ResourceValue(date, datatype=XSD.datetime) catalogRecord.primaryTopic_foaf['0'] = SchemaGeneric(dataset.schema.uri) dataset.schema_catalog_record = catalogRecord generated_dataset = __dataset_old_model_transformation( dataset, data_dict, dataset.schema) # Generate DOI if requested if _DOI_GENERATION_KEY in data_dict: doi = generate_doi_for_dataset(dataset, data_dict[_DOI_GENERATION_KEY]) generated_dataset.set_doi(doi) return generated_dataset
def __safe_locally(self, context): data = ecportal_logic.transform_to_data_dict(request.POST) split_id = data['manifest'][0]['publisher_uri'].split('/')[-1] org = get_action('organization_show')(context, {'id': split_id.lower(), 'include_datasets': 'false'}) data['organization'] = org datasets = [] errors = [] for package in data.get('dataset', []): ui_dict = {} uri = package.get('uri') if not uri or '__temporal/uri' == uri: uri, name= uri_util.new_cataloge_uri_from_title(package.get('title','default')) package['name'] = name if not package.get('name'): package['name'] = uri_util.create_name_from_title(package.get('title','default')) dataset = DatasetDcatApOp(uri) try: dataset.create_dataset_schema_for_package_dict(package, {}, context) datasets.append(dataset) except Exception as e: import traceback log.error(traceback.print_exc()) data['dataset'] = datasets self._create_temporary_files_for_packaging(context, data) return
def __add_new_dataset(self, context): data = ecportal_logic.transform_to_data_dict(request.POST) tmp_list = data.get('dataset', []) result_list = [] split_id = data['manifest'][0]['publisher_uri'].split('/')[-1] if not data.get('organization'): org = get_action('organization_show')(context, {'id': split_id.lower(), 'include_datasets': 'false'}) data['organization'] = org for ds in tmp_list: uri = ds.get('uri') dataset = DatasetDcatApOp(uri) try: dataset.create_dataset_schema_for_package_dict(ds, {}, context) except Exception as e: import traceback log.error(traceback.print_exc()) ui_ds = ui_util.transform_dcat_schema_to_form_schema(dataset) result_list.append(ui_ds) result_list.append({'id': 'new_dataset', 'uri': '__temporal/uri'}) data['dataset'] = result_list return data
def test_serialize_dataset(self): # TODO ds = DatasetDcatApOp( "http://data.europa.eu/88u/dataset/dgt-translation-memory-V1-2") desc = ds.get_description_from_ts() if desc: phone_source = ds.schema.contactPoint_dcat['0'].hasTelephone_vcard[ '0'].hasValue_vcard['0'].uri redis_ds = pickle.dumps(ds) ds2 = pickle.loads(redis_ds) phone = ds2.schema.contactPoint_dcat['0'].hasTelephone_vcard[ '0'].hasValue_vcard['0'].uri self.assertEqual( phone_source, phone, "Test serialize dataset: Phone numbers " "should be equal ({0}) ({1})".format(phone_source, phone)) ds.schema.contactPoint_dcat['0'].hasTelephone_vcard[ '0'].hasValue_vcard['0'].uri = "123456" phone_new = ds.schema.contactPoint_dcat['0'].hasTelephone_vcard[ '0'].hasValue_vcard['0'].uri self.assertNotEqual( phone, phone_new, "Test serialize dataset: Phone numbers " "should be different ({0}) ({1})".format(phone_new, phone))
def __add_delete_action(self,context): data = ecportal_logic.transform_to_data_dict(request.POST) tmp_list = data.get('delete', []) tmp_list.append({'name': 'new_delete_action'}) data['delete'] = tmp_list ds_list = data.get('dataset', []) result_list = [] for ds in ds_list: uri = ds.get('uri') dataset = DatasetDcatApOp(uri) try: dataset.create_dataset_schema_for_package_dict(ds, {}, context) except Exception as e: import traceback log.error(traceback.print_exc()) split_id = dataset.get_owner_org().split('/')[-1] if not data.get('organization'): org = get_action('organization_show')(context, {'id': split_id.lower(), 'include_datasets': 'false'}) data['organization'] = org ui_ds = ui_util.transform_dcat_schema_to_form_schema() result_list.append(ui_ds) data['dataset'] = result_list data['manifest'][0]['publisher_uri'] = 'http://publications.europa.eu/resource/authority/corporate-body/{0}'.format( data.get('organization', {}).get('name', '').upper()) return data
def test_export_to_json(self): ds = DatasetDcatApOp( "http://data.europa.eu/88u/dataset/dgt-translation-memory-V3") if ds.get_description_from_ts(): json_dict = {} json_string = ds.get_dataset_as_json() pass
def test_save_to_ts_new(self): dataset = DatasetDcatApOp( "http://data.europa.eu/88u/dataset/dgt-translation-memory") dataset.get_description_from_ts() r = dataset.save_to_ts() self.assertTrue(r) pass
def test_at_most_one_by_language(self): ds = DatasetDcatApOp( "http://data.europa.eu/88u/dataset/dgt-translation-memory-V1-2") if ds.get_description_from_ts(): ds.schema.title_dcterms['1'] = ResourceValue("new title", "fr") ds.schema.title_dcterms['2'] = ResourceValue("new title2", "it") validator = ValidationSchema(ds.schema, ds.schema.get_schema_type()) report = validator.validate() # validation_result = True for result in report: if result.get("property") == "title_dcterms" and result.get( "constraint") == "card_1..n_en": validation_result = False if result.get( "result") == ValidationTypeResult.error else True break self.assertTrue(validation_result, " Test validation of test_at_least_one_en failed") # test the case of empty value ds.schema.title_dcterms['0'].value_or_uri = '' validator = ValidationSchema(ds.schema, ds.schema.get_schema_type()) report = validator.validate() for result in report: if result.get("property") == "title_dcterms" and result.get( "constraint") == "card_1..n_en": validation_result = False if result.get( "result") == ValidationTypeResult.error else True self.assertTrue(not validation_result, " Test validation of test_at_least_one_en failed") ds.schema.title_dcterms['0'].value_or_uri = None validator = ValidationSchema(ds.schema, ds.schema.get_schema_type()) report = validator.validate() for result in report: if result.get("property") == "title_dcterms" and result.get( "constraint") == "card_1..n_en": validation_result = False if result.get( "result") == ValidationTypeResult.error else True self.assertTrue(not validation_result, " Test validation of test_at_least_one_en failed") # No english title ds.schema.title_dcterms['0'] = ResourceValue("new title", "de") validator = ValidationSchema(ds.schema, ds.schema.get_schema_type()) report = validator.validate() for result in report: if result.get("property") == "title_dcterms" and result.get( "constraint") == "card_1..n_en": validation_result = False if result.get( "result") == ValidationTypeResult.error else True self.assertTrue(not validation_result, " Test validation of test_at_least_one_en failed")
def test_generate_list_properties(self): dict_prop = DatasetDcatApOp('').schema.__dict__ # type: dict[str,str] list_mapping = {} for prop in dict_prop.keys(): # remove the name space list_mapping[prop] = prop pass
def test_data_change_in_ts(self): ds = DatasetDcatApOp( 'http://data.europa.eu/88u/dataset/efsa-botanical-compendium') ds.get_description_from_ts() ds.schema.identifier_adms.get('0').uri = '10.5281/zenodo.1212387' result = ds.save_to_ts() self.assertTrue(result)
def test_validate(self): # TODO finish the test ds = DatasetDcatApOp( "http://data.europa.eu/88u/dataset/dgt-translation-memory-V1-2") if ds.get_description_from_ts(): # compare lengths of the report and the validation rules # report = ds.schema.validate_schema() dataset_validation_report = ds.validate_dataset() self.assertEqual(len(dataset_validation_report), 4, "Size of the report incorrect")
def test_get_dataset_as_rdfxml(self): ds = DatasetDcatApOp( "http://data.europa.eu/88u/dataset/dgt-translation-memory-V3") if ds.get_description_from_ts(): rdf_xml = ds.get_dataset_as_rdfxml() tag = "<dcatapop:ckanName>dgt-translation-memory-V3</dcatapop:ckanName>" ns = 'xmlns:dcatapop="http://data.europa.eu/88u/ontology/dcatapop#"' self.assertTrue(tag in rdf_xml) self.assertTrue(ns in rdf_xml) pass
def test_ts_and_cache_equality(self): name_or_id = uri_prefix_test + "dgt-translation-memory-V1-2" ts_dataset = None # type: DatasetDcatApOp cache_dataset = None # type: DatasetDcatApOp ts_dataset = DatasetDcatApOp(name_or_id) ts_dataset.get_description_from_ts() dataset_string = redis_cache.get_from_cache(name_or_id, pool=redis_cache.DATASET_POOL) if dataset_string: cache_dataset = pickle.loads(dataset_string) assert_equal(ts_dataset.schema.__dict__, cache_dataset.schema.__dict__)
def test_migrate_3_most_viewed_packages_to_virtuoso(self): packages_to_migrate = [] condition = Package.id == TED_PACKAGE_ID ted_package = find_any_in_database(TEST_CONFIG_FILE_PATH, condition, Package)[0] condition = Package.id == DGT_TRANSLATION_PACKAGE_ID dgt_translation_package = find_any_in_database(TEST_CONFIG_FILE_PATH, condition, Package)[0] condition = Package.id == CORDISH2020PROJECTS_PACKAGE_ID cordisH2020projects_package = find_any_in_database( TEST_CONFIG_FILE_PATH, condition, Package)[0] packages_to_migrate.append(ted_package) packages_to_migrate.append(dgt_translation_package) packages_to_migrate.append(cordisH2020projects_package) controlled_vocabulary = ControlledVocabulary() for package in packages_to_migrate: datasets_migration_manager.migrate_package_to_virtuoso( config_file_path=TEST_CONFIG_FILE_PATH, package=package, controlled_vocabulary=controlled_vocabulary) dataset = DatasetDcatApOp(DATASET_URI_PREFIX + "ted-1") result = dataset.get_description_from_ts() assert result is True dataset = DatasetDcatApOp(DATASET_URI_PREFIX + "cordisH2020projects") result = dataset.get_description_from_ts() assert result is True dataset = DatasetDcatApOp(DATASET_URI_PREFIX + "dgt-translation-memory") result = dataset.get_description_from_ts() assert result is True
def test_get_revisions(self): ds = DatasetDcatApOp( "http://data.europa.eu/88u/dataset" "/european-structural-investment-funds-esif-2014-2020-finance-implementation-details" ) ds.get_description_from_ts() list_of_revisions = ds.get_list_revisions_ordred(2) pass # ds.save_to_ts() pass
def test_at_most_one(self): ds = DatasetDcatApOp( "http://data.europa.eu/88u/dataset/dgt-translation-memory-V1-2") if ds.get_description_from_ts(): validation_error = False # ds.schema.theme_dcat['1'] = ResourceValue("New CKAN") validator = ValidationSchema(ds.schema, ds.schema.get_schema_type()) report = validator.validate() for result in report: if result.get( "property") == "accessRights_dcterms" and result.get( "constraint") == "card_0..1": validation_result = False if result.get( "result") == ValidationTypeResult.error else True break self.assertTrue(validation_result, " Test validation of must_have_one failed") # add another member ds.schema.accessRights_dcterms['1'] = SchemaGeneric("newthem") validator = ValidationSchema(ds.schema, ds.schema.get_schema_type()) report = validator.validate() for result in report: if result.get( "property") == "accessRights_dcterms" and result.get( "constraint") == "card_0..1": validation_result = True if result.get( "result") == ValidationTypeResult.error else False break self.assertTrue( validation_result, " Test validation of must_have_one (more than 1) failed") pass ds.schema.accessRights_dcterms = None validator = ValidationSchema(ds.schema, ds.schema.get_schema_type()) report = validator.validate() for result in report: if result.get( "property") == "accessRights_dcterms" and result.get( "constraint") == "card_0..1": validation_result = False if result.get( "result") == ValidationTypeResult.error else True break self.assertTrue( validation_result, " Test validation of must_have_one (cardinality 0) failed") pass
def rollback_dataset_to_revision(self, dataset_uri): """ rollback the dataset to the selected revision :param dataset: :param revision_id: :return: """ try: revision = self.get_first_valid_revision( dataset_uri=dataset_uri) or {} revision_dataset = revision.get('dataset', None) if revision_dataset: context = {"ignore_auth": True} result = update_exisiting_dataset(revision_dataset, None, context, {"uri": dataset_uri}) if result: log.info( "[ROLLBACK Dataset] [Successful] [URI:<{0}>]".format( dataset_uri)) else: log.error( "[ROLLBACK Dataset] [Failed] [Updating dataset][URI:<{0}>]" .format(dataset_uri)) else: # Remove dataset log.error( "[ROLLBACK Dataset] [Revision None] [Try to delete Dataset][URI:<{0}>]" .format(dataset_uri)) try: dataset_to_remove = DatasetDcatApOp(dataset_uri) result = dataset_to_remove.get_description_from_ts() if result: dataset_to_remove.delete_from_ts() log.info( "[ROLLBACK Dataset] [Delete Dataset] [Successful] [URI:<{0}>]" .format(dataset_uri)) redis_cache.delete_value_from_cache( dataset_to_remove.dataset_uri) package_index.remove_dict(dataset_to_remove) except BaseException as e: log.error(traceback.print_exc(e)) log.error( "[ROLLBACK Dataset] [Delete dataset] [Failed] [URI:<{0}>]" .format(dataset_uri)) except BaseException as e: log.error(traceback.print_exc(e)) log.error("[ROLLBACK Dataset] [Failed] [Revision None][URI:<{0}>]". format(dataset_uri))
def test_delete_from_ts(self): ds = DatasetDcatApOp( "http://data.europa.eu/88u/dataset/dgt-translation-memory-V1-2") if ds.get_description_from_ts(): ds.get_description_from_ts() ds.delete_from_ts() ds.get_description_from_ts() count_ttl_lines = len(ds.ttl_as_in_ts.splitlines(2)) self.assertTrue(count_ttl_lines == 2, "Delete dataset from ts failed")
def test_create_ds(self): ds1 = DatasetDcatApOp("t1") ds2 = DatasetDcatApOp("t2") ds1.schema.description_dcterms['5'] = ResourceValue("rien ds 111") ds2.schema.description_dcterms['5'] = ResourceValue("rien ds 222") ds1.schema.ckanName_dcatapop['6'] = ResourceValue("ckan name ds1") ds1.schema.description_dcterms['4'] = ResourceValue("rien 111") ds2.schema.ckanName_dcatapop['6'] = ResourceValue("ckan name ds2") self.assertNotEqual(ds1.schema.description_dcterms['5'].value_or_uri, ds2.schema.description_dcterms['5'].value_or_uri, "ddd") self.assertNotEqual(len(ds1.schema.description_dcterms), len(ds2.schema.description_dcterms))
def test_migrate_dataset_in_group(self): condition = Package.id == ECB_WEB_SERVICE_PACKAGE_ID ecb_web_service_package = find_any_in_database(TEST_CONFIG_FILE_PATH, condition, Package)[0] controlled_vocabulary = ControlledVocabulary() datasets_migration_manager.migrate_package_to_virtuoso( config_file_path=TEST_CONFIG_FILE_PATH, package=ecb_web_service_package, controlled_vocabulary=controlled_vocabulary) dataset = DatasetDcatApOp(DATASET_URI_PREFIX + "ecb-web-service", graph_name=DCATAPOP_PRIVATE_GRAPH_NAME) result = dataset.get_description_from_ts() assert result is True
def test_migrate_dataset_in_multiple_groups(self): condition = Package.id == CONNECT_SPARQL_ENDPOINT_ID ecb_web_service_package = find_any_in_database(TEST_CONFIG_FILE_PATH, condition, Package)[0] controlled_vocabulary = ControlledVocabulary() datasets_migration_manager.migrate_package_to_virtuoso( config_file_path=TEST_CONFIG_FILE_PATH, package=ecb_web_service_package, controlled_vocabulary=controlled_vocabulary) dataset = DatasetDcatApOp(DATASET_URI_PREFIX + "connect-sparql-endpoint", graph_name=DCATAPOP_PRIVATE_GRAPH_NAME) result = dataset.get_description_from_ts() assert result is True
def test_migrate_most_viewed_package_to_virtuoso(self): controlled_vocabulary = ControlledVocabulary() condition = Package.id == TED_PACKAGE_ID ted_package = find_any_in_database(TEST_CONFIG_FILE_PATH, condition, Package)[0] datasets_migration_manager.migrate_package_to_virtuoso( config_file_path=TEST_CONFIG_FILE_PATH, package=ted_package, controlled_vocabulary=controlled_vocabulary) dataset = DatasetDcatApOp(DATASET_URI_PREFIX + "ted-1") result = dataset.get_description_from_ts() assert result is True
def edit_save_to_ts(): ds1 = DatasetDcatApOp("http://data.europa.eu/88u/dataset/dgt-translation-memory-V1-2") if ds1.get_description_from_ts(): ds1.privacy_state = "public" ds1.schema.ckanName_dcatapop['0'].value_or_uri = "NEW CKAN NAME" ds1.schema.keyword_dcat['fr'] = ResourceValue(u'la réussite', lang="fr") ds1.schema.keyword_dcat['grg'] = ResourceValue(u'επιτυχία', lang="gr") ds1.schema.contactPoint_dcat['0'].hasTelephone_vcard['0'].hasValue_vcard['0'].uri = "TEL:213232323" if ds1.save_to_ts(): print " Save done" ds1after = DatasetDcatApOp("http://data.europa.eu/88u/dataset/dgt-translation-memory-V1-2") ds1after.get_description_from_ts() pass
def test_save_as_public(self): ds_private = DatasetDcatApOp(TRANSLATION_MEMORY_V_4, privacy_state=PRIVACY_STATE_PRIVATE, graph_name=DCATAPOP_PRIVATE_GRAPH_NAME) ds_private.privacy_state = PRIVACY_STATE_PUBLIC ds_private.get_description_from_ts() ds_private.save_to_ts()
def test_controlled_vocabulary_publisher_from_db(self): ds = DatasetDcatApOp( "http://data.europa.eu/88u/dataset/dgt-translation-memory-V1-2") validation_result = True if ds.get_description_from_ts(): validator = ValidationSchema(ds.schema, ds.schema.get_schema_type()) report = validator.validate() for result in report: if result.get( "property") == "publisher_dcterms" and result.get( "constraint") == "controlled_vocabulary": validation_result = False if result.get( "result") == ValidationTypeResult.error else True break self.assertTrue(validation_result, " Test validation of validation failed")
def test_validation(self): ds = DatasetDcatApOp( "http://data.europa.eu/88u/dataset/dgt-translation-memory-V1-2") if ds.get_description_from_ts(): # must have one ckan name, validation_result = True # ds.schema.theme_dcat['1'] = ResourceValue("New CKAN") validator = ValidationSchema(ds.schema, ds.schema.get_schema_type()) report = validator.validate() for result in report: validation_result = False if result.get( "result") == ValidationTypeResult.error else True if not validation_result: break self.assertTrue(validation_result, " Test validation of validation failed") pass
def test_must_have_one(self): ds = DatasetDcatApOp( "http://data.europa.eu/88u/dataset/dgt-translation-memory-V1-2") if ds.get_description_from_ts(): # must have one ckan name, validation_error = False ds.schema.ckanName_dcatapop['1'] = ResourceValue("New CKAN") validator = ValidationSchema(ds.schema, ds.schema.get_schema_type()) report = validator.validate() for result in report: if result.get( "property") == "ckanName_dcatapop" and result.get( "constraint") == "card_1..1": validation_resul = False if result.get( "result") == ValidationTypeResult.error else True self.assertTrue(not validation_resul, " Test validation of must_have_one failed")
def generate_pickled_dataset(activate=False): if activate: list_ds_vip = [ "http://data.europa.eu/88u/dataset/doi-test1", "http://data.europa.eu/88u/dataset/eurovoc", "http://data.europa.eu/88u/dataset/dgt-translation-memory" ] for ds_uri in list_ds_vip: ds = DatasetDcatApOp(ds_uri) ds.get_description_from_ts() file_name = ds_uri.split("/")[-1] + ".pickle" with open(self._RESOURCE_FOLDER + file_name, "w") as pickle_file: pickle.dump(ds, pickle_file) with open(self._RESOURCE_FOLDER + file_name + ".json", "w") as f: json.dump(self._MOCK_DATASET_DATA, f)