def load(self, meta_xml): if isinstance(meta_xml, basestring): meta_xml = loader.from_string(meta_xml) dcat_metadata = {} for key in self.get_metadata_keys(): attribute = self.get_attribute(key) dcat_metadata[key] = attribute.get_value( xml=meta_xml ) return self._clean_dataset(dcat_metadata)
def import_stage(self, harvest_object): # noqa log.debug("In GeocatHarvester import_stage") self._set_config(harvest_object.job.source.config) if not harvest_object: log.error("No harvest object received") self._save_object_error("No harvest object received", harvest_object) return False try: if "organization" not in self.config: context = {"model": model, "session": Session, "ignore_auth": True} source_dataset = get_action("package_show")(context, {"id": harvest_object.source.id}) self.config["organization"] = source_dataset.get("organization").get("name") xml_elem = loader.from_string(harvest_object.content) dataset_metadata = md.GeocatDcatDatasetMetadata() dist_metadata = md.GeocatDcatDistributionMetadata() pkg_dict = dataset_metadata.get_metadata(xml_elem) dist_list = dist_metadata.get_metadata(xml_elem) for dist in dist_list: dist["rights"] = self.config.get( "rights", "NonCommercialNotAllowed-CommercialNotAllowed-ReferenceRequired" # noqa ) pkg_dict["identifier"] = "%s@%s" % (pkg_dict["identifier"], self.config["organization"]) # geocat returns see_alsos as UUID, check if there are # datasets from the same organization as the harvester existing_see_alsos = [] for linked_uuid in pkg_dict["see_alsos"]: try: identifier = "%s@%s" % (linked_uuid, self.config["organization"]) check_dict = {"identifier": identifier} self._find_existing_package(check_dict) existing_see_alsos.append(identifier) except NotFound: continue pkg_dict["see_alsos"] = existing_see_alsos pkg_dict["owner_org"] = self.config["organization"] pkg_dict["resources"] = dist_list pkg_dict["name"] = self._gen_new_name(pkg_dict["title"]["de"]) # legal basis legal_basis_url = self.config.get("legal_basis_url", None) if legal_basis_url: pkg_dict["relations"].append({"url": legal_basis_url, "label": "legal_basis"}) log.debug("package dict: %s" % pkg_dict) package_context = {"ignore_auth": True} try: existing = self._find_existing_package(pkg_dict) log.debug("Existing package found, updating %s..." % existing["id"]) pkg_dict["name"] = existing["name"] pkg_dict["id"] = existing["id"] updated_pkg = get_action("package_update")(package_context, pkg_dict) harvest_object.current = True harvest_object.package_id = updated_pkg["id"] harvest_object.save() log.debug("Updated PKG: %s" % updated_pkg) except NotFound: log.debug("No package found, create a new one!") model.Session.execute("SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED") model.Session.flush() created_pkg = get_action("package_create")(package_context, pkg_dict) harvest_object.current = True harvest_object.package_id = created_pkg["id"] harvest_object.add() log.debug("Created PKG: %s" % created_pkg) Session.commit() return True except Exception, e: self._save_object_error( ("Exception in import stage: %r / %s" % (e, traceback.format_exc())), harvest_object ) return False