예제 #1
0
    def load(self, meta_xml):
        if isinstance(meta_xml, basestring):
            meta_xml = loader.from_string(meta_xml)

        dcat_metadata = {}
        for key in self.get_metadata_keys():
            attribute = self.get_attribute(key)
            dcat_metadata[key] = attribute.get_value(
                xml=meta_xml
            )
        return self._clean_dataset(dcat_metadata)
예제 #2
0
    def import_stage(self, harvest_object):  # noqa
        log.debug("In GeocatHarvester import_stage")
        self._set_config(harvest_object.job.source.config)

        if not harvest_object:
            log.error("No harvest object received")
            self._save_object_error("No harvest object received", harvest_object)
            return False

        try:

            if "organization" not in self.config:
                context = {"model": model, "session": Session, "ignore_auth": True}
                source_dataset = get_action("package_show")(context, {"id": harvest_object.source.id})
                self.config["organization"] = source_dataset.get("organization").get("name")

            xml_elem = loader.from_string(harvest_object.content)
            dataset_metadata = md.GeocatDcatDatasetMetadata()
            dist_metadata = md.GeocatDcatDistributionMetadata()

            pkg_dict = dataset_metadata.get_metadata(xml_elem)
            dist_list = dist_metadata.get_metadata(xml_elem)

            for dist in dist_list:
                dist["rights"] = self.config.get(
                    "rights", "NonCommercialNotAllowed-CommercialNotAllowed-ReferenceRequired"  # noqa
                )

            pkg_dict["identifier"] = "%s@%s" % (pkg_dict["identifier"], self.config["organization"])

            # geocat returns see_alsos as UUID, check if there are
            # datasets from the same organization as the harvester
            existing_see_alsos = []
            for linked_uuid in pkg_dict["see_alsos"]:
                try:
                    identifier = "%s@%s" % (linked_uuid, self.config["organization"])
                    check_dict = {"identifier": identifier}
                    self._find_existing_package(check_dict)
                    existing_see_alsos.append(identifier)
                except NotFound:
                    continue
            pkg_dict["see_alsos"] = existing_see_alsos

            pkg_dict["owner_org"] = self.config["organization"]
            pkg_dict["resources"] = dist_list
            pkg_dict["name"] = self._gen_new_name(pkg_dict["title"]["de"])

            # legal basis
            legal_basis_url = self.config.get("legal_basis_url", None)
            if legal_basis_url:
                pkg_dict["relations"].append({"url": legal_basis_url, "label": "legal_basis"})

            log.debug("package dict: %s" % pkg_dict)

            package_context = {"ignore_auth": True}
            try:
                existing = self._find_existing_package(pkg_dict)
                log.debug("Existing package found, updating %s..." % existing["id"])
                pkg_dict["name"] = existing["name"]
                pkg_dict["id"] = existing["id"]
                updated_pkg = get_action("package_update")(package_context, pkg_dict)
                harvest_object.current = True
                harvest_object.package_id = updated_pkg["id"]
                harvest_object.save()
                log.debug("Updated PKG: %s" % updated_pkg)
            except NotFound:
                log.debug("No package found, create a new one!")

                model.Session.execute("SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED")
                model.Session.flush()

                created_pkg = get_action("package_create")(package_context, pkg_dict)

                harvest_object.current = True
                harvest_object.package_id = created_pkg["id"]
                harvest_object.add()

                log.debug("Created PKG: %s" % created_pkg)

            Session.commit()
            return True

        except Exception, e:
            self._save_object_error(
                ("Exception in import stage: %r / %s" % (e, traceback.format_exc())), harvest_object
            )
            return False