def test_get_package_id_by_data_pids(self): package_1_id, package_2_id = self._create_datasets() package_id = utils.get_package_id_by_data_pids({'pids': [{'type': 'data', 'id': 'some_data_pid_1'}]}) self.assertEquals(package_1_id, package_id[0]) package_id = utils.get_package_id_by_data_pids({'pids': [{'type': 'data', 'id': 'some_data_pid_2'}]}) self.assertEquals(package_2_id, package_id[0])
def package_show(context, data_dict): ''' Return the metadata of a dataset (package) and its resources. Called before showing the dataset in some interface (browser, API), or when adding package to Solr index (no validation / conversions then). :param id: the id or name of the dataset :type id: string :rtype: dictionary ''' if data_dict.get('type') == 'harvest': context['schema'] = Schemas.harvest_source_show_package_schema() if not data_dict.get('id') and not data_dict.get('name'): # Get package by data PIDs data_dict['id'] = utils.get_package_id_by_data_pids(data_dict) pkg_dict1 = ckan.logic.action.get.package_show(context, data_dict) pkg_dict1 = utils.resource_to_dataset(pkg_dict1) # Remove empty agents that come from padding the agent list in converters if 'agent' in pkg_dict1: agents = filter(None, pkg_dict1.get('agent', [])) pkg_dict1['agent'] = agents or [] # Normally logic function should not catch the raised errors # but here it is needed so action package_show won't catch it instead # Hiding information from API calls try: check_access('package_update', context) except NotAuthorized: pkg_dict1 = utils.hide_sensitive_fields(pkg_dict1) pkg = Package.get(pkg_dict1['id']) if 'erelated' in pkg.extras: erelated = pkg.extras['erelated'] if len(erelated): for value in erelated.split(';'): if len(Session.query(Related).filter(Related.title == value).all()) == 0: data_dict = {'title': value, 'type': _("Paper"), 'dataset_id': pkg.id} related_create(context, data_dict) # Update package.title to match package.extras.title_0 extras_title = pkg.extras.get(u'title_0') if extras_title and extras_title != pkg.title: repo.new_revision() pkg.title = pkg.extras[u'title_0'] pkg.save() rebuild(pkg.id) # Rebuild solr-index for this dataset return pkg_dict1
ddi_xml = BeautifulSoup(info["xml"], "xml") except etree.XMLSyntaxError, err: self._save_object_error("Unable to parse XML! {er}".format(er=err.msg), harvest_object, "Import") # I presume source sent wrong data but it arrived correctly. # This could result in a case where incorrect source is tried # over and over again without success. del info["xml"] harvest_object.content = info["url"] # self._add_retry(harvest_object) return False self.ddi_converter.read_fsd_ref() package_dict = self.ddi_converter.ddi2ckan(ddi_xml, info["url"], info["xml"], harvest_object) # Check if dataset already exists and use its id. pkg_id = utils.get_package_id_by_data_pids(package_dict) pkg = model.Session.query(model.Package).filter(model.Package.id == pkg_id).first() if pkg_id else None if pkg: package_dict["id"] = pkg.id log.debug("Found existing package with PIDs: {pid}".format(pid=package_dict["pids"])) errors = self.ddi_converter.get_errors() if errors: # FIXME: Using line number here hazardous. Old _read_value doesn't support for er, line in errors: self._save_object_error( "Invalid or missing mandatory metadata in {ur}. " "{er}".format(ur=info["url"], er=er), harvest_object, "Import", line, )