def listIdentifiers(self, metadataPrefix, set=None, cursor=None, from_=None, until=None, batch_size=None): '''List all identifiers for this repository. ''' data = [] packages = [] group = None if not set: if not from_ and not until: packages = Session.query(Package).filter(Package.type=='dataset').\ filter(Package.private!=True).filter(Package.state=='active').all() else: if from_ and not until: packages = Session.query(Package).filter(Package.type=='dataset').filter(Package.private!=True).\ filter(PackageRevision.revision_timestamp > from_).\ filter(Package.name==PackageRevision.name).filter(Package.state=='active').all() if until and not from_: packages = Session.query(Package).filter(Package.type=='dataset').filter(Package.private!=True).\ filter(PackageRevision.revision_timestamp < until).\ filter(Package.name==PackageRevision.name).filter(Package.state=='active').all() if from_ and until: packages = Session.query(Package).filter(Package.type=='dataset').filter(Package.private!=True).\ filter(between(PackageRevision.revision_timestamp, from_, until)).\ filter(Package.name==PackageRevision.name).filter(Package.state=='active').all() else: group = Group.get(set) if group: packages = group.packages(return_query=True).filter(Package.type=='dataset').\ filter(Package.private!=True).filter(Package.state=='active') if from_ and not until: packages = packages.filter(PackageRevision.revision_timestamp > from_).\ filter(Package.name==PackageRevision.name).filter(Package.state=='active') if until and not from_: packages = packages.filter(PackageRevision.revision_timestamp < until).\ filter(Package.name==PackageRevision.name).filter(Package.state=='active') if from_ and until: packages = packages.filter(between(PackageRevision.revision_timestamp, from_, until)).\ filter(Package.name==PackageRevision.name).filter(Package.state=='active') packages = packages.all() if cursor: packages = packages[cursor:] for package in packages: spec = package.name if group: spec = group.name else: if package.owner_org: group = Group.get(package.owner_org) if group and group.name: spec = group.name group = None data.append(common.Header('', package.id, package.metadata_created, [spec], False)) return data
def listRecords(self, metadataPrefix, set=None, cursor=None, from_=None, until=None, batch_size=None): '''Show a selection of records, basically lists all datasets. ''' data = [] packages = [] group = None if not set: if not from_ and not until: packages = Session.query(Package).filter(Package.type=='dataset').filter(Package.private!=True).\ filter(Package.state=='active').all() if from_ and not until: packages = Session.query(Package).filter(Package.type=='dataset').filter(Package.private!=True).\ filter(PackageRevision.revision_timestamp > from_).filter(Package.name==PackageRevision.name).\ filter(Package.state=='active').all() if until and not from_: packages = Session.query(Package).filter(Package.type=='dataset').filter(Package.private!=True).\ filter(PackageRevision.revision_timestamp < until).filter(Package.name==PackageRevision.name).\ filter(Package.state=='active').all() if from_ and until: packages = Session.query(Package).filter(Package.type=='dataset').filter(Package.private!=True).\ filter(between(PackageRevision.revision_timestamp, from_, until)).\ filter(Package.name==PackageRevision.name).filter(Package.state=='active').all() else: group = Group.get(set) if group: packages = group.packages(return_query=True) if from_ and not until: packages = packages.filter(PackageRevision.revision_timestamp > from_).\ filter(Package.type=='dataset').filter(Package.private!=True).\ filter(Package.name==PackageRevision.name).filter(Package.state=='active').all() if until and not from_: packages = packages.filter(PackageRevision.revision_timestamp < until).\ filter(Package.type=='dataset').filter(Package.private!=True).\ filter(Package.name==PackageRevision.name).filter(Package.state=='active').all() if from_ and until: packages = packages.filter(between(PackageRevision.revision_timestamp, from_, until)).\ filter(Package.type=='dataset').filter(Package.private!=True).\ filter(Package.name==PackageRevision.name).filter(Package.state=='active').all() if cursor: packages = packages[cursor:] for res in packages: spec = res.name if group: spec = group.name else: if res.owner_org: group = Group.get(res.owner_org) if group and group.name: spec = group.name group = None data.append(self._record_for_dataset(res, spec)) return data
def test_zaincremental_harvester(self): client = CKANServer() metadata_registry = metadata.MetadataRegistry() metadata_registry.registerReader('oai_dc', oai_dc_reader) metadata_registry.registerWriter('oai_dc', oai_dc_writer) serv = BatchingServer(client, metadata_registry=metadata_registry) oaipmh.client.Client = mock.Mock(return_value=ServerClient(serv, metadata_registry)) harv = OAIPMHHarvester() harvest_job = HarvestJob() harvest_job.source = HarvestSource() harvest_job.source.title = "Test" harvest_job.source.url = "http://helda.helsinki.fi/oai/request" harvest_job.gather_started = ((datetime.now() + timedelta(days=1))) harvest_job.source.config = '{"incremental":"True"}' harvest_job.source.type = "OAI-PMH" Session.add(harvest_job) rev = model.repo.new_revision() rev.timestamp = ((datetime.now() + timedelta(days=2))) pkg = Package(name='footest', revision=rev) Session.add(pkg) pkg.save() roger = Group.get('roger') roger.add_package_by_name('footest') Session.add(roger) roger.save() gathered = harv.gather_stage(harvest_job) harvest_object = HarvestObject.get(gathered[0]) harv.fetch_stage(harvest_object) harvobj = json.loads(harvest_object.content) self.assert_(harvobj['records'])
def get_discipline(context, data_dict): model = context['model'] terms = data_dict.get('query') or data_dict.get('q') or [] if isinstance(terms, basestring): terms = [terms] terms = [t.strip() for t in terms if t.strip()] if 'fields' in data_dict: log.warning('"fields" parameter is deprecated. ' 'Use the "query" parameter instead') offset = data_dict.get('offset') limit = data_dict.get('limit') # TODO: should we check for user authentication first? q = model.Session.query(model.Group) if not len(terms): return [], 0 katagrp = Group.get('KATA') res = [] for term in terms: escaped_term = misc.escape_sql_like_special_characters(term, escape='\\') for child in katagrp.get_children_groups(): if escaped_term in child['name']: res.append(child) return res
def listRecords(self, metadataPrefix=None, set=None, cursor=None, from_=None, until=None, batch_size=None): '''Show a selection of records, basically lists all datasets. ''' data = [] packages, setspc = self._filter_packages(set, cursor, from_, until, batch_size) for package in packages: set_spec = [] if setspc: set_spec.append(setspc) if package.owner_org: group = Group.get(package.owner_org) if group and group.name: set_spec.append(group.name) if not set_spec: set_spec = [package.name] if metadataPrefix == 'rdf': data.append(self._record_for_dataset_dcat(package, set_spec)) if metadataPrefix == 'oai_openaire': data.append( self._record_for_dataset_datacite(package, set_spec)) else: data.append(self._record_for_dataset(package, set_spec)) return data
def listIdentifiers(self, metadataPrefix=None, set=None, cursor=None, from_=None, until=None, batch_size=None): '''List all identifiers for this repository. ''' data = [] packages, group = self._filter_packages(set, cursor, from_, until, batch_size) for package in packages: spec = package.name if group: spec = group.name else: if package.owner_org: group = Group.get(package.owner_org) if group and group.name: spec = group.name data.append( common.Header('', package.id, package.metadata_created, [spec], False)) return data
def add_to_group(key, data, errors, context): val = data.get(key) if val: repo.new_revision() grp = Group.get(val) grp.add_package_by_name(data[('name',)]) grp.save()
def listRecords(self, metadataPrefix=None, set=None, cursor=None, from_=None, until=None, batch_size=None): '''Show a selection of records, basically lists all datasets. ''' data = [] packages, group = self._filter_packages(set, cursor, from_, until, batch_size) for package in packages: spec = package.name if group: spec = group.name else: if package.owner_org: group = Group.get(package.owner_org) if group and group.name: spec = group.name if metadataPrefix == 'rdf': data.append(self._record_for_dataset_dcat(package, spec)) else: data.append(self._record_for_dataset(package, spec)) return data
def test_records(self): """ Test record fetching via http-request to prevent accidental changes to interface """ model.User(name="test", sysadmin=True).save() organization = get_action('organization_create')({'user': '******'}, {'name': 'test-organization', 'title': "Test organization"}) package_1_data = deepcopy(TEST_DATADICT) package_1_data['owner_org'] = organization['name'] package_1_data['private'] = False package_2_data = deepcopy(package_1_data) for pid in package_1_data.get('pids', []): pid['id'] = utils.generate_pid() for pid in package_2_data.get('pids', []): pid['id'] = utils.generate_pid() packages = [get_action('package_create')({'user': '******'}, package_1_data), get_action('package_create')({'user': '******'}, package_2_data)] url = url_for('/oai') result = self.app.get(url, {'verb': 'ListSets'}) root = lxml.etree.fromstring(result.body) request_set = self._get_single_result(root, "//o:set") set_name = request_set.xpath("string(o:setName)", namespaces=self._namespaces) set_spec = request_set.xpath("string(o:setSpec)", namespaces=self._namespaces) self.assertEquals(organization['name'], set_spec) self.assertEquals(organization['title'], set_name) result = self.app.get(url, {'verb': 'ListIdentifiers', 'set': set_spec, 'metadataPrefix': 'oai_dc'}) root = lxml.etree.fromstring(result.body) fail = True package_identifiers = [package['id'] for package in packages] package_org_names = [Group.get(package['owner_org']).name for package in packages] for header in root.xpath("//o:header", namespaces=self._namespaces): fail = False set_spec = header.xpath("string(o:setSpec)", namespaces=self._namespaces) identifier = header.xpath("string(o:identifier)", namespaces=self._namespaces) self.assertTrue(set_spec in package_org_names) self.assertTrue(identifier in package_identifiers) result = self.app.get(url, {'verb': 'GetRecord', 'identifier': identifier, 'metadataPrefix': 'oai_dc'}) root = lxml.etree.fromstring(result.body) fail_record = True for record_result in root.xpath("//o:record", namespaces=self._namespaces): fail_record = False header = self._get_single_result(record_result, 'o:header') self._get_single_result(record_result, 'o:metadata') self.assertTrue(header.xpath("string(o:identifier)", namespaces=self._namespaces) in package_identifiers) self.assertTrue(header.xpath("string(o:setSpec)", namespaces=self._namespaces) in package_org_names) self.assertFalse(fail_record, "No records received") self.assertFalse(fail, "No headers (packages) received")
def getRecord(self, metadataPrefix, identifier): '''Simple getRecord for a dataset. ''' package = Package.get(identifier) if not package: raise IdDoesNotExistError("No dataset with id %s" % identifier) spec = package.name if package.owner_org: group = Group.get(package.owner_org) if group and group.name: spec = group.name return self._record_for_dataset(package, spec)
def _filter_packages(set, cursor, from_, until, batch_size): '''Get a part of datasets for "listNN" verbs. ''' packages = [] setspc = None if not set: packages = Session.query(Package).filter(Package.type=='dataset'). \ filter(Package.state == 'active').filter(Package.private!=True) if from_ and not until: packages = packages.filter(PackageRevision.revision_timestamp > from_).\ filter(Package.name==PackageRevision.name) if until and not from_: packages = packages.filter(PackageRevision.revision_timestamp < until).\ filter(Package.name==PackageRevision.name) if from_ and until: packages = packages.filter(between(PackageRevision.revision_timestamp, from_, until)).\ filter(Package.name==PackageRevision.name) if batch_size: packages = packages.limit(batch_size) if cursor: packages = packages.offset(cursor) packages = packages.all() elif set == 'openaire_data': oa_tag = Session.query(Tag).filter( Tag.name == 'openaire_data').first() if oa_tag: packages = oa_tag.packages setspc = set else: group = Group.get(set) if group: # Note that group.packages never returns private datasets regardless of 'with_private' parameter. packages = group.packages(return_query=True, with_private=False).filter(Package.type=='dataset'). \ filter(Package.state == 'active') if from_ and not until: packages = packages.filter(PackageRevision.revision_timestamp > from_).\ filter(Package.name==PackageRevision.name) if until and not from_: packages = packages.filter(PackageRevision.revision_timestamp < until).\ filter(Package.name==PackageRevision.name) if from_ and until: packages = packages.filter(between(PackageRevision.revision_timestamp, from_, until)).\ filter(Package.name==PackageRevision.name) if batch_size: packages = packages.limit(batch_size) if cursor: packages = packages.offset(cursor) packages = packages.all() # if cursor is not None: # cursor_end = cursor + batch_size if cursor + batch_size < len(packages) else len(packages) # packages = packages[cursor:cursor_end] return packages, setspc
def initdb(self): kata = Group.get('KATA') if not kata: repo.new_revision() kata = Group(name="KATA", title="Tieteenalat") kata.save() for tiede in tieteet.tieteet: t = Group(description=tiede['description'], name=tiede['name'], title=tiede['title']) t.save() m = Member(group=kata, table_id=t.id, table_name="group") m.save() setup()
def getRecord(self, metadataPrefix, identifier): '''Simple getRecord for a dataset. ''' package = Package.get(identifier) if not package: raise IdDoesNotExistError("No dataset with id %s" % identifier) spec = package.name if package.owner_org: group = Group.get(package.owner_org) if group and group.name: spec = group.name if metadataPrefix == 'rdf': return self._record_for_dataset_dcat(package, spec) return self._record_for_dataset(package, spec)
def harvest_source_dictize(source, context): out = source.as_dict() out['publisher_title'] = u'' publisher_id = out.get('publisher_id') if publisher_id: group = Group.get(publisher_id) if group: out['publisher_title'] = group.title out['status'] = _get_source_status(source, context) return out
def listIdentifiers(self, metadataPrefix, set=None, cursor=None, from_=None, until=None, batch_size=None): '''List all identifiers for this repository. ''' data = [] packages = [] if not set: if not from_ and not until: packages = Session.query(Package).all() else: if from_: packages = Session.query(Package).\ filter(PackageRevision.revision_timestamp > from_).\ all() if until: packages = Session.query(Package).\ filter(PackageRevision.revision_timestamp < until).\ all() if from_ and until: packages = Session.query(Package).\ filter(between(PackageRevision.revision_timestamp, from_, until)\ ).all() else: group = Group.get(set) if group: packages = group.active_packages() if from_ and not until: packages = packages.\ filter(PackageRevision.revision_timestamp > from_) if until and not from_: packages = packages.\ filter(PackageRevision.revision_timestamp < until) if from_ and until: packages = packages.filter( between(PackageRevision.revision_timestamp, from_, until)) packages = packages.all() if cursor: packages = packages[:cursor] for package in packages: data.append(common.Header(package.id, package.metadata_created, [package.name], False)) return data
def listIdentifiers(self, metadataPrefix=None, set=None, cursor=None, from_=None, until=None, batch_size=None): '''List all identifiers for this repository. ''' data = [] packages, group = self._filter_packages(set, cursor, from_, until, batch_size) for package in packages: spec = package.name if group: spec = group.name else: if package.owner_org: group = Group.get(package.owner_org) if group and group.name: spec = group.name data.append(common.Header('', package.id, package.metadata_created, [spec], False)) return data
def import_stage(self, harvest_object): ''' The import stage will receive a HarvestObject object and will be responsible for: - performing any necessary action with the fetched object (e.g create a CKAN package). Note: if this stage creates or updates a package, a reference to the package must be added to the HarvestObject. Additionally, the HarvestObject must be flagged as current. - creating the HarvestObject - Package relation (if necessary) - creating and storing any suitable HarvestObjectErrors that may occur. - returning True if everything went as expected, False otherwise. :param harvest_object: HarvestObject object :returns: True if everything went right, False if errors were found ''' # Do common tasks and then call different methods depending on what # kind of info the harvest object contains. self._set_config(harvest_object.job.source.config) ident = json.loads(harvest_object.content) registry = MetadataRegistry() if 'metadata_formats' in self.config: for mdp in self.config['metadata_formats']: registry.registerReader(mdp, kata_oai_dc_reader) if self.metadata_prefix_value not in self.config['metadata_formats']: registry.registerReader(self.metadata_prefix_value, kata_oai_dc_reader) else: registry.registerReader(self.metadata_prefix_value, kata_oai_dc_reader) client = oaipmh.client.Client(harvest_object.job.source.url, registry) client.updateGranularity() #quickfix for granularity domain = ident['domain'] group = Group.get(domain) # Checked in gather_stage so exists. try: if ident['fetch_type'] == 'record': return self._fetch_import_record(harvest_object, ident, client, group) if ident['fetch_type'] == 'set': return self._fetch_import_set(harvest_object, ident, client, group) # This should not happen... log.error('Unknown fetch type: %s' % ident['fetch_type']) except Exception as e: # Guard against miscellaneous stuff. Probably plain bugs. # Also very rare exceptions we haven't seen yet. self._add_retry(harvest_object) log.debug(traceback.format_exc(e)) return False
def harvest_source_dictize(source, context, last_job_status=False): out = source.as_dict() out['publisher_title'] = u'' publisher_id = out.get('publisher_id') if publisher_id: group = Group.get(publisher_id) if group: out['publisher_title'] = group.title out['status'] = _get_source_status(source, context) if last_job_status: source_status = logic.get_action('harvest_source_show_status')(context, {'id': source.id}) out['last_job_status'] = source_status.get('last_job', {}) return out
def add_to_group(key, data, errors, context): ''' Add a new group if it doesn't yet exist. :param key: key :param data: data :param errors: validation errors :param context: context ''' val = data.get(key) if val: repo.new_revision() grp = Group.get(val) # UI code needs group created if it does not match. Hence do so. if not grp: grp = Group(name=val, description=val, title=val) setup_default_user_roles(grp) grp.save() repo.commit()
def listRecords(self, metadataPrefix, set=None, cursor=None, from_=None, until=None, batch_size=None): '''Show a selection of records, basically lists all datasets. ''' data = [] packages = [] if not set: if not from_ and not until: packages = Session.query(Package).all() if from_: packages = Session.query(Package).\ filter(PackageRevision.revision_timestamp > from_).all() if until: packages = Session.query(Package).\ filter(PackageRevision.revision_timestamp < until).all() if from_ and until: packages = Session.query(Package).filter( between(PackageRevision.revision_timestamp,from_,until)).\ all() else: group = Group.get(set) if group: packages = group.active_packages() if from_ and not until: packages = packages.\ filter(PackageRevision.revision_timestamp > from_).\ all() if until and not from_: packages = packages.\ filter(PackageRevision.revision_timestamp < until).\ all() if from_ and until: packages = packages.filter( between(PackageRevision.revision_timestamp, from_, until))\ .all() if cursor: packages = packages[:cursor] for res in packages: data.append(self._record_for_dataset(res)) return data
def getRecord(self, metadataPrefix, identifier): '''Simple getRecord for a dataset. ''' package = Package.get(identifier) if not package: raise IdDoesNotExistError("No dataset with id %s" % identifier) set_spec = [] if package.owner_org: group = Group.get(package.owner_org) if group and group.name: set_spec.append(group.name) if 'openaire_data' in package.as_dict().get('tags'): set_spec.append('openaire_data') if not set_spec: set_spec = [package.name] if metadataPrefix == 'rdf': return self._record_for_dataset_dcat(package, set_spec) if metadataPrefix == 'oai_openaire': return self._record_for_dataset_datacite(package, set_spec) return self._record_for_dataset(package, set_spec)
def _filter_packages(set, cursor, from_, until, batch_size): '''Get a part of datasets for "listNN" verbs. ''' packages = [] group = None if not set: packages = Session.query(Package).filter(Package.type=='dataset'). \ filter(Package.state == 'active').filter(Package.private!=True) if from_ and not until: packages = packages.filter(PackageRevision.revision_timestamp > from_).\ filter(Package.name==PackageRevision.name) if until and not from_: packages = packages.filter(PackageRevision.revision_timestamp < until).\ filter(Package.name==PackageRevision.name) if from_ and until: packages = packages.filter(between(PackageRevision.revision_timestamp, from_, until)).\ filter(Package.name==PackageRevision.name) packages = packages.all() else: group = Group.get(set) if group: # Note that group.packages never returns private datasets regardless of 'with_private' parameter. packages = group.packages(return_query=True, with_private=False).filter(Package.type=='dataset'). \ filter(Package.state == 'active') if from_ and not until: packages = packages.filter(PackageRevision.revision_timestamp > from_).\ filter(Package.name==PackageRevision.name) if until and not from_: packages = packages.filter(PackageRevision.revision_timestamp < until).\ filter(Package.name==PackageRevision.name) if from_ and until: packages = packages.filter(between(PackageRevision.revision_timestamp, from_, until)).\ filter(Package.name==PackageRevision.name) packages = packages.all() if cursor is not None: cursor_end = cursor + batch_size if cursor + batch_size < len(packages) else len(packages) packages = packages[cursor:cursor_end] return packages, group
def test_records(self): """ Test record fetching via http-request to prevent accidental changes to interface """ model.User(name="test", sysadmin=True).save() organization = get_action('organization_create')( { 'user': '******' }, { 'name': 'test-organization', 'title': "Test organization" }) package_1_data = deepcopy(TEST_DATADICT) package_1_data['owner_org'] = organization['name'] package_1_data['private'] = False package_2_data = deepcopy(package_1_data) for pid in package_1_data.get('pids', []): pid['id'] = utils.generate_pid() for pid in package_2_data.get('pids', []): pid['id'] = utils.generate_pid() packages = [ get_action('package_create')({ 'user': '******' }, package_1_data), get_action('package_create')({ 'user': '******' }, package_2_data) ] url = url_for('/oai') result = self.app.get(url, {'verb': 'ListSets'}) root = lxml.etree.fromstring(result.body) request_set = self._get_single_result(root, "//o:set") set_name = request_set.xpath("string(o:setName)", namespaces=self._namespaces) set_spec = request_set.xpath("string(o:setSpec)", namespaces=self._namespaces) self.assertEquals(organization['name'], set_spec) self.assertEquals(organization['title'], set_name) result = self.app.get(url, { 'verb': 'ListIdentifiers', 'set': set_spec, 'metadataPrefix': 'oai_dc' }) root = lxml.etree.fromstring(result.body) fail = True package_identifiers = [package['id'] for package in packages] package_org_names = [ Group.get(package['owner_org']).name for package in packages ] for header in root.xpath("//o:header", namespaces=self._namespaces): fail = False set_spec = header.xpath("string(o:setSpec)", namespaces=self._namespaces) identifier = header.xpath("string(o:identifier)", namespaces=self._namespaces) self.assertTrue(set_spec in package_org_names) self.assertTrue(identifier in package_identifiers) result = self.app.get( url, { 'verb': 'GetRecord', 'identifier': identifier, 'metadataPrefix': 'oai_dc' }) root = lxml.etree.fromstring(result.body) fail_record = True for record_result in root.xpath("//o:record", namespaces=self._namespaces): fail_record = False header = self._get_single_result(record_result, 'o:header') self._get_single_result(record_result, 'o:metadata') self.assertTrue( header.xpath("string(o:identifier)", namespaces=self._namespaces) in package_identifiers) self.assertTrue( header.xpath("string(o:setSpec)", namespaces=self._namespaces) in package_org_names) self.assertFalse(fail_record, "No records received") self.assertFalse(fail, "No headers (packages) received")
def import_stage(self, harvest_object): ''' The import stage will receive a HarvestObject object and will be responsible for: - performing any necessary action with the fetched object (e.g create a CKAN package). Note: if this stage creates or updates a package, a reference to the package must be added to the HarvestObject. Additionally, the HarvestObject must be flagged as current. - creating the HarvestObject - Package relation (if necessary) - creating and storing any suitable HarvestObjectErrors that may occur. - returning True if everything went as expected, False otherwise. :param harvest_object: HarvestObject object :returns: True if everything went right, False if errors were found ''' model.repo.new_revision() master_data = json.loads(harvest_object.content) domain = master_data['domain'] group = Group.get(domain) if not group: group = Group(name=domain, description=domain) if 'records' in master_data: records = master_data['records'] set_name = master_data['set_name'] for rec in records: identifier, metadata, _ = rec if metadata: name = metadata['title'][0] if len(metadata['title'])\ else identifier title = name norm_title = unicodedata.normalize('NFKD', name)\ .encode('ASCII', 'ignore')\ .lower().replace(' ', '_')[:35] slug = ''.join(e for e in norm_title if e in string.ascii_letters + '_') name = slug creator = metadata['creator'][0]\ if len(metadata['creator']) else '' description = metadata['description'][0]\ if len(metadata['description']) else '' pkg = Package.by_name(name) if not pkg: pkg = Package(name=name, title=title) extras = {} for met in metadata.items(): key, value = met if len(value) > 0: if key == 'subject' or key == 'type': for tag in value: if tag: tag = munge_tag(tag[:100]) tag_obj = model.Tag.by_name(tag) if not tag_obj: tag_obj = model.Tag(name=tag) if tag_obj: pkgtag = model.PackageTag( tag=tag_obj, package=pkg) Session.add(tag_obj) Session.add(pkgtag) else: extras[key] = ' '.join(value) pkg.author = creator pkg.author_email = creator pkg.title = title pkg.notes = description pkg.extras = extras pkg.url = \ "%s?verb=GetRecord&identifier=%s&metadataPrefix=oai_dc"\ % (harvest_object.job.source.url, identifier) pkg.save() harvest_object.package_id = pkg.id Session.add(harvest_object) setup_default_user_roles(pkg) url = '' for ids in metadata['identifier']: if ids.startswith('http://'): url = ids title = metadata['title'][0] if len(metadata['title'])\ else '' description = metadata['description'][0]\ if len(metadata['description']) else '' pkg.add_resource(url, description=description, name=title) group.add_package_by_name(pkg.name) subg_name = "%s - %s" % (domain, set_name) subgroup = Group.by_name(subg_name) if not subgroup: subgroup = Group(name=subg_name, description=subg_name) subgroup.add_package_by_name(pkg.name) Session.add(group) Session.add(subgroup) setup_default_user_roles(group) setup_default_user_roles(subgroup) model.repo.commit() else: self._save_object_error('Could not receive any objects from fetch!' , harvest_object, stage='Import') return False return True