def dump_relations(self, obj): """Dump the relations to a dictionary.""" if 'relations' in obj: return obj['relations'] if is_deposit(obj): pid = self.context['pid'] return serialize_relations(pid) else: pid = self.context['pid'] return serialize_relations(pid)
def indexer_receiver(sender, json=None, record=None, index=None, **dummy_kwargs): """Connect to before_record_index signal to transform record for ES. In order to avoid that a record and published deposit differs (e.g. if an embargo task updates the record), every time we index a record we also index the deposit and overwrite the content with that of the record. :param sender: Sender of the signal. :param json: JSON to be passed for the elastic search. :type json: `invenio_records.api.Deposit` :param record: Indexed deposit record. :type record: `invenio_records.api.Deposit` :param index: Elasticsearch index name. :type index: str """ if not index.startswith('deposits-records-'): return if not isinstance(record, ZenodoDeposit): record = ZenodoDeposit(record, model=record.model) if record['_deposit']['status'] == 'published': schema = json['$schema'] pub_record = record.fetch_published()[1] # Temporarily set to draft mode to ensure that `clear` can be called json['_deposit']['status'] = 'draft' json.clear() json.update(copy.deepcopy(pub_record.replace_refs())) # Set back to published mode and restore schema. json['_deposit']['status'] = 'published' json['$schema'] = schema json['_updated'] = pub_record.updated else: json['_updated'] = record.updated json['_created'] = record.created # Compute filecount and total file size files = json.get('_files', []) json['filecount'] = len(files) json['size'] = sum([f.get('size', 0) for f in files]) recid = record.get('recid') if recid: pid = PersistentIdentifier.get('recid', recid) pv = PIDVersioning(child=pid) relations = serialize_relations(pid) if pv.exists: if pv.draft_child_deposit: is_last = (pv.draft_child_deposit.pid_value == record['_deposit']['id']) relations['version'][0]['is_last'] = is_last relations['version'][0]['count'] += 1 else: relations = {'version': [{'is_last': True, 'index': 0}, ]} if relations: json['relations'] = relations
def indexer_receiver(sender, json=None, record=None, index=None, **dummy_kwargs): """Connect to before_record_index signal to transform record for ES.""" if not index.startswith('records-') or record.get('$schema') is None: return # Remove files from index if record is not open access. if json['access_right'] != 'open' and '_files' in json: del json['_files'] else: # Compute file count and total size files = json.get('_files', []) json['filecount'] = len(files) json['size'] = sum([f.get('size', 0) for f in files]) pid = PersistentIdentifier.query.filter( PersistentIdentifier.object_uuid == record.id, PersistentIdentifier.pid_type == current_pidrelations.primary_pid_type, ).one_or_none() if pid: pv = PIDVersioning(child=pid) if pv.exists: relations = serialize_relations(pid) else: relations = {'version': [{'is_last': True, 'index': 0}, ]} if relations: json['relations'] = relations rels = serialize_related_identifiers(pid) if rels: json.setdefault('related_identifiers', []).extend(rels) # Remove internal data. if '_internal' in json: del json['_internal']
def indexer_receiver(sender, json=None, record=None, index=None, **dummy_kwargs): """Connect to before_record_index signal to transform record for ES.""" if not index.startswith('records-') or record.get('$schema') is None: return # Remove files from index if record is not open access. if json['access_right'] != 'open' and '_files' in json: del json['_files'] else: # Compute file count and total size files = json.get('_files', []) json['filecount'] = len(files) json['size'] = sum([f.get('size', 0) for f in files]) pid = PersistentIdentifier.query.filter( PersistentIdentifier.pid_value == str(record['recid']), PersistentIdentifier.pid_type == 'recid', PersistentIdentifier.object_uuid == record.id, ).one_or_none() if pid: pv = PIDVersioning(child=pid) if pv.exists: relations = serialize_relations(pid) else: relations = { 'version': [ { 'is_last': True, 'index': 0 }, ] } if relations: json['relations'] = relations rels = serialize_related_identifiers(pid) if rels: json.setdefault('related_identifiers', []).extend(rels) for loc in json.get('locations', []): if loc.get('lat') and loc.get('lon'): loc['point'] = {'lat': loc['lat'], 'lon': loc['lon']} # Remove internal data. if '_internal' in json: del json['_internal'] json['_stats'] = build_record_stats(record['recid'], record.get('conceptrecid')) custom_es_fields = build_record_custom_fields(json) for es_field, es_value in custom_es_fields.items(): json[es_field] = es_value
def publish(self, pid=None, id_=None): """Publish the deposit.""" if self.data is None: self.data = self.get('_deposit', {}) if 'control_number' in self: self.pop('control_number') if '$schema' not in self: self['$schema'] = current_app.extensions['invenio-jsonschemas'].\ path_to_url(current_app.config['DEPOSIT_DEFAULT_JSONSCHEMA']) self.is_edit = True try: deposit = super(WekoDeposit, self).publish(pid, id_) # update relation version current to ES pid = PersistentIdentifier.query.filter_by( pid_type='recid', object_uuid=self.id).first() relations = serialize_relations(pid) if relations is not None and 'version' in relations: relations_ver = relations['version'][0] relations['version'][0]['id'] = pid.object_uuid self.indexer.update_relation_version_is_last( relations['version'][0]) # update relation version previous to ES if relations_ver is not None and 'previous' in relations_ver \ and relations_ver['previous'] is not None: pid_val_prev = relations_ver['previous']['pid_value'] pid_prev = PersistentIdentifier.get('recid', pid_val_prev) relations_prev = serialize_relations(pid_prev) if relations_prev is not None \ and 'version' in relations_prev: relations_prev['version'][0][ 'id'] = pid_prev.object_uuid self.indexer.update_relation_version_is_last( relations_prev['version'][0]) return deposit except SQLAlchemyError as ex: current_app.logger.debug(ex) db.session.rollback() return None
def predump_relations(self, obj): """Add relations to the schema context.""" m = obj.get('metadata', {}) if 'relations' not in m: pid = self.context['pid'] # For deposits serialize the record's relations if is_deposit(m): pid = PersistentIdentifier.get('recid', m['recid']) m['relations'] = serialize_relations(pid) # Remove some non-public fields if is_record(m): version_info = m['relations'].get('version', []) if version_info: version_info[0].pop('draft_child_deposit', None)
def dump_relations(self, obj): """Dump the relations to a dictionary.""" pid = self.context['pid'] return serialize_relations(pid)
def test_relations_serialization(app, db, deposit, deposit_file): """Serialize PID relations.""" deposit_v1 = publish_and_expunge(db, deposit) depid_v1_value = deposit_v1['_deposit']['id'] depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) recid_v1, record_v1 = deposit_v1.fetch_published() expected = { "version": [ { "draft_child_deposit": None, "index": 0, "is_last": True, "last_child": { "pid_type": "recid", "pid_value": "2" }, "parent": { "pid_type": "recid", "pid_value": "1" }, "count": 1 } ] } assert serialize_relations(recid_v1) == expected deposit_v1.newversion() # Should contain "draft_child_deposit" information expected = { "version": [ { "draft_child_deposit": { "pid_type": "depid", "pid_value": "3" }, "index": 0, "is_last": True, "last_child": { "pid_type": "recid", "pid_value": "2" }, "count": 1, "parent": { "pid_type": "recid", "pid_value": "1" }, } ] } assert serialize_relations(recid_v1) == expected # Publish the new version pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object()) deposit_v2.files['file.txt'] = BytesIO(b('file1')) deposit_v2 = publish_and_expunge(db, deposit_v2) recid_v2, record_v2 = deposit_v2.fetch_published() depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) recid_v1, record_v1 = deposit_v1.fetch_published() # Should no longer contain "draft_child_deposit" info after publishing # and no longer be the last child expected = { "version": [ { "draft_child_deposit": None, "index": 0, "is_last": False, "last_child": { "pid_type": "recid", "pid_value": "3" }, "parent": { "pid_type": "recid", "pid_value": "1" }, "count": 2 } ] } assert serialize_relations(recid_v1) == expected # New version should be the last child now expected = { "version": [ { "draft_child_deposit": None, "index": 1, "is_last": True, "last_child": { "pid_type": "recid", "pid_value": "3" }, "count": 2, "parent": { "pid_type": "recid", "pid_value": "1" }, } ] } assert serialize_relations(recid_v2) == expected
def test_relations_serialization(app, db, deposit, deposit_file): """Serialize PID relations.""" deposit_v1 = publish_and_expunge(db, deposit) depid_v1_value = deposit_v1['_deposit']['id'] depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) recid_v1, record_v1 = deposit_v1.fetch_published() expected = { "version": [{ "draft_child_deposit": None, "index": 0, "is_last": True, "last_child": { "pid_type": "recid", "pid_value": "2" }, "parent": { "pid_type": "recid", "pid_value": "1" }, "count": 1 }] } assert serialize_relations(recid_v1) == expected deposit_v1.newversion() # Should contain "draft_child_deposit" information expected = { "version": [{ "draft_child_deposit": { "pid_type": "depid", "pid_value": "3" }, "index": 0, "is_last": True, "last_child": { "pid_type": "recid", "pid_value": "2" }, "count": 1, "parent": { "pid_type": "recid", "pid_value": "1" }, }] } assert serialize_relations(recid_v1) == expected # Publish the new version pv = PIDVersioning(child=recid_v1) depid_v2 = pv.draft_child_deposit deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object()) deposit_v2.files['file.txt'] = BytesIO(b('file1')) deposit_v2 = publish_and_expunge(db, deposit_v2) recid_v2, record_v2 = deposit_v2.fetch_published() depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value) recid_v1, record_v1 = deposit_v1.fetch_published() # Should no longer contain "draft_child_deposit" info after publishing # and no longer be the last child expected = { "version": [{ "draft_child_deposit": None, "index": 0, "is_last": False, "last_child": { "pid_type": "recid", "pid_value": "3" }, "parent": { "pid_type": "recid", "pid_value": "1" }, "count": 2 }] } assert serialize_relations(recid_v1) == expected # New version should be the last child now expected = { "version": [{ "draft_child_deposit": None, "index": 1, "is_last": True, "last_child": { "pid_type": "recid", "pid_value": "3" }, "count": 2, "parent": { "pid_type": "recid", "pid_value": "1" }, }] } assert serialize_relations(recid_v2) == expected