def update(self, instance, validated_data): structures = validated_data.pop('structures', []) notes_data = validated_data.pop('notes', None) identifiers_data = validated_data.pop('identifiers', None) if identifiers_data is not None: NodeIdentifier.objects.filter(tag_version=instance).delete() self.create_identifiers(instance, identifiers_data) if notes_data is not None: NodeNote.objects.filter(tag_version=instance).delete() for note in notes_data: note.setdefault('create_date', timezone.now()) self.create_notes(instance, notes_data) with transaction.atomic(): for structure in structures: if not TagStructure.objects.filter(tag=instance.tag, structure__template=structure).exists(): structure_instance, _ = structure.create_template_instance(instance.tag) for instance_unit in structure_instance.units.all(): StructureUnitDocument.from_obj(instance_unit).save() TagVersion.objects.filter(pk=instance.pk).update(**validated_data) instance.refresh_from_db() doc = Archive.from_obj(instance) doc.save() return instance
def update(self, instance: TagVersion, validated_data): structures = validated_data.pop('structures', []) notes_data = validated_data.pop('notes', None) identifiers_data = validated_data.pop('identifiers', None) appraisal_date = validated_data.pop('appraisal_date', instance.tag.appraisal_date) self.update_identifiers(instance, identifiers_data) self.update_notes(instance, notes_data) with transaction.atomic(): for structure in structures: if not TagStructure.objects.filter(tag=instance.tag, structure__template=structure).exists(): structure_instance, _ = structure.create_template_instance(instance.tag) for instance_unit in structure_instance.units.all(): StructureUnitDocument.from_obj(instance_unit).save() instance.tag.appraisal_date = appraisal_date instance.tag.save() TagVersion.objects.filter(pk=instance.pk).update(**validated_data) instance.refresh_from_db() doc = Archive.from_obj(instance) doc.save() return instance
def parse_arkiv(cls, el, agent, task=None, ip=None): logger.info("Parsing arkiv...") name = el.xpath("va:arkivnamn", namespaces=cls.NSMAP)[0].text tag_type = cls.ARCHIVE_TYPE start_year = el.xpath("va:tidarkivf", namespaces=cls.NSMAP)[0].text start_date = None if start_year is not None: start_date = datetime( year=int(start_year), month=1, day=1, tzinfo=pytz.UTC, ) end_year = el.xpath("va:tidarkivt", namespaces=cls.NSMAP)[0].text end_date = None if end_year is not None: end_date = datetime( year=int(end_year), month=12, day=31, tzinfo=pytz.UTC, ) tag = Tag.objects.create(information_package=ip, task=task) tag_version = TagVersion.objects.create( tag=tag, elastic_index='archive', type=tag_type, name=name, create_date=cls.parse_archive_create_date(el), revise_date=cls.parse_archive_revise_date(el), import_date=timezone.now(), start_date=start_date, end_date=end_date, ) structure = Structure.objects.create( name="Arkivförteckning för {}".format(name), type=cls.STRUCTURE_TYPE, is_template=True, published=True, published_date=timezone.now(), version='1.0', task=task, ) structure_instance, tag_structure = structure.create_template_instance(tag) for instance_unit in structure_instance.units.all(): StructureUnitDocument.from_obj(instance_unit).save() agent_tag_link = AgentTagLink.objects.create( agent=agent, tag_id=tag_version.id, type=cls.AGENT_TAG_LINK_RELATION_TYPE, ) doc = Archive.from_obj(tag_version) doc.agents = [str(agent.pk)] logger.info("Parsed arkiv: {}".format(tag_version.pk)) return doc.to_dict(include_meta=True), tag, tag_version, tag_structure, agent_tag_link
def parse_archive_xml(self, xmlfile): logger.info("Parsing archive XML elements...") tree = etree.parse(xmlfile, self.xmlparser) root = tree.getroot() for archive_el in root.xpath("Archive"): archive_doc, archive_tag, archive_tag_version, archive_tag_structure, inst_code = self.parse_archive( archive_el, task=self.task, ip=self.ip ) structure = archive_tag_structure.structure structure_template = structure.template agent_hash = self.build_agent_hash( archive_el.xpath("ObjectParts/General/Archive.ArchiveOrigID")[0].text, archive_el.xpath("ObjectParts/General/ArchiveOrig.Name")[0].text ) archive_id = archive_el.xpath("ObjectParts/General/Archive.ArchiveID")[0].text archive_name = archive_el.xpath("ObjectParts/General/Archive.Name")[0].text archive_hash = self.build_archive_hash( archive_id, archive_name, agent_hash, ) cache.set(archive_hash, archive_tag.pk, 300) for series_el in self.get_series(archive_el): series_template_structure_unit = self.parse_series( series_el, structure_template, inst_code, task=self.task, ) unit = series_template_structure_unit.create_template_instance(structure) series_id = series_el.xpath("Series.SeriesID")[0].text series_signum = series_el.xpath("Series.Signum")[0].text series_title = series_el.xpath("Series.Title")[0].text series_hash = self.build_series_hash( series_id, series_signum, series_title, archive_hash, ) cache.set(series_hash, unit.pk, 300) doc = StructureUnitDocument.from_obj(unit) yield doc.to_dict(include_meta=True) yield archive_doc logger.info("Archive XML elements parsed")
def create(self, validated_data): with transaction.atomic(): agent = validated_data.pop('archive_creator') structures = validated_data.pop('structures') notes_data = validated_data.pop('notes', []) identifiers_data = validated_data.pop('identifiers', []) use_uuid_as_refcode = validated_data.pop('use_uuid_as_refcode', False) tag_version_id = uuid.uuid4() if use_uuid_as_refcode: validated_data['reference_code'] = str(tag_version_id) tag = Tag.objects.create() tag_version = TagVersion.objects.create( pk=tag_version_id, tag=tag, elastic_index='archive', **validated_data, ) tag.current_version = tag_version tag.save() for structure in structures: structure_instance, _ = structure.create_template_instance(tag) for instance_unit in structure_instance.units.all(): StructureUnitDocument.from_obj(instance_unit).save() org = self.context[ 'request'].user.user_profile.current_organization org.add_object(tag) org.add_object(tag_version) tag_link_type, _ = AgentTagLinkRelationType.objects.get_or_create( creator=True, defaults={'name': 'creator'}) AgentTagLink.objects.create(agent=agent, tag=tag_version, type=tag_link_type) self.create_identifiers(self, identifiers_data) self.create_notes(self, notes_data) doc = Archive.from_obj(tag_version) doc.save() return tag
def test_add_results_to_appraisal(self): component_tag = Tag.objects.create() component_tag_version = TagVersion.objects.create( name='foo', tag=component_tag, type=self.component_type, elastic_index="component", ) Component.from_obj(component_tag_version).save(refresh='true') component_tag2 = Tag.objects.create() component_tag_version2 = TagVersion.objects.create( name='bar', tag=component_tag2, type=self.component_type, elastic_index="component", ) Component.from_obj(component_tag_version2).save(refresh='true') # test that we don't try to add structure units matched by query to job structure = Structure.objects.create( type=StructureType.objects.create(), is_template=False) structure_unit = StructureUnit.objects.create( name='foo', structure=structure, type=StructureUnitType.objects.create( structure_type=structure.type), ) StructureUnitDocument.from_obj(structure_unit).save(refresh='true') appraisal_job = AppraisalJob.objects.create() res = self.client.get(self.url, data={ 'q': 'foo', 'add_to_appraisal': appraisal_job.pk }) self.assertEqual(res.status_code, status.HTTP_200_OK) self.assertCountEqual(appraisal_job.tags.all(), [component_tag]) res = self.client.get(self.url, data={'add_to_appraisal': appraisal_job.pk}) self.assertEqual(res.status_code, status.HTTP_200_OK) self.assertCountEqual(appraisal_job.tags.all(), [component_tag, component_tag2])
def create(self, request, *args, **kwargs): parents_query_dict = self.get_parents_query_dict() if parents_query_dict: request.data.update(parents_query_dict) serializer = self.get_serializer(data=request.data) serializer.is_valid(raise_exception=True) serializer.save() headers = self.get_success_headers(serializer.data) # Create elasticsearch document instance = serializer.instance doc = StructureUnitDocument.from_obj(instance) doc.save() return Response(serializer.data, status=status.HTTP_201_CREATED, headers=headers)
def parse_serie(cls, el, structure, structure_template, agent=None, task=None, ip=None): logger.debug("Parsing serie...") name = el.xpath("va:serierubrik", namespaces=cls.NSMAP)[0].text tag_type = {'series': cls.SERIE_TYPE}.get(el.get('level'), None) if tag_type is None: tag_type, _ = StructureUnitType.objects.get_or_create( name=el.get('level'), defaults={ 'structure_type': cls.STRUCTURE_TYPE, } ) reference_code = el.get("signum") parent_unit_id = None parent_reference_code = reference_code cache_key_prefix = str(structure_template.pk) while len(parent_reference_code) > 1: parent_reference_code = parent_reference_code.rsplit(maxsplit=1)[0] cache_key = '{}{}'.format(cache_key_prefix, parent_reference_code) parent_unit_id = cache.get(cache_key) if parent_unit_id is not None: break template_unit = StructureUnit.objects.create( structure=structure_template, name=name, parent_id=parent_unit_id, type=tag_type, reference_code=reference_code, task=task, ) cache.set('{}{}'.format(cache_key_prefix, reference_code), str(template_unit.pk), 300) unit = template_unit.create_template_instance(structure) doc = StructureUnitDocument.from_obj(unit) doc.save() logger.debug("Parsed serie: {}".format(unit.pk)) return unit
def update(self, request, *args, **kwargs): parents_query_dict = self.get_parents_query_dict() if parents_query_dict: request.data.update(parents_query_dict) partial = kwargs.pop('partial', False) instance = self.get_object() serializer = self.get_serializer(instance, data=request.data, partial=partial) serializer.is_valid(raise_exception=True) serializer.save() # Update elasticsearch document doc = StructureUnitDocument.from_obj(instance) doc.save() if getattr(instance, '_prefetched_objects_cache', None): # If 'prefetch_related' has been applied to a queryset, we need to # forcibly invalidate the prefetch cache on the instance. instance._prefetched_objects_cache = {} return Response(serializer.data)
def destroy(self, request, *args, **kwargs): with transaction.atomic(): instance = self.get_object() structure = instance.structure if not structure.is_template and not structure.type.editable_instances: raise exceptions.ValidationError( _('Cannot delete units in instances of type {}').format( structure.type)) # Delete elasticsearch document if exists try: doc = StructureUnitDocument.from_obj(instance) doc.get(doc.id) except elasticsearch.NotFoundError: pass else: instance.delete() doc.delete() return Response(status=status.HTTP_204_NO_CONTENT) instance.delete() return Response(status=status.HTTP_204_NO_CONTENT)
def parse_archive(self, el, task=None, ip=None): name = el.xpath('ObjectParts/General/Archive.Name')[0].text orig_name = el.xpath('ObjectParts/General/ArchiveOrig.Name')[0].text create_date = self.parse_archive_create_date(el) revise_date = self.parse_archive_revise_date(el) tag_type = self.ARCHIVE_TYPE tag = Tag.objects.create(information_package=ip, task=task) archive_id = uuid.uuid4() tag_version = TagVersion.objects.create( pk=archive_id, tag=tag, reference_code=el.xpath('ObjectParts/General/Archive.RefCode')[0].text, type=tag_type, name=name, elastic_index='archive', create_date=create_date, revise_date=revise_date, import_date=timezone.now(), start_date=self.parse_archive_start_date(el), end_date=self.parse_archive_end_date(el), ) inst_code = el.xpath("ObjectParts/General/ArchiveInst.InstCode")[0].text archive_klara_id = el.xpath("ObjectParts/General/Archive.ArchiveID")[0].text NodeIdentifier.objects.create( identifier="{}/{}".format(inst_code, archive_klara_id), tag_version=tag_version, type=self.node_identifier_type_klara, ) history_note_text = el.xpath("ObjectParts/History/Archive.History")[0].text if history_note_text: NodeNote.objects.create( text=html.unescape(history_note_text), tag_version=tag_version, type=self.node_note_type_historik, create_date=timezone.now(), # TODO: use something else to get the date? revise_date=timezone.now(), # TODO: use something else to get the date? ) rule_convention_type, _ = RuleConventionType.objects.get_or_create( name=el.xpath("ObjectParts/General/ArchiveType.Name")[0].text, ) structure = Structure.objects.create( name="Arkivförteckning för {}".format(orig_name), type=self.STRUCTURE_TYPE, is_template=True, published=True, published_date=timezone.now(), version='1.0', create_date=create_date, rule_convention_type=rule_convention_type, task=task, ) structure_instance, tag_structure = structure.create_template_instance(tag) for instance_unit in structure_instance.units.all(): StructureUnitDocument.from_obj(instance_unit).save() agent_hash = self.build_agent_hash( el.xpath('ObjectParts/General/Archive.ArchiveOrigID')[0].text, orig_name, ) agent_id = cache.get(agent_hash) AgentTagLink.objects.create( agent_id=agent_id, tag=tag_version, type=self.tag_link_relation_type, ) doc = Archive.from_obj(tag_version).to_dict(include_meta=True) return doc, tag, tag_version, tag_structure, inst_code