Esempio n. 1
0
def index_path(ip, path, parent=None):
    """
    Indexes the file or directory at path to elasticsearch

    :param ip: The IP the path belongs to
    :type ip: InformationPackage
    :param path: The path of the file or directory
    :type path: str
    :param parent: The parent of the tag
    :type parent: TagStructure
    :return: The indexed elasticsearch document
    :rtype: File or Directory
    """

    isfile = os.path.isfile(path)
    id = str(uuid.uuid4())

    tag = Tag.objects.create(information_package=ip)
    tag_version = TagVersion(pk=id, tag=tag, name=os.path.basename(path))
    if parent:
        TagStructure.objects.create(tag=tag, parent=parent, structure=parent.structure)

    if isfile:
        tag_version.elastic_index = 'document'
        tag_version.type = 'document'
        tag_version.save()
        return index_document(ip, path, id)
    else:
        tag_version.elastic_index = 'directory'
        tag_version.type = 'directory'
        tag_version.save()
        return index_directory(ip, path, id)
Esempio n. 2
0
    def update(self, instance: TagVersion, validated_data):
        structures = validated_data.pop('structures', [])
        notes_data = validated_data.pop('notes', None)
        identifiers_data = validated_data.pop('identifiers', None)
        appraisal_date = validated_data.pop('appraisal_date', instance.tag.appraisal_date)

        self.update_identifiers(instance, identifiers_data)
        self.update_notes(instance, notes_data)

        with transaction.atomic():
            for structure in structures:
                if not TagStructure.objects.filter(tag=instance.tag, structure__template=structure).exists():
                    structure_instance, _ = structure.create_template_instance(instance.tag)
                    for instance_unit in structure_instance.units.all():
                        StructureUnitDocument.from_obj(instance_unit).save()

            instance.tag.appraisal_date = appraisal_date
            instance.tag.save()
            TagVersion.objects.filter(pk=instance.pk).update(**validated_data)
            instance.refresh_from_db()

        doc = Archive.from_obj(instance)
        doc.save()

        return instance
Esempio n. 3
0
    def parse_errands(self, ip, rootdir, archive, errands_root):
        archive_structure = archive.get_active_structure()
        structure = archive_structure.structure
        for errand in self.get_arkiv_objekt_arenden(errands_root):
            component, structure_unit = self.parse_errand(errand, archive, ip, structure)
            tag = Tag(information_package=ip, task=self.task)
            tag_version = TagVersion(pk=component.meta.id, tag=tag,
                                     elastic_index=component._index._name,
                                     name=component.name, type=component.type,
                                     reference_code=component.reference_code)
            tag_repr = TagStructure(
                tag=tag,
                structure_unit=structure_unit,
                structure=structure,
                parent=archive_structure,
                tree_id=archive_structure.tree_id,
                lft=0,
                rght=0,
                level=0,
            )

            acts_root = self.get_acts_root(errand)
            if len(acts_root):
                for act in self.parse_acts(ip, rootdir, component, acts_root[0], tag_repr):
                    yield act

            yield tag, tag_version, tag_repr, component.to_dict(include_meta=True)
Esempio n. 4
0
    def parse_document(self, ip, rootdir, document, act, parent):
        id = str(uuid.uuid4())
        name = document.get("Namn")
        desc = document.get("Beskrivning")

        filepath = document.get('Lank')
        if ip is not None:
            filepath = os.path.join(ip.object_path, ip.sip_path, document.get('Lank'))
        elif rootdir is not None:
            filepath = os.path.join(rootdir, document.get('Lank'))

        href = os.path.dirname(os.path.relpath(filepath, rootdir))
        href = '' if href == '.' else href
        filename = os.path.basename(filepath)
        ext = os.path.splitext(filepath)[1][1:]

        with open(filepath, 'rb') as f:
            content = f.read()
            encoded_content = base64.b64encode(content).decode("ascii")

        size, _ = get_tree_size_and_count(filepath)
        modified = timestamp_to_datetime(os.stat(filepath).st_mtime)

        d = File(
            _id=id,
            name=name,
            type='Bilaga',
            archive=act.archive,
            desc=desc,
            filename=filename,
            href=href,
            extension=ext,
            data=encoded_content,
            size=size,
            modified=modified,
            current_version=True,
            ip=act.ip,
            task_id=str(self.task.pk),
        )

        tag = Tag(information_package=ip, task=self.task)
        tag_version = TagVersion(pk=d.meta.id, tag=tag,
                                 elastic_index=d._index._name,
                                 name=d.name, type=d.type,
                                 reference_code='')
        tag_repr = TagStructure(
            tag=tag,
            parent=parent,
            structure=parent.structure,
            tree_id=parent.tree_id,
            lft=0,
            rght=0,
            level=0,
        )
        self.indexed_files.append(filepath)

        d_dict = d.to_dict(include_meta=True)
        d_dict['pipeline'] = 'ingest_attachment'
        return tag, tag_version, tag_repr, d_dict
Esempio n. 5
0
    def update(self, instance: TagVersion, validated_data):
        structure_unit = validated_data.pop('structure_unit', None)
        parent = validated_data.pop('parent', None)
        structure = validated_data.pop('structure', None)
        notes_data = validated_data.pop('notes', None)
        identifiers_data = validated_data.pop('identifiers', None)
        information_package = validated_data.pop('information_package', instance.tag.information_package)
        appraisal_date = validated_data.pop('appraisal_date', instance.tag.appraisal_date)
        validated_data.pop('index', None)

        self.update_identifiers(instance, identifiers_data)
        self.update_notes(instance, notes_data)

        if structure is not None:
            tag = instance.tag

            if structure_unit is not None:
                archive_structure = structure.tagstructure_set.first().get_root()
                parent = archive_structure

            elif parent is not None:
                parent_structure = parent.get_structures(structure).get()
                parent = parent_structure
                structure_unit = None

            if parent or structure_unit:
                TagStructure.objects.update_or_create(tag=tag, structure=structure, defaults={
                    'parent': parent,
                    'structure_unit': structure_unit,
                })

        instance.tag.information_package = information_package
        instance.tag.appraisal_date = appraisal_date
        instance.tag.save()
        TagVersion.objects.filter(pk=instance.pk).update(**validated_data)
        instance.refresh_from_db()

        if instance.elastic_index == 'component':
            doc = Component.from_obj(instance)
        elif instance.elastic_index == 'document':
            doc = File.from_obj(instance)

        doc.save()

        return instance
Esempio n. 6
0
def index_path(ip, path, parent=None):
    """
    Indexes the file or directory at path to elasticsearch

    :param ip: The IP the path belongs to
    :type ip: InformationPackage
    :param path: The path of the file or directory
    :type path: str
    :param parent: The parent of the tag
    :type parent: TagStructure
    :return: The indexed elasticsearch document
    :rtype: File or Directory
    """

    isfile = os.path.isfile(path)
    id = str(uuid.uuid4())

    tag = Tag.objects.create(information_package=ip)
    tag_version = TagVersion(pk=id, tag=tag, name=os.path.basename(path))
    if parent:
        TagStructure.objects.create(tag=tag,
                                    parent=parent,
                                    structure=parent.structure)

    logger.debug('indexing {}'.format(path))

    if isfile:
        tag_version.elastic_index = 'document'
        # TODO: minimize db queries
        tag_version.type = TagVersionType.objects.get_or_create(
            name='document', archive_type=False)[0]
        doc, tag_version = index_document(tag_version, path)
        tag_version.save()
    else:
        tag_version.elastic_index = 'directory'
        # TODO: minimize db queries
        tag_version.type = TagVersionType.objects.get_or_create(
            name='directory', archive_type=False)[0]
        doc, tag_version = index_directory(tag_version, path)
        tag_version.save()
Esempio n. 7
0
    def parse_volym(cls, el, archive_version, parent_tag_structure, structure_unit, agent, task=None, ip=None):
        logger.debug("Parsing volym...")
        ref_code = el.xpath("va:volnr", namespaces=cls.NSMAP)[0].text
        name = el.xpath("va:utseende", namespaces=cls.NSMAP)[0].text
        tag_type = cls.VOLUME_TYPE

        volym_id = uuid.uuid4()

        tag = Tag(information_package=ip, task=task)
        tag_version = TagVersion(
            pk=volym_id,
            tag=tag,
            elastic_index='component',
            reference_code=ref_code,
            name=name,
            create_date=cls.parse_volume_create_date(el),
            revise_date=cls.parse_volume_revise_date(el),
            import_date=timezone.now(),
            type=tag_type,
        )
        tag_structure = TagStructure(
            tag=tag,
            structure_unit=structure_unit,
            structure=parent_tag_structure.structure,
            parent=parent_tag_structure,
            tree_id=parent_tag_structure.tree_id,
            lft=0,
            rght=0,
            level=0
        )

        agent_tag_link = AgentTagLink(
            agent=agent,
            tag_id=tag_version.id,
            type=cls.AGENT_TAG_LINK_RELATION_TYPE,
        )

        doc = Component.from_obj(tag_version, archive=archive_version)
        doc.agents = [str(agent.pk)]

        logger.debug("Parsed volym: {}".format(tag_version.pk))
        return doc.to_dict(include_meta=True), tag, tag_version, tag_structure, agent_tag_link
Esempio n. 8
0
    def parse_acts(self, ip, rootdir, errand, acts_root, parent):
        for act_el in acts_root.xpath("*[local-name()='ArkivobjektHandling']"):
            act = self.parse_act(act_el, errand)

            tag = Tag(information_package=ip, task=self.task)
            tag_version = TagVersion(pk=act.meta.id, tag=tag,
                                     elastic_index=act._index._name,
                                     name=act.name, type=act.type,
                                     reference_code=act.reference_code)
            tag_repr = TagStructure(
                tag=tag,
                parent=parent,
                structure=parent.structure,
                tree_id=parent.tree_id,
                lft=0,
                rght=0,
                level=0
            )

            for doc_el in act_el.xpath("*[local-name()='Bilaga']"):
                yield self.parse_document(ip, rootdir, doc_el, act, tag_repr)

            yield tag, tag_version, tag_repr, act.to_dict(include_meta=True)