Exemple #1
0
 def parse_eget_element(self, el):
     data_mappings = {
         'varde': 'Varde',
     }
     data = self.parse_mappings(data_mappings, el)
     data['namn'] = el.get('Namn')
     if data['namn'] is not None:
         data['namn'] = remove_prefix(remove_prefix(data['namn'], "Dokument/"), u"Ärende/")
     data['datatyp'] = el.get('DataTyp')
     data['format'] = el.get('Format')
     data['element'] = [self.parse_eget_element(e) for e in el.xpath('*[local-name()="EgetElement"]')]
     data['egenskaper'] = [self.parse_egenskaper(e) for e in
                           el.xpath('*[local-name()="Egenskaper"]/*[local-name()="Egenskap"]')]
     return data
Exemple #2
0
    def path(self, path):
        if path is None:
            self.paths = self.props.get('path', [''])

            if isinstance(self.paths, str):
                self.paths = [self.paths]

            for path in self.paths:
                path = get_value_from_path(self.el, path)

                if path is not None:
                    break

            self.path_prefix = self.props.get('pathprefix', [])
            for prefix in sorted(self.path_prefix, key=len, reverse=True):
                no_prefix = remove_prefix(path, prefix)

                if no_prefix != path:
                    path = no_prefix
                    break

            if self.props.get('path_includes_root', False):
                path = path.split('/', 1)[-1]

            path = path.lstrip('/ ')

        self._path = normalize_path(path)
Exemple #3
0
    def run(self, ip=None, xmlfile=None, validate_fileformat=True, validate_integrity=True, rootdir=None):
        step = ProcessStep.objects.create(
            name="Validate Files",
            parallel=True,
            parent_step=self.taskobj.processstep
        )

        if any([validate_fileformat, validate_integrity]):
            if rootdir is None:
                rootdir = ip.ObjectPath

            doc = etree.ElementTree(file=xmlfile)

            for elname, props in settings.FILE_ELEMENTS.iteritems():
                for f in doc.xpath('.//*[local-name()="%s"]' % elname):
                    fpath = get_value_from_path(f, props["path"])

                    if fpath:
                        fpath = remove_prefix(fpath, props.get("pathprefix", ""))

                    fformat = get_value_from_path(f, props.get("format"))
                    checksum = get_value_from_path(f, props.get("checksum"))
                    algorithm = get_value_from_path(f, props.get("checksumtype"))

                    if validate_fileformat and fformat is not None:
                        step.tasks.add(ProcessTask.objects.create(
                            name=self.fileformat_task,
                            params={
                                "filename": os.path.join(rootdir, fpath),
                                "fileformat": fformat,
                            },
                            log=self.taskobj.log,
                            information_package=ip,
                            responsible=self.taskobj.responsible,
                        ))

                    if validate_integrity and checksum is not None:
                        step.tasks.add(ProcessTask.objects.create(
                            name=self.checksum_task,
                            params={
                                "filename": os.path.join(rootdir, fpath),
                                "checksum": checksum,
                                "algorithm": algorithm,
                            },
                            log=self.taskobj.log,
                            information_package=ip,
                            responsible=self.taskobj.responsible,
                        ))

        self.taskobj.log = None
        self.taskobj.save(update_fields=['log'])
        self.set_progress(100, total=100)

        with allow_join_result():
            return step.run().get()
Exemple #4
0
    def run(self, dirname=None, files=[], files_reldir=None, xmlfile=None):
        if dirname:
            xmlrelpath = os.path.relpath(xmlfile, dirname)
            xmlrelpath = remove_prefix(xmlrelpath, "./")
        else:
            xmlrelpath = xmlfile

        doc = etree.ElementTree(file=xmlfile)

        root = doc.getroot()

        logical_files = set()
        physical_files = set()

        for elname, props in settings.FILE_ELEMENTS.iteritems():
            for f in doc.xpath('.//*[local-name()="%s"]' % elname):
                filename = get_value_from_path(f, props["path"])

                if filename:
                    filename = remove_prefix(filename, props.get("pathprefix", ""))
                    logical_files.add(filename)

        if dirname:
            for root, dirs, filenames in os.walk(dirname):
                for f in filenames:
                    if f != xmlrelpath:
                        reldir = os.path.relpath(root, dirname)
                        relfile = os.path.join(reldir, f)
                        relfile = win_to_posix(relfile)
                        relfile = remove_prefix(relfile, "./")

                        physical_files.add(relfile)

        for f in files:
            if files_reldir:
                f = os.path.relpath(f, files_reldir)
            physical_files.add(f)

        assert logical_files == physical_files, "the logical representation differs from the physical"
        self.set_progress(100, total=100)
        return "Success"
Exemple #5
0
    def run(self,
            dirname=None,
            files=[],
            files_reldir=None,
            xmlfile=None,
            rootdir=""):
        if dirname:
            xmlrelpath = os.path.relpath(xmlfile, dirname)
            xmlrelpath = remove_prefix(xmlrelpath, "./")
        else:
            xmlrelpath = xmlfile

        logical_files = find_files(xmlfile, rootdir)
        physical_files = set()

        if dirname:
            for root, dirs, filenames in walk(dirname):
                for f in filenames:
                    reldir = os.path.relpath(root, dirname)
                    relfile = os.path.join(reldir, f)
                    relfile = win_to_posix(relfile)
                    relfile = remove_prefix(relfile, "./")

                    if relfile != xmlrelpath:
                        physical_files.add(relfile)

        for f in files:
            if files_reldir:
                if f == files_reldir:
                    physical_files.add(os.path.basename(f))
                    continue

                f = os.path.relpath(f, files_reldir)
            physical_files.add(f)

        assert logical_files == physical_files, "the logical representation differs from the physical"
        return "Success"
Exemple #6
0
    def get_sorting(self, request):
        sort = list()
        ordering = request.query_params.get('ordering', '').strip()
        if ordering == '':
            return sort
        fields = ordering.split(',')
        for f in fields:
            direction = 'desc' if f.startswith('-') else 'asc'
            f = remove_prefix(f, '-')
            verified_f = self.verify_sort_field(f, direction)
            if verified_f is False:
                raise exceptions.ParseError('Invalid sort field: {}'.format(f))
            sort.append(verified_f)

        return sort
Exemple #7
0
    def get_sorting(self, request):
        sort = list()
        ordering = request.query_params.get('ordering', '').strip()
        if ordering == '':
            return sort
        fields = ordering.split(',')
        for f in fields:
            direction = 'desc' if f.startswith('-') else 'asc'
            f = remove_prefix(f, '-')
            verified_f = self.verify_sort_field(f, direction)
            if verified_f is False:
                raise exceptions.ParseError('Invalid sort field: {}'.format(f))
            sort.append(verified_f)

        return sort
Exemple #8
0
    def __init__(self, el, props, path=None, rootdir=None):
        '''
        args:
            el: lxml.etree._Element
            props: 'dict with properties from FILE_ELEMENTS'
        '''

        self.path = path
        if self.path is None:
            self.paths = props.get('path', [''])

            if isinstance(self.paths, six.string_types):
                self.paths = [self.paths]

            for path in self.paths:
                self.path = get_value_from_path(el, path)

                if self.path is not None:
                    break

            self.path_prefix = props.get('pathprefix', [])
            for prefix in sorted(self.path_prefix, key=len, reverse=True):
                no_prefix = remove_prefix(self.path, prefix)

                if no_prefix != self.path:
                    self.path = no_prefix
                    break

            if props.get('path_includes_root', False):
                self.path = self.path.split('/', 1)[-1]

            self.path = self.path.lstrip('/ ')

        self.checksum = get_value_from_path(el, props.get('checksum', ''))
        self.checksum = self.checksum.lower(
        ) if self.checksum is not None else self.checksum
        self.checksum_type = get_value_from_path(el,
                                                 props.get('checksumtype', ''))
        self.checksum_type = self.checksum_type.lower(
        ) if self.checksum_type is not None else self.checksum_type
        self.size = get_value_from_path(el, props.get('size', ''))
        self.size = int(self.size) if self.size is not None else None
        self.format = get_value_from_path(el, props.get('format', ''))
Exemple #9
0
    def __init__(self, el, props):
        '''
        args:
            el: lxml.etree._Element
            props: 'dict with properties from FILE_ELEMENTS'
        '''

        self.path = get_value_from_path(el, props.get('path', ''))
        self.path_prefix = props.get('pathprefix', [])
        for prefix in sorted(self.path_prefix, key=len, reverse=True):
            no_prefix = remove_prefix(self.path, prefix)

            if no_prefix != self.path:
                self.path = no_prefix
                break

        self.path = self.path.lstrip('/ ')

        self.checksum = get_value_from_path(el, props.get('checksum', ''))
        self.checksum_type = get_value_from_path(el, props.get('checksumtype', ''))

        self.format = get_value_from_path(el, props.get('format', ''))
Exemple #10
0
    def list(self, request):
        reception = Path.objects.get(entity="path_ingest_reception").value
        uip = Path.objects.get(entity="path_ingest_unidentified").value
        ips = []

        for xmlfile in glob.glob(os.path.join(reception, "*.xml")) + glob.glob(os.path.join(uip, "*.xml")):
            if os.path.isfile(xmlfile):
                if xmlfile.startswith(uip):
                    srcdir = uip
                else:
                    srcdir = reception

                ip = self.parseFile(xmlfile, srcdir)
                if not InformationPackage.objects.filter(id=ip['id']).exists():
                    ips.append(ip)

        for container_file in glob.glob(os.path.join(uip, "*.tar")) + glob.glob(os.path.join(uip, "*.zip")):
            ip = {
                'Label': os.path.basename(container_file),
                'CreateDate': str(timestamp_to_datetime(creation_date(container_file)).isoformat()),
                'State': 'Unidentified',
                'status': 0,
                'step_state': celery_states.SUCCESS,
            }

            include = True

            for xmlfile in glob.glob(os.path.join(uip, "*.xml")):
                if os.path.isfile(xmlfile):
                    doc = etree.parse(xmlfile)
                    root = doc.getroot()

                    el = root.xpath('.//*[local-name()="%s"]' % "FLocat")[0]
                    if ip['Label'] == get_value_from_path(el, "@href").split('file:///')[1]:
                        include = False
                        break

            if include:
                ips.append(ip)

        from_db = InformationPackage.objects.filter(State='Receiving').prefetch_related(
            Prefetch('profileip_set', to_attr='profiles'),
        )
        serializer = InformationPackageSerializer(
            data=from_db, many=True, context={'request': request}
        )
        serializer.is_valid()
        ips.extend(serializer.data)

        try:
            ordering = request.query_params.get('ordering', '')
            reverse = ordering.startswith('-')
            ordering = remove_prefix(ordering, '-')
            ips = sorted(ips, key=lambda k: k[ordering], reverse=reverse)
        except KeyError:
            pass

        paginator = LinkHeaderPagination()
        page = paginator.paginate_queryset(ips, request)
        if page is not None:
            return paginator.get_paginated_response(page)

        return Response(ips)