Esempio n. 1
0
    def import_from_upload(self, upload, frbr_uri, request):
        """ Create a new Document by importing it from a
        :class:`django.core.files.uploadedfile.UploadedFile` instance.
        """
        self.reformat = True

        if upload.content_type in ['text/xml', 'application/xml']:
            # just assume it's valid AKN xml
            doc = Document.randomized(frbr_uri)
            doc.content = upload.read().decode('utf-8')
            return doc

        if upload.content_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
            # pre-process docx to HTML and then import html
            html = self.docx_to_html(upload)
            doc = self.import_from_text(html, frbr_uri, '.html')
        elif upload.content_type == 'application/pdf':
            doc = self.import_from_pdf(upload, frbr_uri)
        else:
            # slaw will do its best
            with self.tempfile_for_upload(upload) as f:
                doc = self.import_from_file(f.name, frbr_uri)

        self.analyse_after_import(doc)

        return doc
Esempio n. 2
0
    def import_from_file(self, fname, frbr_uri):
        cmd = ['bundle', 'exec', 'slaw', 'parse']

        if self.fragment:
            cmd.extend(['--fragment', self.fragment])
            if self.fragment_id_prefix:
                cmd.extend(['--id-prefix', self.fragment_id_prefix])

        if self.section_number_position:
            cmd.extend(
                ['--section-number-position', self.section_number_position])

        cmd.extend(['--grammar', self.slaw_grammar])
        cmd.append(fname)

        code, stdout, stderr = self.shell(cmd)

        if code > 0:
            raise ValueError(stderr)

        if not stdout:
            raise ValueError("We couldn't get any useful text out of the file")

        if self.fragment:
            doc = Fragment(stdout.decode('utf-8'))
        else:
            doc = Document.randomized(frbr_uri)
            doc.content = stdout.decode('utf-8')
            doc.frbr_uri = frbr_uri  # reset it
            doc.title = None
            doc.copy_attributes()

        self.log.info("Successfully imported from %s" % fname)
        return doc
Esempio n. 3
0
def document(request, doc_id=None):
    if doc_id:
        doc = get_object_or_404(Document, pk=doc_id)
        doc_json = json.dumps(None)
    else:
        # it's new!
        doc = Document.randomized(request.user, title='(untitled)')
        doc.tags = None
        doc_json = json.dumps(
            DocumentSerializer(instance=doc, context={
                'request': request
            }).data)

    form = DocumentForm(instance=doc)

    countries = Country.objects.select_related('country').prefetch_related(
        'locality_set', 'publication_set', 'country').all()
    countries = {c.code: c.as_json() for c in countries}
    countries_json = json.dumps(countries)

    serializer = DocumentListSerializer(context={'request': request})
    documents_json = json.dumps(
        serializer.to_representation(DocumentViewSet.queryset.all()))

    return render(
        request, 'document/show.html', {
            'document': doc,
            'document_json': doc_json,
            'document_content_json': json.dumps(doc.document_xml),
            'documents_json': documents_json,
            'form': form,
            'subtypes': Subtype.objects.order_by('name').all(),
            'languages': Language.objects.select_related('language').all(),
            'countries': Country.objects.select_related('country').all(),
            'countries_json': countries_json,
            'view': 'DocumentView',
        })