def post(self, request): serializer = ParseSerializer(data=request.data) serializer.is_valid(raise_exception=True) fragment = serializer.validated_data.get('fragment') frbr_uri = FrbrUri.parse(serializer.validated_data.get('frbr_uri')) importer = plugins.for_locale('importer', frbr_uri.country, frbr_uri.language, frbr_uri.locality) importer.fragment = fragment importer.fragment_id_prefix = serializer.validated_data.get( 'id_prefix') try: text = serializer.validated_data.get('content') xml = importer.import_from_text(text, frbr_uri.work_uri(), '.txt') except ValueError as e: log.error("Error during import: %s" % str(e), exc_info=e) raise ValidationError({'content': str(e) or "error during import"}) # parse and re-serialize the XML to ensure it's clean, and sort out encodings xml = Base(xml).to_xml() # output return Response({'output': xml})
def post(self, request, document_id): serializer = ParseSerializer(data=request.data) serializer.is_valid(raise_exception=True) fragment = serializer.validated_data.get('fragment') frbr_uri = self.document.work_uri importer = plugins.for_locale('importer', frbr_uri.country, frbr_uri.language, frbr_uri.locality) importer.fragment = fragment importer.fragment_id_prefix = serializer.validated_data.get( 'id_prefix') try: text = serializer.validated_data.get('content') xml = importer.import_from_text(text, frbr_uri.work_uri(), '.txt') except ValueError as e: log.warning("Error during import: %s" % str(e), exc_info=e) raise ValidationError({'content': str(e) or "error during import"}) if fragment: # clean up encodings doc = AkomaNtosoDocument(xml) xml = doc.to_xml(encoding='unicode') else: # The importer doesn't have enough information to give us a complete document # including the meta section, so it's empty or incorrect. We fold in the meta section # from the existing document, so that we return a complete document to the caller. klass = StructuredDocument.for_document_type(frbr_uri.doctype) doc = klass(xml) doc.main.replace(doc.meta, copy.deepcopy(self.document.doc.meta)) xml = doc.to_xml(encoding='unicode') return Response({'output': xml})
def link_publication_document(self, work, info): params = info.get('params') locality_code = self.locality.code if self.locality else None finder = plugins.for_locale('publications', self.country.code, None, locality_code) if not finder or not params.get('date'): return self.create_task(work, info, task_type='link-publication-document') try: publications = finder.find_publications(params) except requests.HTTPError: return self.create_task(work, info, task_type='link-publication-document') if len(publications) != 1: return self.create_task(work, info, task_type='link-publication-document') pub_doc_details = publications[0] pub_doc = PublicationDocument() pub_doc.work = work pub_doc.file = None pub_doc.trusted_url = pub_doc_details.get('url') pub_doc.size = pub_doc_details.get('size') pub_doc.save()
def bulk_creator(self): if not self._bulk_creator: locality_code = self.locality.code if self.locality else None self._bulk_creator = plugins.for_locale('bulk-creator', self.country.code, None, locality_code) self._bulk_creator.country = self.country self._bulk_creator.locality = self.locality return self._bulk_creator
def link_publication_document(self, work, row): locality_code = self.locality.code if self.locality else None finder = plugins.for_locale('publications', self.country.code, None, locality_code) if not finder or not row.params.get('date'): return self.create_task(work, row, task_type='link-gazette') try: publications = finder.find_publications(row.params) except requests.HTTPError: return self.create_task(work, row, task_type='link-gazette') if len(publications) != 1: return self.create_task(work, row, task_type='link-gazette') # don't actually create it for dry_run if not self.dry_run: pub_doc_details = publications[0] pub_doc = PublicationDocument() pub_doc.work = work pub_doc.file = None pub_doc.trusted_url = pub_doc_details.get('url') pub_doc.size = pub_doc_details.get('size') pub_doc.save()
def get_publication_document(self, params, work, form): finder = plugins.for_locale('publications', self.country, None, self.locality) if finder: try: publications = finder.find_publications(params) if len(publications) == 1: pub_doc_details = publications[0] pub_doc = PublicationDocument() pub_doc.work = work pub_doc.file = None pub_doc.trusted_url = pub_doc_details.get('url') pub_doc.size = pub_doc_details.get('size') pub_doc.save() self.pub_doc_task(work, form, task_type='check') else: self.pub_doc_task(work, form, task_type='link') except ValueError as e: raise ValidationError({'message': e.message}) else: self.pub_doc_task(work, form, task_type='link')
def setUp(self): creator = plugins.for_locale('bulk-creator', 'za', None, None) za = Country.objects.get(pk=1) creator.country = za creator.locality = None creator.dry_run = False self.creator = creator
def validate(self, data): """ We allow callers to pass in a file upload in the ``file`` attribute, and overwrite the content XML with that value if we can. """ upload = data.pop('file', None) if upload: frbr_uri = self.validate_frbr_uri(data.get('frbr_uri')) # we got a file try: # import options opts = data.get('file_options', {}) posn = opts.get('section_number_position', 'guess') cropbox = opts.get('cropbox', None) if cropbox: cropbox = cropbox.split(',') request = self.context['request'] country = data.get('country') or request.user.editor.country_code importer = plugins.for_locale('importer', country, None, None) importer.section_number_position = posn importer.cropbox = cropbox document = importer.import_from_upload(upload, frbr_uri, request) except ValueError as e: log.error("Error during import: %s" % e.message, exc_info=e) raise ValidationError({'file': e.message or "error during import"}) data['content'] = document.content # add the document as an attachment data['source_file'] = upload return data
def get(self, request, country, locality=None): country = country.lower() if locality: locality = locality.lower() finder = plugins.for_locale('publications', country, None, locality) publications = [] if finder: try: publications = finder.find_publications(request.GET) except ValueError as e: raise ValidationError({'message': e.message}) return Response({'publications': publications})
def form_valid(self, form): error = None works = None locality_code = self.locality.code if self.locality else None bulk_creator = plugins.for_locale('bulk-creator', self.country.code, None, locality_code) extra_properties = bulk_creator.extra_properties try: table = self.get_table(form.cleaned_data.get('spreadsheet_url')) works = bulk_creator.get_works(self, table) self.create_links(works, form) if extra_properties: self.add_extra_properties(works, extra_properties) self.get_tasks(works, form) except ValidationError as e: error = e.message context_data = self.get_context_data(works=works, error=error) return self.render_to_response(context_data)
def get(self, request, country, locality=None): country = country.lower() if locality: locality = locality.lower() finder = plugins.for_locale('publications', country, None, locality) publications = [] if finder: try: params = { 'date': request.GET.get('date'), 'number': request.GET.get('number'), 'publication': request.GET.get('publication'), 'country': country, 'locality': locality, } publications = finder.find_publications(params) except ValueError as e: raise ValidationError({'message': str(e)}) return Response({'publications': publications})
def post(self, request): serializer = ParseSerializer(data=request.data) serializer.is_valid(raise_exception=True) fragment = serializer.validated_data.get('fragment') frbr_uri = FrbrUri.parse(serializer.validated_data.get('frbr_uri')) importer = plugins.for_locale('importer', frbr_uri.country, frbr_uri.language, frbr_uri.locality) importer.fragment = fragment importer.fragment_id_prefix = serializer.validated_data.get('id_prefix') upload = self.request.data.get('file') if upload: # we got a file try: document = importer.import_from_upload(upload, frbr_uri.work_uri(), self.request) except ValueError as e: log.error("Error during import: %s" % e.message, exc_info=e) raise ValidationError({'file': e.message or "error during import"}) else: # plain text try: text = serializer.validated_data.get('content') document = importer.import_from_text(text, frbr_uri.work_uri()) except ValueError as e: log.error("Error during import: %s" % e.message, exc_info=e) raise ValidationError({'content': e.message or "error during import"}) if not document: raise ValidationError("Nothing to parse! Either 'file' or 'content' must be provided.") # output if fragment: return Response({'output': document.to_xml()}) else: return Response({'output': document.document_xml})