def get_frbr_uri(self, row): frbr_uri = FrbrUri(country=row.get('country'), locality=row.get('locality'), doctype='act', subtype=row.get('subtype'), date=row.get('year'), number=row.get('number'), actor=None) return frbr_uri.work_uri().lower()
def get_frbr_uri(self, row): frbr_uri = FrbrUri(country=row.country, locality=row.locality, doctype=row.doctype, subtype=row.subtype, date=row.year, number=row.number, actor=getattr(row, 'actor', None)) return frbr_uri.work_uri().lower()
def test_dont_link_constitution_in_constitution(self): constitution = Work(frbr_uri='/akn/za/act/1996/constitution') doc = Document(work=constitution, content=document_fixture(xml=""" <section id="section-1"> <num>1.</num> <heading>Tester</heading> <paragraph id="section-1.paragraph-0"> <content> <p>the Constitution</p> <p>the Constitution, 1996</p> <p>the Constitution of South Africa</p> <p>the Constitution of the Republic of South Africa</p> <p>the Constitution of the Republic of South Africa, 1996</p> <p>the Constitution of the Republic of South Africa 1996</p> <p>the Constitution of the Republic of South Africa Act, 1996</p> <p>the Constitution of the Republic of South Africa, 1996 ( Act 108 of 1996 )</p> <p>the Constitution of the Republic of South Africa, 1996 ( Act No 108 of 1996 )</p> <p>the Constitution of the Republic of South Africa, 1996 ( Act No. 108 of 1996 )</p> <p>the Constitution of the Republic of South Africa Act, 1996 ( Act No. 108 of 1996 )</p> <p>the Constitution of the Republic of South Africa Act 108 of 1996</p> <p>the Constitution of the Republic of South Africa (Act 108 of 1996)</p> </content> </paragraph> </section>""")) unchanged = doc.document_xml doc.doc.frbr_uri = FrbrUri.parse(constitution.frbr_uri) self.finder.find_references_in_document(doc) self.assertMultiLineEqual(unchanged, doc.document_xml)
def parse_frbr_uri(self, frbr_uri): FrbrUri.default_language = None try: frbr_uri = FrbrUri.parse(frbr_uri) except ValueError: return None # ensure we haven't mistaken '/za-cpt/act/by-law/2011/full.atom' for a URI if frbr_uri.number in ['full', 'summary' ] and self.format_kwarg == 'atom': return None frbr_uri.default_language = self.country.primary_language.code if not frbr_uri.language: frbr_uri.language = frbr_uri.default_language # in a URL like # # /act/1980/1/toc # # don't mistake 'toc' for a language, it's really equivalent to # # /act/1980/1/eng/toc # # if eng is the default language. if frbr_uri.language == 'toc': frbr_uri.language = frbr_uri.default_language frbr_uri.expression_component = 'toc' return frbr_uri
def post(self, request): serializer = ParseSerializer(data=request.data) serializer.is_valid(raise_exception=True) fragment = serializer.validated_data.get('fragment') frbr_uri = FrbrUri.parse(serializer.validated_data.get('frbr_uri')) importer = plugins.for_locale('importer', frbr_uri.country, frbr_uri.language, frbr_uri.locality) importer.fragment = fragment importer.fragment_id_prefix = serializer.validated_data.get( 'id_prefix') try: text = serializer.validated_data.get('content') xml = importer.import_from_text(text, frbr_uri.work_uri(), '.txt') except ValueError as e: log.error("Error during import: %s" % str(e), exc_info=e) raise ValidationError({'content': str(e) or "error during import"}) # parse and re-serialize the XML to ensure it's clean, and sort out encodings xml = Base(xml).to_xml() # output return Response({'output': xml})
def get(self, request, **kwargs): # try parse it as an FRBR URI, if that succeeds, we'll lookup the document # that document matches, otherwise we'll assume they're trying to # list documents with a prefix URI match. try: self.frbr_uri = FrbrUri.parse(self.kwargs['frbr_uri']) # ensure we haven't mistaken '/za-cpt/act/by-law/2011/full.atom' for a URI if self.frbr_uri.number in ['full', 'summary'] and self.format_kwarg == 'atom': raise ValueError() # in a URL like # # /act/1980/1/toc # # don't mistake 'toc' for a language, it's really equivalent to # # /act/1980/1/eng/toc # # if eng is the default language. if self.frbr_uri.language == 'toc': self.frbr_uri.language = self.frbr_uri.default_language self.frbr_uri.expression_component = 'toc' return self.retrieve(request) except ValueError: return self.list(request)
def validate_frbr_uri(self, value): try: if not value: raise ValueError() return FrbrUri.parse(value.lower()).work_uri() except ValueError: raise ValidationError("Invalid FRBR URI: %s" % value)
def find_work(self, given_string): """ The string we get from the spreadsheet could be e.g. `/ug/act/1933/14 - Administrator-General’s Act` (new and preferred style) `Administrator-General’s Act` (old style) First see if the string before the first space can be parsed as an FRBR URI, and find a work based on that. If not, assume a title has been given and try to match on the whole string. """ first = given_string.split()[0] try: FrbrUri.parse(first) return Work.objects.filter(frbr_uri=first).first() except ValueError: potential_matches = Work.objects.filter(title=given_string, country=self.country, locality=self.locality) if len(potential_matches) == 1: return potential_matches.first()
def get_for_frbr_uri(self, frbr_uri): """ Find a single document matching the FRBR URI. Raises ValueError if any part of the URI isn't valid. See http://docs.oasis-open.org/legaldocml/akn-nc/v1.0/cs01/akn-nc-v1.0-cs01.html#_Toc492651893 """ if not isinstance(frbr_uri, FrbrUri): frbr_uri = FrbrUri.parse(frbr_uri) query = self.filter(frbr_uri=frbr_uri.work_uri()) # filter on language if frbr_uri.language: query = query.filter( language__language__iso_639_2B=frbr_uri.language) # filter on expression date expr_date = frbr_uri.expression_date if not expr_date: # no expression date is equivalent to the "current" version, at time of retrieval expr_date = ':' + datetime.date.today().strftime("%Y-%m-%d") try: if expr_date == '@': # earliest document query = query.order_by('expression_date') elif expr_date[0] == '@': # document at this date query = query.filter( expression_date=parse_date(expr_date[1:]).date()) elif expr_date[0] == ':': # latest document at or before this date query = query \ .filter(expression_date__lte=parse_date(expr_date[1:]).date()) \ .order_by('-expression_date') else: raise ValueError("The expression date %s is not valid" % expr_date) except ParseError: raise ValueError("The expression date %s is not valid" % expr_date) obj = query.first() if obj is None: raise ValueError("Document doesn't exist") if obj and frbr_uri.language and obj.language.code != frbr_uri.language: raise ValueError( "The document %s exists but is not available in the language '%s'" % (frbr_uri.work_uri(), frbr_uri.language)) return obj
def new_frbr_uri(self, uri, forward, for_work=True): """ Sets prefix on uri: 'akn' if forward is True, None if it's False. """ if not isinstance(uri, FrbrUri): uri = FrbrUri.parse(uri) uri.prefix = 'akn' if forward else None if for_work: return uri.work_uri() else: return uri.expression_uri()
def parse_frbr_uri(self, frbr_uri): FrbrUri.default_language = None try: frbr_uri = FrbrUri.parse(frbr_uri) except ValueError: return None frbr_uri.default_language = self.country.primary_language.code if not frbr_uri.language: frbr_uri.language = frbr_uri.default_language return frbr_uri
def validate_frbr_uri(self, value): try: if not value: raise ValueError() value = FrbrUri.parse(value.lower()).work_uri() except ValueError: raise ValidationError("Invalid FRBR URI: %s" % value) # does a work exist for this frbr_uri? # raises ValueError if it doesn't Work.objects.get_for_frbr_uri(value) return value
def parse_frbr_uri(self, frbr_uri): FrbrUri.default_language = None self.frbr_uri = FrbrUri.parse(frbr_uri) # validate the country and set the default language try: country = Country.for_frbr_uri(self.frbr_uri) self.frbr_uri.default_language = country.primary_language.code except Country.DoesNotExist: raise Http404("Country %s from FRBR URI not found" % self.frbr_uri.country) if not self.frbr_uri.language: self.frbr_uri.language = self.frbr_uri.default_language
def randomized(cls, frbr_uri, **kwargs): """ Helper to return a new document with a random FRBR URI """ from .works import Work from .places import Country frbr_uri = FrbrUri.parse(frbr_uri) kwargs['work'] = Work.objects.get_for_frbr_uri(frbr_uri.work_uri()) kwargs['language'] = Country.for_frbr_uri(frbr_uri).primary_language doc = cls(frbr_uri=frbr_uri.work_uri(False), expression_date=frbr_uri.expression_date, **kwargs) doc.copy_attributes() return doc
def clean(self): # validate and clean the frbr_uri try: frbr_uri = FrbrUri.parse( self.frbr_uri).work_uri(work_component=False) except ValueError: raise ValidationError("Invalid FRBR URI") # Assume frbr_uri starts with /akn; `rest` is everything after the country/locality, e.g. # in `/akn/za-wc/act/2000/12`, `rest` is `act/2000/12`. rest = frbr_uri.split('/', 3)[3] # force akn prefix, country and locality codes in frbr uri prefix = '/akn/' + self.country.code if self.locality: prefix = prefix + '-' + self.locality.code self.frbr_uri = f'{prefix}/{rest}'.lower()
def test_dont_find_self(self): document = Document(work=self.work, frbr_uri=self.work.frbr_uri, document_xml=document_fixture(xml=""" <section eId="sec_1"> <num>1.</num> <heading>Tester</heading> <paragraph eId="sec_1.paragraph-0"> <content> <p>Something to do with Act 1 of 1991.</p> <p>Something to do with Act no 22 of 2012.</p> <p>And another thing about Act 4 of 1998.</p> </content> </paragraph> </section>"""), language=self.eng) expected = Document(work=self.work, document_xml=document_fixture(xml=""" <section eId="sec_1"> <num>1.</num> <heading>Tester</heading> <paragraph eId="sec_1.paragraph-0"> <content> <p>Something to do with Act 1 of 1991.</p> <p>Something to do with Act <ref href="/akn/za/act/2012/22">no 22 of 2012</ref>.</p> <p>And another thing about Act <ref href="/akn/za/act/1998/4">4 of 1998</ref>.</p> </content> </paragraph> </section>"""), language=self.eng) document.doc.frbr_uri = FrbrUri.parse(self.work.frbr_uri) self.finder.find_references_in_document(document) root = etree.fromstring(expected.content) expected.content = etree.tostring(root, encoding='utf-8').decode('utf-8') self.assertEqual(expected.content, document.content)
def post(self, request): serializer = ParseSerializer(data=request.data) serializer.is_valid(raise_exception=True) fragment = serializer.validated_data.get('fragment') frbr_uri = FrbrUri.parse(serializer.validated_data.get('frbr_uri')) importer = plugins.for_locale('importer', frbr_uri.country, frbr_uri.language, frbr_uri.locality) importer.fragment = fragment importer.fragment_id_prefix = serializer.validated_data.get('id_prefix') upload = self.request.data.get('file') if upload: # we got a file try: document = importer.import_from_upload(upload, frbr_uri.work_uri(), self.request) except ValueError as e: log.error("Error during import: %s" % e.message, exc_info=e) raise ValidationError({'file': e.message or "error during import"}) else: # plain text try: text = serializer.validated_data.get('content') document = importer.import_from_text(text, frbr_uri.work_uri()) except ValueError as e: log.error("Error during import: %s" % e.message, exc_info=e) raise ValidationError({'content': e.message or "error during import"}) if not document: raise ValidationError("Nothing to parse! Either 'file' or 'content' must be provided.") # output if fragment: return Response({'output': document.to_xml()}) else: return Response({'output': document.document_xml})
def work_uri(self): """ The FRBR Work URI as a :class:`FrbrUri` instance that uniquely identifies this work universally. """ if self._work_uri is None: self._work_uri = FrbrUri.parse(self.frbr_uri) return self._work_uri