def test_find_simple(self): za = Country.objects.get(pk=1) user1 = User.objects.get(pk=1) settings.INDIGO['WORK_PROPERTIES'] = { 'za': { 'cap': 'Chapter (cap)', } } work = Work( frbr_uri='/akn/za/act/2002/5', title='Act 5 of 2002', country=za, created_by_user=user1, ) work.properties['cap'] = '12' work.updated_by_user = user1 work.save() document = Document( document_xml=document_fixture( xml=""" <section eId="sec_1"> <num>1.</num> <heading>Tester</heading> <paragraph eId="sec_1.paragraph-0"> <content> <p>Something to do with Cap. 12.</p> </content> </paragraph> </section>""" ), language=self.eng, work=work) expected = Document( document_xml=document_fixture( xml=""" <section eId="sec_1"> <num>1.</num> <heading>Tester</heading> <paragraph eId="sec_1.paragraph-0"> <content> <p>Something to do with <ref href="/akn/za/act/2002/5">Cap. 12</ref>.</p> </content> </paragraph> </section>""" ), language=self.eng, work=work) self.finder.find_references_in_document(document) root = etree.fromstring(expected.content) expected.content = etree.tostring(root, encoding='utf-8').decode('utf-8') self.assertEqual(expected.content, document.content)
def get_works(self, table): works = [] # clean up headers headers = [h.split(' ')[0].lower() for h in table[0]] # transform rows into list of dicts for easy access rows = [{header: row[i] for i, header in enumerate(headers) if header} for row in table[1:]] for idx, row in enumerate(rows): # ignore if it's blank or explicitly marked 'ignore' in the 'ignore' column if not row.get('ignore') and [ val for val in row.itervalues() if val ]: info = { 'row': idx + 2, } works.append(info) try: frbr_uri = self.get_frbr_uri(self.country, self.locality, row) except ValueError as e: info['status'] = 'error' info['error_message'] = e.message continue try: work = Work.objects.get(frbr_uri=frbr_uri) info['work'] = work info['status'] = 'duplicate' # TODO one day: also mark first work as duplicate if user is trying to import two of the same (currently only the second one will be) except Work.DoesNotExist: work = Work() work.frbr_uri = frbr_uri work.title = row.get('title') work.country = self.country work.locality = self.locality work.publication_name = row.get('publication_name') work.publication_number = row.get('publication_number') work.created_by_user = self.request.user work.updated_by_user = self.request.user try: work.publication_date = self.make_date( row.get('publication_date'), 'publication_date') work.commencement_date = self.make_date( row.get('commencement_date'), 'commencement_date') work.assent_date = self.make_date( row.get('assent_date'), 'assent_date') work.full_clean() work.save() # signals work_changed.send(sender=work.__class__, work=work, request=self.request) info['status'] = 'success' info['work'] = work except ValidationError as e: info['status'] = 'error' if hasattr(e, 'message_dict'): info['error_message'] = ' '.join([ '%s: %s' % (f, '; '.join(errs)) for f, errs in e.message_dict.items() ]) else: info['error_message'] = e.message return works
def get_works(self, table, form): works = [] # clean up headers headers = [h.split(' ')[0].lower() for h in table[0]] # transform rows into list of dicts for easy access rows = [ {header: row[i] for i, header in enumerate(headers) if header} for row in table[1:] ] for idx, row in enumerate(rows): # ignore if it's blank or explicitly marked 'ignore' in the 'ignore' column if not row.get('ignore') and [val for val in row.itervalues() if val]: info = { 'row': idx + 2, } works.append(info) try: frbr_uri = self.get_frbr_uri(self.country, self.locality, row) except ValueError as e: info['status'] = 'error' info['error_message'] = e.message continue try: work = Work.objects.get(frbr_uri=frbr_uri) info['work'] = work info['status'] = 'duplicate' if row.get('amends'): info['amends'] = row.get('amends') if row.get('commencement_date'): info['commencement_date'] = row.get('commencement_date') except Work.DoesNotExist: work = Work() work.frbr_uri = frbr_uri work.title = self.strip_title_string(row.get('title')) work.country = self.country work.locality = self.locality work.publication_name = row.get('publication_name') work.publication_number = row.get('publication_number') work.created_by_user = self.request.user work.updated_by_user = self.request.user work.stub = not row.get('primary') try: work.publication_date = self.make_date(row.get('publication_date'), 'publication_date') work.commencement_date = self.make_date(row.get('commencement_date'), 'commencement_date') work.assent_date = self.make_date(row.get('assent_date'), 'assent_date') work.full_clean() work.save() # link publication document params = { 'date': row.get('publication_date'), 'number': work.publication_number, 'publication': work.publication_name, 'country': self.country.place_code, 'locality': self.locality.code if self.locality else None, } self.get_publication_document(params, work, form) # signals work_changed.send(sender=work.__class__, work=work, request=self.request) info['status'] = 'success' info['work'] = work # TODO: neaten this up if row.get('commenced_by'): info['commenced_by'] = row.get('commenced_by') if row.get('amends'): info['amends'] = row.get('amends') if row.get('repealed_by'): info['repealed_by'] = row.get('repealed_by') if row.get('with_effect_from'): info['with_effect_from'] = row.get('with_effect_from') if row.get('parent_work'): info['parent_work'] = row.get('parent_work') except ValidationError as e: info['status'] = 'error' if hasattr(e, 'message_dict'): info['error_message'] = ' '.join( ['%s: %s' % (f, '; '.join(errs)) for f, errs in e.message_dict.items()] ) else: info['error_message'] = e.message return works