def handle(self, *args, **options): limit = options['limit'] instance = None try: instance = Instance.objects.get(label=options['instance']) except Instance.DoesNotExist: raise CommandError("Instance specified not found (%s)" % options['instance']) sources = Source.objects.filter(last_processing_success__isnull = False) if not( options['reimport'] or options['id']): sources = sources.filter(sayit_section__isnull = True) if options['id']: sources = sources.filter(id = options['id']) sections = [] hansard_tag, hansard_tag_created = Tag.objects.get_or_create(instance=instance, name="hansard") sources = sources[:limit] if limit else sources.all() for s in sources: path = s.xml_file_path() if not path: continue importer = ImportAkomaNtoso( instance=instance, popit_url='http://za-peoples-assembly.popit.mysociety.org/api/v0.1/') try: self.stdout.write("TRYING %s\n" % path) section = importer.import_document(path) except Exception as e: self.stderr.write('WARN: failed to import %d: %s' % (s.id, str(e))) continue sections.append(section) s.sayit_section = section s.last_sayit_import = datetime.datetime.now(pytz.utc) s.save() for speech in section.descendant_speeches(): speech.tags.add(hansard_tag) # Get or create the sections above the one we just created and put it in there parent = Section.objects.get_or_create_with_parents(instance=instance, titles=s.section_parent_titles) section.parent = parent section.save() self.stdout.write('Imported %d / %d sections\n' % (len(sections), len(sources))) self.stdout.write( str( [s.id for s in sections] ) ) self.stdout.write( '\n' )
def test_empty_title(self): self.importer.import_document( 'speeches/fixtures/test_inputs/test_empty_title.xml') self.assertEqual( self._list_sections(), { 'Untitled': ['<p>Hello</p>'], 'Untitled': ['<p>Howdy</p>'], 'Conclusions': ['<p>Bye</p>'], }) ImportAkomaNtoso( instance=self.instance, commit=True, clobber='skip').import_document( 'speeches/fixtures/test_inputs/test_empty_title.xml') self.assertEqual( self._list_sections(), { 'Untitled': ['<p>Hello</p>'], 'Untitled': ['<p>Howdy</p>'], 'Conclusions': ['<p>Bye</p>'], }) ImportAkomaNtoso( instance=self.instance, commit=True, clobber='merge').import_document( 'speeches/fixtures/test_inputs/test_empty_title.xml') self.assertEqual( self._list_sections(), { 'Untitled': ['<p>Hello</p>', '<p>Hello</p>', '<p>Howdy</p>'], 'Untitled': ['<p>Howdy</p>'], 'Conclusions': ['<p>Bye</p>', '<p>Bye</p>'], }) ImportAkomaNtoso( instance=self.instance, commit=True, clobber='replace').import_document( 'speeches/fixtures/test_inputs/test_empty_title.xml') self.assertEqual( self._list_sections(), { 'Untitled': ['<p>Hello</p>'], 'Untitled': ['<p>Howdy</p>'], 'Conclusions': ['<p>Bye</p>'], }) ImportAkomaNtoso(instance=self.instance, commit=True).import_document( 'speeches/fixtures/test_inputs/test_empty_title.xml') self.assertEqual( self._list_sections(), { 'Untitled': ['<p>Hello</p>'], 'Untitled': ['<p>Howdy</p>'], 'Conclusions': ['<p>Bye</p>'], 'Untitled': ['<p>Hello</p>'], 'Untitled': ['<p>Howdy</p>'], 'Conclusions': ['<p>Bye</p>'], })
def form_valid(self, form): try: importer = ImportAkomaNtoso( instance=self.request.instance, commit=True, clobber=form.cleaned_data.get('existing_sections'), ) stats = importer.import_document(form.cleaned_data['location']) except: form._errors[NON_FIELD_ERRORS] = form.error_class( [_('Sorry - something went wrong with the import')]) return self.form_invalid(form) speakers = stats.get(Speaker, 0) sections = stats.get(Section, 0) speeches = stats.get(Speech, 0) success = speakers or sections or speeches if success: messages.add_message( self.request, messages.SUCCESS, _('Created: ') + ', '.join(( ungettext( "%(speakers)d speaker", "%(speakers)d speakers", speakers, ) % { 'speakers': speakers }, ungettext( "%(sections)d section", "%(sections)d sections", sections, ) % { 'sections': sections }, ungettext( "%(speeches)d speech", "%(speeches)d speeches", speeches, ) % { 'speeches': speeches }, ))) else: messages.add_message( self.request, messages.INFO, _('Nothing new to import.'), ) return super(AkomaNtosoImportView, self).form_valid(form)
def form_valid(self, form): try: importer = ImportAkomaNtoso( instance=self.request.instance, commit=True, clobber=form.cleaned_data.get('existing_sections'), ) stats = importer.import_document(form.cleaned_data['location']) except: form._errors[NON_FIELD_ERRORS] = form.error_class( [_('Sorry - something went wrong with the import')]) return self.form_invalid(form) speakers = stats.get(Speaker, 0) sections = stats.get(Section, 0) speeches = stats.get(Speech, 0) success = speakers or sections or speeches if success: messages.add_message( self.request, messages.SUCCESS, _('Created: ') + ', '.join( ( ungettext( "%(speakers)d speaker", "%(speakers)d speakers", speakers, ) % {'speakers': speakers}, ungettext( "%(sections)d section", "%(sections)d sections", sections, ) % {'sections': sections}, ungettext( "%(speeches)d speech", "%(speeches)d speeches", speeches, ) % {'speeches': speeches}, ) ) ) else: messages.add_message( self.request, messages.INFO, _('Nothing new to import.'), ) return super(AkomaNtosoImportView, self).form_valid(form)
class AkomaNtosoImportTestCase(InstanceTestCase): def setUp(self): super(AkomaNtosoImportTestCase, self).setUp() self.importer = ImportAkomaNtoso(instance=self.instance, commit=True) def test_import_sample_file(self): self.importer.import_document( 'speeches/fixtures/test_inputs/Debate_Bungeni_1995-10-31.xml') # To get us started, let's just check that we get the right kind of # speech in the right order. self.assertEqual( [x.type for x in Speech.objects.all()], [u'scene', u'other', u'narrative', u'speech', u'question', u'summary', u'speech', u'answer', u'narrative', u'speech', u'narrative'] ) def test_xpath_preface_elements(self): self.importer.import_document( 'speeches/fixtures/test_inputs/test_xpath.xml') self.assertEqual( [ x.title for x in Section.objects.all() ], [ 'This is the title' ] ) self.assertEqual( [ x.start_date for x in Speech.objects.all() ], [ datetime.date(2014, 7, 24) ] ) def test_unicode_character(self): self.importer.import_document( 'speeches/fixtures/test_inputs/test_unicode_character.xml') self.assertEqual( [ x.type for x in Speech.objects.all() ], [ 'other' ] ) def test_blank_speakers(self): self.importer.import_document( 'speeches/fixtures/test_inputs/test_blank_speakers.xml') speaker = Speaker.objects.get(name='Speaker') speeches = Speech.objects.all() speeches_s = Speech.objects.filter(type='speech') self.assertEqual(speeches.count(), speeches_s.count()) for i in range(4): s = speaker if i%2 else None sd = 'Speaker' if i>1 else None self.assertEqual(speeches[i].speaker, s) self.assertEqual(speeches[i].speaker_display, sd)
def test_already_imported(self): self.importer.import_document( 'speeches/fixtures/test_inputs/test_xpath.xml') self.assertEqual( self._list_sections(), [('This is the title', ['<p>Hello</p>'])] ) ImportAkomaNtoso(instance=self.instance, commit=True, clobber='skip').import_document( 'speeches/tests/data/fake_http/test_clobber.xml') self.assertEqual( self._list_sections(), [('This is the title', ['<p>Hello</p>'])] ) ImportAkomaNtoso(instance=self.instance, commit=True, clobber='merge').import_document( 'speeches/tests/data/fake_http/test_clobber.xml') self.assertEqual( self._list_sections(), [('This is the title', ['<p>Hello</p>', '<p>Howdy</p>']), ('Conclusions', ['<p>Bye</p>']), ]) ImportAkomaNtoso(instance=self.instance, commit=True, clobber='replace').import_document( 'speeches/tests/data/fake_http/test_clobber.xml') self.assertEqual( self._list_sections(), [('This is the title', ['<p>Howdy</p>']), ('Conclusions', ['<p>Bye</p>']), ]) ImportAkomaNtoso(instance=self.instance, commit=True).import_document( 'speeches/tests/data/fake_http/test_clobber.xml') self.assertEqual( self._list_sections(), [('This is the title', ['<p>Howdy</p>']), ('Conclusions', ['<p>Bye</p>']), ('This is the title', ['<p>Howdy</p>']), ('Conclusions', ['<p>Bye</p>']), ])
def test_not_already_imported(self): ImportAkomaNtoso(instance=self.instance, commit=True, clobber='skip').import_document( 'speeches/tests/data/fake_http/test_clobber.xml') self.assertEqual(self._list_sections(), { 'This is the title': ['<p>Howdy</p>'], 'Conclusions': ['<p>Bye</p>'], }) Section.objects.all().delete() ImportAkomaNtoso(instance=self.instance, commit=True, clobber='merge').import_document( 'speeches/tests/data/fake_http/test_clobber.xml') self.assertEqual(self._list_sections(), { 'This is the title': ['<p>Howdy</p>'], 'Conclusions': ['<p>Bye</p>'], }) Section.objects.all().delete() ImportAkomaNtoso(instance=self.instance, commit=True, clobber='replace').import_document( 'speeches/tests/data/fake_http/test_clobber.xml') self.assertEqual(self._list_sections(), { 'This is the title': ['<p>Howdy</p>'], 'Conclusions': ['<p>Bye</p>'], }) Section.objects.all().delete() ImportAkomaNtoso(instance=self.instance, commit=True).import_document( 'speeches/tests/data/fake_http/test_clobber.xml') self.assertEqual(self._list_sections(), { 'This is the title': ['<p>Howdy</p>'], 'Conclusions': ['<p>Bye</p>'], }) Section.objects.all().delete()
def test_import(self): document_path = os.path.join(self._in_fixtures, 'NA200912.xml') an = ImportAkomaNtoso(instance=self.instance, commit=True, popit_url=popit_url, title_case=True) section = an.import_document(document_path) self.assertTrue(section is not None) # Check that all the sections have correct looking titles for sub in section.children.all(): self.assertFalse("Member'S" in sub.title) speakers = Speaker.objects.all() resolved = filter(lambda s: s.person != None, speakers) THRESHOLD=48 logging.info( "%d above threshold %d/%d?" % (len(resolved), THRESHOLD, len(speakers))) self.assertTrue( len(resolved) >= THRESHOLD, "%d above threshold %d/%d" % (len(resolved), THRESHOLD, len(speakers)))
def setUp(self): super(AkomaNtosoImportTestCase, self).setUp() self.importer = ImportAkomaNtoso(instance=self.instance, commit=True)
class AkomaNtosoImportTestCase(InstanceTestCase): def setUp(self): super(AkomaNtosoImportTestCase, self).setUp() self.importer = ImportAkomaNtoso(instance=self.instance, commit=True) def _list_sections(self): # Make mapping {section name: list of its speeches' texts} sections = {section.id: [] for section in Section.objects.all()} for speech in Speech.objects.all(): if speech.section_id: sections[speech.section_id].append(speech.text) return { section.title: sections[section.id] for section in Section.objects.all() } def test_import_sample_file(self): self.importer.import_document( 'speeches/tests/data/fake_http/Debate_Bungeni_1995-10-31.xml') # To get us started, let's just check that we get the right kind of # speech in the right order. self.assertEqual([x.type for x in Speech.objects.all()], [ u'scene', u'other', u'narrative', u'speech', u'question', u'summary', u'speech', u'answer', u'narrative', u'speech', u'narrative' ]) def test_already_imported(self): self.importer.import_document( 'speeches/fixtures/test_inputs/test_xpath.xml') self.assertEqual(self._list_sections(), {'This is the title': ['<p>Hello</p>']}) ImportAkomaNtoso(instance=self.instance, commit=True, clobber='skip').import_document( 'speeches/tests/data/fake_http/test_clobber.xml') self.assertEqual(self._list_sections(), {'This is the title': ['<p>Hello</p>']}) ImportAkomaNtoso(instance=self.instance, commit=True, clobber='merge').import_document( 'speeches/tests/data/fake_http/test_clobber.xml') self.assertEqual( self._list_sections(), { 'This is the title': ['<p>Hello</p>', '<p>Howdy</p>'], 'Conclusions': ['<p>Bye</p>'], }) ImportAkomaNtoso(instance=self.instance, commit=True, clobber='replace').import_document( 'speeches/tests/data/fake_http/test_clobber.xml') self.assertEqual(self._list_sections(), { 'This is the title': ['<p>Howdy</p>'], 'Conclusions': ['<p>Bye</p>'], }) ImportAkomaNtoso(instance=self.instance, commit=True).import_document( 'speeches/tests/data/fake_http/test_clobber.xml') self.assertEqual( self._list_sections(), { 'This is the title': ['<p>Howdy</p>'], 'Conclusions': ['<p>Bye</p>'], 'This is the title': ['<p>Howdy</p>'], 'Conclusions': ['<p>Bye</p>'], }) def test_not_already_imported(self): ImportAkomaNtoso(instance=self.instance, commit=True, clobber='skip').import_document( 'speeches/tests/data/fake_http/test_clobber.xml') self.assertEqual(self._list_sections(), { 'This is the title': ['<p>Howdy</p>'], 'Conclusions': ['<p>Bye</p>'], }) Section.objects.all().delete() ImportAkomaNtoso(instance=self.instance, commit=True, clobber='merge').import_document( 'speeches/tests/data/fake_http/test_clobber.xml') self.assertEqual(self._list_sections(), { 'This is the title': ['<p>Howdy</p>'], 'Conclusions': ['<p>Bye</p>'], }) Section.objects.all().delete() ImportAkomaNtoso(instance=self.instance, commit=True, clobber='replace').import_document( 'speeches/tests/data/fake_http/test_clobber.xml') self.assertEqual(self._list_sections(), { 'This is the title': ['<p>Howdy</p>'], 'Conclusions': ['<p>Bye</p>'], }) Section.objects.all().delete() ImportAkomaNtoso(instance=self.instance, commit=True).import_document( 'speeches/tests/data/fake_http/test_clobber.xml') self.assertEqual(self._list_sections(), { 'This is the title': ['<p>Howdy</p>'], 'Conclusions': ['<p>Bye</p>'], }) Section.objects.all().delete() def test_empty_title(self): self.importer.import_document( 'speeches/fixtures/test_inputs/test_empty_title.xml') self.assertEqual( self._list_sections(), { 'Untitled': ['<p>Hello</p>'], 'Untitled': ['<p>Howdy</p>'], 'Conclusions': ['<p>Bye</p>'], }) ImportAkomaNtoso( instance=self.instance, commit=True, clobber='skip').import_document( 'speeches/fixtures/test_inputs/test_empty_title.xml') self.assertEqual( self._list_sections(), { 'Untitled': ['<p>Hello</p>'], 'Untitled': ['<p>Howdy</p>'], 'Conclusions': ['<p>Bye</p>'], }) ImportAkomaNtoso( instance=self.instance, commit=True, clobber='merge').import_document( 'speeches/fixtures/test_inputs/test_empty_title.xml') self.assertEqual( self._list_sections(), { 'Untitled': ['<p>Hello</p>', '<p>Hello</p>', '<p>Howdy</p>'], 'Untitled': ['<p>Howdy</p>'], 'Conclusions': ['<p>Bye</p>', '<p>Bye</p>'], }) ImportAkomaNtoso( instance=self.instance, commit=True, clobber='replace').import_document( 'speeches/fixtures/test_inputs/test_empty_title.xml') self.assertEqual( self._list_sections(), { 'Untitled': ['<p>Hello</p>'], 'Untitled': ['<p>Howdy</p>'], 'Conclusions': ['<p>Bye</p>'], }) ImportAkomaNtoso(instance=self.instance, commit=True).import_document( 'speeches/fixtures/test_inputs/test_empty_title.xml') self.assertEqual( self._list_sections(), { 'Untitled': ['<p>Hello</p>'], 'Untitled': ['<p>Howdy</p>'], 'Conclusions': ['<p>Bye</p>'], 'Untitled': ['<p>Hello</p>'], 'Untitled': ['<p>Howdy</p>'], 'Conclusions': ['<p>Bye</p>'], }) def test_empty_docDate(self): self.importer.import_document( 'speeches/fixtures/test_inputs/test_empty_docDate.xml') self.assertEqual( [(x.start_date, x.title, x.source_url) for x in Section.objects.all()], [(datetime.date(2012, 3, 7), 'Title', 'http://example.org')]) def test_xpath_preface_elements(self): self.importer.import_document( 'speeches/fixtures/test_inputs/test_xpath.xml') self.assertEqual([x.title for x in Section.objects.all()], ['This is the title']) self.assertEqual([x.start_date for x in Speech.objects.all()], [datetime.date(2014, 7, 24)]) def test_unicode_character(self): self.importer.import_document( 'speeches/fixtures/test_inputs/test_unicode_character.xml') self.assertEqual([x.type for x in Speech.objects.all()], ['other']) def test_blank_speakers(self): self.importer.import_document( 'speeches/fixtures/test_inputs/test_blank_speakers.xml') speaker = Speaker.objects.get(name='Speaker') speeches = Speech.objects.all() speeches_s = Speech.objects.filter(type='speech') self.assertEqual(speeches.count(), speeches_s.count()) for i in range(4): s = speaker if i % 2 else None sd = 'Speaker' if i > 1 else None self.assertEqual(speeches[i].speaker, s) self.assertEqual(speeches[i].speaker_display, sd) def test_import_remote_file(self): self.importer.import_document( 'http://example.com/Debate_Bungeni_1995-10-31.xml') # To get us started, let's just check that we get the right kind of # speech in the right order. self.assertEqual([x.type for x in Speech.objects.all()], [ u'scene', u'other', u'narrative', u'speech', u'question', u'summary', u'speech', u'answer', u'narrative', u'speech', u'narrative' ])
class AkomaNtosoImportTestCase(InstanceTestCase): def setUp(self): super(AkomaNtosoImportTestCase, self).setUp() self.importer = ImportAkomaNtoso(instance=self.instance, commit=True) def _list_sections(self): # Make list [section name: list of its speeches' texts} sections = {section.id: [] for section in Section.objects.all()} for speech in Speech.objects.all(): if speech.section_id: sections[speech.section_id].append(speech.text) return [(section.title, sections[section.id]) for section in Section.objects.all()] def test_import_sample_file(self): self.importer.import_document( 'speeches/tests/data/fake_http/Debate_Bungeni_1995-10-31.xml') # To get us started, let's just check that we get the right kind of # speech in the right order. self.assertEqual( [x.type for x in Speech.objects.all()], [u'scene', u'other', u'narrative', u'speech', u'question', u'summary', u'speech', u'answer', u'narrative', u'speech', u'narrative'] ) def test_already_imported(self): self.importer.import_document( 'speeches/fixtures/test_inputs/test_xpath.xml') self.assertEqual( self._list_sections(), [('This is the title', ['<p>Hello</p>'])] ) ImportAkomaNtoso(instance=self.instance, commit=True, clobber='skip').import_document( 'speeches/tests/data/fake_http/test_clobber.xml') self.assertEqual( self._list_sections(), [('This is the title', ['<p>Hello</p>'])] ) ImportAkomaNtoso(instance=self.instance, commit=True, clobber='merge').import_document( 'speeches/tests/data/fake_http/test_clobber.xml') self.assertEqual( self._list_sections(), [('This is the title', ['<p>Hello</p>', '<p>Howdy</p>']), ('Conclusions', ['<p>Bye</p>']), ]) ImportAkomaNtoso(instance=self.instance, commit=True, clobber='replace').import_document( 'speeches/tests/data/fake_http/test_clobber.xml') self.assertEqual( self._list_sections(), [('This is the title', ['<p>Howdy</p>']), ('Conclusions', ['<p>Bye</p>']), ]) ImportAkomaNtoso(instance=self.instance, commit=True).import_document( 'speeches/tests/data/fake_http/test_clobber.xml') self.assertEqual( self._list_sections(), [('This is the title', ['<p>Howdy</p>']), ('Conclusions', ['<p>Bye</p>']), ('This is the title', ['<p>Howdy</p>']), ('Conclusions', ['<p>Bye</p>']), ]) def test_not_already_imported(self): ImportAkomaNtoso(instance=self.instance, commit=True, clobber='skip').import_document( 'speeches/tests/data/fake_http/test_clobber.xml') self.assertEqual( self._list_sections(), [('This is the title', ['<p>Howdy</p>']), ('Conclusions', ['<p>Bye</p>']), ]) Section.objects.all().delete() ImportAkomaNtoso(instance=self.instance, commit=True, clobber='merge').import_document( 'speeches/tests/data/fake_http/test_clobber.xml') self.assertEqual( self._list_sections(), [('This is the title', ['<p>Howdy</p>']), ('Conclusions', ['<p>Bye</p>']), ]) Section.objects.all().delete() ImportAkomaNtoso(instance=self.instance, commit=True, clobber='replace').import_document( 'speeches/tests/data/fake_http/test_clobber.xml') self.assertEqual( self._list_sections(), [('This is the title', ['<p>Howdy</p>']), ('Conclusions', ['<p>Bye</p>']), ]) Section.objects.all().delete() ImportAkomaNtoso(instance=self.instance, commit=True).import_document( 'speeches/tests/data/fake_http/test_clobber.xml') self.assertEqual( self._list_sections(), [('This is the title', ['<p>Howdy</p>']), ('Conclusions', ['<p>Bye</p>']), ]) Section.objects.all().delete() def test_empty_title(self): self.importer.import_document( 'speeches/fixtures/test_inputs/test_empty_title.xml') self.assertEqual( self._list_sections(), [('Untitled', ['<p>Hello</p>']), ('Untitled', ['<p>Howdy</p>']), ('Conclusions', ['<p>Bye</p>']), ]) ImportAkomaNtoso(instance=self.instance, commit=True, clobber='skip').import_document( 'speeches/fixtures/test_inputs/test_empty_title.xml') self.assertEqual( self._list_sections(), [('Untitled', ['<p>Hello</p>']), ('Untitled', ['<p>Howdy</p>']), ('Conclusions', ['<p>Bye</p>']), ]) ImportAkomaNtoso(instance=self.instance, commit=True, clobber='merge').import_document( 'speeches/fixtures/test_inputs/test_empty_title.xml') self.assertEqual( self._list_sections(), [('Untitled', ['<p>Hello</p>', '<p>Hello</p>', '<p>Howdy</p>']), ('Untitled', ['<p>Howdy</p>']), ('Conclusions', ['<p>Bye</p>', '<p>Bye</p>']), ]) ImportAkomaNtoso(instance=self.instance, commit=True, clobber='replace').import_document( 'speeches/fixtures/test_inputs/test_empty_title.xml') self.assertEqual( self._list_sections(), [('Untitled', ['<p>Hello</p>']), ('Untitled', ['<p>Howdy</p>']), ('Conclusions', ['<p>Bye</p>']), ]) ImportAkomaNtoso(instance=self.instance, commit=True).import_document( 'speeches/fixtures/test_inputs/test_empty_title.xml') self.assertEqual( self._list_sections(), [('Untitled', ['<p>Hello</p>']), ('Untitled', ['<p>Howdy</p>']), ('Conclusions', ['<p>Bye</p>']), ('Untitled', ['<p>Hello</p>']), ('Untitled', ['<p>Howdy</p>']), ('Conclusions', ['<p>Bye</p>']), ]) def test_empty_docDate(self): self.importer.import_document( 'speeches/fixtures/test_inputs/test_empty_docDate.xml') self.assertEqual( [(x.start_date, x.title, x.source_url) for x in Section.objects.all()], [(datetime.date(2012, 3, 7), 'Title', 'http://example.org')] ) def test_xpath_preface_elements(self): self.importer.import_document( 'speeches/fixtures/test_inputs/test_xpath.xml') self.assertEqual( [x.title for x in Section.objects.all()], ['This is the title'] ) self.assertEqual( [x.start_date for x in Speech.objects.all()], [datetime.date(2014, 7, 24)] ) def test_unicode_character(self): self.importer.import_document( 'speeches/fixtures/test_inputs/test_unicode_character.xml') self.assertEqual( [x.type for x in Speech.objects.all()], ['other'] ) def test_blank_speakers(self): self.importer.import_document( 'speeches/fixtures/test_inputs/test_blank_speakers.xml') speaker = Speaker.objects.get(name='Speaker') speeches = Speech.objects.all() speeches_s = Speech.objects.filter(type='speech') self.assertEqual(speeches.count(), speeches_s.count()) for i in range(4): s = speaker if i % 2 else None sd = 'Speaker' if i > 1 else None self.assertEqual(speeches[i].speaker, s) self.assertEqual(speeches[i].speaker_display, sd) def test_import_remote_file(self): self.importer.import_document( 'http://example.com/Debate_Bungeni_1995-10-31.xml') # To get us started, let's just check that we get the right kind of # speech in the right order. self.assertEqual( [x.type for x in Speech.objects.all()], [u'scene', u'other', u'narrative', u'speech', u'question', u'summary', u'speech', u'answer', u'narrative', u'speech', u'narrative'] )
class AkomaNtosoImportTestCase(InstanceTestCase): def setUp(self): super(AkomaNtosoImportTestCase, self).setUp() self.importer = ImportAkomaNtoso(instance=self.instance, commit=True) def test_import_sample_file(self): self.importer.import_document("speeches/fixtures/test_inputs/Debate_Bungeni_1995-10-31.xml") # To get us started, let's just check that we get the right kind of # speech in the right order. self.assertEqual( [x.type for x in Speech.objects.all()], [ u"scene", u"other", u"narrative", u"speech", u"question", u"summary", u"speech", u"answer", u"narrative", u"speech", u"narrative", ], ) def test_import_remote_file(self): self.importer.import_document( "http://examples.akomantoso.org/php/download.php?file=Debate_Bungeni_1995-10-31.xml" ) # noqa # To get us started, let's just check that we get the right kind of # speech in the right order. self.assertEqual( [x.type for x in Speech.objects.all()], [ u"scene", u"other", u"narrative", u"speech", u"question", u"summary", u"speech", u"answer", u"narrative", u"speech", u"narrative", ], ) def test_already_imported(self): self.importer.import_document("speeches/fixtures/test_inputs/test_xpath.xml") ImportAkomaNtoso(instance=self.instance, commit=True, clobber=False).import_document( "speeches/fixtures/test_inputs/test_clobber.xml" ) self.assertEqual([x.text for x in Speech.objects.all()], ["<p>Hello</p>"]) ImportAkomaNtoso(instance=self.instance, commit=True, clobber=True).import_document( "speeches/fixtures/test_inputs/test_clobber.xml" ) self.assertEqual([x.text for x in Speech.objects.all()], ["<p>Howdy</p>"]) ImportAkomaNtoso(instance=self.instance, commit=True).import_document( "speeches/fixtures/test_inputs/test_clobber.xml" ) self.assertEqual([x.text for x in Speech.objects.all()], ["<p>Howdy</p>", "<p>Howdy</p>"]) def test_not_already_imported(self): ImportAkomaNtoso(instance=self.instance, commit=True, clobber=False).import_document( "speeches/fixtures/test_inputs/test_xpath.xml" ) self.assertEqual([x.title for x in Section.objects.all()], ["This is the title"]) Section.objects.all().delete() ImportAkomaNtoso(instance=self.instance, commit=True, clobber=True).import_document( "speeches/fixtures/test_inputs/test_xpath.xml" ) self.assertEqual([x.title for x in Section.objects.all()], ["This is the title"]) Section.objects.all().delete() ImportAkomaNtoso(instance=self.instance, commit=True).import_document( "speeches/fixtures/test_inputs/test_xpath.xml" ) self.assertEqual([x.title for x in Section.objects.all()], ["This is the title"]) Section.objects.all().delete() def test_empty_title(self): self.importer.import_document("speeches/fixtures/test_inputs/test_empty_title.xml") ImportAkomaNtoso(instance=self.instance, commit=True, clobber=False).import_document( "speeches/fixtures/test_inputs/test_empty_title.xml" ) self.assertEqual([x.title for x in Section.objects.all()], ["", ""]) ImportAkomaNtoso(instance=self.instance, commit=True, clobber=True).import_document( "speeches/fixtures/test_inputs/test_empty_title.xml" ) self.assertEqual([x.title for x in Section.objects.all()], ["", "", ""]) ImportAkomaNtoso(instance=self.instance, commit=True).import_document( "speeches/fixtures/test_inputs/test_empty_title.xml" ) self.assertEqual([x.title for x in Section.objects.all()], ["", "", "", ""]) def test_xpath_preface_elements(self): self.importer.import_document("speeches/fixtures/test_inputs/test_xpath.xml") self.assertEqual([x.title for x in Section.objects.all()], ["This is the title"]) self.assertEqual([x.start_date for x in Speech.objects.all()], [datetime.date(2014, 7, 24)]) def test_unicode_character(self): self.importer.import_document("speeches/fixtures/test_inputs/test_unicode_character.xml") self.assertEqual([x.type for x in Speech.objects.all()], ["other"]) def test_blank_speakers(self): self.importer.import_document("speeches/fixtures/test_inputs/test_blank_speakers.xml") speaker = Speaker.objects.get(name="Speaker") speeches = Speech.objects.all() speeches_s = Speech.objects.filter(type="speech") self.assertEqual(speeches.count(), speeches_s.count()) for i in range(4): s = speaker if i % 2 else None sd = "Speaker" if i > 1 else None self.assertEqual(speeches[i].speaker, s) self.assertEqual(speeches[i].speaker_display, sd)