def parse(self, data): if self.skip_transcript(data): return date = data.get('date') top_section = self.get_or_create( Section, instance=self.instance, source_url=data['url'], heading=self.top_section_heading(data), parent=self.get_parent_section(data), ) for speech in self.parse_transcript(data): if not speech: continue if speech.section: if speech.section.object: section = speech.section.object else: heading = self.prettify(speech.section.heading) section = Section( instance=self.instance, heading=heading, parent=top_section, ) if self.commit: section.save() speech.section.object = section else: section = top_section if speech.speaker: speaker = self.prettify(speech.speaker) speaker = self.get_or_create(Speaker, instance=self.instance, name=speaker) else: speaker = None if not speech.type: speech.type = ('speech' if speaker or speech.speaker_display else 'narrative') text = '</p>\n<p>'.join([' '.join(s) for s in speech.text]) text = '<p>%s</p>' % text speech_date = speech.date or date speech = Speech( instance=self.instance, section=section, text=text, speaker=speaker, speaker_display=speech.speaker_display, type=speech.type, start_date=speech_date, start_time=speech.time, ) if self.commit: speech.save()
def parse(self, data): url, date, heading, speaker, text = data text, speaker = parse_speech(text, speaker) speaker = self.get_or_create( Speaker, instance=self.instance, name=speaker, ) speech = Speech( instance=self.instance, text=text, speaker=speaker, start_date=date, heading=heading, source_url=url, type='speech', ) if self.commit: speech.save()