def determine_entry_tag(e): if e.mp and not e.speaker.startswith('Primeiro-Ministro') and not e.speaker.startswith('Ministr') and not (e.speaker.startswith(u'Secretári') and "Estado" in e.speaker) : if len(e.text) < 60: return 'deputado_aparte' else: return 'deputado_intervencao' elif e.speaker: if e.speaker in ('O Orador', 'A Oradora'): find_cont_speaker(e) return 'continuacao' if e.speaker.startswith('Primeiro-Ministro'): from deputados.utils import get_pm_from_date from deputados.models import MP pm_name = get_pm_from_date(e.day.date) e.mp = MP.objects.get(shortname=pm_name) e.save() if len(e.text) < 30: return 'pm_aparte' else: return 'pm_intervencao' elif e.speaker.startswith(u'Secretári') and "Estado" in e.speaker: if len(e.text) < 30: return 'secestado_aparte' else: return 'secestado_intervencao' elif e.speaker.startswith('Ministr'): if len(e.text) < 30: return 'ministro_aparte' else: return 'ministro_intervencao' elif e.speaker.startswith('Presidente'): if re_concluir.search(e.text): return 'presidente_aparte' return 'presidente' elif e.speaker.startswith(u'Secretári') and not "Estado" in e.speaker: return 'secretario' elif e.speaker.startswith(('Vozes', 'Uma voz d')): return 'vozes_aparte' else: if e.text.startswith(u'Aplauso'): return 'aplauso' elif e.text.startswith(u'Protesto'): return 'protesto' elif e.text.startswith(u'Risos'): return 'riso' elif re_voto.search(e.text): return 'voto' elif e.text.strip() == 'Pausa.': return 'pausa' elif not e.speaker and e.text.startswith('Entretanto, assumiu'): return 'nota' elif e.text.startswith((u'SUMÁRIO', u'S U M Á R I O')): return 'sumario' elif e.text.startswith(('ORDEM DO DIA', 'ANTES DA ORDEM DO DIA')): return 'nota' elif e.text.startswith('Eram ') and e.text.strip().endswith('minutos.'): return 'hora' elif e.text.strip(' :.').endswith((u'presentes à sessão', )) or e.text.startswith('Estavam presentes os seguintes Srs. Deputados:'): return 'chamada_presentes' elif e.text.endswith((u'faltaram à sessão:', )): return 'chamada_ausentes' elif e.text.endswith((u'por se encontrarem em missões internacionais:', )): return 'chamada_missao' return ''
def parse_raw_text(self): if not self.raw_text: return None from parsing import parse_mp_from_raw_text, find_cont_speaker speaker, text = parse_mp_from_raw_text(self.raw_text) self.normalize_text() if isinstance(speaker, int): self.mp = MP.objects.get(id=speaker) self.party = self.mp.mandate_on(self.day.legislature.number).party.abbrev self.text = text self.save() elif speaker: if speaker == 'pm': from deputados.utils import get_pm_from_date self.mp = get_pm_from_date(self.day.date) self.speaker = 'Primeiro-Ministro' self.party = self.mp.current_party if self.type == 'deputado_intervencao': self.type = 'pm_intervencao' self.save() elif speaker.startswith('ministro: '): from deputados.utils import get_minister speaker = speaker.replace('ministro: ', '').strip() if '(' in speaker: speaker = speaker.split('(')[1].rstrip(')') govpost = get_minister(self.day.date, shortname=speaker) else: govpost = get_minister(self.day.date, post=speaker) if govpost: if govpost.mp: self.mp = govpost.mp else: self.speaker = govpost.person_name self.party = govpost.name else: self.speaker = speaker self.type = 'ministro_intervencao' elif speaker.startswith('secestado: '): from deputados.utils import get_minister speaker = speaker.replace('secestado: ', '').strip() if '(' in speaker: speaker = speaker.split('(')[1].rstrip(')') govpost = get_minister(self.day.date, shortname=speaker) else: govpost = get_minister(self.day.date, post=speaker) if govpost: if govpost.mp: self.mp = govpost.mp else: self.speaker = govpost.person_name self.party = govpost.name else: self.speaker = speaker self.type = 'secestado_intervencao' elif len(speaker) > 100: speaker = speaker[:100] else: self.speaker = speaker self.text = text self.save() else: self.text = self.raw_text self.save() # special case if not self.type in ('continuacao', 'pm_intervencao', 'ministro_intervencao', 'secestado_intervencao'): self.determine_type() from parsing import guess_if_continuation if guess_if_continuation(self): self.type = 'continuacao' self.save() if not self.mp: find_cont_speaker(self)