Example #1
0
def make_fingerprint(engine, person):
    try:
        long_name = make_long_name(person)
        try:
            long_name = resolve_person(long_name)
            log.info(" -> %s" % long_name.strip())
        except:
            log.error("Resolve did not work")
            pass

        Person = sl.get_table(engine, 'person')
        sl.upsert(engine, Person, {
            'fingerprint': long_name,
            'slug': url_slug(long_name),
            'mdb_id': person['mdb_id']
            }, unique=['mdb_id'])
        Rolle = sl.get_table(engine, 'rolle')
        sl.upsert(engine, Rolle, {
            'mdb_id': person['mdb_id'],
            'fingerprint': long_name
            }, unique=['mdb_id'])
        person['fingerprint'] = long_name
    except BadReference:
        log.error("Bad Reference %s", person)
        pass
Example #2
0
def match_beitrag(engine, beitrag, url):
    beitrag_print = make_long_name(beitrag)
    log.info("Matching: %s", beitrag_print.encode('ascii', 'replace'))
    try:
        value = resolve_person(beitrag_print)
        if sl.find_one(engine, sl.get_table(engine, 'person'),
                fingerprint=value) is None:
            make_person(engine, beitrag, value, url)
        return value
    except BadReference:
        log.info("Beitrag person is unknown: %s",
                beitrag_print.encode('ascii', 'replace'))
Example #3
0
def resolve_stimmen(engine, source_url):
    table = sl.get_table(engine, 'abstimmung')
    for data in sl.find(engine, table, source_url=source_url):
        try:
            fp = resolve_person(data['person'])
        except BadReference:
            fp = None
            log.info("No match for: %s", data['person'])
        sl.upsert(engine, table,
                  {'person': data.get('person'),
                   'matched': fp is not None,
                   'fingerprint': fp},
                  unique=['person'])
Example #4
0
def resolve_stimmen(engine, source_url):
    table = sl.get_table(engine, 'abstimmung')
    for data in sl.find(engine, table, source_url=source_url):
        try:
            fp = resolve_person(data['person'])
        except BadReference:
            fp = None
            log.info("No match for: %s", data['person'])
        sl.upsert(engine,
                  table, {
                      'person': data.get('person'),
                      'matched': fp is not None,
                      'fingerprint': fp
                  },
                  unique=['person'])
def load_profiles(engine):
    doc = etree.parse(FEED_URL)
    Person = sl.get_table(engine, 'person')
    for profile in doc.findall('//PROFIL'):
        name = profile.findtext('.//VORNAME')
        if name is None:
            continue
        name += ' ' + profile.findtext('.//NACHNAME')
        partei = profile.findtext('.//PARTEI')
        name += ' ' + PARTEI_MAPPING.get(partei, partei)
        try:
            fp = resolve_person(name)
            sl.upsert(engine, Person,
                      {'awatch_url': profile.get('url'),
                       'fingerprint': fp},
                    unique=['fingerprint'])
        except BadReference: pass
 def parse_pois(self, group):
     for poi in group.split(' - '):
         text = poi
         speaker_name = None
         fingerprint = None
         sinfo = poi.split(': ', 1)
         if len(sinfo) > 1:
             speaker_name = sinfo[0]
             text = sinfo[1]
             speaker = speaker_name.replace('Gegenruf des Abg. ', '')
             try:
                 fingerprint = resolve_person(speaker)
             except InvalidReference:
                 pass
             except BadReference:
                 self.missing_recon = True
         yield (speaker_name, fingerprint, text)
Example #7
0
 def parse_pois(self, group):
     for poi in group.split(' - '):
         text = poi
         speaker_name = None
         fingerprint = None
         sinfo = poi.split(': ', 1)
         if len(sinfo) > 1:
             speaker_name = sinfo[0]
             text = sinfo[1]
             speaker = speaker_name.replace('Gegenruf des Abg. ', '')
             try:
                 fingerprint = resolve_person(speaker)
             except InvalidReference:
                 pass
             except BadReference:
                 self.missing_recon = True
         yield (speaker_name, fingerprint, text)
Example #8
0
def make_person(engine, beitrag, fp, source_url):
    try:
        fp = resolve_person(fp)
        person = {
            'fingerprint': fp,
            'slug': url_slug(fp),
            'source_url': source_url,
            'vorname': beitrag['vorname'],
            'nachname': beitrag['nachname'],
            'ort': beitrag.get('ort'),
            'ressort': beitrag.get('ressort'),
            'land': beitrag.get('land'),
            'fraktion': beitrag.get('fraktion')
        }
        sl.upsert(engine, sl.get_table(engine, 'person'), person,
                  unique=['fingerprint'])
    except BadReference: pass
    return fp
Example #9
0
def speakers_webtv(engine, wp, session):
    table = sl.get_table(engine, 'webtv')
    for speech in sl.distinct(engine, table, 'speaker',
            wp=wp, session=session):
        if speech['speaker'] is None:
            continue
        speaker = speaker_name_transform(speech['speaker'])
        matched = True
        try:
            fp = resolve_person(speaker)
        except InvalidReference:
            fp = None
        except BadReference:
            fp = None
            matched = False
        sl.upsert(engine, table, {'fingerprint': fp,
                                  'matched': matched,
                                  'speaker': speech['speaker']},
                    unique=['speaker'])
Example #10
0
def load_profiles(engine):
    doc = etree.parse(FEED_URL)
    Person = sl.get_table(engine, 'person')
    for profile in doc.findall('//PROFIL'):
        name = profile.findtext('.//VORNAME')
        if name is None:
            continue
        name += ' ' + profile.findtext('.//NACHNAME')
        partei = profile.findtext('.//PARTEI')
        name += ' ' + PARTEI_MAPPING.get(partei, partei)
        try:
            fp = resolve_person(name)
            sl.upsert(engine,
                      Person, {
                          'awatch_url': profile.get('url'),
                          'fingerprint': fp
                      },
                      unique=['fingerprint'])
        except BadReference:
            pass
Example #11
0
def make_person(engine, beitrag, fp, source_url):
    try:
        fp = resolve_person(fp)
        person = {
            'fingerprint': fp,
            'slug': url_slug(fp),
            'source_url': source_url,
            'vorname': beitrag['vorname'],
            'nachname': beitrag['nachname'],
            'ort': beitrag.get('ort'),
            'ressort': beitrag.get('ressort'),
            'land': beitrag.get('land'),
            'fraktion': beitrag.get('fraktion')
        }
        sl.upsert(engine,
                  sl.get_table(engine, 'person'),
                  person,
                  unique=['fingerprint'])
    except BadReference:
        pass
    return fp
    def __iter__(self):
        self.in_session = False
        speaker = None
        fingerprint = None
        chair_ = [False]
        text = []

        def emit(reset_chair=True):
            data = {
                'speaker': speaker,
                'type': 'chair' if chair_[0] else 'speech',
                'fingerprint': fingerprint,
                'text': "\n\n".join(text).strip()
                }
            if reset_chair:
                chair_[0] = False
            [text.pop() for i in xrange(len(text))]
            return data

        for line in self.fh:
            line = line.decode('latin-1')
            line = line.replace(u'\u2014', '-')
            line = line.replace(u'\x96', '-')
            if not self.in_session and BEGIN_MARK.match(line):
                self.in_session = True
                continue
            elif not self.in_session:
                continue

            if END_MARK.match(line):
                return

            if not len(line.strip()):
                continue

            is_top = False
            if TOP_MARK.match(line):
                is_top = True

            has_stopword = False
            for sw in SPEAKER_STOPWORDS:
                if sw.lower() in line.lower():
                    has_stopword = True

            m = SPEAKER_MARK.match(line)
            if m is not None and not is_top and not has_stopword:
                if speaker is not None:
                    yield emit()
                _speaker = m.group(1)
                role = line.strip().split(' ')[0]
                try:
                    fingerprint = resolve_person(_speaker)
                    speaker = _speaker
                    chair_[0] = role in CHAIRS
                    continue
                except InvalidReference:
                    pass
                except BadReference:
                    self.missing_recon = True

            m = POI_MARK.match(line)
            if m is not None:
                if not m.group(1).lower().strip().startswith('siehe'):
                    yield emit(reset_chair=False)
                    for _speaker, _fingerprint, _text in self.parse_pois(m.group(1)):
                        yield {
                            'speaker': _speaker,
                            'type': 'poi',
                            'fingerprint': _fingerprint,
                            'text': _text
                                }
                    continue

            text.append(line)
        yield emit()
Example #13
0
    def __iter__(self):
        self.in_session = False
        speaker = None
        fingerprint = None
        in_writing = False
        chair_ = [False]
        text = []

        def emit(reset_chair=True):
            data = {
                'speaker': speaker,
                'in_writing': in_writing,
                'type': 'chair' if chair_[0] else 'speech',
                'fingerprint': fingerprint,
                'text': "\n\n".join(text).strip()
                }
            if reset_chair:
                chair_[0] = False
            [text.pop() for i in xrange(len(text))]
            return data

        for line in self.fh.readlines():
            try:
                line = line.decode('latin-1')
            except: pass
            line = line.replace(u'\u2014', '-')
            line = line.replace(u'\x96', '-')
            rline = line.replace(u'\xa0', ' ').strip()

            if not self.in_session and BEGIN_MARK.match(line):
                self.in_session = True
                continue
            elif not self.in_session:
                continue

            if END_MARK.match(rline):
                return

            if WRITING_BEGIN.match(rline):
                in_writing = True

            if WRITING_END.match(rline):
                in_writing = False

            if not len(line.strip()):
                continue

            is_top = False
            if TOP_MARK.match(rline):
                is_top = True

            has_stopword = False
            for sw in SPEAKER_STOPWORDS:
                if sw.lower() in line.lower():
                    has_stopword = True

            m = SPEAKER_MARK.match(line)
            if m is not None and not is_top and not has_stopword:
                if speaker is not None:
                    yield emit()
                _speaker = m.group(1)
                role = line.strip().split(' ')[0]
                try:
                    fingerprint = resolve_person(_speaker)
                    speaker = _speaker
                    chair_[0] = role in CHAIRS
                    continue
                except InvalidReference:
                    pass
                except BadReference:
                    self.missing_recon = True

            m = POI_MARK.match(line)
            if m is not None:
                if not m.group(1).lower().strip().startswith('siehe'):
                    yield emit(reset_chair=False)
                    in_writing = False
                    for _speaker, _fingerprint, _text in self.parse_pois(m.group(1)):
                        yield {
                            'speaker': _speaker,
                            'in_writing': False,
                            'type': 'poi',
                            'fingerprint': _fingerprint,
                            'text': _text
                                }
                    continue

            text.append(line)
        yield emit()