Example #1
0
def load_zitate(ws):
    sitzungen = {}
    mediathek = dict([(m['speech_source_url'], m) for m in ws['mediathek']])
    sys.stdout.write("Loading transcripts")
    sys.stdout.flush()
    for i, speech in enumerate(ws['speech']):
        if i % 1000 == 0:
            sys.stdout.write(".")
            sys.stdout.flush()
        s = (speech['wahlperiode'], speech['sitzung'])
        if s not in sitzungen:
            sitzungen[s] = Sitzung.query.filter_by(
                wahlperiode=speech.get('wahlperiode'),
                nummer=speech.get('sitzung')
                ).first()
        sitzung = sitzungen[s]

        zitat = Zitat.query.filter_by(
                sitzung=sitzung,
                sequenz=speech['sequence']).first()
        if zitat is None:
            zitat = Zitat()
        zitat.sitzung = sitzung
        zitat.sequenz = speech['sequence']
        zitat.text = speech['text']
        zitat.typ = speech['type']
        zitat.sprecher = speech['speaker']
        zitat.source_url = speech['source_url']

        if speech['fingerprint']:
            zitat.person = Person.query.filter_by(
                fingerprint=speech['fingerprint']
                ).first()

        db.session.add(zitat)
        db.session.flush()
        load_debatte_zitate(ws, zitat, mediathek)

    db.session.commit()
    SPME_CACHE.clear()
Example #2
0
def load_zitate(engine, debatte, zitate, speeches):
    for data in zitate:
        f = lambda s: int(s['wahlperiode']) == int(data['wp']) and \
                      int(s['sitzung']) == int(data['session']) and \
                      int(s['sequence']) == int(data['sequence'])
        speech = filter(f, speeches).pop()
        #print speech

        zitat = Zitat.query.filter_by(
                debatte=debatte,
                sequenz=speech['sequence']).first()
        if zitat is None:
            zitat = Zitat()
        zitat.sitzung = debatte.sitzung
        zitat.debatte = debatte
        zitat.sequenz = speech['sequence']
        zitat.text = speech['text']
        zitat.typ = speech['type']
        zitat.speech_id = data['speech_id']
        zitat.sprecher = speech['speaker']
        zitat.redner = data['speaker']
        zitat.source_url = speech['source_url']

        if speech['fingerprint']:
            if speech['fingerprint'] in SPEAKERS:
                zitat.person = SPEAKERS[speech['fingerprint']]
            else:
                zitat.person = Person.query.filter_by(
                    fingerprint=speech['fingerprint']
                    ).first()
                SPEAKERS[speech['fingerprint']] = zitat.person

        db.session.add(zitat)
Example #3
0
def load_zitate(engine, indexer, debatte, zitate, speeches, reden):
    for data in zitate:
        f = lambda s: int(s['wahlperiode']) == int(data['wp']) and \
                      int(s['sitzung']) == int(data['session']) and \
                      int(s['sequence']) == int(data['sequence'])
        speech = filter(f, speeches).pop()
        zitat = Zitat.query.filter_by(
                sitzung=debatte.sitzung,
                sequenz=speech['sequence']).first()
        if zitat is None:
            zitat = Zitat()
        zitat.sitzung = debatte.sitzung
        zitat.debatte = debatte
        zitat.sequenz = speech['sequence']
        zitat.rede = reden[data['speech_id']]
        zitat.text = speech['text']
        zitat.typ = speech['type']
        zitat.sprecher = speech['speaker']
        zitat.source_url = speech['source_url']
        zitat.person = lazyload_person(engine, indexer,
                speech['fingerprint'])

        db.session.add(zitat)
        db.session.flush()