Example #1
0
def parse(input_file):
    term = Term.objects.get(name=TERM)

    f = open(input_file, 'r')
    reader = csv.reader(f, delimiter=',', quotechar='"')
    for row in reader:
        first_name = row[1].strip()
        last_name = row[0].strip()
        budget = row[4].strip().replace(',', '')
        name = "%s %s" % (last_name, first_name)
        name = parse_tools.fix_mp_name(name)
        print "%-20s %-20s %10s" % (first_name, last_name, budget)
        try:
            member = Member.objects.get(name=name)
            tm = TermMember.objects.get(member=member, term=term)
        except Member.DoesNotExist:
            continue
        except TermMember.DoesNotExist:
            continue
        ms = MemberStats.objects.get(begin=term.begin, end=term.end, member=member)
        tm.election_budget = budget
        tm.save()
        ms.election_budget = budget
        ms.save()

    f.close()
Example #2
0
def handle_row(src, q_info, row):
    name = parse_tools.fix_mp_name(row[0])
    mp = Member.objects.filter(name=name)
    if not mp:
        print "%s not found" % name
        return
    mp = mp[0]
    print "%s" % mp
    for q in q_info:
        q_idx, i_idx, order, txt = q
        if not row[q_idx]:
            print "\tMissing value for column %d" % q_idx
            continue
        val = int(row[q_idx])
        opt_idx = int(val * OPTION_STEPS // ANSWER_MAX)
        if opt_idx == OPTION_STEPS:
            opt_idx = OPTION_STEPS - 1
        que = Question.objects.get(source=src, order=order)
        opt = Option.objects.get(question=que, order=opt_idx)
        try:
            ans = Answer.objects.get(member=mp, question=que)
        except Answer.DoesNotExist:
            ans = Answer(member=mp, question=que)
        ans.option = opt
        ans.explanation = None
        ans.save()
Example #3
0
def parse(input_file):
    mp_list = Member.objects.active_in_term(term)
    mp_dict = {}
    for mp in mp_list:
        mp_dict[mp.name] = mp
        mp.found = False
    f = open(input_file, 'r')
    reader = csv.reader(f, delimiter=';', quotechar="'")
    reader.next() # skip header
    for row in reader:
        first_names = row[0].strip(" '").decode('utf8').split(' ')
        last_name = row[1].strip(" '").decode('utf8')
        last_name = unicode(last_name).replace(u'é', u'\u00e9')
        name = '%s %s' % (last_name, first_names[0])
        name = parse_tools.fix_mp_name(name)
        try:
            member = mp_dict[name]
        except KeyError:
#            print "MP '%s' not found" % name
            continue
        member.found = True
        funding = float(row[6].replace(',', '.'))
        print "%30s: %.2f" % (unicode(member), funding)
        ms = MemberStats.objects.get(begin=term.begin, end=term.end, member=member)
        ms.election_budget = funding
        ms.save()
#        print unicode(member)
    for mp in mp_list:
        if mp.found:
            continue
        print "Funding data for MP '%s' not found" % unicode(mp)

        """budget = row[4].strip().replace(',', '')
        name = "%s %s" % (last_name, first_name)
        name = parse_tools.fix_mp_name(name)
        print "%-20s %-20s %10s" % (first_name, last_name, budget)
        try:
            member = Member.objects.get(name=name)
            tm = TermMember.objects.get(member=member, term=term)
        except Member.DoesNotExist:
            continue
        except TermMember.DoesNotExist:
            continue
        ms = MemberStats.objects.get(begin=term.begin, end=term.end, member=member)
        tm.election_budget = budget
        tm.save()
        ms.election_budget = budget
        ms.save()"""
    f.close()
Example #4
0
def process_signatures(sign_el):
    ret = {}

    el_list = sign_el.xpath("./paivays")
    assert len(el_list) == 1
    ret['date'] = el_list[0].attrib['pvm']
    ret['mps'] = []
    mp_list = sign_el.xpath("./edustaja/henkilo")
    for mp_el in mp_list:
        nr = int(mp_el.attrib['numero'])
        if nr > 10000:
            continue
        fname = mp_el.xpath("./etunimi")[0].text.strip()
        lname = mp_el.xpath("./sukunimi")[0].text.strip()
        name = "%s %s" % (lname, fname)
        name = parse_tools.fix_mp_name(name)
        ret['mps'].append(name)
    return ret
Example #5
0
def handle_row(src, row, writer=None):
    (district, party, last_name, first_name, age, gender, is_indep) = row[0:7]
    (county, edu, intro, is_county_off, is_mp, is_mep, profession) = row[7:14]
    (home_page, rss_feed) = row[14:16]
    answer_list = row[16::3]
    importance_list = row[17::3]
    comment_list = row[18::3]

    name = ' '.join((last_name, first_name))
    name = parse_tools.fix_mp_name(name)

    if writer:
        if district.endswith(' vaalipiiri'):
            district = district.replace(' vaalipiiri', '')
        row = [name, district, party, age, gender, edu]
        row = [s.encode('utf8') if s else '' for s in row]
        writer.writerow(row)

    mp = Member.objects.filter(name=name)
    if not mp:
        if is_mp != '0':
            print name
        return
    mp = mp[0]

    for idx, ans in enumerate(answer_list):
        if idx in SKIP_QUESTIONS:
            continue
        comment = comment_list[idx]
        if not ans and not comment:
            continue
        que = Question.objects.get(source=src, order=idx)
        if ans:
            opt = Option.objects.get(question=que, name=ans)
        else:
            opt = None
        try:
            ans = Answer.objects.get(member=mp, question=que)
        except Answer.DoesNotExist:
            ans = Answer(member=mp, question=que)
        ans.option = opt
        ans.explanation = comment
        ans.save()
Example #6
0
def process_district(district):
    url = URL_BASE % district
    s = http_cache.open_url(url, 'funding')
    doc = html.fromstring(s)
    doc.make_links_absolute(url)

    el_list = doc.xpath(".//div[@class='listing_table']")
    for el in el_list:
        rows = el.xpath(".//tr")
        for row in rows:
            ch = row.getchildren()[0]
            if ch.tag == 'th':
                continue
#            print ch.text
            m = re.match('([\w -]+)[ ]{2,}([\w -]+)', ch.text, re.U)
            if not m:
                print "Skipping %s" % ch.text
                continue
            fnames = m.groups()[0].strip()
            lname = m.groups()[1].strip()
            name = "%s %s" % (lname, fnames.split(' ')[0])
            name = parse_tools.fix_mp_name(name)

            mp = Member.objects.filter(name=name)
            if not mp:
                continue
            mp = mp[0]
            links = row.xpath('.//a')
            link = None
            for l in links:
                href = l.attrib['href']
                if l.text.strip() == "Ennakkoilmoitus":
                    if not link:
                        link = href
                elif l.text.strip() == "Vaalirahoitusilmoitus":
                    link = href
                else:
                    assert False
            assert link
            process_mp(mp, link)
Example #7
0
def parse_mp(src, lname, fname, href):
    name = "%s %s" % (lname, fname)
    name = parse_tools.fix_mp_name(name)
    mp = Member.objects.filter(name=name)
    if not mp:
        return
    mp = mp[0]

    if name in mp_dict:
        mp = mp_dict[name]
        mp.found = True

    print mp

    s = http_cache.open_url(href, 'opinions')
    doc = html.fromstring(s)
    q_list = doc.xpath(".//div[@class='em-compare-container']")
    for q_idx, q_el in enumerate(q_list):
        if q_idx in SKIP_QUESTIONS:
            continue

        el = q_el.xpath("./h3")
        assert len(el) == 1
        q_text = el[0].text.strip()
        m = re.match(r'\d+\.\s+(.+)', q_text, re.U)
        assert m
        q_text = m.groups()[0]

        a_list = q_el.xpath(".//td[@class='em-text']")
        a_text_list = []
        for a_idx, a_text in enumerate(a_list):
            a_text = a_text.text.strip()
            a_text_list.append(a_text)

        q_obj = add_question(src, q_text, q_idx, a_text_list)

        a_list = q_el.xpath(".//table[@class='em-compare-alts ']/tr")
        assert len(a_list) == len(a_text_list)
        chosen = None
        for a_idx, el in enumerate(a_list):
            if el.xpath(".//acronym"):
                assert not chosen
                chosen = a_idx
        if chosen == None:
            continue

        comm_el = q_el.xpath(".//div[@class='em-comment']")
        if comm_el:
            assert len(comm_el) == 1
            comm_el = comm_el[0]
            text_list = []
            for br in comm_el.xpath(".//br"):
                if not br.tail:
                    continue
                s = br.tail.strip()
                if s:
                    text_list.append(s)
            comm_text = '\n'.join(text_list)
            assert comm_text[0] == '"' and comm_text[-1] == '"'
            comm_text = comm_text[1:-1]
        else:
            comm_text = None

        opt = q_obj.opt_dict[chosen]
        try:
            ans = Answer.objects.get(member=mp, question=q_obj)
        except:
            ans = Answer(member=mp, question=q_obj)
        ans.option = opt
        ans.explanation = comm_text
        ans.save()