def process_keywords(): mpage = http_cache.open_url(KEYWORD_URL_BASE + KEYWORD_LIST_URL, 'keyword') # BeautifulSoup's SGML parser will break at the following pattern, # so remove it before handing over for parsing pat = 'document.write\("<SCR"\+"IPT Language=.JavaScript. SRC=."\+"' + \ 'http://"\+gDomain\+"/"\+gDcsId\+"/wtid.js"\+".></SCR"\+"IPT>"\);' massage = [(re.compile(pat), lambda match: '')] dir_soup = BeautifulSoup(mpage, markupMassage=massage, fromEncoding='iso-8859-1', convertEntities=BeautifulSoup.HTML_ENTITIES) dir_list = dir_soup.find('p', text='A) Valitse asiasana aakkosittain'). \ parent.findNextSiblings('a') max_len = Keyword._meta.get_field_by_name('name')[0].max_length trim_re = re.compile(' \[$') for dir_elem in dir_list: kpage_url = KEYWORD_URL_BASE + dir_elem['href'] kpage = http_cache.open_url(kpage_url, 'keyword') ksoup = BeautifulSoup(kpage, markupMassage=massage, fromEncoding='iso-8859-1', convertEntities=BeautifulSoup.HTML_ENTITIES) anchor = ksoup.find('p', text=' Suorita haku asiasanalla:') elem = anchor.parent.parent.nextSibling.nextSibling kword_list = elem.findAll('li') for kword in kword_list: insert_keyword(kword, max_len, trim_re)
def process_minutes(full_update): start_from = from_pl stop_after = None member_list = Member.objects.all() member_dict = {} for mem in member_list: (last, first) = mem.name.split(' ', 1) name = ' '.join((first, last)) if name in member_dict: raise Exception() member_dict[name] = mem next_link = url_base + MINUTES_URL while next_link: (info_list, next_link) = read_listing('minutes', next_link, new_only=not full_update) print 'Got links for total of %d minutes' % len(info_list) for idx, info in enumerate(info_list): url = info['minutes_link'] print '%4d. %s' % (idx, info['id']) if start_from: if info['id'] == start_from: start_from = None else: continue if stop_after and info['id'] != stop_after: return s = http_cache.open_url(url, 'minutes') tmp_url = 'http://www.eduskunta.fi/faktatmp/utatmp/akxtmp/' minutes = minutes_parser.parse_minutes(s, tmp_url) if not minutes: continue minutes['url'] = url try: mins = Minutes.objects.get(plenary_session__name=info['id']) if not full_update: return except Minutes.DoesNotExist: mins = None pass pl_sess = insert_minutes(minutes, mins) try: for l in minutes['cnv_links']: print l s = http_cache.open_url(l, 'minutes') disc = minutes_parser.parse_discussion(s, l) insert_discussion(full_update, pl_sess, disc, minutes['cnv_links'].index(l), member_dict) except: Minutes.objects.get(plenary_session=pl_sess).delete() Statement.objects.filter(plenary_session=pl_sess).delete() raise transaction.commit() db.reset_queries() if until_pl and info['id'] == until_pl: stop_after = until_pl
def read_listing(list_type, url, new_only=False): assert list_type in ('minutes', 'votes', 'docs') ret = [] while True: s = http_cache.open_url(url, list_type, skip_cache=new_only) doc = html.fromstring(s) el_list = doc.xpath(".//div[@class='listing']/div/p") doc.make_links_absolute(url) for el in el_list: link = {} parsed_el = process_list_element(list_type, el) ret.append(parsed_el) # Check if last page of links if len(el_list) >= 50: fwd_link = doc.xpath(".//input[@name='forward']") url = url_base + fwd_link[0].attrib['value'] else: url = None break if new_only: break return (ret, url)
def get_wikipedia_links(): MP_LINK = 'http://fi.wikipedia.org/wiki/Luokka:Nykyiset_kansanedustajat' print "Populating Wikipedia links to MP's..." mp_list = Member.objects.all() mp_names = [mp.name for mp in mp_list] s = http_cache.open_url(MP_LINK, 'misc') doc = html.fromstring(s) links = doc.xpath(".//table//a[starts-with(@href, '/wiki')]") doc.make_links_absolute(MP_LINK) for l in links: href = l.attrib['href'] if 'Toiminnot:Haku' in href: continue name = l.text if '(' in name: name = name.split('(')[0].strip() a = name.split() a = list((a[-1],)) + a[0:-1] name = ' '.join(a) try: mp = Member.objects.get(name=name) except Member.DoesNotExist: matches = difflib.get_close_matches(name, mp_names, cutoff=0.8) if len(matches) > 1: raise Exception("Multiple matches for '%s'" % name) elif not matches: print "No match found for '%s'" % name continue print("Mapping '%s' to %s'" % (name, matches[0])) mp = Member.objects.get(name=matches[0]) mp.wikipedia_link = href get_mp_homepage_link(mp) mp.save()
def parse(): s = http_cache.open_url(CSV_URL, 'opinions') src, c = QuestionSource.objects.get_or_create(name='MTV3 vaalikone', year=2007, url_name='mtv2007') reader = csv.reader(s.splitlines(), delimiter=',', quotechar='"') reader.next() hdr = reader.next() # 2d questions q_list = [idx for idx, s in enumerate(hdr) if s.startswith('[2d_x]')] i_list = [idx for idx, s in enumerate(hdr) if s.startswith('[2d_y]')] # 1d questions q2_list = [idx for idx, s in enumerate(hdr) if s.startswith('[1d_x]')] q_list.extend(q2_list) i_list.extend([-1] * len(q2_list)) o_list = range(0, len(q_list)) txt_list = [hdr[idx][7:].replace('_', ',') for idx in q_list] for i in o_list: if q_list[i] in range(56, 64): txt_list[i] = "Hallituspuolueena " + txt_list[i] q_info_list = zip(q_list, i_list, o_list, txt_list) for q in q_info_list: insert_question(src, q) for row in reader: handle_row(src, q_info_list, row)
def process_mp(mp, url): tm = TermMember.objects.filter(member=mp, term=term) if not tm: return s = http_cache.open_url(url, 'funding') doc = html.fromstring(s) election_budget = None rows = doc.xpath(".//tr") for row in rows: th = row.getchildren()[0] if not th.tag == 'th' or not th.text: continue if th.text.strip().startswith('2. Vaalikampanjan rahoitus'): scr = row[1][0] assert scr.tag == 'script' m = re.search(r"addSpaces\('([\d,.]*)'\)", scr.text) assert m s = m.groups()[0].replace(',', '.') if not s: continue election_budget = float(s) if not election_budget: return global mp_count mp_count += 1 ms = MemberStats.objects.get(begin=term.begin, end=term.end, member=mp) ms.election_budget = election_budget ms.save() print "%30s: %.0f" % (mp.name, election_budget)
def process_parties(db_insert): s = http_cache.open_url(party_url_base + party_list_url, 'party') parser = party_list_parser.Parser() parser.feed(s) parser.close() party_list = parser.get_list() parser = party_info_parser.Parser() for party in party_list: if party['name'] == 'vr': continue s = http_cache.open_url(party_url_base + party['info_link'], 'party') parser.reset() parser.feed(s) parser.close() party.update(parser.get_desc()) logo_url = party_url_base + party['logo'] fname = party['name'].encode('iso8859-1') + '.jpg' party['logo'] = fname fname = static_path + party_logo_path + fname create_path_for_file(fname) if not os.path.exists(fname): print 'Fetching logo ' + logo_url s = http_cache.open_url(logo_url, 'party') f = open(fname, 'wb') f.write(s) f.close() else: print 'Skipping logo ' + party['logo'] if not db_insert: continue try: p = Party.objects.get(name=party['name']) except Party.DoesNotExist: p = None if not p: p = Party() p.name = party['name'] if not p.full_name: p.full_name = party['fullname'] p.logo = party_logo_path + party['logo'] p.info_link = party_url_base + party['info_link'] p.save() return party_list
def get_mp_homepage_link(mp, force_update=False): if mp.homepage_link and not force_update: return s = http_cache.open_url(mp.wikipedia_link, 'misc') doc = html.fromstring(s) b = doc.xpath(".//b[.='Kotisivu']") if not b: return elem = b[0].getparent() href = elem.getnext().getchildren()[0].attrib['href'] print "%s: %s" % (mp.name, href) # Try to fetch the homepage s = http_cache.open_url(href, 'misc', skip_cache=True, error_ok=True) if s: mp.homepage_link = href else: print "\tFailed to fetch"
def parse(): s = http_cache.open_url(OPTIONS_URL, 'opinions') src, c = QuestionSource.objects.get_or_create(name='HS vaalikone', year=2011, url_name='hs2011') parse_option_order(s, src) s = http_cache.open_url(CSV_URL, 'opinions') reader = csv.reader(s.splitlines(), delimiter=',', quotechar='"') hdr = reader.next() questions = [s.decode('utf8') for s in hdr[16::3]] q_list = [] for idx, q in enumerate(questions): if idx in SKIP_QUESTIONS: continue q_obj = Question.objects.get(source=src, text=q) assert q_obj.order == idx q_list = Question.objects.filter(source=src).order_by('order') writer = None for row in reader: row = [(s.decode('utf8'), None)[s == '-'] for s in row] handle_row(src, row, writer)
def download_processing_info(doc): url = DOC_PROCESS_URL % (doc.type, doc.name) logger.info('updating processing info for %s' % doc) s = http_cache.open_url(url, 'docs') html_doc = html.fromstring(s) ret = {} subj_el = html_doc.xpath(".//div[@class='listing']/div[1]/div[1]/h3") assert len(subj_el) == 1 ret['subject'] = clean_string(subj_el[0].text) for box_el in html_doc.xpath(".//div[@class='listborder']"): hdr_el = box_el.xpath("./div[@class='header']") if not hdr_el: continue assert len(hdr_el) == 1 hdr = hdr_el[0].text_content().strip() if doc.type == 'VK': date_hdr_str = 'Kysymys j' elif doc.type == 'HE': date_hdr_str = 'Annettu eduskunnalle' elif doc.type == 'VNT': date_hdr_str = 'Ilmoitettu saapuneeksi' else: date_hdr_str = 'Aloite j' if hdr.startswith(date_hdr_str): date_el = box_el.xpath(".//div[.='Pvm']") assert len(date_el) == 1 date = date_el[0].tail.strip() (d, m, y) = date.split('.') ret['date'] = '-'.join((y, m, d)) assert 'date' in ret kw_list = [] kw_el_list = html_doc.xpath(".//div[@id='vepsasia-asiasana']//div[@class='linkspace']/a") for kw in kw_el_list: kw = kw.text.strip() kw_list.append(kw) assert len(kw_list) ret['keywords'] = kw_list return ret
def process_counties(db_insert): s = http_cache.open_url(STAT_URL_BASE + STAT_COUNTY_URL, 'county') # strip first 4 lines of header and any blank/empty lines at EOF for line in s.rstrip().split('\n')[4:]: dec_line = line.decode('iso8859-1').rstrip().split('\t') (county_id, county_name, district_id, district_name) = dec_line if not db_insert: continue try: c = County.objects.get(name=county_name) except: c = None if not c: c = County() c.name = county_name c.district = district_name c.save()
def parse_district(district): base = URL_BASE % district s = http_cache.open_url(base, 'opinions') doc = html.fromstring(s) doc.make_links_absolute(base) el_list = doc.xpath(".//td[@class='em-cell-name']/a") cand_list = [] for el in el_list: href = el.attrib['href'] # Party links have rt-2 in them if '.rt-2.' in href: continue assert '.rt-1.' in href m = re.match(r'(\d+) ([\w -]+), ([\w \-\"\.()]+)$', el.text, re.U) if not m: print "Skipping %s" % el.text.encode('utf8') continue last_name, first_name = m.groups()[1:] cand_list.append((last_name, first_name, href)) return cand_list
def process_district(district): url = URL_BASE % district s = http_cache.open_url(url, 'funding') doc = html.fromstring(s) doc.make_links_absolute(url) el_list = doc.xpath(".//div[@class='listing_table']") for el in el_list: rows = el.xpath(".//tr") for row in rows: ch = row.getchildren()[0] if ch.tag == 'th': continue # print ch.text m = re.match('([\w -]+)[ ]{2,}([\w -]+)', ch.text, re.U) if not m: print "Skipping %s" % ch.text continue fnames = m.groups()[0].strip() lname = m.groups()[1].strip() name = "%s %s" % (lname, fnames.split(' ')[0]) name = parse_tools.fix_mp_name(name) mp = Member.objects.filter(name=name) if not mp: continue mp = mp[0] links = row.xpath('.//a') link = None for l in links: href = l.attrib['href'] if l.text.strip() == "Ennakkoilmoitus": if not link: link = href elif l.text.strip() == "Vaalirahoitusilmoitus": link = href else: assert False assert link process_mp(mp, link)
def parse_mp(src, lname, fname, href): name = "%s %s" % (lname, fname) name = parse_tools.fix_mp_name(name) mp = Member.objects.filter(name=name) if not mp: return mp = mp[0] if name in mp_dict: mp = mp_dict[name] mp.found = True print mp s = http_cache.open_url(href, 'opinions') doc = html.fromstring(s) q_list = doc.xpath(".//div[@class='em-compare-container']") for q_idx, q_el in enumerate(q_list): if q_idx in SKIP_QUESTIONS: continue el = q_el.xpath("./h3") assert len(el) == 1 q_text = el[0].text.strip() m = re.match(r'\d+\.\s+(.+)', q_text, re.U) assert m q_text = m.groups()[0] a_list = q_el.xpath(".//td[@class='em-text']") a_text_list = [] for a_idx, a_text in enumerate(a_list): a_text = a_text.text.strip() a_text_list.append(a_text) q_obj = add_question(src, q_text, q_idx, a_text_list) a_list = q_el.xpath(".//table[@class='em-compare-alts ']/tr") assert len(a_list) == len(a_text_list) chosen = None for a_idx, el in enumerate(a_list): if el.xpath(".//acronym"): assert not chosen chosen = a_idx if chosen == None: continue comm_el = q_el.xpath(".//div[@class='em-comment']") if comm_el: assert len(comm_el) == 1 comm_el = comm_el[0] text_list = [] for br in comm_el.xpath(".//br"): if not br.tail: continue s = br.tail.strip() if s: text_list.append(s) comm_text = '\n'.join(text_list) assert comm_text[0] == '"' and comm_text[-1] == '"' comm_text = comm_text[1:-1] else: comm_text = None opt = q_obj.opt_dict[chosen] try: ans = Answer.objects.get(member=mp, question=q_obj) except: ans = Answer(member=mp, question=q_obj) ans.option = opt ans.explanation = comm_text ans.save()
def process_mops(party_list, update=False, db_insert=False): s = http_cache.open_url(url_base + mp_list_url, 'member') BAD_HTML = '<! hx4600.thw>' idx = s.find(BAD_HTML) if idx >= 0: s = s[idx + len(BAD_HTML) + 1:] parser = mop_list_parser.Parser() parser.feed(s) parser.close() mop_list = parser.get_mop_list() parser = mop_info_parser.Parser() for mp in mop_list: print '%3d: %s, %s' % (mop_list.index(mp), mp['surname'], mp['firstnames']) s = http_cache.open_url(url_base + mp['link'], 'member') parser.reset(is_lame_frame=True) parser.feed(s) parser.close() mp.update(parser.get_desc()) print '%3d: person number %s' % (mop_list.index(mp), mp['hnro']) try: member = Member.objects.get(pk=mp['hnro']) except Member.DoesNotExist: member = None if member and not update: continue s = http_cache.open_url(url_base + heti_url % mp['hnro'], 'member') parser.reset(is_lame_frame=False) parser.feed(s) parser.close() mp.update(parser.get_desc()) photo_url = url_base + mp['photo'] ext = os.path.splitext(mp['photo'])[-1] fname = slugify(mp['name']) mp['photo'] = fname + ext photo_fname = static_path + mp_photo_path + mp['photo'] create_path_for_file(photo_fname) if not os.path.exists(photo_fname): print 'Fetching photo ' + photo_url s = http_cache.open_url(photo_url, 'member') f = open(photo_fname, 'wb') f.write(s) f.close() else: print 'Skipping photo ' + mp['photo'] party_name = None if 'party' in mp: party_name = find_party(party_list, mp['party']) if not party_name: raise Exception('Unknown party') for assoc in mp['assoc']: if 'end' not in assoc: end = None else: end = assoc['end'] party = find_party(party_list, assoc['name']) if party == None: if not end: print assoc raise Exception('party not found') # FIXME: Maybe add the party? assoc['name'] = None else: assoc['name'] = party # Find last party association last_assoc = sorted(mp['assoc'], key=operator.itemgetter('start'))[-1] if 'end' in last_assoc: if party_name: raise Exception('party set for inactive MP') party_name = last_assoc['name'] if not db_insert: continue if not member: member = Member() member.id = mp['hnro'] member.name = mp['name'] member.party_id = party_name member.photo = mp_photo_path + mp['photo'] member.info_link = url_base + heti_url % mp['hnro'] member.birth_date = mp['birthdate'] member.given_names = mp['firstnames'] member.surname = mp['surname'] if 'phone' in mp: member.phone = mp['phone'] if 'email' in mp: member.email = mp['email'] member.save() PartyAssociation.objects.filter(member=member).delete() for assoc in mp['assoc']: if not assoc['name']: continue if 'end' not in assoc: end = None else: end = assoc['end'] if assoc['name'] == 'vr': assoc['name'] = 'vas' party = Party.objects.get(name=assoc['name']) pa = PartyAssociation() pa.member = member pa.party_id = party.pk pa.begin = assoc['start'] pa.end = end pa.save() DistrictAssociation.objects.filter(member=member).delete() for assoc in mp['district']: if 'end' not in assoc: end = None else: end = assoc['end'] da = DistrictAssociation() da.member = member da.name = assoc['name'] da.begin = assoc['start'] da.end = end da.save() return mop_list
def download_he(info, doc): assert doc p_info = download_processing_info(doc) doc.date = p_info['date'] doc.subject = p_info['subject'] doc.save() logger.info('%s: %s' % (doc, doc.subject)) m = re.match('(\d+)/(\d{4})', info['id']) number, year = map(int, m.groups()) url = HE_URL % (number, year) s = http_cache.open_url(url, 'docs', error_ok=True) if len(s) > 2*1024*1024: logger.warning('response too big (%d bytes)' % len(s)) return doc if not s: (s, url) = http_cache.open_url(info['doc_link'], 'docs', return_url=True) if '<!-- akxereiloydy.thw -->' in s or '<!-- akx5000.thw -->' in s: print "\tNot found!" return doc html_doc = html.fromstring(s) frames = html_doc.xpath(".//frame") link_elem = None for f in frames: if f.attrib['src'].startswith('temp/'): link_elem = f break html_doc.make_links_absolute(url) url = link_elem.attrib['src'] print "\tGenerated and found!" s = http_cache.open_url(url, 'docs') # First check if's not a valid HE doc, the surest way to # detect it appears to be the length. *sigh* if len(s) < 1500: print "\tJust PDF" return doc html_doc = html.fromstring(s) elem_list = html_doc.xpath(".//p[@class='Normaali']") ELEM_CL = ['LLEsityksenPaaSis', 'LLEsityksenp-00e4-00e4asiallinensis-00e4lt-00f6', 'LLVSEsityksenp-00e4-00e4asiallinensis-00e4lt-00f6', 'LLPaaotsikko'] for cl in ELEM_CL: elem = html_doc.xpath(".//p[@class='%s']" % cl) if elem: break if not elem: print "\tNo header found: %d" % len(s) print http_cache.get_fname(url, 'docs') return doc # Choose the first header. Sometimes they are replicated. *sigh* elem = elem[0].getnext() p_list = [] if 'class' in elem.attrib and elem.attrib['class'] == 'LLNormaali' and \ elem.getnext().attrib['class'] == 'LLKappalejako': elem = elem.getnext() while elem is not None: if elem.tag != 'p': print elem.tag break OK_CLASS = ('LLKappalejako', 'LLJohtolauseKappaleet', 'LLVoimaantulokappale', 'LLKappalejako-0020Char-0020Char-0020Char', # WTF ) if not 'class' in elem.attrib or elem.attrib['class'] not in OK_CLASS: break p_list.append(elem) elem = elem.getnext() BREAK_CLASS = ('LLNormaali', 'LLYleisperustelut', 'LLPerustelut', 'LLNormaali-0020Char', 'Normaali', 'LLSisallysluettelo') if 'class' in elem.attrib and elem.attrib['class'] not in BREAK_CLASS: print "\tMystery class: %s" % elem.attrib print http_cache.get_fname(url, 'docs') return doc if not p_list: print "\tNo summary found" print http_cache.get_fname(url, 'docs') return doc text_list = [] def append_text(elem, no_append=False): text = '' if elem.text: text = elem.text.replace('\r', '').replace('\n', '').strip() text = text.replace(' ', '') if elem.getchildren(): for ch in elem.getchildren(): text += append_text(ch, no_append=True) if len(text) < 15 and u'\u2014' in text: return if no_append: return text text = text.strip() if text: text_list.append(text) for p in p_list: append_text(p) doc.summary = '\n'.join(text_list) attach_keywords(doc, p_info['keywords']) if 'docs' in info: download_related_docs(doc, info['docs']) doc.save() return doc
def download_doc(info, doc): logger.info("downloading %s %s" % (info['type'], info['id'])) if not doc: assert not Document.objects.filter(type=info['type'], name=info['id']) doc = Document(type=info['type'], name=info['id']) url = DOC_DL_URL % (info['type'], info['id']) doc.info_link = url if not should_download_doc(info): logger.warning("skipping %s %s" % (info['type'], info['id'])) doc.save() return doc if info['type'] == 'HE': return download_he(info, doc) if info['type'] == 'VNT': p_info = download_processing_info(doc) doc.date = p_info['date'] doc.subject = p_info['subject'] doc.save() attach_keywords(doc, p_info['keywords']) return doc s = http_cache.open_url(url, 'docs') html_doc = html.fromstring(s) html_doc.make_links_absolute(url) el_list = html_doc.xpath(".//a[contains(., 'Rakenteinen asiakirja')]") assert el_list and len(el_list) == 1 sgml_url = el_list[0].attrib['href'] s = http_cache.open_url(sgml_url, 'docs') f = open("/tmp/%s%s.xml" % (info['type'], info['id'].replace('/', '-')), "w") f.write(s) f.close() sgml_doc = html.fromstring(s) el_list = sgml_doc.xpath('.//ident/nimike') assert len(el_list) >= 1 el = el_list[0] text = clean_string(el.text) logger.info('%s: %s' % (doc, text)) doc.subject = text if doc.type.endswith('VM'): el_name_list = ('asianvir', 'emasianv') else: el_name_list = ('peruste', 'paasis', 'yleisper') for el_name in el_name_list: summ_el_list = sgml_doc.xpath('.//%s' % el_name) if not len(summ_el_list): continue assert len(summ_el_list) == 1 break p_list = summ_el_list[0].xpath('//te') summary = [] for p_el in p_list: text = clean_string(p_el.text_content()) summary.append(text) doc.summary = '\n'.join(summary) doc.save() process_doc_signatures(doc, sgml_doc) # no processing info for committee reports if not doc.type.endswith('VM'): p_info = download_processing_info(doc) attach_keywords(doc, p_info['keywords']) if 'docs' in info: download_related_docs(doc, info['docs']) return doc
def process_session_votes(url, pl_sess_name): parser = vote_list_parser.Parser() s = http_cache.open_url(url, 'votes') parser.reset() parser.feed(s) parser.close() votes = parser.get_votes() desc = parser.get_desc() desc['nr'] = int(desc['nr']) desc['pl_session'] = pl_sess_name if pl_sess_name in pl_sess_list: pl_sess = pl_sess_list[pl_sess_name] else: try: pl_sess = PlenarySession.objects.get(name=pl_sess_name) except PlenarySession.DoesNotExist: pl_sess = PlenarySession(name=pl_sess_name) pl_sess_list[pl_sess_name] = pl_sess pl_sess.date = desc['date'] pl_sess.term = Term.objects.get_for_date(pl_sess.date) pl_sess.info_link = url_base + desc['session_link'] pl_sess.save() try: sess = Session.objects.get(plenary_session=pl_sess, number=desc['nr']) except Session.DoesNotExist: sess = Session(plenary_session=pl_sess, number=desc['nr']) sess.time = desc['time'] sess.info = '\n'.join(desc['info']) sess.subject = desc['subject'] sess.info_link = None sess.save() sess.docs.clear() sess.keywords.clear() for idx, doc_info in enumerate(desc['docs']): doc = Document.objects.filter(type=doc_info['type'], name=doc_info['id']) if not doc: doc = download_doc(doc_info, None) else: doc = doc[0] sd = SessionDocument(session=sess, doc=doc, order=idx) sd.save() for kw in doc.keywords.all(): sess.keywords.add(kw) sess.vote_set.all().delete() for v in votes: vote = Vote() vote.session = sess vote.member_name = v[0] vote.party = v[1] vote.vote = v[2] if not vote.member_name in mem_name_list: member = Member.objects.get(name=vote.member_name) mem_name_list[vote.member_name] = member vote.member = mem_name_list[vote.member_name] vote.save() sess.count_votes() sess.save() db.reset_queries() return sess