def handle_row(src, q_info, row): name = parse_tools.fix_mp_name(row[0]) mp = Member.objects.filter(name=name) if not mp: print "%s not found" % name return mp = mp[0] print "%s" % mp for q in q_info: q_idx, i_idx, order, txt = q if not row[q_idx]: print "\tMissing value for column %d" % q_idx continue val = int(row[q_idx]) opt_idx = int(val * OPTION_STEPS // ANSWER_MAX) if opt_idx == OPTION_STEPS: opt_idx = OPTION_STEPS - 1 que = Question.objects.get(source=src, order=order) opt = Option.objects.get(question=que, order=opt_idx) try: ans = Answer.objects.get(member=mp, question=que) except Answer.DoesNotExist: ans = Answer(member=mp, question=que) ans.option = opt ans.explanation = None ans.save()
def handle_row(src, row, writer=None): (district, party, last_name, first_name, age, gender, is_indep) = row[0:7] (county, edu, intro, is_county_off, is_mp, is_mep, profession) = row[7:14] (home_page, rss_feed) = row[14:16] answer_list = row[16::3] importance_list = row[17::3] comment_list = row[18::3] name = ' '.join((last_name, first_name)) name = parse_tools.fix_mp_name(name) if writer: if district.endswith(' vaalipiiri'): district = district.replace(' vaalipiiri', '') row = [name, district, party, age, gender, edu] row = [s.encode('utf8') if s else '' for s in row] writer.writerow(row) mp = Member.objects.filter(name=name) if not mp: if is_mp != '0': print(name) return mp = mp[0] for idx, ans in enumerate(answer_list): if idx in SKIP_QUESTIONS: continue comment = comment_list[idx] if not ans and not comment: continue que = Question.objects.get(source=src, order=idx) if ans: opt = Option.objects.get(question=que, name=ans) else: opt = None try: ans = Answer.objects.get(member=mp, question=que) except Answer.DoesNotExist: ans = Answer(member=mp, question=que) ans.option = opt ans.explanation = comment ans.save()
def handle_row(src, q_info, row): name = parse_tools.fix_mp_name(row[0]) mp = Member.objects.filter(name=name) if not mp: print("%s not found" % name) return mp = mp[0] print("%s" % mp) for q in q_info: q_idx, i_idx, order, txt = q if not row[q_idx]: print("\tMissing value for column %d" % q_idx) continue val = int(row[q_idx]) opt_idx = int(val * OPTION_STEPS // ANSWER_MAX) if opt_idx == OPTION_STEPS: opt_idx = OPTION_STEPS - 1 que = Question.objects.get(source=src, order=order) opt = Option.objects.get(question=que, order=opt_idx) try: ans = Answer.objects.get(member=mp, question=que) except Answer.DoesNotExist: ans = Answer(member=mp, question=que) ans.option = opt ans.explanation = None ans.save()
def parse_mp(src, lname, fname, href): name = "%s %s" % (lname, fname) name = parse_tools.fix_mp_name(name) mp = Member.objects.filter(name=name) if not mp: return mp = mp[0] if name in mp_dict: mp = mp_dict[name] mp.found = True print(mp) s = http_cache.open_url(href, 'opinions') doc = html.fromstring(s) q_list = doc.xpath(".//div[@class='em-compare-container']") for q_idx, q_el in enumerate(q_list): if q_idx in SKIP_QUESTIONS: continue el = q_el.xpath("./h3") assert len(el) == 1 q_text = el[0].text.strip() m = re.match(r'\d+\.\s+(.+)', q_text, re.U) assert m q_text = m.groups()[0] a_list = q_el.xpath(".//td[@class='em-text']") a_text_list = [] for a_idx, a_text in enumerate(a_list): a_text = a_text.text.strip() a_text_list.append(a_text) q_obj = add_question(src, q_text, q_idx, a_text_list) a_list = q_el.xpath(".//table[@class='em-compare-alts ']/tr") assert len(a_list) == len(a_text_list) chosen = None for a_idx, el in enumerate(a_list): if el.xpath(".//acronym"): assert not chosen chosen = a_idx if chosen == None: continue comm_el = q_el.xpath(".//div[@class='em-comment']") if comm_el: assert len(comm_el) == 1 comm_el = comm_el[0] text_list = [] for br in comm_el.xpath(".//br"): if not br.tail: continue s = br.tail.strip() if s: text_list.append(s) comm_text = '\n'.join(text_list) assert comm_text[0] == '"' and comm_text[-1] == '"' comm_text = comm_text[1:-1] else: comm_text = None opt = q_obj.opt_dict[chosen] try: ans = Answer.objects.get(member=mp, question=q_obj) except: ans = Answer(member=mp, question=q_obj) ans.option = opt ans.explanation = comm_text ans.save()
def parse_mp(src, lname, fname, href): name = "%s %s" % (lname, fname) name = parse_tools.fix_mp_name(name) mp = Member.objects.filter(name=name) if not mp: return mp = mp[0] if name in mp_dict: mp = mp_dict[name] mp.found = True print mp s = http_cache.open_url(href, 'opinions') doc = html.fromstring(s) q_list = doc.xpath(".//div[@class='em-compare-container']") for q_idx, q_el in enumerate(q_list): if q_idx in SKIP_QUESTIONS: continue el = q_el.xpath("./h3") assert len(el) == 1 q_text = el[0].text.strip() m = re.match(r'\d+\.\s+(.+)', q_text, re.U) assert m q_text = m.groups()[0] a_list = q_el.xpath(".//td[@class='em-text']") a_text_list = [] for a_idx, a_text in enumerate(a_list): a_text = a_text.text.strip() a_text_list.append(a_text) q_obj = add_question(src, q_text, q_idx, a_text_list) a_list = q_el.xpath(".//table[@class='em-compare-alts ']/tr") assert len(a_list) == len(a_text_list) chosen = None for a_idx, el in enumerate(a_list): if el.xpath(".//acronym"): assert not chosen chosen = a_idx if chosen == None: continue comm_el = q_el.xpath(".//div[@class='em-comment']") if comm_el: assert len(comm_el) == 1 comm_el = comm_el[0] text_list = [] for br in comm_el.xpath(".//br"): if not br.tail: continue s = br.tail.strip() if s: text_list.append(s) comm_text = '\n'.join(text_list) assert comm_text[0] == '"' and comm_text[-1] == '"' comm_text = comm_text[1:-1] else: comm_text = None opt = q_obj.opt_dict[chosen] try: ans = Answer.objects.get(member=mp, question=q_obj) except: ans = Answer(member=mp, question=q_obj) ans.option = opt ans.explanation = comm_text ans.save()