def process_parties(db_insert): s = http_cache.open_url(party_url_base + party_list_url, 'party') parser = party_list_parser.Parser() parser.feed(s) parser.close() party_list = parser.get_list() parser = party_info_parser.Parser() for party in party_list: if party['name'] == 'vr': continue s = http_cache.open_url(party_url_base + party['info_link'], 'party') parser.reset() parser.feed(s) parser.close() party.update(parser.get_desc()) logo_url = party_url_base + party['logo'] fname = party['name'].encode('iso8859-1') + '.jpg' party['logo'] = fname fname = static_path + party_logo_path + fname create_path_for_file(fname) if not os.path.exists(fname): print 'Fetching logo ' + logo_url s = http_cache.open_url(logo_url, 'party') f = open(fname, 'wb') f.write(s) f.close() else: print 'Skipping logo ' + party['logo'] if not db_insert: continue try: p = Party.objects.get(name=party['name']) except Party.DoesNotExist: p = None if not p: p = Party() p.name = party['name'] if not p.full_name: p.full_name = party['fullname'] p.logo = party_logo_path + party['logo'] p.info_link = party_url_base + party['info_link'] p.save() return party_list
def process_mops(party_list, update=False, db_insert=False): s = http_cache.open_url(url_base + mp_list_url, 'member') BAD_HTML = '<! hx4600.thw>' idx = s.find(BAD_HTML) if idx >= 0: s = s[idx + len(BAD_HTML) + 1:] parser = mop_list_parser.Parser() parser.feed(s) parser.close() mop_list = parser.get_mop_list() parser = mop_info_parser.Parser() for mp in mop_list: print '%3d: %s, %s' % (mop_list.index(mp), mp['surname'], mp['firstnames']) s = http_cache.open_url(url_base + mp['link'], 'member') parser.reset(is_lame_frame=True) parser.feed(s) parser.close() mp.update(parser.get_desc()) print '%3d: person number %s' % (mop_list.index(mp), mp['hnro']) try: member = Member.objects.get(pk=mp['hnro']) except Member.DoesNotExist: member = None if member and not update: continue s = http_cache.open_url(url_base + heti_url % mp['hnro'], 'member') parser.reset(is_lame_frame=False) parser.feed(s) parser.close() mp.update(parser.get_desc()) photo_url = url_base + mp['photo'] ext = os.path.splitext(mp['photo'])[-1] fname = slugify(mp['name']) mp['photo'] = fname + ext photo_fname = static_path + mp_photo_path + mp['photo'] create_path_for_file(photo_fname) if not os.path.exists(photo_fname): print 'Fetching photo ' + photo_url s = http_cache.open_url(photo_url, 'member') f = open(photo_fname, 'wb') f.write(s) f.close() else: print 'Skipping photo ' + mp['photo'] party_name = None if 'party' in mp: party_name = find_party(party_list, mp['party']) if not party_name: raise Exception('Unknown party') for assoc in mp['assoc']: if 'end' not in assoc: end = None else: end = assoc['end'] party = find_party(party_list, assoc['name']) if party == None: if not end: print assoc raise Exception('party not found') # FIXME: Maybe add the party? assoc['name'] = None else: assoc['name'] = party # Find last party association last_assoc = sorted(mp['assoc'], key=operator.itemgetter('start'))[-1] if 'end' in last_assoc: if party_name: raise Exception('party set for inactive MP') party_name = last_assoc['name'] if not db_insert: continue if not member: member = Member() member.id = mp['hnro'] member.name = mp['name'] member.party_id = party_name member.photo = mp_photo_path + mp['photo'] member.info_link = url_base + heti_url % mp['hnro'] member.birth_date = mp['birthdate'] member.given_names = mp['firstnames'] member.surname = mp['surname'] if 'phone' in mp: member.phone = mp['phone'] if 'email' in mp: member.email = mp['email'] member.save() PartyAssociation.objects.filter(member=member).delete() for assoc in mp['assoc']: if not assoc['name']: continue if 'end' not in assoc: end = None else: end = assoc['end'] if assoc['name'] == 'vr': assoc['name'] = 'vas' party = Party.objects.get(name=assoc['name']) pa = PartyAssociation() pa.member = member pa.party_id = party.pk pa.begin = assoc['start'] pa.end = end pa.save() DistrictAssociation.objects.filter(member=member).delete() for assoc in mp['district']: if 'end' not in assoc: end = None else: end = assoc['end'] da = DistrictAssociation() da.member = member da.name = assoc['name'] da.begin = assoc['start'] da.end = end da.save() return mop_list