def process_candidates(): reader = codecs.open("candidates.csv", 'r', encoding='utf-8') counter = 0 candidates_ids = [] for line in reader: row = line.split(u',') area_name = row[0].title().strip() area = area_getter(area_name) if area is None: print u'no encontré a ' + area_name return kind_of = row[1].title().strip() election_name = kind_of + u' por ' + area.name if not Election.objects.filter(name__iexact=election_name): print u"no pillé a " + election_name # Candidate.objects.filter(id__in=candidates_ids).delete() return else: e = Election.objects.get(name__iexact=election_name) name = row[2].title().strip() candidate = Candidate(name=name) candidate.save() candidates_ids.append(candidate.id) e.candidates.add(candidate) print e, candidate try: mail = row[6].strip().lower() except IndexError: mail = None if mail: contact = CandidacyContact.objects.create(candidate=candidate, mail=mail) pacto = row[3].strip().title() sub_pacto = row[4].strip().title() partido = row[5].strip().title() if pacto: PersonalData.objects.create(candidate=candidate, label=u'Pacto', value=pacto) if sub_pacto: PersonalData.objects.create(candidate=candidate, label=u'Sub Pacto', value=sub_pacto) if partido: PersonalData.objects.create(candidate=candidate, label=u'Partido', value=partido) send_candidate_username_and_password(candidate) counter += 1 if not counter % 1000: print u'van' + str(counter)
def process_candidates(): reader = codecs.open("candidates.csv", 'r', encoding='utf-8') counter = 0 candidates_ids = [] for line in reader: row = line.split(u',') area_name = row[0].title().strip() area = area_getter(area_name) if area is None: print u'no encontré a '+ area_name return kind_of = row[1].title().strip() election_name = kind_of + u' por ' + area.name if not Election.objects.filter(name__iexact=election_name): print u"no pillé a " + election_name # Candidate.objects.filter(id__in=candidates_ids).delete() return else: e = Election.objects.get(name__iexact=election_name) name = row[2].title().strip() candidate = Candidate(name=name) candidate.save() candidates_ids.append(candidate.id) e.candidates.add(candidate) print e, candidate try: mail = row[6].strip().lower() except IndexError: mail = None if mail: contact = CandidacyContact.objects.create(candidate=candidate, mail=mail) pacto = row[3].strip().title() sub_pacto = row[4].strip().title() partido = row[5].strip().title() if pacto: PersonalData.objects.create(candidate=candidate, label=u'Pacto', value=pacto) if sub_pacto: PersonalData.objects.create(candidate=candidate, label=u'Sub Pacto', value=sub_pacto) if partido: PersonalData.objects.create(candidate=candidate, label=u'Partido', value=partido) send_candidate_username_and_password(candidate) counter += 1 if not counter % 1000: print u'van' + str(counter)
counts = table.find_all(class_="read") days = table.find_all(class_="date") for tit, count, day in zip(t**s, counts, days): title = tit.a.get_text() link = tit.a.get('href') read = count.get_text() date = day.get_text() temp_dict = { 'day': date, 'title': title, 'count': read, 'link': link } temp_list.append(temp_dict) #toJson(temp_list) return temp_list #parsing() if __name__ == '__main__': parsed_data = parsing() for i in range(len(parsed_data)): new_candidate = Candidate(name=parsed_data[i]["day"], introduction=parsed_data[i]["title"], area=parsed_data[i]["count"], party_number=parsed_data[i]["link"]) new_candidate.save()
def handle_label(self, label, **options): import csv bios = csv.reader(open(label, 'rb'), delimiter='|') for row in bios: row[0] = int(row[0]) #politician id if row[5]: row[5] = int(row[5]) #year first elected else: row[5] = None row[20] = datetime.datetime(*time.strptime(row[20], "%m-%d-%Y %I:%M:%S %p")[:6]) # timestamp row[13] = row[13][0] # gender if row[6]: row[6] = datetime.date(*time.strptime(row[6], "%Y-%m-%d")[:3]) # birthdate else: row[6] = None try: candidate = Candidate.objects.get(politician_id=row[0]) if candidate.timestamp != row[20]: candidate.first_name = row[1] candidate.middle_name = row[2] candidate.last_name = row[3] candidate.junior = row[4] candidate.year_first_elected = row[5] candidate.birth_date = row[6] candidate.birth_place = row[7] candidate.birth_state = row[8] candidate.birth_province = row[9] candidate.birth_country = row[10] candidate.residence_place = row[11] candidate.residence_state = row[12] candidate.gender = row[13] candidate.ethnicity = row[14] candidate.hispanic = row[15] candidate.religion = row[16] candidate.biography = row[17] candidate.profile = row[18] candidate.campaigns = row[19] candidate.timestamp = row[20] print 'Updating %s %s' % (row[1], row[3]) candidate.save() else: print "Skipping %s %s. No change." % (row[1], row[3]) except Candidate.DoesNotExist: print 'Adding %s %s' % (row[1], row[3]) candidate = Candidate() candidate.politician_id = row[0] candidate.ap_candidate_id = row[0] candidate.candidate_number = row[0] candidate.first_name = row[1] candidate.middle_name = row[2] candidate.last_name = row[3] candidate.junior = row[4] candidate.year_first_elected = row[5] candidate.birth_date = row[6] candidate.birth_place = row[7] candidate.birth_state = row[8] candidate.birth_province = row[9] candidate.birth_country = row[10] candidate.residence_place = row[11] candidate.residence_state = row[12] candidate.gender = row[13] candidate.ethnicity = row[14] candidate.hispanic = row[15] candidate.religion = row[16] candidate.biography = row[17] candidate.profile = row[18] candidate.campaigns = row[19] candidate.timestamp = row[20] candidate.save()