def load_cantidates_from_url(url, election): i = 1 while True: soup = get_soup(url+"&number="+str(i)) i += 1 table = soup.find(id="test") if not table: logger.info('Нет таблицы кандидатов для %s' % election.name) break trs = table.find_all("tr") if len(trs) == 0: logger.info('Таблица кандидатов пустая %s' % election.name) break for tr in trs: data = tr.find_all("td") name = data[1].contents[0].contents[0].text.strip() # in nobr tag logger.info('Загружен кандидат %s, выборы %s' % (name, election.name)) link = fix_link(data[1].contents[0].contents[0]['href']) date = datetime.strptime(data[2].text, '%d.%m.%Y') party = data[3].text.strip() district = data[4].text.strip() try: person = Person.objects.get(name=name, birthdate=date) except Person.DoesNotExist: person = Person(name=name, birthdate=date) person.save() try: info = Info.objects.get(person=person, election=election) except Info.DoesNotExist: info = Info(person=person, election=election) info.save() load_info(link, party, district, info)
def load_from_json(json_file): data = json.loads(json_file.read().decode('utf-8')) for d in data: name = d[0] date = datetime.strptime(d[1], '%d.%m.%Y') url = d[2] try: election = Election.objects.get(name=name, date=date) if election.url != url: election.url = url election.save() except Election.DoesNotExist: election = Election(name=name, date=date, url=url) election.save() candidates = d[3] for c in candidates: try: person = Person.objects.get(name=c[0], birthdate=datetime.strptime(c[2], '%d.%m.%Y')) except Person.DoesNotExist: person = Person(name=c[0], birthdate=datetime.strptime(c[2], '%d.%m.%Y')) person.save() try: info = Info.objects.get(person=person, election=election) except Info.DoesNotExist: info = Info(person=person, election=election) info.save() info.url = c[1] info.party = c[3] info.address = c[4] info.edu = c[5] info.firm = c[6] info.job = c[7] info.dep = c[8] info.criminal = c[9] info.status = c[10] info.district = c[11] info.save()