def load_from_url(root_url, html_text, filter_string): logger.info('Стартует загрузка данных по выборам') if html_text: html = "".join(line.strip() for line in html_text.split("\n")) soup = BeautifulSoup(html, "html.parser") else: soup = get_soup(root_url) vib_links = soup.find_all('a', class_ ="vibLink") election_urls = [[a.text.strip(), fix_link(a['href'])] for a in vib_links if filter_string == "" or filter_string.lower() in a.text.lower()] logger.info('Начинается загрузка, число выборов в списке - %s' % len(election_urls)) for elem in election_urls: soup = get_soup(elem[1]) name = elem[0] url = elem[1] candidates_link = soup.find("a", text="Сведения о кандидатах") date_elem = soup.find(text="Дата голосования") if date_elem: date_text = date_elem.parent.parent.parent.find_all("td")[1].text date = datetime.strptime(date_text, '%d.%m.%Y') else: logger.info('Нет даты голосования для %s' % name) continue try: election = Election.objects.get(name=name, date=date) if filter_string == "" and django.utils.timezone.now() - election.updated_at < timedelta(hours=12): # if without filter and last update too close logger.info('Выборы обновлялись меньше чем 12 часов назад %s' % name) continue if election.url != url: election.url = url except Election.DoesNotExist: election = Election(name=name, date=date, url=url) election.save() if not candidates_link: logger.info('Нет линка на кандидадтов %s' % name) continue load_cantidates_from_url(fix_link(candidates_link['href']), election) election.save() # date update anyway delete_old_infos(election) #sometimes TIKs change candidate date, need to remove old logger.info("ЗАГРУЗКА ПРОШЛА НОРМАЛЬНО, все выборы отбработаны")
def load_from_json(json_file): data = json.loads(json_file.read().decode('utf-8')) for d in data: name = d[0] date = datetime.strptime(d[1], '%d.%m.%Y') url = d[2] try: election = Election.objects.get(name=name, date=date) if election.url != url: election.url = url election.save() except Election.DoesNotExist: election = Election(name=name, date=date, url=url) election.save() candidates = d[3] for c in candidates: try: person = Person.objects.get(name=c[0], birthdate=datetime.strptime(c[2], '%d.%m.%Y')) except Person.DoesNotExist: person = Person(name=c[0], birthdate=datetime.strptime(c[2], '%d.%m.%Y')) person.save() try: info = Info.objects.get(person=person, election=election) except Info.DoesNotExist: info = Info(person=person, election=election) info.save() info.url = c[1] info.party = c[3] info.address = c[4] info.edu = c[5] info.firm = c[6] info.job = c[7] info.dep = c[8] info.criminal = c[9] info.status = c[10] info.district = c[11] info.save()