コード例 #1
0
ファイル: worker.py プロジェクト: Kulikovpavel/elections
def load_from_url(root_url, html_text, filter_string):
  logger.info('Стартует загрузка данных по выборам')
  if html_text:
    html = "".join(line.strip() for line in html_text.split("\n"))
    soup = BeautifulSoup(html, "html.parser")
  else:
    soup = get_soup(root_url)
  vib_links = soup.find_all('a', class_ ="vibLink")
  election_urls = [[a.text.strip(), fix_link(a['href'])] for a in vib_links if filter_string == "" or filter_string.lower() in a.text.lower()]
  logger.info('Начинается загрузка, число выборов в списке - %s' % len(election_urls))
  for elem in election_urls:
    soup = get_soup(elem[1])

    name = elem[0]
    url = elem[1]
    candidates_link = soup.find("a", text="Сведения о кандидатах")

    date_elem = soup.find(text="Дата голосования")
    if date_elem:
      date_text = date_elem.parent.parent.parent.find_all("td")[1].text
      date = datetime.strptime(date_text, '%d.%m.%Y')
    else:
      logger.info('Нет даты голосования для %s' % name)
      continue

    try:
      election = Election.objects.get(name=name, date=date)
      if filter_string == "" and django.utils.timezone.now() - election.updated_at < timedelta(hours=12):  # if without filter and last update too close
        logger.info('Выборы обновлялись меньше чем 12 часов назад %s' % name)
        continue
      if election.url != url:
        election.url = url
    except Election.DoesNotExist:
      election = Election(name=name, date=date, url=url)
      election.save()

    if not candidates_link:
      logger.info('Нет линка на кандидадтов %s' % name)
      continue

    load_cantidates_from_url(fix_link(candidates_link['href']), election)
    election.save()  # date update anyway

    delete_old_infos(election)  #sometimes TIKs change candidate date, need to remove old

  logger.info("ЗАГРУЗКА ПРОШЛА НОРМАЛЬНО, все выборы отбработаны")
コード例 #2
0
ファイル: worker.py プロジェクト: Kulikovpavel/elections
def load_from_json(json_file):
  data = json.loads(json_file.read().decode('utf-8'))
  for d in data:
    name = d[0]
    date = datetime.strptime(d[1], '%d.%m.%Y')
    url = d[2]
    try:
      election = Election.objects.get(name=name, date=date)
      if election.url != url:
        election.url = url
        election.save()
    except Election.DoesNotExist:
      election = Election(name=name, date=date, url=url)
      election.save()


    candidates = d[3]
    for c in candidates:
      try:
        person = Person.objects.get(name=c[0], birthdate=datetime.strptime(c[2], '%d.%m.%Y'))
      except Person.DoesNotExist:
        person = Person(name=c[0], birthdate=datetime.strptime(c[2], '%d.%m.%Y'))
        person.save()

      try:
        info = Info.objects.get(person=person, election=election)
      except Info.DoesNotExist:
        info = Info(person=person, election=election)
        info.save()
      info.url = c[1]
      info.party = c[3]
      info.address = c[4]
      info.edu = c[5]
      info.firm = c[6]
      info.job = c[7]
      info.dep = c[8]
      info.criminal = c[9]
      info.status = c[10]
      info.district = c[11]
      info.save()