Exemple #1
0
}
ACT_TYPE_OTHER = "інше"

print('getting acts list')
acts = rada.list_acts()

for act_number in acts:
    for act_type in ACT_TYPES:
        #print(act_number)
        if acts[act_number][2].startswith(act_type):
            acts[act_number].append(ACT_TYPES[act_type])
    if len(acts[act_number]) == 3:
        acts[act_number].append(ACT_TYPE_OTHER)

print('getting deputies list')
mp_list = rada.list_deputy_links()

fh = open(OUTPUT_FILE, 'w')
csvwriter = writer(fh)
csvwriter.writerow(ROW_HEADERS)
cf = open(EDITS_FILE, "w")
cor_writer = writer(cf)
cor_writer.writerow(COR_HEADERS)


for link in [link.attrib['href'] for link in mp_list]:
    sleep(SLEEP_TIME)
    deputy_id_matched = DEPUTY_ID_RE.fullmatch(link)
    if deputy_id_matched:
        template_arguments['deputy_id'] = \
            int(deputy_id_matched.group('deputy_id'))
        district = ditrict_matched.group('district_number')
        print(district)
    region_matched = REGION_SELECTOR.search(b_info)
    if region_matched:
        region = region_matched.group('region')
        print(region)
    party_matched = PARTY_SELECTOR.search(b_info)
    if party_matched:
        party = party_matched.group('party')
        print(party)
    return ([party, number, region, district])


fh = open(OUTPUT_FILE, 'w')
csvwriter = writer(fh)
csvwriter.writerow(HEADERS)

pre_link_list = rada.list_deputy_links()
link_list = [pq(l).attr('href') for l in pre_link_list]

for link in link_list:
    page = pq(link)
    name = page(NAME_SELECTOR)
    name = pq(name).text()
    info = get_basic_info(page)
    output_row = [name] + info
    csvwriter.writerow(output_row)
    sleep(SLEEP_TIME)

fh.close()
}
ACT_TYPE_OTHER = "інше"

print('getting acts list')
acts = rada.list_acts()

for act_number in acts:
    for act_type in ACT_TYPES:
        print(act_number)
        if acts[act_number][2].startswith(act_type):
            acts[act_number].append(ACT_TYPES[act_type])
    if len(acts[act_number]) == 3:
        acts[act_number].append(ACT_TYPE_OTHER)

print('getting deputies list')
mp_list = rada.list_deputy_links()

fh = open(OUTPUT_FILE, 'w')
csvwriter = writer(fh)
csvwriter.writerow(ROW_HEADERS)

for link in [link.attrib['href'] for link in mp_list]:
    sleep(SLEEP_TIME)
    deputy_id_matched = DEPUTY_ID_RE.fullmatch(link)
    if deputy_id_matched:
        template_arguments['deputy_id'] = \
            int(deputy_id_matched.group('deputy_id'))
        legislative_link = LINK_TEMPLATE.format(**template_arguments)
        legislative_page = pq(legislative_link)
        deputy_name = legislative_page(NAME_SELECTOR).text()\
            .replace(NAME_STRIP, '')
        cells = list(map(lambda x: pq(x).text(), cols))
        start_date = change_date_format(cells[1])
        if cells[2] != "-":
            end_date = change_date_format(cells[2])
        else:
            end_date = ""
        faction_title = cells[0]
        output_row = [name, faction_title, start_date, end_date]
        output_csv_writer.writerow(output_row)


output_csv = open(OUTPUT_FILE, 'w')
output_csv_writer = writer(output_csv)
output_csv_writer.writerow(HEADERS)

mps = rada.list_deputy_links()
for mp in mps:
    name = pq(mp).text()
    page_link = pq(mp).attr('href')
    print(page_link)
    print(MP_PAGE_RE)
    page_link_matched = MP_PAGE_RE.fullmatch(page_link)
    print(page_link_matched)
    mp_id = int(page_link_matched.group('ID'))
    faction_changes_link = FACTION_CHANGES_URL.format(**{
        'ex_page': EX_PAGE_PARTICLE,
        'mp_id': mp_id
    })
    parse_changes_table(faction_changes_link)
    sleep(SLEEP_TIME)
Exemple #5
0
  7: 'Генеральному прокурору України',
  8: 'Місцевим органам влади і управління'
}

ROW_SELECTOR = 'table:eq(1) tr'
ROW_SUBSELECTORS = (
    'td.THEAD3:nth-child(1) a',
    'td.THEAD21:nth-child(2)',
    'td.THEAD3:nth-child(3)',
    'td.THEAD21:nth-child(4)',
)



print("getting deputies list")
mps = rada.list_deputy_links()
names = list(map(lambda x: pq(x).text(), mps))

fh = open(OUTPUT_FILE, 'w')
csvwriter = writer(fh)
csvwriter.writerow(ROW_HEADERS)
mp_number = 0
for link in [link.attrib['href'] for link in mps]:
    deputy_id_matched = DEPUTY_ID_RE.fullmatch(link)
    if deputy_id_matched:
        template_arguments['deputy_id'] = int(
            deputy_id_matched.group('deputy_id'))
        for type_id in QUERY_TYPES.keys():
            sleep(SLEEP_TIME)
            template_arguments['type_id'] = type_id
            queries_link = LINK_TEMPLATE.format(**template_arguments)