} ACT_TYPE_OTHER = "інше" print('getting acts list') acts = rada.list_acts() for act_number in acts: for act_type in ACT_TYPES: #print(act_number) if acts[act_number][2].startswith(act_type): acts[act_number].append(ACT_TYPES[act_type]) if len(acts[act_number]) == 3: acts[act_number].append(ACT_TYPE_OTHER) print('getting deputies list') mp_list = rada.list_deputy_links() fh = open(OUTPUT_FILE, 'w') csvwriter = writer(fh) csvwriter.writerow(ROW_HEADERS) cf = open(EDITS_FILE, "w") cor_writer = writer(cf) cor_writer.writerow(COR_HEADERS) for link in [link.attrib['href'] for link in mp_list]: sleep(SLEEP_TIME) deputy_id_matched = DEPUTY_ID_RE.fullmatch(link) if deputy_id_matched: template_arguments['deputy_id'] = \ int(deputy_id_matched.group('deputy_id'))
district = ditrict_matched.group('district_number') print(district) region_matched = REGION_SELECTOR.search(b_info) if region_matched: region = region_matched.group('region') print(region) party_matched = PARTY_SELECTOR.search(b_info) if party_matched: party = party_matched.group('party') print(party) return ([party, number, region, district]) fh = open(OUTPUT_FILE, 'w') csvwriter = writer(fh) csvwriter.writerow(HEADERS) pre_link_list = rada.list_deputy_links() link_list = [pq(l).attr('href') for l in pre_link_list] for link in link_list: page = pq(link) name = page(NAME_SELECTOR) name = pq(name).text() info = get_basic_info(page) output_row = [name] + info csvwriter.writerow(output_row) sleep(SLEEP_TIME) fh.close()
} ACT_TYPE_OTHER = "інше" print('getting acts list') acts = rada.list_acts() for act_number in acts: for act_type in ACT_TYPES: print(act_number) if acts[act_number][2].startswith(act_type): acts[act_number].append(ACT_TYPES[act_type]) if len(acts[act_number]) == 3: acts[act_number].append(ACT_TYPE_OTHER) print('getting deputies list') mp_list = rada.list_deputy_links() fh = open(OUTPUT_FILE, 'w') csvwriter = writer(fh) csvwriter.writerow(ROW_HEADERS) for link in [link.attrib['href'] for link in mp_list]: sleep(SLEEP_TIME) deputy_id_matched = DEPUTY_ID_RE.fullmatch(link) if deputy_id_matched: template_arguments['deputy_id'] = \ int(deputy_id_matched.group('deputy_id')) legislative_link = LINK_TEMPLATE.format(**template_arguments) legislative_page = pq(legislative_link) deputy_name = legislative_page(NAME_SELECTOR).text()\ .replace(NAME_STRIP, '')
cells = list(map(lambda x: pq(x).text(), cols)) start_date = change_date_format(cells[1]) if cells[2] != "-": end_date = change_date_format(cells[2]) else: end_date = "" faction_title = cells[0] output_row = [name, faction_title, start_date, end_date] output_csv_writer.writerow(output_row) output_csv = open(OUTPUT_FILE, 'w') output_csv_writer = writer(output_csv) output_csv_writer.writerow(HEADERS) mps = rada.list_deputy_links() for mp in mps: name = pq(mp).text() page_link = pq(mp).attr('href') print(page_link) print(MP_PAGE_RE) page_link_matched = MP_PAGE_RE.fullmatch(page_link) print(page_link_matched) mp_id = int(page_link_matched.group('ID')) faction_changes_link = FACTION_CHANGES_URL.format(**{ 'ex_page': EX_PAGE_PARTICLE, 'mp_id': mp_id }) parse_changes_table(faction_changes_link) sleep(SLEEP_TIME)
7: 'Генеральному прокурору України', 8: 'Місцевим органам влади і управління' } ROW_SELECTOR = 'table:eq(1) tr' ROW_SUBSELECTORS = ( 'td.THEAD3:nth-child(1) a', 'td.THEAD21:nth-child(2)', 'td.THEAD3:nth-child(3)', 'td.THEAD21:nth-child(4)', ) print("getting deputies list") mps = rada.list_deputy_links() names = list(map(lambda x: pq(x).text(), mps)) fh = open(OUTPUT_FILE, 'w') csvwriter = writer(fh) csvwriter.writerow(ROW_HEADERS) mp_number = 0 for link in [link.attrib['href'] for link in mps]: deputy_id_matched = DEPUTY_ID_RE.fullmatch(link) if deputy_id_matched: template_arguments['deputy_id'] = int( deputy_id_matched.group('deputy_id')) for type_id in QUERY_TYPES.keys(): sleep(SLEEP_TIME) template_arguments['type_id'] = type_id queries_link = LINK_TEMPLATE.format(**template_arguments)