# http://watch.peoplepower21.org/New/cm_info.php?member_seq=775&info_page=cm_info_act_mVote.php VOTE_MAIN_MEETING = "http://watch.peoplepower21.org/New/cm_info_act_mAttend.php?" # apply law list # http://watch.peoplepower21.org/New/cm_info.php?member_seq=775&info_page=cm_info_act_law.php LAW_LIST = "http://watch.peoplepower21.org/New/cm_info_act_law.php?" # attending information for sub-meeting # http://watch.peoplepower21.org/New/cm_info.php?member_seq=775&info_page=cm_info_act_sAttend.php ATTEND_SUB_MEETING = "http://watch.peoplepower21.org/New/cm_info_act_sAttend.php?" # temporary value CRAWILING_THRESHOLD = 100 # save result file path RESULT_PATH = path_config.get_data_dir_path('attendance_results') # str_time : ex) 2015-07-15 def convertStrTimeToLong(strTime): d = datetime.datetime.strptime(strTime, '%Y-%m-%d') return int(time.mktime(d.timetuple())) * 1000 def refreshAttedingInfoMainMeeting(recentDate, assemblyId): results = [] for pageNum in range(1, CRAWILING_THRESHOLD): url = ATTENDING_MAIN_MEETING + "member_seq=" + \ str(assemblyId) + "&page=" + str(pageNum) r = requests.get(url)
# watch.peoplepower21.org import json import os import re import requests import path_config from bs4 import BeautifulSoup DATA_DIR = path_config.get_data_dir_path('plenary_session_results') go_to_page_regex = re.compile(r'javascript:goToPage\((\d+)\);') mbill_regex = re.compile(r'mbill=(\d+)"') member_seq_regex = re.compile(r'member_seq=(\d+)&') def _compute_has_next(html, cur_page): has_next = False go_to_page_matches = go_to_page_regex.findall(html) for x in go_to_page_matches: has_next |= (int(x) > cur_page) return has_next def fetch_sessions(page): """ { 'sessions': [{ 'bill': 9373, '처리날짜': '2015-09-08', '회차': '제337회 04차',