def parse_all_missions_in_page(html_page, country_nb): all_missions_one_country = [] table = everything_between(html_page, MISSIONS_BEGIN_TABLE_HTML, MISSIONS_END_TABLE_HTML) missions_html = table.split(MISSIONS_LINE_SPLIT_HTML)[1:] for a_mission in missions_html: a_mission = parse_one_mission(a_mission, country_nb) if a_mission: all_missions_one_country.append(a_mission) return all_missions_one_country
def read_mp(): result = '' pm_box_page = get_request(PM_BOX_URL) pm_ids = get_pm_ids(pm_box_page) mp_nb = len(pm_ids) for pm_id in pm_ids: page = get_request(PM_OPEN_URL.format(pm_id=pm_id)) begin = PM_BEGIN_MESSAGE_PATTERN end = PM_END_MESSAGE_PATTERN mp_content = everything_between(page, begin, end) mp_content = strip_tags(re.findall('(.[\S+\n\r\s]+)<br /><br /></td>', mp_content)[0]) result += mp_content + '\n\n\n' return { 'mp_nb': mp_nb, 'result': result }