def build_airports_list(page): result = [] index_begin = page.find(SALE_AIRPORTS_BEGIN_HTML) index_end = page.find(SALE_AIRPORTS_END_HTML) page = page[index_begin:index_end] begin_one_airport = SALE_ONE_AIRPORT_BEGIN_HTML end_one_airport = SALE_ONE_AIRPORT_END_HTML indexes_begin_airport = [(m.start()) for m in re.finditer(begin_one_airport, page)][::2] indexed_end_airport = [(m.start()) for m in re.finditer(end_one_airport, page)] for i in range(0, len(indexes_begin_airport)): a_airport_text = page[indexes_begin_airport[i] : indexed_end_airport[i]] airport_id = get_value_from_regex(SALE_AIRPORT_ID_REGEX, a_airport_text) capacity_reputation = get_values_from_regex(SALE_AIRPORT_REPUTATION_REGEX, a_airport_text) cash = get_amount(get_values_from_regex(SALE_AIRPORT_CASH_REGEX, a_airport_text)[1]) price = get_amount(get_value_from_regex(SALE_AIRPORT_PRICE_REGEX, a_airport_text)) saler = get_value_from_regex(SALE_AIRPORT_PSEUDO, page) if not saler == OWN_PSEUDO: an_airport = { "airport_id": int(airport_id), "cash": cash, "capacity": int(capacity_reputation[0]), "reputation": get_amount(capacity_reputation[1]), "price": price, } result.append(an_airport) return result
def parse_one_mission(mission_html, country_nb): if not string_contains(MISSION_TOO_SHORT_HTML, mission_html): # and not string_contains(u"vous n'avez pas d'avion correspondant à cette mission", mission_html): contract_amount = int(''.join(get_value_from_regex(MISSION_AMOUNT_REGEX, mission_html).split(','))) time_before_departure = parse_duration_before_departure(mission_html) a_mission = { 'country_nb': int(country_nb), 'mission_nb': get_int_from_regex(MISSION_ID_REGEX, mission_html), 'travellers_nb': get_int_from_regex(MISSION_PASSENGERS_CARGO_NB_REGEX, mission_html), 'contract_amount': contract_amount, 'reputation': get_int_from_regex(MISSION_REPUTATION_REGEX, mission_html), 'pilots_nb': get_int_from_regex(MISSIONS_PILOTS_NB_REGEX, mission_html), 'flight_attendants_nb': get_int_from_regex(MISSIONS_FLIGHT_ATTENDANTS_NB_REGEX, mission_html), 'time_before_departure': time_before_departure, 'km_nb': get_int_from_regex(MISSION_DISTANCE_REGEX, mission_html) } return a_mission
def parse_duration_before_departure(html_mission): a = get_value_from_regex(MISSION_DEPARTURE_DATE_REGEX, html_mission) departure_date = date_from_regex_result(a) return compute_time_before_departure(departure_date)
def get_country(page): country = get_value_from_regex(AIRPORT_COUNTRY_NAME_REGEX, page) return country
def get_airport_name(page): return get_value_from_regex(AIRPORT_NAME_REGEX, page).encode('utf-8')