def build_airports_list(page): result = [] index_begin = page.find(SALE_AIRPORTS_BEGIN_HTML) index_end = page.find(SALE_AIRPORTS_END_HTML) page = page[index_begin:index_end] begin_one_airport = SALE_ONE_AIRPORT_BEGIN_HTML end_one_airport = SALE_ONE_AIRPORT_END_HTML indexes_begin_airport = [(m.start()) for m in re.finditer(begin_one_airport, page)][::2] indexed_end_airport = [(m.start()) for m in re.finditer(end_one_airport, page)] for i in range(0, len(indexes_begin_airport)): a_airport_text = page[indexes_begin_airport[i] : indexed_end_airport[i]] airport_id = get_value_from_regex(SALE_AIRPORT_ID_REGEX, a_airport_text) capacity_reputation = get_values_from_regex(SALE_AIRPORT_REPUTATION_REGEX, a_airport_text) cash = get_amount(get_values_from_regex(SALE_AIRPORT_CASH_REGEX, a_airport_text)[1]) price = get_amount(get_value_from_regex(SALE_AIRPORT_PRICE_REGEX, a_airport_text)) saler = get_value_from_regex(SALE_AIRPORT_PSEUDO, page) if not saler == OWN_PSEUDO: an_airport = { "airport_id": int(airport_id), "cash": cash, "capacity": int(capacity_reputation[0]), "reputation": get_amount(capacity_reputation[1]), "price": price, } result.append(an_airport) return result
def extract_available_offers(page): if not re.findall(SHOP_NO_SALE, page): result = re.findall('<td class="Brocante1"><input type="radio" name="mon_champ" value="\d+"></td>[.\S+\n\r\s]*?</tr>', page) sell_list = [] for i in result: sell_id = re.findall('<input type="radio" name="mon_champ" value="(\d+)">', i)[0] amount_html = re.findall('<td class="Brocante3">(.+)</td>', i)[0] quantity = get_amount(amount_html) price = float(re.findall('<td class="Brocante2">(\d+\.?\d*) \$</td>', i)[0]) obj = { 'sell_id': sell_id, 'quantity': quantity, 'price': price } sell_list.append(obj) sell_list = sorted(sell_list, key=lambda i: i['price']) return sell_list