def _format_data(self, fee, list): urls = [] listings = [] for item in list: if 'NumPosts' in item: urls.append(item['url']) continue bedrooms = int(float(item['Bedrooms'])) if bedrooms != 1 or 'studio' in item['PostingTitle'].lower(): continue apartment = Apartment( SOURCE, item['PostingTitle'], int(float(item['Ask'])), BASE_URL + item['PostingURL']) apartment.set_location(item['Latitude'], item['Longitude']) apartment.set_posting_timestamp(item['PostedDate']) apartment.set_has_fee(fee == Fees.FEE) listings.append(apartment) map(lambda listing: self._load_more_data(listing), listings) return (listings, urls)
def _get_fields(self, s): start = s.find(MARKER) if start < 0: return (None, '') s = s[start + len(MARKER):] (price, s) = html_helper.find_in_between(s, '$', '<') price = int(float(price.replace(',', ''))) if price > MAX_PRICE: return (None, '') (link, s) = html_helper.find_in_between(s, LINK_START, '"') (title, s) = html_helper.find_in_between(s, '>', '<') (fee_text, s) = html_helper.find_in_between(s, FEE_START, '<') no_fee = 'no' in fee_text.lower() ending = s.find(LINE_END) end_listing = s[:ending] start = end_listing.rfind(' ') posting_date = end_listing[start:].strip() listing = Apartment(SOURCE, title, price, link) listing.set_has_fee(not no_fee) listing.set_posting_date(posting_date) return (listing, s)
def _get_result(self, s): (latlong, s) = html_helper.find_in_between(s, "se:map:point='", "'") if latlong == None: return (None, s) (lat, long) = latlong.split(',') end = s.find("<div class='photo'>") has_no_fee = 'banner no_fee' in s[:end] s = s[end:] (url, s) = html_helper.find_in_between(s, 'href="', '"') url = BASE_URL + url (title, s) = html_helper.advance_and_find(s, '"details-title">', '"true">', '<') (price, s) = html_helper.advance_and_find(s, "'price'", '$', '<') price = int(float(price.replace(',', ''))) listing = Apartment(SOURCE, title, price, url) listing.set_location(float(lat), float(long)) listing.set_has_fee(not has_no_fee) start = s.find("class='first_detail_cell'") section_end = s.find('</div', start) end = s[:section_end].find('²') if end >= 0: section = s[:end] start = section.rfind('>') + 1 end = section.find(' ', start) listing.set_sqft(int(section[start:end].replace(',', ''))) return (listing, s)