def ParseAsoListing(url): request = urllib.request.Request(url, headers=_HEADERS) html = urllib.request.urlopen(request).read().decode('utf-8') soup = bs4.BeautifulSoup(html, 'lxml') listing = Listing() listing.title = soup.find( 'div', class_='adSpecView-header-Descr').find('div').string.strip() listing.url = url spans = soup.find_all('span') for span in spans: if span.find(text=re.compile(r'Price')): price_str = re.sub(r'[^(\d\.)]', '', span.next_element) if price_str: listing.price = float(price_str) elif span.find(text=re.compile(r'Reg #')): listing.registration = span.next_element.split()[-1].upper() elif span.find(text=re.compile(r'Serial #')): listing.serial = span.next_element.split()[-1].upper() elif span.find(text=re.compile(r'TTAF:')): hours_str = re.sub(r'[^(\d\.)]', '', span.next_element) if hours_str: listing.airframe_hours = float(hours_str) elif span.find(text=re.compile(r'Location:')): locations = span.next_element.split() if len(locations) > 1: listing.state = SanitizeState(locations[1].strip(' ,')) year = re.search(r'(19|20)\d\d', listing.title) if year: listing.year = int(year.group(0)) model = re.search(r'M20[A-Z]\s*(\d{3})?', listing.title.upper()) if model: listing.model = model.group(0) engine_table = soup.find('table', class_='enginePropView') if engine_table: rows = engine_table.find_all('tr') if len(rows) == 2: for i, col in enumerate(rows[1].find_all('td')): if not re.search(r'[^\d]', col.string): listing.engine_hours = float(col.string) listing.overhaul_type = rows[0].find_all( 'td')[i].string.upper() break listing.gps = FindGps(html) listing.transponder = FindTransponder(html) return listing
def ParseTradeAPlaneListing(url): request = urllib.request.Request(url, headers=_HEADERS) html = urllib.request.urlopen(request).read().decode('utf-8') soup = bs4.BeautifulSoup(html, 'lxml') listing = Listing() listing.title = FindTradeAPlaneSpec(soup.find('h1'), 'string') listing.url = url listing.price = FindTradeAPlaneSpec(soup.find('span', itemprop='price'), 'string', float) listing.year = FindTradeAPlaneSpec(soup.find('label', string='Year:'), 'next_sibling', int) listing.registration = FindTradeAPlaneSpec( soup.find('label', string='Registration #:'), 'next_sibling') if not re.search(r'\d', listing.registration): listing.registration = None listing.model = FindTradeAPlaneSpec( soup.find('span', itemprop='manufacturer'), 'next_sibling') listing.serial = FindTradeAPlaneSpec( soup.find('label', string='Serial #:'), 'next_sibling') listing.airframe_hours = FindTradeAPlaneSpec( soup.find('label', string='Total Time:'), 'next_sibling', float) overhaul_str = FindTradeAPlaneSpec( soup.find('label', string='Engine 1 Overhaul Time:'), 'next_sibling') if overhaul_str: overhaul_strs = overhaul_str.split() if len(overhaul_strs) > 0: listing.engine_hours = float(overhaul_strs[0].replace(',', '')) if len(overhaul_strs) > 1: listing.overhaul_type = overhaul_strs[1] location_str = FindTradeAPlaneSpec(soup.find('label', string='Location:'), 'next_sibling') if location_str: location_strs = location_str.split(',') if len(location_strs) == 1: listing.state = SanitizeState(location_strs[0].split()[0]\ .replace('\n', '')) elif len(location_strs) == 2: listing.city = location_strs[0] listing.state = SanitizeState(location_strs[1].strip().split()[0]\ .replace('\n', '')) listing.gps = FindGps(html) listing.transponder = FindTransponder(html) return listing
def ParseAirplaneMartListing(url): request = urllib.request.Request(url, headers=_HEADERS) html = urllib.request.urlopen(request).read().decode('utf-8', 'ignore') soup = bs4.BeautifulSoup(html, 'lxml') listing = Listing() listing.title = soup.find('font', size='5').find('b').string.strip() listing.url = url price_str = re.sub(r'[^(\d\.)]', '', FindAirplaneMartSpec(soup, 'Price:')) if price_str: listing.price = float(price_str) listing.registration = FindAirplaneMartSpec(soup, 'Registration:') listing.serial = FindAirplaneMartSpec(soup, 'Serial:') listing.airframe_hours = FindAirplaneMartSpec(soup, 'Airframe Time:', float) engine_str = FindAirplaneMartSpec(soup, 'Engine Time\(s\):') if engine_str: match = re.search(r'([0-9\.]+)(?:\s+([A-Z]+))?', engine_str.upper()) if match: listing.engine_hours = float(match.group(1)) if len(match.groups()) >= 2: listing.overhaul_type = match.group(2) location_str = FindAirplaneMartSpec(soup, 'Aircraft Location:') if location_str: location_str = re.sub(r'\s*\(.*\)\s*', '', location_str) if location_str: locations = location_str.split(',') listing.city = locations[0].strip() if len(locations) >= 2: listing.state = SanitizeState(locations[1].split()[0].strip()) year = re.search(r'(19|20)\d\d', listing.title) if year: listing.year = int(year.group(0)) model = re.search(r'M20[A-Z]\s*(\d{3})?', listing.title.upper()) if model: listing.model = model.group(0) listing.gps = FindGps(html) listing.transponder = FindTransponder(html) return listing
def ParseControllerListing(url): request = urllib.request.Request(url, headers=_HEADERS) html = urllib.request.urlopen(request).read().decode('utf-8') soup = bs4.BeautifulSoup(html, 'lxml') listing = Listing() listing.title = soup.find('h1').string.strip() listing.url = url h4s = soup.find_all('h4') for h4 in h4s: if h4.find(text=re.compile(r'For Sale Price:')): price_str = re.sub(r'[^(\d\.)]', '', h4.next_element) if price_str: listing.price = float(price_str) break listing.year = FindControllerSpec(soup, 'Year', int) listing.registration = FindControllerSpec(soup, 'Registration #') if not re.search(r'\d', listing.registration): listing.registration = None listing.model = FindControllerSpec(soup, 'Model') listing.serial = FindControllerSpec(soup, 'Serial #') listing.airframe_hours = FindControllerSpec(soup, 'Total Time', float) overhaul_str = FindControllerSpec(soup, 'Overhaul') if overhaul_str: hours_match = re.search(r'([0-9,\.]+)', overhaul_str) type_match = re.search(r'(?:[0-9,\.]+)\s*([a-zA-Z]+)', overhaul_str) if hours_match: listing.engine_hours = float(hours_match.group(1).replace(',', '')) if type_match: listing.overhaul_type = type_match.group(1) location = soup.find('a', class_='machinelocation').string if location and location.string: location_strs = location.string.split(',') if len(location_strs) == 1: listing.state = SanitizeState(location_strs[0]) elif len(location_strs) == 2: listing.city = location_strs[0] listing.state = SanitizeState(location_strs[1].strip()) listing.gps = FindGps(html) listing.transponder = FindTransponder(html) return listing