Beispiel #1
0
def ParseAsoListing(url):
    request = urllib.request.Request(url, headers=_HEADERS)
    html = urllib.request.urlopen(request).read().decode('utf-8')
    soup = bs4.BeautifulSoup(html, 'lxml')
    listing = Listing()

    listing.title = soup.find(
        'div', class_='adSpecView-header-Descr').find('div').string.strip()
    listing.url = url

    spans = soup.find_all('span')
    for span in spans:
        if span.find(text=re.compile(r'Price')):
            price_str = re.sub(r'[^(\d\.)]', '', span.next_element)
            if price_str:
                listing.price = float(price_str)
        elif span.find(text=re.compile(r'Reg #')):
            listing.registration = span.next_element.split()[-1].upper()
        elif span.find(text=re.compile(r'Serial #')):
            listing.serial = span.next_element.split()[-1].upper()
        elif span.find(text=re.compile(r'TTAF:')):
            hours_str = re.sub(r'[^(\d\.)]', '', span.next_element)
            if hours_str:
                listing.airframe_hours = float(hours_str)
        elif span.find(text=re.compile(r'Location:')):
            locations = span.next_element.split()
            if len(locations) > 1:
                listing.state = SanitizeState(locations[1].strip(' ,'))

    year = re.search(r'(19|20)\d\d', listing.title)
    if year:
        listing.year = int(year.group(0))

    model = re.search(r'M20[A-Z]\s*(\d{3})?', listing.title.upper())
    if model:
        listing.model = model.group(0)

    engine_table = soup.find('table', class_='enginePropView')
    if engine_table:
        rows = engine_table.find_all('tr')
        if len(rows) == 2:
            for i, col in enumerate(rows[1].find_all('td')):
                if not re.search(r'[^\d]', col.string):
                    listing.engine_hours = float(col.string)
                    listing.overhaul_type = rows[0].find_all(
                        'td')[i].string.upper()
                    break

    listing.gps = FindGps(html)
    listing.transponder = FindTransponder(html)

    return listing
Beispiel #2
0
def ParseTradeAPlaneListing(url):
    request = urllib.request.Request(url, headers=_HEADERS)
    html = urllib.request.urlopen(request).read().decode('utf-8')
    soup = bs4.BeautifulSoup(html, 'lxml')
    listing = Listing()

    listing.title = FindTradeAPlaneSpec(soup.find('h1'), 'string')
    listing.url = url
    listing.price = FindTradeAPlaneSpec(soup.find('span', itemprop='price'),
                                        'string', float)
    listing.year = FindTradeAPlaneSpec(soup.find('label', string='Year:'),
                                       'next_sibling', int)
    listing.registration = FindTradeAPlaneSpec(
        soup.find('label', string='Registration #:'), 'next_sibling')
    if not re.search(r'\d', listing.registration):
        listing.registration = None
    listing.model = FindTradeAPlaneSpec(
        soup.find('span', itemprop='manufacturer'), 'next_sibling')
    listing.serial = FindTradeAPlaneSpec(
        soup.find('label', string='Serial #:'), 'next_sibling')
    listing.airframe_hours = FindTradeAPlaneSpec(
        soup.find('label', string='Total Time:'), 'next_sibling', float)

    overhaul_str = FindTradeAPlaneSpec(
        soup.find('label', string='Engine 1 Overhaul Time:'), 'next_sibling')
    if overhaul_str:
        overhaul_strs = overhaul_str.split()
        if len(overhaul_strs) > 0:
            listing.engine_hours = float(overhaul_strs[0].replace(',', ''))
        if len(overhaul_strs) > 1:
            listing.overhaul_type = overhaul_strs[1]

    location_str = FindTradeAPlaneSpec(soup.find('label', string='Location:'),
                                       'next_sibling')
    if location_str:
        location_strs = location_str.split(',')
        if len(location_strs) == 1:
            listing.state = SanitizeState(location_strs[0].split()[0]\
                .replace('\n', ''))
        elif len(location_strs) == 2:
            listing.city = location_strs[0]
            listing.state = SanitizeState(location_strs[1].strip().split()[0]\
                .replace('\n', ''))

    listing.gps = FindGps(html)
    listing.transponder = FindTransponder(html)

    return listing
Beispiel #3
0
def ParseAirplaneMartListing(url):
    request = urllib.request.Request(url, headers=_HEADERS)
    html = urllib.request.urlopen(request).read().decode('utf-8', 'ignore')
    soup = bs4.BeautifulSoup(html, 'lxml')
    listing = Listing()

    listing.title = soup.find('font', size='5').find('b').string.strip()
    listing.url = url

    price_str = re.sub(r'[^(\d\.)]', '', FindAirplaneMartSpec(soup, 'Price:'))
    if price_str:
        listing.price = float(price_str)

    listing.registration = FindAirplaneMartSpec(soup, 'Registration:')
    listing.serial = FindAirplaneMartSpec(soup, 'Serial:')
    listing.airframe_hours = FindAirplaneMartSpec(soup, 'Airframe Time:',
                                                  float)

    engine_str = FindAirplaneMartSpec(soup, 'Engine Time\(s\):')
    if engine_str:
        match = re.search(r'([0-9\.]+)(?:\s+([A-Z]+))?', engine_str.upper())
        if match:
            listing.engine_hours = float(match.group(1))
            if len(match.groups()) >= 2:
                listing.overhaul_type = match.group(2)

    location_str = FindAirplaneMartSpec(soup, 'Aircraft Location:')
    if location_str:
        location_str = re.sub(r'\s*\(.*\)\s*', '', location_str)
        if location_str:
            locations = location_str.split(',')
            listing.city = locations[0].strip()
            if len(locations) >= 2:
                listing.state = SanitizeState(locations[1].split()[0].strip())

    year = re.search(r'(19|20)\d\d', listing.title)
    if year:
        listing.year = int(year.group(0))

    model = re.search(r'M20[A-Z]\s*(\d{3})?', listing.title.upper())
    if model:
        listing.model = model.group(0)

    listing.gps = FindGps(html)
    listing.transponder = FindTransponder(html)

    return listing
Beispiel #4
0
def ParseControllerListing(url):
    request = urllib.request.Request(url, headers=_HEADERS)
    html = urllib.request.urlopen(request).read().decode('utf-8')
    soup = bs4.BeautifulSoup(html, 'lxml')
    listing = Listing()

    listing.title = soup.find('h1').string.strip()
    listing.url = url

    h4s = soup.find_all('h4')
    for h4 in h4s:
        if h4.find(text=re.compile(r'For Sale Price:')):
            price_str = re.sub(r'[^(\d\.)]', '', h4.next_element)
            if price_str:
                listing.price = float(price_str)
            break

    listing.year = FindControllerSpec(soup, 'Year', int)
    listing.registration = FindControllerSpec(soup, 'Registration #')
    if not re.search(r'\d', listing.registration):
        listing.registration = None
    listing.model = FindControllerSpec(soup, 'Model')
    listing.serial = FindControllerSpec(soup, 'Serial #')
    listing.airframe_hours = FindControllerSpec(soup, 'Total Time', float)

    overhaul_str = FindControllerSpec(soup, 'Overhaul')
    if overhaul_str:
        hours_match = re.search(r'([0-9,\.]+)', overhaul_str)
        type_match = re.search(r'(?:[0-9,\.]+)\s*([a-zA-Z]+)', overhaul_str)
        if hours_match:
            listing.engine_hours = float(hours_match.group(1).replace(',', ''))
        if type_match:
            listing.overhaul_type = type_match.group(1)

    location = soup.find('a', class_='machinelocation').string
    if location and location.string:
        location_strs = location.string.split(',')
        if len(location_strs) == 1:
            listing.state = SanitizeState(location_strs[0])
        elif len(location_strs) == 2:
            listing.city = location_strs[0]
            listing.state = SanitizeState(location_strs[1].strip())

    listing.gps = FindGps(html)
    listing.transponder = FindTransponder(html)

    return listing