Esempio n. 1
0
    def __getProjectListing(self, listing, isRedesign=False):

        #print(str(listing))

        if isRedesign:
            addressNodeClass = 'listing-result-redesign__project-address'
            titleNodeClass = 'listing-result-redesign__project-title'
            urlNodeClass = 'listing-result-redesign__project-title-wrapper'
            featureNodeClass = 'listing-result-redesign__project-features'
            childNodeClass = 'listing-result-redesign__listing'
            priceNodeClass = 'listing-result-redesign__price'
        else:
            addressNodeClass = 'listing-result__project-address'
            titleNodeClass = 'listing-result__project-title'
            urlNodeClass = 'listing-result__project-title-wrapper'
            featureNodeClass = 'listing-result__project-features'
            childNodeClass = 'listing-result__listing'
            priceNodeClass = 'listing-result__price'

        addressNode = listing.find('span', class_=addressNodeClass)
        if addressNode:
            addressParts = addressNode.text.strip().split(',')
            addressLine1 = addressParts[0].strip()
            addressLocality = addressParts[1].strip()
            try:
                addressRegion, postalCode = addressParts[2].strip().split(' ')
            except:
                print(addressNode.text)
                raise
        else:
            print(str(listing))
            raise ValueError('No project address found.')

        project = Project(addressLine1, addressLocality, addressRegion,
                          postalCode)

        titleNode = listing.find('h2', class_=titleNodeClass)
        if titleNode:
            title = titleNode.text.strip()
            project.setTitle(title)

        urlNode = listing.find('a', class_=urlNodeClass)
        if urlNode and urlNode.has_attr('href'):
            projectUrl = urlNode['href']
            project.setUrl(projectUrl)
            print(projectUrl)
        else:
            print(str(listing))
            raise ValueError('No project URL found.')

        featureNode = listing.find('ul', class_=featureNodeClass)
        if featureNode:
            features = listing.find_all('li')
            for feature in features:
                project.setFeature(feature.text.strip())

        # get the child properties
        childListings = listing.find_all('a', class_=childNodeClass)
        if len(childListings) == 0:
            print(str(listing))
            raise ValueError('No child property found.')
        for childListing in childListings:
            print(str(childListing))
            if childListing.has_attr('href'):
                propertyUrl = childListing['href']
            else:
                print(str(childListing))
                raise ValueError('No child URL found.')
            priceNode = childListing.find('h3', class_=priceNodeClass)
            if priceNode:
                priceText = priceNode.text.strip()
                price = self.__getPrice(priceText)
            if price == 0 and addressLine1:
                price = self.__getPriceFromAddress(addressLine1,
                                                   addressLocality,
                                                   addressRegion, postalCode)
            if price == 0:
                print(str(childListing))
                raise ValueError('No child price found.')
            childProperty = Property(price)
            childProperty.setUrl(propertyUrl)

            childFeatureNodes = listing.find_all(
                'span', class_='property-feature__feature-text-container')
            if len(childFeatureNodes) == 0:
                print(str(listing))
                raise ValueError('No child feature found.')

            for childFeatureNode in childFeatureNodes:
                childProperty.setFeature(childFeatureNode.text.strip())

            childPage = urlopen(propertyUrl)
            childDom = BeautifulSoup(childPage, 'html.parser')
            childAddressNode = childDom.find(
                'button', class_='listing-details__project-title-address')
            if childAddressNode:
                print(childAddressNode.text.strip())
                childAddressParts = childAddressNode.text.strip().split('/')
                if len(childAddressParts) > 0:
                    childPropertyType = childAddressParts[0].strip()
                    # this is something like: Type A, Type B, Type C, Courtyard, etc.
                    childProperty.setChildPropertyType(childPropertyType)
            else:
                raise ValueError('No child address found.')

            project.addChildProperty(childProperty)

        return project
Esempio n. 2
0
    def __getPropertyListing(self, listing):
        priceNode = listing.find('p', class_='listing-result__price')
        if priceNode is None:
            priceNode = listing.find('p',
                                     class_='listing-result-redesign__price')
            if priceNode is None:
                print(str(listing))
                raise ValueError('No price tag found.')
        priceText = priceNode.text.strip()

        addressNode = listing.find('a', class_='listing-result__address')
        if addressNode is None:
            addressNode = listing.find(
                'a', class_='listing-result-redesign__address')
        if addressNode and addressNode.has_attr('href'):
            propertyUrl = addressNode['href']
        else:
            print(str(listing))
            raise ValueError('No property URL found.')

        addressLine1Node = addressNode.find('span', class_='address-line1')
        if addressLine1Node:
            addressLine1 = addressLine1Node.text.strip()
            if addressLine1.endswith(','):
                addressLine1 = addressLine1[:-1]
        else:
            addressLine1 = ''

        addressLine2Node = addressNode.find('span', class_='address-line2')
        if addressLine2Node:
            addressLine2Parts = addressLine2Node.select('span')
            addressLocality = addressLine2Parts[0].text.strip()
            addressRegion = addressLine2Parts[1].text.strip()
            postalCode = addressLine2Parts[2].text.strip()
        else:
            print(str(listing))
            raise ValueError('No address line 2 found.')

        price = self.__getPrice(priceText)
        if price == 0 and addressLine1:
            price = self.__getPriceFromAddress(addressLine1, addressLocality,
                                               addressRegion, postalCode)

        property = Property(price, addressLine1, addressLocality,
                            addressRegion, postalCode)
        property.setUrl(propertyUrl)

        page = urlopen(propertyUrl)
        dom = BeautifulSoup(page, 'html.parser')
        keyFeatureNodes = dom.find_all(
            'div', class_='listing-details__key-features--item')
        #print(propertyUrl)
        for keyFeatureNode in keyFeatureNodes:
            key = keyFeatureNode.find(
                'div',
                class_='listing-details__key-features--key').text.strip()
            value = keyFeatureNode.find(
                'div',
                class_='listing-details__key-features--value').text.strip()
            #print(key, value)
            if re.search(r'Property type', key, re.IGNORECASE):
                property.setType(value)
            elif re.search(r'Land area', key, re.IGNORECASE):
                match = re.search(r'(\d+)', value)
                if match:
                    #print(match.group(1))
                    property.setLandArea(int(match.group(1)))

        description = self.__getPropertyDescription(dom)
        if description:
            #print(propertyUrl)
            #print(description)
            property.setDescription(description)
        else:
            print(propertyUrl)
            raise ValueError('No description found.')

        if property.getType() == '':
            print(propertyUrl)
            raise ValueError('No property type found.')

        featureNodes = listing.find_all(
            'span', class_='property-feature__feature-text-container')
        if len(featureNodes) == 0:
            print(str(listing))
            raise ValueError('No feature found.')

        for featureNode in featureNodes:
            property.setFeature(featureNode.text.strip())

        return property