Ejemplo n.º 1
0
    def get_price(self):
        raw = find_by_css(self.soup, 'div.price')
        price = re.search('\d+', punctuation_cleaner.sub('', raw[0].text)) \
                  .group() if raw else 0.0

        raw = find_by_css(self.soup, 'div.price b')
        unit = raw[0].text if raw else u"元/月"

        return float(price), unit
Ejemplo n.º 2
0
    def __get_house_infos(self):
        infos = {}

        attr_raw = find_by_css(self.soup, 'td.td-center')
        value_raw = find_by_css(self.soup, 'td.td-white')

        for attr, value in zip(attr_raw, value_raw):
            if attr.text in infos: continue
            infos[attr.text] = value.text.strip()

        return infos
Ejemplo n.º 3
0
    def __get_house_infos(self):
        infos = {}

        attr_raw = find_by_css(self.soup, '#mainInfo th')
        value_raw = find_by_css(self.soup, '#mainInfo td')

        for attr, value in zip(attr_raw, value_raw):
            key = attr.text.replace(u'\u3000', '')

            if key in infos: continue
            infos[key] = value.text.strip()

        return infos
Ejemplo n.º 4
0
    def get_host_role(self):
        raw = find_by_css(self.soup, '.status')
        role = raw[0].text if raw else ''

        role = u'屋主' if u'房東' in role else '仲介'

        return role
Ejemplo n.º 5
0
    def get_host_company(self):
        company = ''
        if self.get_host_role() == u'仲介':
            raw = find_by_css('#sideMenu section li span')
            company = ' '.join([self.get_host_name, raw[-1].text])

        return company
Ejemplo n.º 6
0
    def __get_house_infos(self):
        infos = {}

        attr_raw = find_by_css(self.soup, '.labelList .one')
        value_raw = find_by_css(self.soup, '.labelList .two')

        for attr, value in zip(attr_raw, value_raw):
            infos[attr.text] = value.text.replace(u':', '')

        rows = find_by_css(self.soup, 'ul.attr li')
        for row in rows:
            clean_text = punctuation_cleaner.sub('', row.text)
            attr, value = clean_text.split(':')
            infos[attr] = value

        return infos
Ejemplo n.º 7
0
    def get_case_name(self):
        case_no = self.get_case_number()

        raw = find_by_css(self.soup, 'div.h1table h1')
        case_name = raw[0].text.replace(case_no, '') if raw else ''
        case_name = punctuation_cleaner.sub('', case_name)

        return case_name
Ejemplo n.º 8
0
    def get_host_company(self):
        raw = find_by_css(self.soup, 'div.auatarSonBox')
        clean_text = punctuation_cleaner.sub('', raw[0].text) if raw else ''
        m = re.search(u'公司名:(\W+)分店:(\W+)', clean_text)

        company = m.group(1) if m else ""
        branch = m.group(2) if m else ""

        return ' '.join([company, branch])
Ejemplo n.º 9
0
    def get_host_phonenumber(self):
        phone = ''
        raws = find_by_css(self.soup, '.tel span')
        for raw in raws:
            text = raw['class'][0]
            m = re.search('\d', text)
            phone += m.group() if m else ''

        return phone
Ejemplo n.º 10
0
    def get_latitude_longtitude(self):
        raw = find_by_css(self.soup, '#static_map2')
        text = raw[0]['src']

        lat_lng_pattern = re.compile(r'(\d+\.\d+)_(\d+\.\d+).png')
        m = lat_lng_pattern.search(text)

        lat = float(m.group(1)) if m else 0.
        lng = float(m.group(2)) if m else 0.

        return lat, lng
Ejemplo n.º 11
0
    def __get_house_infos(self):
        infos = {}

        rows = find_by_css(self.soup, 'div.object-list li')

        for row in rows:
            clean_text = punctuation_cleaner.sub('', row.text)
            if ':' in clean_text:
                attr, value = clean_text.split(':')
                infos[attr] = value

        return infos
Ejemplo n.º 12
0
    def get_host_name(self):
        raw = find_by_css(self.soup, 'span.font_15_r')
        name = raw[0].text if raw else ''

        return name
Ejemplo n.º 13
0
    def get_case_name(self):
        raw = find_by_css(self.soup, 'div.top h1')
        case_name = raw[0].text if raw else ''
        case_name = punctuation_cleaner.sub('', case_name)

        return case_name
Ejemplo n.º 14
0
    def get_host_phonenumber(self):
        raw = find_by_css(self.soup, 'div.infoblock h2')
        phone = raw[0].text if raw else ""

        return phone
Ejemplo n.º 15
0
    def get_host_company(self):
        raw = find_by_css(self.soup, 'div.infoblock h3')
        company = raw[0].text.strip() if raw else ""

        return company
Ejemplo n.º 16
0
    def get_expire_date(self):
        raw = find_by_css(self.soup, 'span.ft-rt')
        expire_date = raw[0].text.replace(u'有效期:', '')

        return expire_date
Ejemplo n.º 17
0
 def get_case_number(self):
     raw = find_by_css(self.soup, 'div.h1table span.color-gray')
     case_number = punctuation_cleaner.sub('', raw[0].text) if raw else ''
     return case_number
Ejemplo n.º 18
0
 def get_case_name(self):
     raw = find_by_css(self.soup, 'span.houseInfoTitle')
     return raw[0].text if raw else ""
Ejemplo n.º 19
0
    def get_host_name(self):
        raw = find_by_css(self.soup, 'div.infoblock span')
        name = raw[0].text if raw else self.get_host_company()

        return name
Ejemplo n.º 20
0
    def get_host_role(self):
        raw = find_by_css(self.soup, 'div.avatarRight')
        role = re.search(u'屋主|仲介|代理人', raw[0].text).group() if raw else ""

        return role
Ejemplo n.º 21
0
    def get_host_name(self):
        raw = find_by_css(self.soup, 'div.avatarRight i')
        name = raw[0].text if raw else ""

        return name
Ejemplo n.º 22
0
    def get_host_phonenumber(self):
        raw = find_by_css(self.soup, 'span.font_13_666_tel')
        phone = raw[0].text if raw else ''

        return phone
Ejemplo n.º 23
0
    def get_host_name(self):
        raw = find_by_css(self.soup, 'div.landlord')
        name = raw[0].text if raw else ''

        return name
Ejemplo n.º 24
0
    def get_host_company(self):
        raw = find_by_css(self.soup, 'span.font_15_r')
        company = raw[0].text if raw else ''

        return company
Ejemplo n.º 25
0
    def get_address(self):
        raw = find_by_css(self.soup, 'span.addr')
        addr = raw[0].text if raw else ''

        return addr