def get_price(self): raw = find_by_css(self.soup, 'div.price') price = re.search('\d+', punctuation_cleaner.sub('', raw[0].text)) \ .group() if raw else 0.0 raw = find_by_css(self.soup, 'div.price b') unit = raw[0].text if raw else u"元/月" return float(price), unit
def __get_house_infos(self): infos = {} attr_raw = find_by_css(self.soup, 'td.td-center') value_raw = find_by_css(self.soup, 'td.td-white') for attr, value in zip(attr_raw, value_raw): if attr.text in infos: continue infos[attr.text] = value.text.strip() return infos
def __get_house_infos(self): infos = {} attr_raw = find_by_css(self.soup, '#mainInfo th') value_raw = find_by_css(self.soup, '#mainInfo td') for attr, value in zip(attr_raw, value_raw): key = attr.text.replace(u'\u3000', '') if key in infos: continue infos[key] = value.text.strip() return infos
def get_host_role(self): raw = find_by_css(self.soup, '.status') role = raw[0].text if raw else '' role = u'屋主' if u'房東' in role else '仲介' return role
def get_host_company(self): company = '' if self.get_host_role() == u'仲介': raw = find_by_css('#sideMenu section li span') company = ' '.join([self.get_host_name, raw[-1].text]) return company
def __get_house_infos(self): infos = {} attr_raw = find_by_css(self.soup, '.labelList .one') value_raw = find_by_css(self.soup, '.labelList .two') for attr, value in zip(attr_raw, value_raw): infos[attr.text] = value.text.replace(u':', '') rows = find_by_css(self.soup, 'ul.attr li') for row in rows: clean_text = punctuation_cleaner.sub('', row.text) attr, value = clean_text.split(':') infos[attr] = value return infos
def get_case_name(self): case_no = self.get_case_number() raw = find_by_css(self.soup, 'div.h1table h1') case_name = raw[0].text.replace(case_no, '') if raw else '' case_name = punctuation_cleaner.sub('', case_name) return case_name
def get_host_company(self): raw = find_by_css(self.soup, 'div.auatarSonBox') clean_text = punctuation_cleaner.sub('', raw[0].text) if raw else '' m = re.search(u'公司名:(\W+)分店:(\W+)', clean_text) company = m.group(1) if m else "" branch = m.group(2) if m else "" return ' '.join([company, branch])
def get_host_phonenumber(self): phone = '' raws = find_by_css(self.soup, '.tel span') for raw in raws: text = raw['class'][0] m = re.search('\d', text) phone += m.group() if m else '' return phone
def get_latitude_longtitude(self): raw = find_by_css(self.soup, '#static_map2') text = raw[0]['src'] lat_lng_pattern = re.compile(r'(\d+\.\d+)_(\d+\.\d+).png') m = lat_lng_pattern.search(text) lat = float(m.group(1)) if m else 0. lng = float(m.group(2)) if m else 0. return lat, lng
def __get_house_infos(self): infos = {} rows = find_by_css(self.soup, 'div.object-list li') for row in rows: clean_text = punctuation_cleaner.sub('', row.text) if ':' in clean_text: attr, value = clean_text.split(':') infos[attr] = value return infos
def get_host_name(self): raw = find_by_css(self.soup, 'span.font_15_r') name = raw[0].text if raw else '' return name
def get_case_name(self): raw = find_by_css(self.soup, 'div.top h1') case_name = raw[0].text if raw else '' case_name = punctuation_cleaner.sub('', case_name) return case_name
def get_host_phonenumber(self): raw = find_by_css(self.soup, 'div.infoblock h2') phone = raw[0].text if raw else "" return phone
def get_host_company(self): raw = find_by_css(self.soup, 'div.infoblock h3') company = raw[0].text.strip() if raw else "" return company
def get_expire_date(self): raw = find_by_css(self.soup, 'span.ft-rt') expire_date = raw[0].text.replace(u'有效期:', '') return expire_date
def get_case_number(self): raw = find_by_css(self.soup, 'div.h1table span.color-gray') case_number = punctuation_cleaner.sub('', raw[0].text) if raw else '' return case_number
def get_case_name(self): raw = find_by_css(self.soup, 'span.houseInfoTitle') return raw[0].text if raw else ""
def get_host_name(self): raw = find_by_css(self.soup, 'div.infoblock span') name = raw[0].text if raw else self.get_host_company() return name
def get_host_role(self): raw = find_by_css(self.soup, 'div.avatarRight') role = re.search(u'屋主|仲介|代理人', raw[0].text).group() if raw else "" return role
def get_host_name(self): raw = find_by_css(self.soup, 'div.avatarRight i') name = raw[0].text if raw else "" return name
def get_host_phonenumber(self): raw = find_by_css(self.soup, 'span.font_13_666_tel') phone = raw[0].text if raw else '' return phone
def get_host_name(self): raw = find_by_css(self.soup, 'div.landlord') name = raw[0].text if raw else '' return name
def get_host_company(self): raw = find_by_css(self.soup, 'span.font_15_r') company = raw[0].text if raw else '' return company
def get_address(self): raw = find_by_css(self.soup, 'span.addr') addr = raw[0].text if raw else '' return addr