def get_case_name(self): raw = self.__find_by_class_name('h1', 'detail-title-content') name = clean.sub('', raw.text) if raw is not None else "" name = punctuation_cleaner.sub('', name) return name
def get_host_role(self): raw = self.__find_by_class_name('span', 'info-span-msg') host_role = raw.text if raw is not None else "" host_role = punctuation_cleaner.sub('', host_role) return host_role
def get_price(self): key = u'租金' price = self.infos[key] if self.__is_key(key) else "0" price = punctuation_cleaner.sub('', price) price = re.search('\d+', price).group() return float(price), u'元/月'
def get_case_name(self): case_no = self.get_case_number() raw = find_by_css(self.soup, 'div.h1table h1') case_name = raw[0].text.replace(case_no, '') if raw else '' case_name = punctuation_cleaner.sub('', case_name) return case_name
def get_price(self): raw = find_by_css(self.soup, 'div.price') price = re.search('\d+', punctuation_cleaner.sub('', raw[0].text)) \ .group() if raw else 0.0 raw = find_by_css(self.soup, 'div.price b') unit = raw[0].text if raw else u"元/月" return float(price), unit
def get_host_company(self): raw = find_by_css(self.soup, 'div.auatarSonBox') clean_text = punctuation_cleaner.sub('', raw[0].text) if raw else '' m = re.search(u'公司名:(\W+)分店:(\W+)', clean_text) company = m.group(1) if m else "" branch = m.group(2) if m else "" return ' '.join([company, branch])
def __get_house_infos(self): infos = {} rows = find_by_css(self.soup, 'div.object-list li') for row in rows: clean_text = punctuation_cleaner.sub('', row.text) if ':' in clean_text: attr, value = clean_text.split(':') infos[attr] = value return infos
def __get_house_infos(self): infos = {} attr_raw = find_by_css(self.soup, '.labelList .one') value_raw = find_by_css(self.soup, '.labelList .two') for attr, value in zip(attr_raw, value_raw): infos[attr.text] = value.text.replace(u':', '') rows = find_by_css(self.soup, 'ul.attr li') for row in rows: clean_text = punctuation_cleaner.sub('', row.text) attr, value = clean_text.split(':') infos[attr] = value return infos
def get_host_company(self): host_store = "" raw = self.__find_by_class_name('div', 'info-detail-show') text = clean.sub('', raw.text) if raw is not None else "" text = punctuation_cleaner.sub('', text) m = re.search(u'公司名:(\W+)分公司:(\W+)', text) try: headquarter = m.group(1) branch = m.group(2) host_store = " ".join([headquarter, branch]) except Exception as e: print(e) return host_store
def get_case_name(self): raw = find_by_css(self.soup, 'div.top h1') case_name = raw[0].text if raw else '' case_name = punctuation_cleaner.sub('', case_name) return case_name
def get_case_number(self): raw = find_by_css(self.soup, 'div.h1table span.color-gray') case_number = punctuation_cleaner.sub('', raw[0].text) if raw else '' return case_number