Ejemplo n.º 1
0
    def get_case_name(self):
        raw = self.__find_by_class_name('h1', 'detail-title-content')

        name = clean.sub('', raw.text) if raw is not None else ""
        name = punctuation_cleaner.sub('', name)

        return name
Ejemplo n.º 2
0
    def get_host_role(self):
        raw = self.__find_by_class_name('span', 'info-span-msg')

        host_role = raw.text if raw is not None else ""
        host_role = punctuation_cleaner.sub('', host_role)

        return host_role
Ejemplo n.º 3
0
    def get_price(self):
        key = u'租金'
        price = self.infos[key] if self.__is_key(key) else "0"
        price = punctuation_cleaner.sub('', price)

        price = re.search('\d+', price).group()

        return float(price), u'元/月'
Ejemplo n.º 4
0
    def get_case_name(self):
        case_no = self.get_case_number()

        raw = find_by_css(self.soup, 'div.h1table h1')
        case_name = raw[0].text.replace(case_no, '') if raw else ''
        case_name = punctuation_cleaner.sub('', case_name)

        return case_name
Ejemplo n.º 5
0
    def get_price(self):
        raw = find_by_css(self.soup, 'div.price')
        price = re.search('\d+', punctuation_cleaner.sub('', raw[0].text)) \
                  .group() if raw else 0.0

        raw = find_by_css(self.soup, 'div.price b')
        unit = raw[0].text if raw else u"元/月"

        return float(price), unit
Ejemplo n.º 6
0
    def get_host_company(self):
        raw = find_by_css(self.soup, 'div.auatarSonBox')
        clean_text = punctuation_cleaner.sub('', raw[0].text) if raw else ''
        m = re.search(u'公司名:(\W+)分店:(\W+)', clean_text)

        company = m.group(1) if m else ""
        branch = m.group(2) if m else ""

        return ' '.join([company, branch])
Ejemplo n.º 7
0
    def __get_house_infos(self):
        infos = {}

        rows = find_by_css(self.soup, 'div.object-list li')

        for row in rows:
            clean_text = punctuation_cleaner.sub('', row.text)
            if ':' in clean_text:
                attr, value = clean_text.split(':')
                infos[attr] = value

        return infos
Ejemplo n.º 8
0
    def __get_house_infos(self):
        infos = {}

        attr_raw = find_by_css(self.soup, '.labelList .one')
        value_raw = find_by_css(self.soup, '.labelList .two')

        for attr, value in zip(attr_raw, value_raw):
            infos[attr.text] = value.text.replace(u':', '')

        rows = find_by_css(self.soup, 'ul.attr li')
        for row in rows:
            clean_text = punctuation_cleaner.sub('', row.text)
            attr, value = clean_text.split(':')
            infos[attr] = value

        return infos
Ejemplo n.º 9
0
    def get_host_company(self):
        host_store = ""
        raw = self.__find_by_class_name('div', 'info-detail-show')

        text = clean.sub('', raw.text) if raw is not None else ""
        text = punctuation_cleaner.sub('', text)

        m = re.search(u'公司名:(\W+)分公司:(\W+)', text)

        try:
            headquarter = m.group(1)
            branch = m.group(2)
            host_store = " ".join([headquarter, branch])
        except Exception as e:
            print(e)

        return host_store
Ejemplo n.º 10
0
    def get_case_name(self):
        raw = find_by_css(self.soup, 'div.top h1')
        case_name = raw[0].text if raw else ''
        case_name = punctuation_cleaner.sub('', case_name)

        return case_name
Ejemplo n.º 11
0
 def get_case_number(self):
     raw = find_by_css(self.soup, 'div.h1table span.color-gray')
     case_number = punctuation_cleaner.sub('', raw[0].text) if raw else ''
     return case_number