コード例 #1
0
def get_land_info_by_url(land_url, session):
    for land in session.query(LandInfo).filter(LandInfo.land_url == land_url):
        print land.land_url
        url = settings.host_url + land_url
        spider = BasicSpider(url, 0)
        base_soup = spider.get_html_beautiful_soup_without_cookies(land_url)
        get_detailed_land_info(base_soup, land)
    session.commit()
コード例 #2
0
 def get_detailed_info_by_land_info_url(self, land_info_url):
     url = settings.host_url + land_info_url
     spider = BasicSpider(url, self.step)
     base_soup = spider.get_html_beautiful_soup_without_cookies(
         land_info_url)
     # spider.get_tudi_detailed_info(base_soup)
     #land_detailed_info.get_detailed_land_info(base_soup, land_info_url)
     land_detailed_info.update_land_detailed_info()
コード例 #3
0
 def get_basic_info_by_land_info_page_url_list(self,
                                               land_info_page_url,
                                               district_row_id=-1):
     url = settings.host_url + land_info_page_url
     spider = BasicSpider(url, self.step)
     base_soup = spider.get_html_beautiful_soup_without_cookies(
         land_info_page_url)
     land_info_url_list = spider.get_land_basic_info_by_land_info_url(
         base_soup, district_row_id)
コード例 #4
0
def get_trade_info_by_url(land_url):
    url = settings.host_url + land_url
    spider = BasicSpider(url, 0)
    base_soup = spider.get_html_beautiful_soup_without_cookies(land_url)
    if check_need_cookies_or_not(base_soup):
        base_soup = spider.get_html_beautiful_soup_with_cookies(land_url, cookies=get_cookies(settings.cookies))
    trade_info = TradeInfo(land_url)
    get_trade_info(base_soup, trade_info)
    return trade_info
コード例 #5
0
 def get_land_info_url_list_by_district_url(self, district_url):
     url = settings.host_url + district_url
     spider = BasicSpider(url, self.step)
     base_soup = spider.get_html_beautiful_soup_without_cookies(
         district_url)
     land_info_url_list = spider.get_land_url_list_by_distrct_url(
         base_soup, district_url)
     district_info = spider.search_region_info_by_url(district_url)
     if land_info_url_list is not None:
         for land_info_page_url in land_info_url_list:
             """
                 self.get_basic_info_by_land_info_url_lsit(land_info_url, district_info[0])
             """
             print len(land_info_page_url)
             print district_info[0]
             self.get_basic_info_by_land_info_page_url_list(
                 land_info_page_url, district_info[0])
コード例 #6
0
 def get_district_url_by_city_url(self, city_url):
     url = settings.host_url + city_url
     spider = BasicSpider(url, self.step)
     base_soup = spider.get_html_beautiful_soup_without_cookies(city_url)
     spider.get_district_url_list(base_soup, city_url)
コード例 #7
0
 def get_city_url_by_province_url(self, province_url):
     url = settings.host_url + province_url
     spider = BasicSpider(url, self.step)
     base_soup = spider.get_html_beautiful_soup_without_cookies(
         province_url)
     spider.get_city_url_list(base_soup, province_url)
コード例 #8
0
 def get_all_province_url(self):
     spider = BasicSpider(self.url, self.step)
     base_soup = spider.get_html_beautiful_soup_without_cookies(self.url)
     #span =base_soup.find('span', class_='w866 fr')
     spider.get_province_url_list(base_soup)
コード例 #9
0
def make_spider(cls):

    options = Options()
    options.headless = False

    cls.spider = BasicSpider(base_url, options=options)