Ejemplo n.º 1
0
 def __init__(self, **kwargs):
     # args
     super(CarSpider, self).__init__(**kwargs)
     # setting
     self.tag = 'original'
     self.counts = 0
     self.carnum = 300000
     self.dbname = 'usedcar'
     # spider setting
     spider_original_Init(dbname=self.dbname,
                          website=website,
                          carnum=self.carnum)
     self.df = 'none'
     self.fa = 'none'
     self.headers = {
         "User-Agent":
         "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36",
     }
     self.cookies_for_request = {}
     self.browser = webdriver.PhantomJS(
         executable_path=settings['PHANTOMJS_PATH'])
     self.browser.set_page_load_timeout(30)
     # self.browser = webdriver.PhantomJS(executable_path="/usr/local/phantomjs/bin/phantomjs")
     # self.browser = webdriver.PhantomJS(executable_path="/root/home/phantomjs")
     super(CarSpider, self).__init__()
     dispatcher.connect(self.spider_closed, signals.spider_closed)
Ejemplo n.º 2
0
    def __init__(self, **kwargs):
        # args
        super(CarSpider, self).__init__(**kwargs)
        #setting
        self.tag = 'original'
        self.counts = 0
        self.carnum = 5000000
        self.dbname = 'usedcar'
        # spider setting
        spider_original_Init(dbname=self.dbname,
                             website=website,
                             carnum=self.carnum)
        self.df = 'none'
        self.fa = 'none'

        # self.display = Display(visible=0, size=(800, 600))
        # self.display.start()
        # self.browser = webdriver.Chrome(executable_path=settings['CHROME_PATH'])
        # self.browser = webdriver.Chrome(
        #     executable_path="/root/chromedriver.exe")
        # self.browser = webdriver.PhantomJS(executable_path="D:/phantomjs.exe")
        # self.browser = webdriver.PhantomJS(executable_path="/usr/local/phantomjs/bin/phantomjs")
        desired_capabilities = DesiredCapabilities.PHANTOMJS.copy()
        desired_capabilities[
            "phantomjs.page.settings.userAgent"] = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36'
        self.browser = webdriver.PhantomJS(
            executable_path=settings['PHANTOMJS_PATH'],
            desired_capabilities=desired_capabilities)
        self.browser.set_page_load_timeout(20)
        self.browser.implicitly_wait(100)
        super(CarSpider, self).__init__()
        dispatcher.connect(self.spider_closed, signals.spider_closed)
Ejemplo n.º 3
0
 def __init__(self, **kwargs):
     # args
     super(CarSpider, self).__init__(**kwargs)
     #setting
     self.tag = 'original'
     self.counts = 0
     self.carnum = 5000000
     self.dbname = 'usedcar'
     # spider setting
     spider_original_Init(dbname=self.dbname,
                          website=website,
                          carnum=self.carnum)
     self.df = 'none'
     self.fa = 'none'
Ejemplo n.º 4
0
 def __init__(self, part=0, parts=1, *args, **kwargs):
     # args
     super(CarSpider, self).__init__(*args, **kwargs)
     # setting
     self.tag = 'original'
     self.counts = 0
     self.carnum = 20000000
     self.dbname = 'usedcar_evaluation'
     # spider setting
     spider_original_Init(dbname=self.dbname,
                          website=website,
                          carnum=self.carnum)
     self.df = 'none'
     self.fa = 'none'
     self.part = int(part)
     self.parts = int(parts)
Ejemplo n.º 5
0
 def __init__(self, **kwargs):
     # args
     super(CarSpider, self).__init__(**kwargs)
     #setting
     self.tag='original'
     self.counts=0
     self.carnum=2000000
     self.dbname = 'usedcar'
     # spider setting
     spider_original_Init(
         dbname=self.dbname,
         website=website,
         carnum=self.carnum)
     self.df='none'
     self.fa='none'
     self.headers = {
         'User-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
         'Host':'used.xcar.com.cn',
         'Upgrade-Insecure-Requests':'1',
     }
Ejemplo n.º 6
0
 def __init__(self, part=0, parts=1, *args, **kwargs):
     # args
     super(CarSpider, self).__init__(*args, **kwargs)
     # setting
     self.tag = 'original'
     self.counts = 0
     self.carnum = 20000000
     self.dbname = 'usedcar_evaluation'
     # spider setting
     spider_original_Init(dbname=self.dbname,
                          website=website,
                          carnum=self.carnum)
     self.df = 'none'
     self.fa = 'none'
     self.part = int(part)
     self.parts = int(parts)
     self.headers = {
         'User-Agent':
         'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
     }
Ejemplo n.º 7
0
 def __init__(self, **kwargs):
     # args
     super(CarSpider, self).__init__(**kwargs)
     #setting-
     self.tag='original'
     self.counts=0
     self.carnum=2000000
     self.dbname = 'usedcar'
     # spider setting
     spider_original_Init(
         dbname=self.dbname,
         website=website,
         carnum=self.carnum)
     self.urllist = spider_update_Init(
         dbname=self.dbname,
         website='youxinpai',
         carnum=self.carnum
     )
     self.carnum = len(self.urllist)
     self.df='none'
     self.fa='none'
    def __init__(self, part=0, parts=1, *args, **kwargs):
        # args
        super(CarSpider, self).__init__(*args, **kwargs)
        # setting
        self.tag = 'original'
        self.counts = 0
        self.carnum = 20000000
        self.dbname = 'usedcar_evaluation'
        # spider setting
        spider_original_Init(dbname=self.dbname,
                             website=website,
                             carnum=self.carnum)
        self.df = 'none'
        self.fa = 'none'
        self.part = int(part)
        self.parts = int(parts)

        self.headers = {
            'user-agent':
            'activity/3.4.2.18 (Linux; Android 7.0; RNE-AL00; Build/HUAWEIRNE-AL00)',
        }
Ejemplo n.º 9
0
    def __init__(self, **kwargs):
        # args
        super(CarSpider, self).__init__(**kwargs)
        # setting
        self.tag = 'original'
        self.counts = 0
        self.carnum = 1000000
        self.dbname = 'usedcar'
        # spider setting
        spider_original_Init(dbname=self.dbname,
                             website=website,
                             carnum=self.carnum)
        self.df = 'none'
        self.fa = 'none'

        self.browser = webdriver.PhantomJS(
            executable_path=settings['PHANTOMJS_PATH'])
        self.browser.set_page_load_timeout(10)
        # self.browser = webdriver.PhantomJS(executable_path="/usr/local/phantomjs/bin/phantomjs")
        # self.browser = webdriver.PhantomJS(executable_path="/root/home/phantomjs")
        super(CarSpider, self).__init__()
        dispatcher.connect(self.spider_closed, signals.spider_closed)
Ejemplo n.º 10
0
    def __init__(self, **kwargs):
        # args
        super(CarSpider, self).__init__(**kwargs)
        # setting
        self.tag = 'original'
        self.counts = 0
        self.carnum = 1500000
        self.dbname = 'usedcar'
        # spider setting
        spider_original_Init(dbname=self.dbname,
                             website=website,
                             carnum=self.carnum)
        self.df = 'none'
        self.fa = 'none'

        # options = webdriver.ChromeOptions()
        # options.add_argument(
        #     'user-agent="Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36"')
        # print('opening browser')
        # # options.add_argument('--proxy-server=http://%s' % "121.225.24.3:18888")
        # self.browser = webdriver.Chrome(
        #     executable_path=settings['CHROME_PATH'],
        #     chrome_options=options)

        self.desired_capabilities = DesiredCapabilities.PHANTOMJS.copy()
        self.desired_capabilities[
            "phantomjs.page.settings.userAgent"] = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36'
        self.browser = webdriver.PhantomJS(
            executable_path=settings['PHANTOMJS_PATH'])
        # self.browser = webdriver.PhantomJS(executable_path="D:/phantomjs.exe")
        # self.browser = webdriver.PhantomJS(executable_path="/usr/local/phantomjs/bin/phantomjs", desired_capabilities=self.desired_capabilities)
        # self.browser = webdriver.PhantomJS(executable_path="/home/phantomjs-2.1.1-linux-x86_64/bin/phantomjs", desired_capabilities=self.desired_capabilities)

        # self.browser.start_session(self.desired_capabilities)
        # self.browser.set_page_load_timeout(300)

        super(CarSpider, self).__init__()
        dispatcher.connect(self.spider_closed, signals.spider_closed)