def __init__(self, **kwargs): # args super(CarSpider, self).__init__(**kwargs) # setting self.tag = 'original' self.counts = 0 self.carnum = 300000 self.dbname = 'usedcar' # spider setting spider_original_Init(dbname=self.dbname, website=website, carnum=self.carnum) self.df = 'none' self.fa = 'none' self.headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36", } self.cookies_for_request = {} self.browser = webdriver.PhantomJS( executable_path=settings['PHANTOMJS_PATH']) self.browser.set_page_load_timeout(30) # self.browser = webdriver.PhantomJS(executable_path="/usr/local/phantomjs/bin/phantomjs") # self.browser = webdriver.PhantomJS(executable_path="/root/home/phantomjs") super(CarSpider, self).__init__() dispatcher.connect(self.spider_closed, signals.spider_closed)
def __init__(self, **kwargs): # args super(CarSpider, self).__init__(**kwargs) #setting self.tag = 'original' self.counts = 0 self.carnum = 5000000 self.dbname = 'usedcar' # spider setting spider_original_Init(dbname=self.dbname, website=website, carnum=self.carnum) self.df = 'none' self.fa = 'none' # self.display = Display(visible=0, size=(800, 600)) # self.display.start() # self.browser = webdriver.Chrome(executable_path=settings['CHROME_PATH']) # self.browser = webdriver.Chrome( # executable_path="/root/chromedriver.exe") # self.browser = webdriver.PhantomJS(executable_path="D:/phantomjs.exe") # self.browser = webdriver.PhantomJS(executable_path="/usr/local/phantomjs/bin/phantomjs") desired_capabilities = DesiredCapabilities.PHANTOMJS.copy() desired_capabilities[ "phantomjs.page.settings.userAgent"] = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36' self.browser = webdriver.PhantomJS( executable_path=settings['PHANTOMJS_PATH'], desired_capabilities=desired_capabilities) self.browser.set_page_load_timeout(20) self.browser.implicitly_wait(100) super(CarSpider, self).__init__() dispatcher.connect(self.spider_closed, signals.spider_closed)
def __init__(self, **kwargs): # args super(CarSpider, self).__init__(**kwargs) #setting self.tag = 'original' self.counts = 0 self.carnum = 5000000 self.dbname = 'usedcar' # spider setting spider_original_Init(dbname=self.dbname, website=website, carnum=self.carnum) self.df = 'none' self.fa = 'none'
def __init__(self, part=0, parts=1, *args, **kwargs): # args super(CarSpider, self).__init__(*args, **kwargs) # setting self.tag = 'original' self.counts = 0 self.carnum = 20000000 self.dbname = 'usedcar_evaluation' # spider setting spider_original_Init(dbname=self.dbname, website=website, carnum=self.carnum) self.df = 'none' self.fa = 'none' self.part = int(part) self.parts = int(parts)
def __init__(self, **kwargs): # args super(CarSpider, self).__init__(**kwargs) #setting self.tag='original' self.counts=0 self.carnum=2000000 self.dbname = 'usedcar' # spider setting spider_original_Init( dbname=self.dbname, website=website, carnum=self.carnum) self.df='none' self.fa='none' self.headers = { 'User-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36', 'Host':'used.xcar.com.cn', 'Upgrade-Insecure-Requests':'1', }
def __init__(self, part=0, parts=1, *args, **kwargs): # args super(CarSpider, self).__init__(*args, **kwargs) # setting self.tag = 'original' self.counts = 0 self.carnum = 20000000 self.dbname = 'usedcar_evaluation' # spider setting spider_original_Init(dbname=self.dbname, website=website, carnum=self.carnum) self.df = 'none' self.fa = 'none' self.part = int(part) self.parts = int(parts) self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36', }
def __init__(self, **kwargs): # args super(CarSpider, self).__init__(**kwargs) #setting- self.tag='original' self.counts=0 self.carnum=2000000 self.dbname = 'usedcar' # spider setting spider_original_Init( dbname=self.dbname, website=website, carnum=self.carnum) self.urllist = spider_update_Init( dbname=self.dbname, website='youxinpai', carnum=self.carnum ) self.carnum = len(self.urllist) self.df='none' self.fa='none'
def __init__(self, part=0, parts=1, *args, **kwargs): # args super(CarSpider, self).__init__(*args, **kwargs) # setting self.tag = 'original' self.counts = 0 self.carnum = 20000000 self.dbname = 'usedcar_evaluation' # spider setting spider_original_Init(dbname=self.dbname, website=website, carnum=self.carnum) self.df = 'none' self.fa = 'none' self.part = int(part) self.parts = int(parts) self.headers = { 'user-agent': 'activity/3.4.2.18 (Linux; Android 7.0; RNE-AL00; Build/HUAWEIRNE-AL00)', }
def __init__(self, **kwargs): # args super(CarSpider, self).__init__(**kwargs) # setting self.tag = 'original' self.counts = 0 self.carnum = 1000000 self.dbname = 'usedcar' # spider setting spider_original_Init(dbname=self.dbname, website=website, carnum=self.carnum) self.df = 'none' self.fa = 'none' self.browser = webdriver.PhantomJS( executable_path=settings['PHANTOMJS_PATH']) self.browser.set_page_load_timeout(10) # self.browser = webdriver.PhantomJS(executable_path="/usr/local/phantomjs/bin/phantomjs") # self.browser = webdriver.PhantomJS(executable_path="/root/home/phantomjs") super(CarSpider, self).__init__() dispatcher.connect(self.spider_closed, signals.spider_closed)
def __init__(self, **kwargs): # args super(CarSpider, self).__init__(**kwargs) # setting self.tag = 'original' self.counts = 0 self.carnum = 1500000 self.dbname = 'usedcar' # spider setting spider_original_Init(dbname=self.dbname, website=website, carnum=self.carnum) self.df = 'none' self.fa = 'none' # options = webdriver.ChromeOptions() # options.add_argument( # 'user-agent="Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36"') # print('opening browser') # # options.add_argument('--proxy-server=http://%s' % "121.225.24.3:18888") # self.browser = webdriver.Chrome( # executable_path=settings['CHROME_PATH'], # chrome_options=options) self.desired_capabilities = DesiredCapabilities.PHANTOMJS.copy() self.desired_capabilities[ "phantomjs.page.settings.userAgent"] = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36' self.browser = webdriver.PhantomJS( executable_path=settings['PHANTOMJS_PATH']) # self.browser = webdriver.PhantomJS(executable_path="D:/phantomjs.exe") # self.browser = webdriver.PhantomJS(executable_path="/usr/local/phantomjs/bin/phantomjs", desired_capabilities=self.desired_capabilities) # self.browser = webdriver.PhantomJS(executable_path="/home/phantomjs-2.1.1-linux-x86_64/bin/phantomjs", desired_capabilities=self.desired_capabilities) # self.browser.start_session(self.desired_capabilities) # self.browser.set_page_load_timeout(300) super(CarSpider, self).__init__() dispatcher.connect(self.spider_closed, signals.spider_closed)