def download(self): """ >>> app=MultiThreadDownloader(conf) >>> app.stock.allCode >>> app.download() True """ logging.debug("Start downloading data...\nCrawl mode is mutil.") conf = {} conf.update(self.conf) conf['handle']=self.handle conf['date'] = self.date oQueue = queue.Queue() for code in self.allCode: if type(code) == int: code = Util.getCode(code) oQueue.put(code) for i in range(self.threadNum): conf["queue"]=oQueue multiThreadCrawlHandler = MultiThreadHandler(conf = conf) multiThreadCrawlHandler.setDaemon(True) multiThreadCrawlHandler.start() oQueue.join() return True
def getUrl(num,date,sourceType="qq2",conf={}): """ >>> getUrl(num="601919",date="20110817",conf = conf) 'http://stock.gtimg.cn/data/index.php?appn=detail&action=download&c=sh601919&d=20110817' """ num=int(num) date=str(date) detailSource=conf.get("DETAIL_SOURCE",DETAIL_SOURCE) if sourceType=="qq": strNum=Util.getCode(num) if(num<600000): pre="sz" else: pre="sh" fileName=pre+strNum+".js" return detailSource.get("qq") % fileName elif sourceType=="sina": strNum=Util.getCode(num) if(num<600000): pre="sz" else: pre="sh" symbol=pre+strNum formatDate=time.strftime("%Y-%m-%d",time.strptime(date,"%Y%m%d")) url=detailSource.get("sina") % (formatDate,symbol) return url elif sourceType=="qq2": strNum=Util.getCode(num) if(num<600000): pre="sz" else: pre="sh" symbol=pre+strNum formatDate=time.strftime("%Y%m%d",time.strptime(date,"%Y%m%d")) url=detailSource.get("qq2") %(symbol, formatDate) return url
def download(self,date=None): """ >> detail=Detail(conf) >> detail.stock.allCode >> detail.download() True """ date = date or self.date logging.debug("Start downloading Detail data...\nCrawl mode is %s." % self.threadMode) if self.conf.get("restart"): #中断后重新开始传数据的restart模式 lastCode=Util.getLastCode( path = os.path.join(self.conf.get("SYS_HOME"), self.conf.get("DETAIL_DATA_PATH"),date) ) #得到中断前最后一个抓取成功的股票代码 codes = map(lambda code:int(code) >int (lastCode),self.stock.allCode) #得到剩余的需要抓取的代码 logging.info("Downloader is in restart mode.Restart begin at " + str(codes[0])) self.conf['restart'] = False else: codes = self.stock.allCode if self.threadMode == "multi": #多线程模式 oQueue = queue.Queue() for code in codes: if type(code) == int: code = Util.getCode(code) oQueue.put(code) for i in range(self.threadNum): self.conf["queue"]=oQueue oMultiThreadCrawlDetail = MultiThreadDetail(date=date,parser=self.parser,conf=self.conf) oMultiThreadCrawlDetail.setDaemon(True) oMultiThreadCrawlDetail.start() oQueue.join() else: #单线程模式 for code in codes: handleDetail(code = code,date = date,parser = self.parser,conf = self.conf) return True