class MultiThreadDownloader: def __init__(self,conf={}): self.conf = conf self.dataHandler = Data(conf) self.allCode = self.dataHandler.get("allCode") self.date = conf.get('date',Date.getDate()) #抓取指定日期的detail self.sourceName=conf.get('SOURCE_NAME') self.threadNum = int(conf.get('THREAD_NUM',THREAD_NUM)) # 采用多线程模式抓取数据时的线程数 def download(self): """ >>> app=MultiThreadDownloader(conf) >>> app.stock.allCode >>> app.download() True """ logging.debug("Start downloading data...\nCrawl mode is mutil.") conf = {} conf.update(self.conf) conf['handle']=self.handle conf['date'] = self.date oQueue = queue.Queue() for code in self.allCode: if type(code) == int: code = Util.getCode(code) oQueue.put(code) for i in range(self.threadNum): conf["queue"]=oQueue multiThreadCrawlHandler = MultiThreadHandler(conf = conf) multiThreadCrawlHandler.setDaemon(True) multiThreadCrawlHandler.start() oQueue.join() return True def handle(self,code,date): raise NotImplementedError
class Stock(object): def __init__(self,conf={}): self.conf=conf self.code=None self.date=None self.data=Data(self.conf) #store and cache data def __iter__(self): """ support iter function for stock. >>> stock.data.adv={'20110804':{'601919':{'close':'11.11'},'601920':{'close':'22.22'}}} >>> stock.date = None >>> check = True >>> for date in stock: ... print date 20110804 >>> for code , price in stock['20110804']: ... print code ,price 601919 11.11 601920 22.22 """ if self.date: # if set date ,then return data in date, else return all dates in stock data=self.data.get(name="adv",conf={"date":self.date,"code":"all"}) if data: result=map(lambda code:(code , data.get(code,{}))) if result: return iter(result) return iter([]) else: return iter(self.data.adv.keys()) def __getitem__(self,value): """ 通过[],setDate 取值将会改变stock的基准值,而通过方法 index()则不会 >>> stock.data.adv={'20110805':{'601919':{'close':'10.0'},'601920':{'close':'22.22'}}} >>> stock['601919']['20110805'].close '10.0' """ if len(value)==6: self.code = value if len(value)==8: self.date = value return self def __getattr__(self,value): """ define some simple way to access data in stock. >>> len(stock.allCode)>1000 #and len(stock.allCode) == len(stock.info) True >>> len(stock.info) > 100 True >>> stock.data.adv={'20110804':{'601919':{'close':'11.11','volume':'111','high':'12','low':'10',"sequence": [ 7.34]},'601920':{'close':'22.22'}}} >>> stock['601919']['20110804'].close '11.11' >>> stock.volume '111' >>> stock.high '12' >>> stock.low '10' >>> stock['20110804']['601919'].sequence [7.34] """ result = self.data.get(name = value,conf={"date":self.date,"code":self.code}) if result == None: return 0 else: return result def __len__(self): """ get code length in stock data. >>> len(stock) > 1000 True """ return len(self.allCode) def index(self,index): self.date=Date.getDate(index , self.date) return self def ma(self,dateRange): """ 求指定日期内平均股价 """ return self.data.get(name = "ma",conf={"date":self.date,"code":self.code,"dateRange":dateRange}) or 0 def max(self,dateRange): return self.data.get(name = "max",conf={"date":self.date,"code":self.code,"dateRange":dateRange}) or 0 def min(self,dateRange): return self.data.get(name = "min",conf={"date":self.date,"code":self.code,"dateRange":dateRange}) or 0