class MultiThreadDownloader:
    def __init__(self,conf={}):
        self.conf = conf
        self.dataHandler = Data(conf)
        self.allCode = self.dataHandler.get("allCode")
        self.date = conf.get('date',Date.getDate()) #抓取指定日期的detail
        self.sourceName=conf.get('SOURCE_NAME')
        self.threadNum = int(conf.get('THREAD_NUM',THREAD_NUM)) # 采用多线程模式抓取数据时的线程数

    def download(self):
        """
        >>> app=MultiThreadDownloader(conf)
        >>> app.stock.allCode

        >>> app.download()
        True
        """
        logging.debug("Start downloading data...\nCrawl mode is mutil.")
        conf = {}
        conf.update(self.conf)
        conf['handle']=self.handle
        conf['date'] = self.date
        oQueue = queue.Queue()
        for code in self.allCode:
            if type(code) == int:
                code = Util.getCode(code)
            oQueue.put(code)
        for i in range(self.threadNum):
            conf["queue"]=oQueue
            multiThreadCrawlHandler = MultiThreadHandler(conf = conf)
            multiThreadCrawlHandler.setDaemon(True)
            multiThreadCrawlHandler.start()
        oQueue.join()               
        return True

    def handle(self,code,date):
        raise NotImplementedError
Ejemplo n.º 2
0
class Stock(object):
    def __init__(self,conf={}):
        self.conf=conf
        self.code=None
        self.date=None
        self.data=Data(self.conf) #store and cache data

    def __iter__(self):
        """
        support iter function for stock. 

            >>> stock.data.adv={'20110804':{'601919':{'close':'11.11'},'601920':{'close':'22.22'}}}
            >>> stock.date = None
            >>> check = True
            >>> for date in stock:
            ...    print date
            20110804
            >>> for code , price in stock['20110804']:
            ...     print code ,price
            601919 11.11
            601920 22.22

        """
        if self.date: # if set date ,then return data in date, else return all dates in stock
            data=self.data.get(name="adv",conf={"date":self.date,"code":"all"})
            if data:
                result=map(lambda code:(code , data.get(code,{})))
                if result:
                    return iter(result)
            return iter([])
        else:
            return iter(self.data.adv.keys())

    def __getitem__(self,value):
        """
        通过[],setDate 取值将会改变stock的基准值,而通过方法 index()则不会

            >>> stock.data.adv={'20110805':{'601919':{'close':'10.0'},'601920':{'close':'22.22'}}}
            >>> stock['601919']['20110805'].close
            '10.0'

        """
        if len(value)==6:
            self.code = value
        if len(value)==8:
            self.date = value
        return self

    def __getattr__(self,value):
        """
        define some simple way to access data in stock.

            >>> len(stock.allCode)>1000 #and len(stock.allCode) == len(stock.info)
            True
            >>> len(stock.info) > 100
            True
            >>> stock.data.adv={'20110804':{'601919':{'close':'11.11','volume':'111','high':'12','low':'10',"sequence": [ 7.34]},'601920':{'close':'22.22'}}}
            >>> stock['601919']['20110804'].close
            '11.11'
            >>> stock.volume
            '111'
            >>> stock.high
            '12'
            >>> stock.low
            '10'
            >>> stock['20110804']['601919'].sequence
            [7.34]
        """
        result = self.data.get(name = value,conf={"date":self.date,"code":self.code})
        if result == None:
            return 0
        else:
            return result

    def __len__(self):
        """
        get code length in stock data.
            
            >>> len(stock) > 1000
            True

        """
        return len(self.allCode)

    def index(self,index):
        self.date=Date.getDate(index , self.date)
        return self

    def ma(self,dateRange):
        """
        求指定日期内平均股价
        """
        return self.data.get(name = "ma",conf={"date":self.date,"code":self.code,"dateRange":dateRange}) or 0

    def max(self,dateRange):
        return self.data.get(name = "max",conf={"date":self.date,"code":self.code,"dateRange":dateRange}) or 0

    def min(self,dateRange):
        return self.data.get(name = "min",conf={"date":self.date,"code":self.code,"dateRange":dateRange}) or 0