Ejemplo n.º 1
0
    def __init__(self):
        const_path = sys.path[0].replace("\\craw_data\\stock_list", "")
        f = open(const_path + '\\const.json', 'r', encoding='utf8')
        self.consts = json.loads(f.read())
        
        self.downloader = download.Downloader()
        # self.downloader.init_zhilian_ip()     # 使用代理 ip (付费)
        # self.downloader.init_ip_pool()        # 使用代理 ip (免费)

        self.arr = []
Ejemplo n.º 2
0
    def __init__(self, end_date='00000000'):

        const_path = sys.path[0].replace("\\craw_data\\dayline", "")
        f = open(const_path + "\\const.json", "r", encoding="utf8")
        self.consts = json.loads(f.read())

        self.stock_list_file = self.consts['stock_list_file']                        # 全部股票信息的csv文件
        self.save_path_prefix = self.consts['day_line_file_prefix']['ease_money']    # 日线存储文件夹目录
        self.end_date = end_date                                                     # 截止日期
        self.codes = self.get_codes()

        self.downloader = download.Downloader()                                      # 下载器
Ejemplo n.º 3
0
    def __init__(self, end_date='99999999', thread_num=1, timeout=10):

        const_path = sys.path[0].replace("\\craw_data\\dayline", "")
        # print(const_path)
        f = open(const_path + "\\const.json", "r", encoding='utf8')
        consts = json.loads(f.read())

        self.stock_list_file = consts['stock_list_file']  # 全部股票信息的csv文件
        self.save_path_prefix = consts['day_line_file_prefix'][
            'netease']  # 日线存储文件夹目录
        self.end_date = end_date  # 截止日期
        self.thread_num = thread_num  # 线程数
        self.timeout = timeout  # 线程超时

        self.downloader = download.Downloader()  # 下载器
Ejemplo n.º 4
0
    def run(self):
        """ Run the pipeline according to the passed command line args

        Returns
        -------

        """
        # Get some initialization info required for the pipeline to run
        initializer_obj = initialize.Initializer(self.instr, self.cfg)
        initializer_obj.initialize_dates()
        initializer_obj.get_date_ranges()
        initializer_obj.get_processed_ranges()

        if self.initialize:
            initializer_obj.initialize_HDF5(chunks=self.chunks)

        # Initialize the downloader
        downloader = download.Downloader(instr=self.instr,
                                         instr_cfg=self.instr_cfg)

        # Divide up the dates into chunks
        date_chunks = np.array_split(initializer_obj.dates, self.chunks)
        for i, chunk in enumerate(date_chunks):
            for (start, stop) in chunk:
                failed = False
                results = None
                if '{} {}'.format(start.iso, stop.iso) in \
                        initializer_obj.previously_analyzed:
                    LOG.info('Already analyzed {} to {}\n'.format(
                        start.iso, stop.iso))
                    continue

                # Start the analysis
                LOG.info('Analyzing data from {} to {}'.format(
                    start.iso, stop.iso))
                if self.download:
                    download_time = self.run_downloader(date_range=(start,
                                                                    stop),
                                                        downloader=downloader)
                    self.processing_times['download'] = download_time

                self.flist = glob.glob(self.search_pattern)

                if self.process:
                    process_time = self.run_processing(start, stop)
                    self.processing_times['cr_rejection'] = process_time

                # Analyze the images and extract the results iff files
                # were successfully processed through CR rejection AND
                # the analyze flag is True.
                if self.analyze and self.flist:
                    analysis_time, results = self.run_labeling_all(
                        chunk_num=i + 1)
                    self.processing_times['analysis'] = analysis_time
                else:
                    failed = True

                self.processing_times['total'] = sum(
                    list(self.processing_times.values()))

                # Clean up downloaded files and write out the range we just processed
                self._pipeline_cleanup(start, stop, failed)

                # Send the final email iff there were results computed
                if results:
                    self.send_email(start, stop, results)