def generate_all_data(self, cdate, black_list=ct.BLACK_LIST): from gevent.pool import Pool obj_pool = Pool(5000) failed_list = CStockInfo(redis_host=self.redis_host).get( redis=self.redis).code.tolist() if len(black_list) > 0: failed_list = list(set(failed_list).difference(set(black_list))) all_df = pd.DataFrame() last_length = len(failed_list) cfunc = partial(self.get_stock_data, cdate) while last_length > 0: self.logger.info("all stock list:%s, cdate:%s", len(failed_list), cdate) for code_data in obj_pool.imap_unordered(cfunc, failed_list): if code_data[1] is not None: tem_df = code_data[1] tem_df['code'] = code_data[0] all_df = all_df.append(tem_df) failed_list.remove(code_data[0]) if len(failed_list) != last_length: self.logger.debug( "last failed list:%s, current failed list:%s" % (last_length, len(failed_list))) last_length = len(failed_list) else: if last_length > 0: time.sleep(600) obj_pool.join(timeout=5) obj_pool.kill() all_df = all_df.drop_duplicates() all_df = all_df.sort_values(by='date', ascending=True) all_df = all_df.reset_index(drop=True) return all_df
def generate_all_data(self, cdate): from gevent.pool import Pool good_list = list() obj_pool = Pool(4000) all_df = pd.DataFrame() failed_list = CStockInfo(redis_host = self.redis_host).get(redis = self.redis).code.tolist() cfunc = partial(self.get_stock_data, cdate) while len(failed_list) > 0: print("all stock list:%s, cdate:%s" % (len(failed_list),cdate)) for code_data in obj_pool.imap_unordered(cfunc, failed_list): if code_data[1] is not None: tem_df = code_data[1] tem_df['code'] = code_data[0] all_df = all_df.append(tem_df) failed_list.remove(code_data[0]) obj_pool.join(timeout = 5) obj_pool.kill() all_df = all_df.drop_duplicates() all_df = all_df.sort_values(by = 'date', ascending= True) all_df = all_df.reset_index(drop = True) return all_df