Exemple #1
0
 def upload_test(self):
     start_time = time.time()
     q = Manager().Queue()
     plist = []
     for i in range(self.upload_user):
         proc = Process(target=self.upload_one_user, args=(q,))
         plist.append(proc)
     for proc in plist:
         proc.start()
     for proc in plist:
         proc.join()
     while True:
         if q.empty():
             break
         else:
             if q.get() == 0:
                 self.upload_success += 1
             else:
                 self.upload_fail += 1
     use_time = time.time() - start_time
     table = PrettyTable(["key", "value"])
     table.add_row(["One File Size (M)", self.upload_file_size])
     table.add_row(["All File Size (M)", self.upload_file_size * self.upload_number * self.upload_user])
     table.add_row(["Process Count(user)", self.upload_user])
     table.add_row(["Upload Count", self.upload_number * self.upload_user])
     table.add_row(["Interval Time(s)", self.upload_time])
     table.add_row(["Success count", self.upload_success])
     table.add_row(["Fail count", self.upload_fail])
     table.add_row(["Success ratio (%)",
                    (round(self.upload_success / float(self.upload_number * self.upload_user), 4) * 100)])
     table.add_row(["Use time (s)", "%.2f" % use_time])
     print table
Exemple #2
0
def main():
    arg_parser = argparse.ArgumentParser(description='bbd compressing program')
    arg_parser.add_argument('-compress_from_dir', type=str, default='.',
                            help='directory where needs to be compressed')
    arg_parser.add_argument('-compress_to_dir', type=str, default='.',
                            help='directory where puts compressed file')
    arg_parser.add_argument('-compress_method', default='bz2', choices=['bz2', 'gz'],
                            help='the method of compressing, '
                                 'support bz2 and gz, bz2 is default')
    arg_parser.add_argument('-compress_dir_match', default=None,
                            help='regular expressions what matches '
                                 'which directories can be compressed')
    arg_parser.add_argument('-compress_file_match', default=None,
                            help='regular expressions what matches '
                                 'which files can be compressed')

    args = arg_parser.parse_args()
    kwargs = dict()
    kwargs['compress_from_dir'] = os.path.abspath(args.compress_from_dir)
    kwargs['compress_to_dir'] = os.path.abspath(args.compress_to_dir)
    kwargs['compress_method'] = args.compress_method
    kwargs['compress_dir_match'] = args.compress_dir_match
    kwargs['compress_file_match'] = args.compress_file_match
    print('Operating parameters are as follows:')
    print('\t' + '\n\t'.join(['{}: {}'.format(k, v) for k, v in kwargs.items()]))

    if check_compress_proc_is_alive():
        return

    if kwargs['compress_from_dir'] == kwargs['compress_to_dir']:
        print(kwargs['compress_from_dir'], kwargs['compress_to_dir'])
        compress_to_dir = os.path.join(kwargs['compress_to_dir'], 'flume_compressed_data')
        kwargs['compress_to_dir'] = compress_to_dir
        os.makedirs(compress_to_dir, exist_ok=True)

    max_worker = cpu_count() if cpu_count() <= 8 else 8
    pool_cls = Pool
    compressed_queue = Manager().Queue()
    print('using multi processes to compress files')

    path_mgr = PathUtils(**kwargs)
    compressed_data_dir = Path(kwargs['target_dir']) / 'bbd_compressed_data_dir'
    compress_method = kwargs['compress_method']
    for file_path in path_mgr.match_need_compress_files():
        from_path = str(file_path.absolute())
        to_path = str((compressed_data_dir / file_path.name).absolute())
        compressed_queue.put((from_path, to_path, compress_method))

    if compressed_queue.empty():
        print('there is no file need to be compressed, waiting for next checking')
        return

    multi_workers(max_worker=max_worker, pool_cls=pool_cls, work=compress_file,
                  compressed_queue=compressed_queue)
Exemple #3
0
 def upload_begin(self):
     plist = []
     q = Manager().Queue()
     with open(self.list_path, 'r') as fp:
         for i in fp:
             if not i:
                 break
             md5_crc32 = i.strip()[:41]
             if md5_crc32 not in self.tmp_list and len(md5_crc32) == 41:
                 self.tmp_list.append(md5_crc32)
                 self.upload_num += 1
     print self.upload_num
     for md5_crc32_list in self.chunks(self.tmp_list, self.work_count):
         proc = Process(target=self.upload_file, args=(q, md5_crc32_list,))
         plist.append(proc)
     for proc in plist:
         proc.start()
     for proc in plist:
         proc.join()
     while True:
         if q.empty():
             break
         else:
             r = q.get()
             if r == 0:
                 self.success += 1
             elif r == 1:
                 self.fail += 1
             elif r == 2:
                 self.download_fail += 1
             else:
                 pass
     use_time = time.time() - self.start_time
     table = PrettyTable(["key", "value"])
     table.add_row(["Upload Count", len(set(self.tmp_list))])
     table.add_row(["Success count", self.success])
     table.add_row(["Fail count", self.fail])
     table.add_row(["Download Fail", self.download_fail])
     table.add_row(["Use time (s)", "%.2f" % use_time])
     print table
def startServer(host, port, options):
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    s.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
    s.bind((host, port))
    s.listen(0)

    queue = Manager().Queue()
    while True:
        print "main: waiting for connection"
        conn, addr = s.accept()
        print 'main: Connected by', addr

        data = conn.recv(1024)
        print 'received port request'
        p = Process(target = serverNewClient, args = (queue, options, ))
        p.start()
        while queue.empty():
            time.sleep(0.05)
            print "queue is still empty"
        port = queue.get()
        conn.sendall(str(port) + '\r\n')
        print "assigned port %d to new client" % port
Exemple #5
0
    # 往线程池加入2个task
    #f1 = pool.submit(return_future,"hello")
    #    with Manager() as mgr:
    #        l = mgr.list()
    #        l=[]
    #        l.append(0)
    ###q =Queue()用process开子进程时,且非pool时用
    q = Manager().Queue()  #pool时用 #*
    #    f2 = pool.submit(return_future,q)  #*
    ###p=Process(target=return_future, args=(q,))
    ###p.start()
    #p.join()
    while 1:
        tim = time.time()
        print('c')
        print('main:', os.getpid())
        time.sleep(0.4)
        if not e.is_set():  #未置位就增加一个子进程到pool
            f2 = pool.submit(return_future, q, e)

        if not q.empty():
            value = q.get(True)
            d = value[0] + 1
            print
            print("d=", d)
            print
            if d == 11:
                break
#                pool.terminate()
        print(time.time() - tim)
Exemple #6
0
def getData(q):
    while True:
        print('move data %s out from queue'%(q.get()))
        time.sleep(2)



if __name__ == '__main__':
    q = Manager().Queue()

    pool = Pool(8)
    for i in range(8):
        pool.apply_async(generateData, args=(q,))
    # generator = Process(target=generateData, args=(q,))
    pool.close()

    mover = Process(target=getData, args=(q,))
    # generator.start()
    mover.start()
    pool.join()
    # generator.join()
    while True:
        if q.empty() == True:
            break
    mover.terminate()# because the while





    def calc_factor_loading(cls,
                            start_date,
                            end_date=None,
                            month_end=True,
                            save=False,
                            **kwargs):
        """
        计算指定日期的样本个股的因子载荷, 并保存至因子数据库
        Parameters:
        --------
        :param start_date: datetime-like, str
            开始日期, 格式: YYYY-MM-DD or YYYYMMDD
        :param end_date: datetime-like, str
            结束日期, 如果为None, 则只计算start_date日期的因子载荷, 格式: YYYY-MM-DD or YYYYMMDD
        :param month_end: bool, 默认为True
            如果为True, 则只计算月末时点的因子载荷
        :param save: bool, 默认为True
            是否保存至因子数据库
        :param kwargs:
            'multi_proc': bool, True=采用多进程, False=采用单进程, 默认为False
        :return: dict
            因子载荷
        """
        # 取得交易日序列及股票基本信息表
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date,
                                                         end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date,
                                                         ndays=1)
        all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 遍历交易日序列, 计算DASTD因子载荷
        dict_dastd = None
        for calc_date in trading_days_series:
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            logging.info('[%s] Calc DASTD factor loading.' %
                         Utils.datetimelike_to_str(calc_date))
            # 遍历个股, 计算个股的DASTD因子值
            s = (calc_date - datetime.timedelta(
                days=risk_ct.DASTD_CT.listed_days)).strftime('%Y%m%d')
            stock_basics = all_stock_basics[all_stock_basics.list_date < s]
            ids = []  # 个股代码list
            dastds = []  # DASTD因子值list

            if 'multi_proc' not in kwargs:
                kwargs['multi_proc'] = False
            if not kwargs['multi_proc']:
                # 采用单进程计算DASTD因子值
                for _, stock_info in stock_basics.iterrows():
                    logging.info(
                        "[%s] Calc %s's DASTD factor loading." %
                        (calc_date.strftime('%Y-%m-%d'), stock_info.symbol))
                    dastd_data = cls._calc_factor_loading(
                        stock_info.symbol, calc_date)
                    if dastd_data is not None:
                        ids.append(dastd_data['code'])
                        dastds.append(dastd_data['dastd'])
            else:
                # 采用多进程并行计算DASTD因子值
                q = Manager().Queue()  # 队列, 用于进程间通信, 存储每个进程计算的因子载荷
                p = Pool(4)  # 进程池, 最多同时开启4个进程
                for _, stock_info in stock_basics.iterrows():
                    p.apply_async(cls._calc_factor_loading_proc,
                                  args=(
                                      stock_info.symbol,
                                      calc_date,
                                      q,
                                  ))
                p.close()
                p.join()
                while not q.empty():
                    dastd_data = q.get(True)
                    ids.append(dastd_data['code'])
                    dastds.append(dastd_data['dastd'])

            date_label = Utils.get_trading_days(start=calc_date, ndays=2)[1]
            dict_dastd = {
                'date': [date_label] * len(ids),
                'id': ids,
                'factorvalue': dastds
            }
            if save:
                Utils.factor_loading_persistent(
                    cls._db_file,
                    Utils.datetimelike_to_str(calc_date, dash=False),
                    dict_dastd, ['date', 'id', 'factorvalue'])
            # 暂停180秒
            logging.info('Suspending for 180s.')
            time.sleep(180)
        return dict_dastd
Exemple #8
0
    def calc_factor_loading(cls,
                            start_date,
                            end_date=None,
                            month_end=True,
                            save=False,
                            **kwargs):
        """
        计算指定日期的样本个股的因子载荷, 并保存至因子数据库
        Parameters:
        --------
        :param start_date: datetime-like, str
            开始日期, 格式: YYYY-MM-DD or YYYYMMDD
        :param end_date: datetime-like, str
            结束日期, 如果为None, 则只计算start_date日期的因子载荷, 格式: YYYY-MM-DD or YYYYMMDD
        :param month_end: bool, 默认为True
            如果为True, 则只计算月末时点的因子载荷
        :param save: bool, 默认True
            是否保存至因子数据库
        :param kwargs:
        :return: dict
            因子载荷
        """
        # 取得交易日序列及股票基本信息表
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date,
                                                         end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date,
                                                         ndays=1)
        all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 遍历交易日序列, 计算筹码分布因子载荷
        dict_beta = {}
        dict_hsigma = {}
        for calc_date in trading_days_series:
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            logging.info('[%s] Calc BETA factor loading.' %
                         Utils.datetimelike_to_str(calc_date))
            # 遍历个股, 计算个股BETA因子值
            s = (calc_date - datetime.timedelta(days=180)).strftime('%Y%m%d')
            stock_basics = all_stock_basics[all_stock_basics.list_date < s]
            ids = []  # 个股代码list
            betas = []  # BETA因子值
            hsigmas = []  # HSIGMA因子值

            # 采用单进程计算BETA因子和HSIGMA因子值,
            # for _, stock_info in stock_basics.iterrows():
            #     logging.info("[%s] Calc %s's BETA and HSIGMA factor data." % (calc_date.strftime('%Y-%m-%d'), stock_info.symbol))
            #     beta_data = cls._calc_factor_loading(stock_info.symbol, calc_date)
            #     if beta_data is not None:
            #         ids.append(beta_data['code'])
            #         betas.append(beta_data['beta'])
            #         hsigmas.append(beta_data['hsigma'])

            # 采用多进程并行计算BETA因子和HSIGMA因子值
            q = Manager().Queue()  # 队列, 用于进程间通信, 存储每个进程计算的因子载荷
            p = Pool(4)  # 进程池, 最多同时开启4个进程
            for _, stock_info in stock_basics.iterrows():
                p.apply_async(cls._calc_factor_loading_proc,
                              args=(
                                  stock_info.symbol,
                                  calc_date,
                                  q,
                              ))
            p.close()
            p.join()
            while not q.empty():
                beta_data = q.get(True)
                ids.append(beta_data['code'])
                betas.append(beta_data['beta'])
                hsigmas.append(beta_data['hsigma'])

            date_label = Utils.get_trading_days(calc_date, ndays=2)[1]
            dict_beta = {
                'date': [date_label] * len(ids),
                'id': ids,
                'factorvalue': betas
            }
            dict_hsigma = {
                'date': [date_label] * len(ids),
                'id': ids,
                'factorvalue': hsigmas
            }
            if save:
                Utils.factor_loading_persistent(
                    cls._db_file,
                    Utils.datetimelike_to_str(calc_date, dash=False),
                    dict_beta, ['date', 'id', 'factorvalue'])
                hsigma_path = os.path.join(factor_ct.FACTOR_DB.db_path,
                                           risk_ct.HSIGMA_CT.db_file)
                Utils.factor_loading_persistent(
                    hsigma_path,
                    Utils.datetimelike_to_str(calc_date, dash=False),
                    dict_hsigma, ['date', 'id', 'factorvalue'])
            # 休息180秒
            logging.info('Suspending for 180s.')
            time.sleep(180)
        return dict_beta
Exemple #9
0
    def calc_factor_loading(cls, start_date, end_date=None, month_end=True, save=False, **kwargs):
        """
        计算指定日期的样本个股的因子载荷,并保存至因子数据库
        Parameters
        --------
        :param start_date: datetime-like, str
            开始日期,格式:YYYY-MM-DD or YYYYMMDD
        :param end_date: datetime-like, str
            结束日期,如果为None,则只计算start_date日期的因子载荷,格式:YYYY-MM-DD or YYYYMMDD
        :param month_end: bool,默认True
            如果为True,则只计算月末时点的因子载荷
        :param save: bool,默认False
            是否保存至因子数据库
        :param kwargs:
            'multi_proc': bool, True=采用多进程并行计算, False=采用单进程计算, 默认为False
        :return: 因子载荷,DataFrame
        --------
            因子载荷,DataFrame
            0. date: 日期
            1. id: 证券symbol
            2. m0: 隔夜时段动量
            3. m1: 第一个小时动量
            4. m2: 第二个小时动量
            5. m3: 第三个小时动量
            6. m4: 第四个小时动量
            7. m_normal: 传统动量
        """
        # 取得交易日序列及股票基本信息表
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date, end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date, ndays=1)
        # all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 遍历交易日序列,计算日内动量因子值
        dict_intraday_momentum = None
        for calc_date in trading_days_series:
            if month_end and (not Utils.is_month_end(calc_date)):
                continue

            # 计算日内各时段动量因子
            dict_intraday_momentum = {'date': [], 'id': [], 'm0': [], 'm1': [],
                                      'm2': [], 'm3': [], 'm4': [], 'm_normal': []}
            # 遍历个股,计算个股日内动量值
            s = (calc_date - datetime.timedelta(days=90)).strftime('%Y%m%d')
            stock_basics = Utils.get_stock_basics(s)

            if 'multi_proc' not in kwargs:
                kwargs['multi_proc'] = False
            if not kwargs['multi_proc']:
                # 采用单进程进行计算
                for _, stock_info in stock_basics.iterrows():
                    momentum_data = cls._calc_factor_loading(stock_info.symbol, calc_date)
                    if momentum_data is not None:
                        logging.info("[%s] %s's intraday momentum = (%0.4f,%0.4f,%0.4f,%0.4f,%0.4f,%0.4f)" % (calc_date.strftime('%Y-%m-%d'),stock_info.symbol, momentum_data.m0, momentum_data.m1, momentum_data.m2, momentum_data.m3, momentum_data.m4, momentum_data.m_normal))
                        dict_intraday_momentum['id'].append(Utils.code_to_symbol(stock_info.symbol))
                        dict_intraday_momentum['m0'].append(round(momentum_data.m0, 6))
                        dict_intraday_momentum['m1'].append(round(momentum_data.m1, 6))
                        dict_intraday_momentum['m2'].append(round(momentum_data.m2, 6))
                        dict_intraday_momentum['m3'].append(round(momentum_data.m3, 6))
                        dict_intraday_momentum['m4'].append(round(momentum_data.m4, 6))
                        dict_intraday_momentum['m_normal'].append(round(momentum_data.m_normal, 6))
            else:
                # 采用多进程并行计算日内动量因子载荷
                q = Manager().Queue()   # 队列,用于进程间通信,存储每个进程计算的因子载荷
                p = Pool(4)             # 进程池,最多同时开启4个进程
                for _, stock_info in stock_basics.iterrows():
                    p.apply_async(cls._calc_factor_loading_proc, args=(stock_info.symbol, calc_date, q,))
                p.close()
                p.join()
                while not q.empty():
                    momentum_data = q.get(True)
                    dict_intraday_momentum['id'].append(momentum_data[0])
                    dict_intraday_momentum['m0'].append(round(momentum_data[1], 6))
                    dict_intraday_momentum['m1'].append(round(momentum_data[2], 6))
                    dict_intraday_momentum['m2'].append(round(momentum_data[3], 6))
                    dict_intraday_momentum['m3'].append(round(momentum_data[4], 6))
                    dict_intraday_momentum['m4'].append(round(momentum_data[5], 6))
                    dict_intraday_momentum['m_normal'].append(round(momentum_data[6], 6))

            date_label = Utils.get_trading_days(calc_date, ndays=2)[1]
            dict_intraday_momentum['date'] = [date_label] * len(dict_intraday_momentum['id'])

            # 保存因子载荷至因子数据库
            if save:
                # Utils.factor_loading_persistent(cls._db_file, calc_date.strftime('%Y%m%d'), dict_intraday_momentum)
                cls._save_factor_loading(cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), dict_intraday_momentum, 'periodmomentum', factor_type='raw')

            # 计算日内各时段动量因子的Rank IC值向量, 并保存
            cls._calc_periodmomentum_ic(calc_date, 'month')

            # 计算最优化权重
            if alphafactor_ct.INTRADAYMOMENTUM_CT['optimized']:
                cls._optimize_periodmomentum_weight(calc_date)

            # 计算合成日内动量因子
            if alphafactor_ct.INTRADAYMOMENTUM_CT['synthesized']:
                logging.info('[%s] calc synthetic intraday momentum factor loading.' % Utils.datetimelike_to_str(calc_date))
                dict_intraday_momentum = {'date': [], 'id': [], 'factorvalue': []}
                # 读取日内个时段动量因子值
                # period_momentum_path = os.path.join(SETTINGS.FACTOR_DB_PATH, alphafactor_ct.INTRADAYMOMENTUM_CT.db_file, 'raw/periodmomentum')
                # df_factor_loading = Utils.read_factor_loading(period_momentum_path, Utils.datetimelike_to_str(calc_date, False))
                df_factor_loading = cls._get_factor_loading(cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), factor_name='periodmomentum', factor_type='raw', drop_na=False)
                if df_factor_loading.shape[0] <= 0:
                    logging.info("[%s] It doesn't exist intraday momentum factor loading." % Utils.datetimelike_to_str(calc_date))
                    return
                df_factor_loading.fillna(0, inplace=True)
                # 读取因子最优权重
                factor_weight = cls._get_factor_weight(calc_date)
                if factor_weight is None:
                    logging.info("[%s] It doesn't exist factor weight.")
                    return
                # 计算合成动量因子, 合成之前先对日内各时段动量因子进行去极值和标准化处理
                arr_factor_loading = np.array(df_factor_loading[['m0', 'm1', 'm2', 'm3', 'm4']])
                arr_factor_loading = Utils.normalize_data(arr_factor_loading, treat_outlier=True)
                arr_factor_weight = np.array(factor_weight.drop('date')).reshape((5, 1))
                arr_synthetic_factor = np.dot(arr_factor_loading, arr_factor_weight)
                dict_intraday_momentum['date'] = list(df_factor_loading['date'])
                dict_intraday_momentum['id'] = list(df_factor_loading['id'])
                dict_intraday_momentum['factorvalue'] = list(arr_synthetic_factor.astype(float).round(6).reshape((arr_synthetic_factor.shape[0],)))
                # 标准化合成动量因子
                df_std_intradaymonmentum = Utils.normalize_data(pd.DataFrame(dict_intraday_momentum), columns='factorvalue', treat_outlier=True, weight='eq')
                # 保存合成因子
                if save:
                    # Utils.factor_loading_persistent(synthetic_db_file, Utils.datetimelike_to_str(calc_date, False), dict_intraday_momentum)
                    cls._save_factor_loading(cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), dict_intraday_momentum, 'IntradayMomentum', factor_type='raw', columns=['date', 'id', 'factorvalue'])
                    cls._save_factor_loading(cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), df_std_intradaymonmentum, 'IntradayMomentum', factor_type='standardized', columns=['date', 'id', 'factorvalue'])

            # 休息360秒
            logging.info('Suspending for 360s.')
            time.sleep(360)
        return dict_intraday_momentum
class Queue_server(object):
    
    '''
                 初始话公众号队列
     @param Tuple wx_lists 公众号列表
    '''
    def __init__(self ,wx_lists=()):
        self.__queue = Manager().Queue(-1)
        self.init_wx_lists(wx_lists)
        self.__fail_list = Manager().list()
    '''
                 初始话公众号队列
     @param Tuple wx_lists 公众号列表
    '''      
    def init_wx_lists(self ,wx_lists=()):
        for wx in wx_lists:
            self.put(wx)
    '''
                 添加元素
     @param mixed value 要添加的元素
    '''
    def put(self ,value):
        self.__queue.put(value)
    
    '''
                 弹出元素
     @return mixed       
    '''
    def get(self):
        if not self.empty():
            return self.__queue.get()
        return False
    
    '''
                 获取队列
     @return mixed       
    '''
    def get_wx_lists_queue(self):
        return self.__queue
    
    '''
                             获取队列大小
        @return int
    '''
    def get_size(self):
        return self.__queue.qsize()
    
    '''
                             队列是否为空
        @return bool
    '''
    def empty(self):
        return self.__queue.empty()
    
    '''
                             添加失败数据
        @param tuple wx_data 公众号信息
        @return bool
    '''     
    def put_fail_wx(self , wx_data):
        self.__fail_list.append(wx_data)
    
    '''
                             打印失败列表
    '''    
    def print_fail_list(self ,flush=None):
        if len(self.__fail_list) > 0 :
            for fail in self.__fail_list:
                self.put(fail)
                print 'the fail wx : {0}' . format(fail)
            if not flush:
                self.__fail_list = Manager().list()
        elif flush:
            print 'all success'
            
    #判断是否有错
    def is_have_failed(self):
        #判断是否有失败的公众号重新加入队列中
        return not self.empty()
Exemple #11
0
class MPResult(object):
    """
    Sync result between processes
    """

    MATCH = {}  # id -> instance

    def __init__(self, result):
        from multiprocessing import Manager

        # Test result instance
        self.result = result

        # Result queue
        self.queue = Manager().Queue()

    def __getattr__(self, item):
        return getattr(self.result, item)

    @staticmethod
    def pack_result_storage(storage):
        """
        Pack result from storage
        """
        return [(get_master_id(s[0]), s[1]) for s in storage]

    def unpack_result_storage(self, storage):
        """
        Unpack result from storage
        """
        unpack_storage = []

        for master_id, message in storage:
            unpack_storage.append(
                (self.MATCH[master_id], message),
            )

        return unpack_storage

    def match(self, suite):
        """
        Match id of master process to instance
        """
        self.MATCH[get_suite_master_id(suite)] = suite

        def match(s):
            for o in s:
                if isinstance(o, BaseSuite):
                    self.MATCH[get_suite_master_id(o)] = o
                    match(o)
                else:
                    self.MATCH[get_case_master_id(o)] = o

        match(suite)

    def save_result(self):
        """
        Save result in queue
        """
        self.queue.put(
            (
                (
                    self.pack_result_storage(self.result.errors),
                    self.pack_result_storage(self.result.skipped),
                    self.pack_result_storage(self.result.failures),
                ),
                self.result.testsRun,
            ),
        )

    def make_result(self):
        """
        Merge result from queue to result instance
        """
        while not self.queue.empty():
            (errors, skipped, failures), run_tests = self.queue.get()

            self.result.errors.extend(self.unpack_result_storage(errors))
            self.result.skipped.extend(self.unpack_result_storage(skipped))
            self.result.failures.extend(self.unpack_result_storage(failures))

            self.result.testsRun += run_tests
def query_request_new(kb_ids, issue, kb_vers_map):
    """
    tips: cache形式:cache.set(box ,(kb, 0/1), 过期时间)
    requirement: (1)查看boxs列表, 对比cache中的boxs, 若有空闲的box,则为其赋一个知识库;
                    若没有空闲box,则在cache中寻找该KB已有的box。
                 (2)取出请求中所有知识库对应的BOX的地址,单独起进程发送请求,请求结果放入queue

    param: kb_ids 列表 
    return: 
    """

    # 取cache中已有的kb
    logger.debug('start get add')
    start_get_add = time()

    kb_add_dict = cache.get_many(kb_ids)

    no_kbs = set(kb_ids) - set(kb_add_dict.keys())
    boxs = cache.get('boxs')
    box_addr_dict = cache.get('box_infos')
    box_kb_dict = cache.get_many(boxs)

    box_kb_rest = list(
        filter(lambda x: (x[1][0] in boxs) and (not x[1][1]),
               box_kb_dict.items()))
    boxs_idle = list(filter(lambda x: not cache.get(x), boxs))
    logger.debug('boxs_idle:%s' % boxs_idle)

    # 为cache中没有的kb赋予box
    boxs_free = []
    if kb_add_dict:
        boxs_free = set(dict(boxs).keys()) - set(
            dict(kb_add_dict.values()).keys())
    else:
        boxs_free = set(dict(boxs).keys())

    if len(boxs_free) < len(no_kbs):
        rest_kbs = no_kbs[len(boxs_free):]
        kb_ids = set(kb_ids) - set(rest_kbs)

    # 写入cache
    boxs_free_info = filter(lambda x: x[0] in boxs_free, boxs)
    temp_kb_box_list = list(zip(no_kbs, boxs_free_info))
    cache_ret = map(lambda x: cache.set(x[0], x[1], 30 * 60), temp_kb_box_list)
    logger.debug('cache_ret:%s' % list(cache_ret))

    kb_add_dict = cache.get_many(kb_ids)
    logger.debug('kb_add_dict:%s' % kb_add_dict)
    logger.debug('------get address time:%.5f' % (time() - start_get_add))
    logger.debug('start box-request ')
    start_request = time()
    num = len(kb_ids)
    q = Manager().Queue()
    p_list = []
    for i in range(0, num):
        kb = kb_ids[i]
        version = kb_vers_map[kb]
        add = kb_add_dict[kb][1]
        logger.debug('Target:%s Add:%s' % (kb, add))
        temp_p = Process(target=_writer, args=(q, kb, version, add, issue))
        p_list.append(temp_p)
        temp_p.start()

    for pr in p_list:
        pr.join()
    logger.debug('------box-request time:%.5f' % (time() - start_request))

    start_get_msg = time()
    i = 0
    ret = {'no_box': [], 'ans': [], 'not_match': [], 'fail': []}
    while not q.empty():
        msg = q.get()
        if 'not_match' in msg.keys():
            ret['not_match'].append(msg['not_match'])
        elif 'fail' in msg.keys():
            ret['fail'].append(msg['fail'])
        else:
            ret['ans'].append(msg)
        logger.debug('------%d msg:%s' % (i, msg))
        i += 1
    logger.debug('------get answers time:%.5f' % (time() - start_get_msg))

    # 异步写入zk
    # set_box_zk.delay(temp_kb_box_list)

    return ret
Exemple #13
0
def find_pro_ite(ite,url,m_page,max_asin):
    head_csv={'A': 'ID', 'B': 'Type', 'C': 'SKU', 'D': 'Name', 'E': 'Published', 'F': 'Is featured?', 'G': 'Visibility in catalog', 'H': 'Short description',
            'I': 'Description', 'J': 'Date sale price starts', 'K': 'Date sale price ends', 'L': 'Tax status', 'M': 'Tax class', 'N': 'In stock?', 'O': 'Stock',
            'P': 'Low stock amount', 'Q': 'Backorders allowed?', 'R': 'Sold individually?', 'S': 'Weight (kg)', 'T': 'Length (cm)', 'U': 'Width (cm)', 
            'V': 'Height (cm)', 'W': 'Allow customer reviews?', 'X': 'Purchase note', 'Y': 'Sale price', 'Z': 'Regular price', 'AA': 'Categories', 'AB': 'Tags', 
            'AC': 'Shipping class', 'AD': 'Images', 'AE': 'Download limit', 'AF': 'Download expiry days', 'AG': 'Parent', 'AH': 'Grouped products', 'AI': 'Upsells', 
            'AJ': 'Cross-sells', 'AK': 'External URL', 'AM': 'Button text', 'AL': 'Position', 'AN': 'Attribute 1 name', 'AO': 'Attribute 1 value(s)', 
            'AP': 'Attribute 1 visible', 'AQ': 'Attribute 1 global', 'AR': 'Attribute 2 name', 'AS': 'Attribute 2 value(s)', 'AT': 'Attribute 2 visible', 
            'AU': 'Attribute 2 global', 'AV': 'Meta: pf_size_chart', 'AW': 'Meta: _primary_term_product_cat'
            }
    headers= {
            'Accept-Language':'en-us,en;q=0.9',
            'accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
            'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36',
            'cookie':r'session-id-time=2082787201l; session-id=143-7283193-0434042; ubid-main=135-4071739-0100158; i18n-prefs=USD; session-token=F1LPasjCZIhIQ3/UGQzCK5nsUrdENnnmnEtR39OUktmBknNz2vdXXWxPOt7TEoa6vmDlLxrS6DtUP45Q//el4TorL3P/dxmPofxCIQNxJTdAm4VegAQpnjMFvd1iytSxmCo8A8EpGmHFurR6fehGlCCvQl1+XDM9qcGXvlg6bdKcz2LQv0xkdqye+tM3mHl+vQ8A39yGrNXr0Zdf0zh4t5AWSZAEtibFA7ijzLXESlwl85N8bV4MacnWQl46mWYM; csm-hit=tb:s-M0ECGT4J6MCDVSNCDN6F|1601001176170&t:1601001180260&adb:adblk_no'
            }
    #相关设置
    url=url
    csv_path="./pro/{ite}.csv".format(ite=ite)
    asin_path="./asin/{ite}.txt".format(ite=ite)#可不修改
    
    #新建或者清空asin文件
    open(asin_path,"w",encoding="utf8",newline="") 
    #采集asin
    for i in find_x():
        if m_page == -1:
            pass
        elif i > m_page:
            print('采集完{i}页的asin'.format(i=i))
            break
        else:
            pass
        try:
            #获取下一页url和本页面的产品url
            bs=find_bs(url,headers)
            print('成功获取bs')
            page_url,asins=find_page_url(bs,asin_path)
            print('成功获取产品asin和下一页')
            str_asins="".join(asins)
            print("page_url-----",page_url)
            print("asins------",str_asins)

        except Exception as e:
            print(e)
            print("采集完毕!")
            print("最后采集页面是",url)
            break
        url=page_url

    #写入表头
    with open(csv_path,"w",encoding='utf8',newline='') as f:
        f_csv=csv.DictWriter(f,head_csv)
        f_csv.writerow(head_csv)

    with open(asin_path,'r',encoding='utf8') as f:
        q_asin_list=set()
        all_asin=[]
        q_asins=Manager().Queue()
        q = Manager().Queue()
        q_asin=Manager().Queue()
        for asin in f:
            t=5
            main_asin=re.sub("\n",'',asin)
            try:
                find_pro(csv_path,main_asin,headers,head_csv,t,q_asin,q_asin_list,q,q_asins,all_asin,max_asin)
                print('采集完{asin}'.format(asin=main_asin))
            except Exception as e:
                print('采集{asin}失败,跳过'.format(asin=main_asin),e)
                while q.empty()==False:
                    q.get()
                print('成功清空q')
Exemple #14
0
    FPS_clock = pygame.time.Clock()
    game_state = state.GameState()
    game_gui = gui.GUI(game_state)
    game_event_handler = event_handler.EventLogic(game_state, game_gui)
    game_gui.add_handler(game_event_handler)
    game_gui.draw(game_state.get_state())
    pygame.display.update()
    commandQueue = Manager().Queue()
    listeningProcess = Process(target=voice_listener, args=(game_event_handler, commandQueue,))
    while True:
        game_gui.draw(game_state.get_state())
        game_event_handler.event_handler()
        if game_state.get_state() == "SSH season voice mode" or game_state.get_state() == "Web season voice mode":
            if not game_event_handler.queue.empty():
                val = game_event_handler.queue.get()
                if val:
                    listeningProcess.start()
                else:
                    listeningProcess.terminate()
                    listeningProcess.join()
                    listeningProcess = Process(target=voice_listener, args=(game_event_handler, commandQueue,))
            if not commandQueue.empty():
                voice_command = commandQueue.get()
                try:
                    game_event_handler.pipi.say(voice_command %
                                                game_gui.bool_to_text[str(game_gui.light_to_string[voice_command])])
                except KeyError:
                    pass
        pygame.display.update()
        FPS_clock.tick(30)
class HostScanner:
    def __init__(self,
                 function,
                 number,
                 method,
                 port_range=None,
                 ip_range=None,
                 verbose=False,
                 write_json=False,
                 json_file='./result.json'):
        self.function = function
        self.number = number
        self.method = method
        self.port_range = port_range
        self.ip_range = ip_range
        self.write_json = write_json
        self._verbose = verbose
        self.json_file = json_file

        self._PoolExecutor = ThreadPoolExecutor if self.method == 'thread' else ProcessPoolExecutor
        self.que = Manager().Queue(10)

    def _update_json_file(self):
        if self._verbose:
            print(f"Write the results in the queue to {self.json_file}")
        json_update = {}
        while not self.que.empty():
            json_update.update(self.que.get())

        if os.path.exists(self.json_file):
            with open(self.json_file, 'r') as fr:
                json_content = json.loads(fr.read())
                json_content.update(json_update)

            with open(self.json_file, 'w') as fw:
                fw.write(json.dumps(json_content, indent=4))
        else:
            with open(self.json_file, 'w') as fw:
                fw.write(json.dumps(json_update, indent=4))

    def _ping_host_ip(self, ip):
        if self._verbose:
            print("pid is %s" % os.getpid())
        try:
            res = subprocess.call('ping -c 2 -t 2 %s' % ip,
                                  shell=True,
                                  stdout=subprocess.PIPE)
            status = 'Active' if res == 0 else 'Inactive'
            print(f'{ip} {status}')

            if self.write_json:
                if self.method == 'proc':
                    with WRITE_LOCK_PROC:
                        if self.que.full():
                            self._update_json_file()
                elif self.method == 'thread':
                    with WRITE_LOCK_THREAD:
                        if self.que.full():
                            self._update_json_file()
                self.que.put({ip: status})

        except Exception as e:
            print('Failed to get status for {}: {}'.format(ip, e))

    def _scan_host_port(self, port):
        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        try:
            s.connect((self.ip_range, port))

            print(f'{port} OPEN')
            if self.write_json:
                if self.method == 'proc':
                    with WRITE_LOCK_PROC:
                        if self.que.full():
                            self._update_json_file()
                elif self.method == 'thread':
                    with WRITE_LOCK_THREAD:
                        if self.que.full():
                            self._update_json_file()
                self.que.put({port: 'OPEN'})
        except Exception as e:
            # Ignore the failed port
            pass
        finally:
            s.close()

    def _runMultiWorks(self):
        with self._PoolExecutor(self.number) as Executor:
            if self.function == 'tcp':
                print(f'The scanned host is {self.ip_range}')
                Executor.map(self._scan_host_port, list(self.port_range))
            elif self.function == 'ping':
                Executor.map(self._ping_host_ip, self.ip_range)

    def run(self):
        # In order to support multiple debugging,
        # delete the generated json file in the first run
        if os.path.exists(self.json_file):
            os.remove(self.json_file)

        if self._verbose:
            print('Start')
            print('*' * 20)

        start_time = time.time()
        self._runMultiWorks()
        end_time = time.time()

        if self._verbose:
            print('*' * 20)
            print('End')
            print("Total time spent: %0.2f" % (end_time - start_time))

        if self.write_json:
            print("Writing into {}".format(self.json_file))
            self._update_json_file()
def query_request(kb_ids, issue, kb_vers_map):
    """
    requirement: (1)cache中查找知识库的地址,cache中没有的,则为其在cache中没有被其他kb占用的box中选取BOX,并异步写入其TARGET
                 (2)遍历/B/vm/下的子节点,找到空闲节点,为其赋一个知识库,单独起进程发送请求,请求结果放入queue

    param: kb_ids 列表 
    return: 
    """

    # 取cache中已有的kb
    logger.debug('start get add')
    start_get_add = time()

    kb_add_dict = cache.get_many(kb_ids)

    no_kbs = set(kb_ids) - set(kb_add_dict.keys())

    logger.debug('no_kbs:%s' % no_kbs)
    # 为cache中没有的kb赋予box
    boxs = [('/B/83c4ee846cf2/B70/', '192.168.30.187:8000/70'),
            ('/B/83c4ee846cf2/B74/', '192.168.30.187:8000/74'),
            ('/B/83c4ee846cf2/B73/', '192.168.30.187:8000/73'),
            ('/B/83c4ee846cf2/B72/', '192.168.30.187:8000/72'),
            ('/B/83c4ee846cf2/B71/', '192.168.30.187:8000/71'),
            ('/B/83c4ee846cf2/B30/', '192.168.30.187:8000/30'),
            ('/B/83c4ee846cf2/B23/', '192.168.30.187:8000/23'),
            ('/B/83c4ee846cf2/B22/', '192.168.30.187:8000/22'),
            ('/B/83c4ee846cf2/B21/', '192.168.30.187:8000/21'),
            ('/B/83c4ee846cf2/B20/', '192.168.30.187:8000/20'),
            ('/B/83c4ee846cf2/B27/', '192.168.30.187:8000/27'),
            ('/B/83c4ee846cf2/B26/', '192.168.30.187:8000/26'),
            ('/B/83c4ee846cf2/B25/', '192.168.30.187:8000/25'),
            ('/B/83c4ee846cf2/B24/', '192.168.30.187:8000/24'),
            ('/B/83c4ee846cf2/B66/', '192.168.30.187:8000/66'),
            ('/B/83c4ee846cf2/B67/', '192.168.30.187:8000/67'),
            ('/B/83c4ee846cf2/B64/', '192.168.30.187:8000/64'),
            ('/B/83c4ee846cf2/B29/', '192.168.30.187:8000/29'),
            ('/B/83c4ee846cf2/B65/', '192.168.30.187:8000/65'),
            ('/B/83c4ee846cf2/B28/', '192.168.30.187:8000/28'),
            ('/B/83c4ee846cf2/B68/', '192.168.30.187:8000/68'),
            ('/B/83c4ee846cf2/B69/', '192.168.30.187:8000/69'),
            ('/B/83c4ee846cf2/B5/', '192.168.30.187:8000/5'),
            ('/B/83c4ee846cf2/B4/', '192.168.30.187:8000/4'),
            ('/B/83c4ee846cf2/B81/', '192.168.30.187:8000/81'),
            ('/B/83c4ee846cf2/B3/', '192.168.30.187:8000/3'),
            ('/B/83c4ee846cf2/B80/', '192.168.30.187:8000/80'),
            ('/B/83c4ee846cf2/B2/', '192.168.30.187:8000/2'),
            ('/B/83c4ee846cf2/B83/', '192.168.30.187:8000/83'),
            ('/B/83c4ee846cf2/B9/', '192.168.30.187:8000/9'),
            ('/B/83c4ee846cf2/B82/', '192.168.30.187:8000/82'),
            ('/B/83c4ee846cf2/B8/', '192.168.30.187:8000/8'),
            ('/B/83c4ee846cf2/B85/', '192.168.30.187:8000/85'),
            ('/B/83c4ee846cf2/B7/', '192.168.30.187:8000/7'),
            ('/B/83c4ee846cf2/B84/', '192.168.30.187:8000/84'),
            ('/B/83c4ee846cf2/B6/', '192.168.30.187:8000/6'),
            ('/B/83c4ee846cf2/B40/', '192.168.30.187:8000/40'),
            ('/B/83c4ee846cf2/B41/', '192.168.30.187:8000/41'),
            ('/B/83c4ee846cf2/B32/', '192.168.30.187:8000/32'),
            ('/B/83c4ee846cf2/B31/', '192.168.30.187:8000/31'),
            ('/B/83c4ee846cf2/B34/', '192.168.30.187:8000/34'),
            ('/B/83c4ee846cf2/B33/', '192.168.30.187:8000/33'),
            ('/B/83c4ee846cf2/B36/', '192.168.30.187:8000/36'),
            ('/B/83c4ee846cf2/B35/', '192.168.30.187:8000/35'),
            ('/B/83c4ee846cf2/B38/', '192.168.30.187:8000/38'),
            ('/B/83c4ee846cf2/B37/', '192.168.30.187:8000/37'),
            ('/B/83c4ee846cf2/B75/', '192.168.30.187:8000/75'),
            ('/B/83c4ee846cf2/B76/', '192.168.30.187:8000/76'),
            ('/B/83c4ee846cf2/B39/', '192.168.30.187:8000/39'),
            ('/B/83c4ee846cf2/B77/', '192.168.30.187:8000/77'),
            ('/B/83c4ee846cf2/B78/', '192.168.30.187:8000/78'),
            ('/B/83c4ee846cf2/B79/', '192.168.30.187:8000/79'),
            ('/B/83c4ee846cf2/B1/', '192.168.30.187:8000/1'),
            ('/B/83c4ee846cf2/B19/', '192.168.30.187:8000/19'),
            ('/B/83c4ee846cf2/B17/', '192.168.30.187:8000/17'),
            ('/B/83c4ee846cf2/B18/', '192.168.30.187:8000/18'),
            ('/B/83c4ee846cf2/B90/', '192.168.30.187:8000/90'),
            ('/B/83c4ee846cf2/B51/', '192.168.30.187:8000/51'),
            ('/B/83c4ee846cf2/B11/', '192.168.30.187:8000/11'),
            ('/B/83c4ee846cf2/B52/', '192.168.30.187:8000/52'),
            ('/B/83c4ee846cf2/B12/', '192.168.30.187:8000/12'),
            ('/B/83c4ee846cf2/B50/', '192.168.30.187:8000/50'),
            ('/B/83c4ee846cf2/B10/', '192.168.30.187:8000/10'),
            ('/B/83c4ee846cf2/B15/', '192.168.30.187:8000/15'),
            ('/B/83c4ee846cf2/B16/', '192.168.30.187:8000/16'),
            ('/B/83c4ee846cf2/B13/', '192.168.30.187:8000/13'),
            ('/B/83c4ee846cf2/B14/', '192.168.30.187:8000/14'),
            ('/B/83c4ee846cf2/B49/', '192.168.30.187:8000/49'),
            ('/B/83c4ee846cf2/B48/', '192.168.30.187:8000/48'),
            ('/B/83c4ee846cf2/B47/', '192.168.30.187:8000/47'),
            ('/B/83c4ee846cf2/B46/', '192.168.30.187:8000/46'),
            ('/B/83c4ee846cf2/B45/', '192.168.30.187:8000/45'),
            ('/B/83c4ee846cf2/B44/', '192.168.30.187:8000/44'),
            ('/B/83c4ee846cf2/B43/', '192.168.30.187:8000/43'),
            ('/B/83c4ee846cf2/B42/', '192.168.30.187:8000/42'),
            ('/B/83c4ee846cf2/B88/', '192.168.30.187:8000/88'),
            ('/B/83c4ee846cf2/B89/', '192.168.30.187:8000/89'),
            ('/B/83c4ee846cf2/B86/', '192.168.30.187:8000/86'),
            ('/B/83c4ee846cf2/B87/', '192.168.30.187:8000/87'),
            ('/B/83c4ee846cf2/B60/', '192.168.30.187:8000/60'),
            ('/B/83c4ee846cf2/B61/', '192.168.30.187:8000/61'),
            ('/B/83c4ee846cf2/B62/', '192.168.30.187:8000/62'),
            ('/B/83c4ee846cf2/B63/', '192.168.30.187:8000/63'),
            ('/B/83c4ee846cf2/B58/', '192.168.30.187:8000/58'),
            ('/B/83c4ee846cf2/B57/', '192.168.30.187:8000/57'),
            ('/B/83c4ee846cf2/B59/', '192.168.30.187:8000/59'),
            ('/B/83c4ee846cf2/B54/', '192.168.30.187:8000/54'),
            ('/B/83c4ee846cf2/B53/', '192.168.30.187:8000/53'),
            ('/B/83c4ee846cf2/B56/', '192.168.30.187:8000/56'),
            ('/B/83c4ee846cf2/B55/', '192.168.30.187:8000/55'),
            ('/B/d204c1d12b8a/B70/', '192.168.30.186:8000/70'),
            ('/B/d204c1d12b8a/B74/', '192.168.30.186:8000/74'),
            ('/B/d204c1d12b8a/B73/', '192.168.30.186:8000/73'),
            ('/B/d204c1d12b8a/B72/', '192.168.30.186:8000/72'),
            ('/B/d204c1d12b8a/B71/', '192.168.30.186:8000/71'),
            ('/B/d204c1d12b8a/B30/', '192.168.30.186:8000/30'),
            ('/B/d204c1d12b8a/B23/', '192.168.30.186:8000/23'),
            ('/B/d204c1d12b8a/B22/', '192.168.30.186:8000/22'),
            ('/B/d204c1d12b8a/B21/', '192.168.30.186:8000/21'),
            ('/B/d204c1d12b8a/B20/', '192.168.30.186:8000/20'),
            ('/B/d204c1d12b8a/B27/', '192.168.30.186:8000/27'),
            ('/B/d204c1d12b8a/B26/', '192.168.30.186:8000/26'),
            ('/B/d204c1d12b8a/B25/', '192.168.30.186:8000/25'),
            ('/B/d204c1d12b8a/B24/', '192.168.30.186:8000/24'),
            ('/B/d204c1d12b8a/B66/', '192.168.30.186:8000/66'),
            ('/B/d204c1d12b8a/B67/', '192.168.30.186:8000/67'),
            ('/B/d204c1d12b8a/B64/', '192.168.30.186:8000/64'),
            ('/B/d204c1d12b8a/B29/', '192.168.30.186:8000/29'),
            ('/B/d204c1d12b8a/B65/', '192.168.30.186:8000/65'),
            ('/B/d204c1d12b8a/B28/', '192.168.30.186:8000/28'),
            ('/B/d204c1d12b8a/B68/', '192.168.30.186:8000/68'),
            ('/B/d204c1d12b8a/B69/', '192.168.30.186:8000/69'),
            ('/B/d204c1d12b8a/B5/', '192.168.30.186:8000/5'),
            ('/B/d204c1d12b8a/B4/', '192.168.30.186:8000/4'),
            ('/B/d204c1d12b8a/B81/', '192.168.30.186:8000/81'),
            ('/B/d204c1d12b8a/B3/', '192.168.30.186:8000/3'),
            ('/B/d204c1d12b8a/B80/', '192.168.30.186:8000/80'),
            ('/B/d204c1d12b8a/B2/', '192.168.30.186:8000/2'),
            ('/B/d204c1d12b8a/B83/', '192.168.30.186:8000/83'),
            ('/B/d204c1d12b8a/B9/', '192.168.30.186:8000/9'),
            ('/B/d204c1d12b8a/B82/', '192.168.30.186:8000/82'),
            ('/B/d204c1d12b8a/B8/', '192.168.30.186:8000/8'),
            ('/B/d204c1d12b8a/B85/', '192.168.30.186:8000/85'),
            ('/B/d204c1d12b8a/B7/', '192.168.30.186:8000/7'),
            ('/B/d204c1d12b8a/B84/', '192.168.30.186:8000/84'),
            ('/B/d204c1d12b8a/B6/', '192.168.30.186:8000/6'),
            ('/B/d204c1d12b8a/B40/', '192.168.30.186:8000/40'),
            ('/B/d204c1d12b8a/B41/', '192.168.30.186:8000/41'),
            ('/B/d204c1d12b8a/B32/', '192.168.30.186:8000/32'),
            ('/B/d204c1d12b8a/B31/', '192.168.30.186:8000/31'),
            ('/B/d204c1d12b8a/B34/', '192.168.30.186:8000/34'),
            ('/B/d204c1d12b8a/B33/', '192.168.30.186:8000/33'),
            ('/B/d204c1d12b8a/B36/', '192.168.30.186:8000/36'),
            ('/B/d204c1d12b8a/B35/', '192.168.30.186:8000/35'),
            ('/B/d204c1d12b8a/B38/', '192.168.30.186:8000/38'),
            ('/B/d204c1d12b8a/B37/', '192.168.30.186:8000/37'),
            ('/B/d204c1d12b8a/B75/', '192.168.30.186:8000/75'),
            ('/B/d204c1d12b8a/B76/', '192.168.30.186:8000/76'),
            ('/B/d204c1d12b8a/B39/', '192.168.30.186:8000/39'),
            ('/B/d204c1d12b8a/B77/', '192.168.30.186:8000/77'),
            ('/B/d204c1d12b8a/B78/', '192.168.30.186:8000/78'),
            ('/B/d204c1d12b8a/B79/', '192.168.30.186:8000/79'),
            ('/B/d204c1d12b8a/B1/', '192.168.30.186:8000/1'),
            ('/B/d204c1d12b8a/B19/', '192.168.30.186:8000/19'),
            ('/B/d204c1d12b8a/B17/', '192.168.30.186:8000/17'),
            ('/B/d204c1d12b8a/B18/', '192.168.30.186:8000/18'),
            ('/B/d204c1d12b8a/B90/', '192.168.30.186:8000/90'),
            ('/B/d204c1d12b8a/B51/', '192.168.30.186:8000/51'),
            ('/B/d204c1d12b8a/B11/', '192.168.30.186:8000/11'),
            ('/B/d204c1d12b8a/B52/', '192.168.30.186:8000/52'),
            ('/B/d204c1d12b8a/B12/', '192.168.30.186:8000/12'),
            ('/B/d204c1d12b8a/B50/', '192.168.30.186:8000/50'),
            ('/B/d204c1d12b8a/B10/', '192.168.30.186:8000/10'),
            ('/B/d204c1d12b8a/B15/', '192.168.30.186:8000/15'),
            ('/B/d204c1d12b8a/B16/', '192.168.30.186:8000/16'),
            ('/B/d204c1d12b8a/B13/', '192.168.30.186:8000/13'),
            ('/B/d204c1d12b8a/B14/', '192.168.30.186:8000/14'),
            ('/B/d204c1d12b8a/B49/', '192.168.30.186:8000/49'),
            ('/B/d204c1d12b8a/B48/', '192.168.30.186:8000/48'),
            ('/B/d204c1d12b8a/B47/', '192.168.30.186:8000/47'),
            ('/B/d204c1d12b8a/B46/', '192.168.30.186:8000/46'),
            ('/B/d204c1d12b8a/B45/', '192.168.30.186:8000/45'),
            ('/B/d204c1d12b8a/B44/', '192.168.30.186:8000/44'),
            ('/B/d204c1d12b8a/B43/', '192.168.30.186:8000/43'),
            ('/B/d204c1d12b8a/B42/', '192.168.30.186:8000/42'),
            ('/B/d204c1d12b8a/B88/', '192.168.30.186:8000/88'),
            ('/B/d204c1d12b8a/B89/', '192.168.30.186:8000/89'),
            ('/B/d204c1d12b8a/B86/', '192.168.30.186:8000/86'),
            ('/B/d204c1d12b8a/B87/', '192.168.30.186:8000/87'),
            ('/B/d204c1d12b8a/B60/', '192.168.30.186:8000/60'),
            ('/B/d204c1d12b8a/B61/', '192.168.30.186:8000/61'),
            ('/B/d204c1d12b8a/B62/', '192.168.30.186:8000/62'),
            ('/B/d204c1d12b8a/B63/', '192.168.30.186:8000/63'),
            ('/B/d204c1d12b8a/B58/', '192.168.30.186:8000/58'),
            ('/B/d204c1d12b8a/B57/', '192.168.30.186:8000/57'),
            ('/B/d204c1d12b8a/B59/', '192.168.30.186:8000/59'),
            ('/B/d204c1d12b8a/B54/', '192.168.30.186:8000/54'),
            ('/B/d204c1d12b8a/B53/', '192.168.30.186:8000/53'),
            ('/B/d204c1d12b8a/B56/', '192.168.30.186:8000/56'),
            ('/B/d204c1d12b8a/B55/', '192.168.30.186:8000/55')]
    boxs_free = []
    if kb_add_dict:
        boxs_free = set(dict(boxs).keys()) - set(
            dict(kb_add_dict.values()).keys())
    else:
        boxs_free = set(dict(boxs).keys())

    if len(boxs_free) < len(no_kbs):
        rest_kbs = no_kbs[len(boxs_free):]
        kb_ids = set(kb_ids) - set(rest_kbs)

    # 写入cache
    boxs_free_info = filter(lambda x: x[0] in boxs_free, boxs)
    temp_kb_box_list = list(zip(no_kbs, boxs_free_info))
    cache_ret = map(lambda x: cache.set(x[0], x[1], 30 * 60), temp_kb_box_list)
    logger.debug('cache_ret:%s' % list(cache_ret))

    kb_add_dict = cache.get_many(kb_ids)
    logger.debug('kb_add_dict:%s' % kb_add_dict)
    logger.debug('------get address time:%.5f' % (time() - start_get_add))
    logger.debug('start box-request ')
    start_request = time()
    num = len(kb_ids)
    q = Manager().Queue()
    p_list = []
    for i in range(0, num):
        kb = kb_ids[i]
        version = kb_vers_map[kb]
        add = kb_add_dict[kb][1]
        logger.debug('Target:%s Add:%s' % (kb, add))
        temp_p = Process(target=_writer, args=(q, kb, version, add, issue))
        p_list.append(temp_p)
        temp_p.start()

    for pr in p_list:
        pr.join()
    logger.debug('------box-request time:%.5f' % (time() - start_request))

    start_get_msg = time()
    i = 0
    ret = {'no_box': [], 'ans': [], 'not_match': [], 'fail': []}
    while not q.empty():
        msg = q.get()
        if 'not_match' in msg.keys():
            ret['not_match'].append(msg['not_match'])
        elif 'fail' in msg.keys():
            ret['fail'].append(msg['fail'])
        else:
            ret['ans'].append(msg)
        logger.debug('------%d msg:%s' % (i, msg))
        i += 1
    logger.debug('------get answers time:%.5f' % (time() - start_get_msg))

    # 异步写入zk
    # set_box_zk.delay(temp_kb_box_list)

    return ret
def query_request_0(kb_ids, issue, kb_vers_map):
    """
    requirement: (1)zk中查找/B/节点下的vm 
                 (2)遍历/B/vm/下的子节点,找到空闲节点,为其赋一个知识库,单独起进程发送请求,请求结果放入queue

    param: kb_ids 列表 
    return: 
    """
    try:
        zk = KazooClient(hosts=ZOOKEEPER['HOST'])
        zk.start()
    except Exception as e:
        err_log.error(e)
        raise Exception(1910)

    _node_list = zk.get_children('/B/')
    logger.debug('vm:%s' % _node_list)
    q = Manager().Queue()
    ret = {'no_box': [], 'ans': [], 'not_match': [], 'fail': []}

    p_list = []
    random.shuffle(_node_list)
    for vm in _node_list:

        box_list = zk.get_children('/B/' + vm + '/')
        random.shuffle(box_list)

        for box in box_list:
            node = '/B/' + vm + '/' + box + '/'
            _str, _ = zk.get(node)
            _dict = json.loads(_str)
            if _dict['status'] == '0':
                target = kb_ids.pop()
                logger.debug('------Target:%s Add:%s' % (target, _dict['Add']))

                temp_p = Process(target=_writer,
                                 args=(q, target, kb_vers_map[target],
                                       _dict['Add'], issue))
                p_list.append(temp_p)
                temp_p.start()

            if not kb_ids:
                break

        if not kb_ids:
            break
    else:
        if kb_ids:
            ret['no_box'] = kb_ids
    for pr in p_list:
        pr.join()
    zk.stop()

    i = 0
    while not q.empty():
        msg = q.get()
        if 'not_match' in msg.keys():
            ret['not_match'].append(msg['not_match'])
        elif 'fail' in msg.keys():
            ret['fail'].append(msg['fail'])
        else:
            ret['ans'].append(msg)
        logger.debug('------%d msg:%s' % (i, msg))
        i += 1
    logger.debug('get answers finished')

    return ret
# Set up Processes
number_of_processes = 16
for i in range(number_of_processes):
    worker = MD5Cracker(work_queue, global_namespace)
    worker.start()
    workers.append(worker)

print "Target Hash: {}".format(hash)

maxChars = 13
while_count = 1
for baseWidth in range(1, maxChars + 1):

    while global_namespace.finished is False:
        if work_queue.empty():
            print "checking passwords width [" + `baseWidth` + "]"

            # set is width, position, baseString
            work_queue.put({'width': baseWidth, 'position': 0, 'baseString': ""})
            break
        else:

            if while_count % 10 == 0:
                global_namespace.count = 0
                while_count = 1
            else:
                print "{:,d} passwords/sec".format(global_namespace.count/while_count)
                while_count += 1

            print "Queue Size: {}".format(work_queue.qsize())
def main():
    import argparse
    import logging
    import os
    import yaml

    parser = argparse.ArgumentParser()
    parser.add_argument('classifier')
    parser.add_argument('--postprocess', action="store_true",
                        help='Run postprocessing, close blobs and remove noise')
    parser.add_argument('videolist', help='A file listed all the videos to be indexed')
    parser.add_argument('cores', type=int, help='Number of processes of paralellism')
    args = parser.parse_args()

    logging.basicConfig(level=logging.WARNING,
                        format="%(asctime)s - %(message)s")

    classifier = zipfile.ZipFile(args.classifier)
    global forest0, svmmodels, training_bosts, hist0
    forest0, hist0, forest1, hist1, training_bosts, svmmodels, prior = \
        load_from_classifier(classifier)
    classifier.close()

    KEY_FRAME_PERIOD = 2 # in seconds
    #queue = Queue.Queue()
    #data_queue = Queue.Queue()
    queue = Manager().Queue()
    data_queue = Manager().Queue()

    for processes in [4]:    
        video_list = open(args.videolist, 'r')
        log_file = open('statistics%d.txt' % processes, 'w')

        fps = 0
        fps_count = 0

        for video_file in video_list:
            video_file = video_file.strip()
            name = os.path.splitext(video_file)[0]
            file_path = os.path.join(VIDEO_RESOURCE, video_file)
            log_file.write(file_path+"\n")

            capture = cv.CaptureFromFile(file_path)
            frame_rate = cv.GetCaptureProperty(capture, cv.CV_CAP_PROP_FPS)
            total_frames = cv.GetCaptureProperty(capture, cv.CV_CAP_PROP_FRAME_COUNT)
            log_file.write("frame rate: %.3f, total frames: %d\n" % (frame_rate, total_frames)) 

            start_time0 = time.time()
            key_frame_counter = 0    
            frame = cv.QueryFrame(capture)
            os.makedirs("tmp")
            while frame:
                cv.SaveImage("tmp/" + name + "%d.png" % key_frame_counter, frame)
                for i in xrange(int(KEY_FRAME_PERIOD * frame_rate)):
                    frame = cv.QueryFrame(capture)
                key_frame_counter += 1
            for i in xrange(key_frame_counter):
                 data_queue.put(i)

            start_time = time.time()

            ps = []
            for group in xrange(processes):
                p = Process(target = calculate_class, args=(name, queue, data_queue, ))
                #p = threading.Thread(target = calculate_class, args=(name, queue, data_queue, ))
                p.start()
                ps.append(p)
            for p in ps:
                p.join()

            elapse_time = time.time() - start_time

            accuracy_file = open('360.txt', 'w')
            while not queue.empty():
                q_entry = queue.get()
                frame_counter = q_entry[0]
                ILP = q_entry[1]
                accuracy_file.write('%d' % frame_counter)
                for class_index, score in enumerate(ILP):
                    accuracy_file.write(',%.02f' % score)
                accuracy_file.write('\n')
            accuracy_file.close()

            os.system("rm -rf tmp")

            log_file.write("decoding time: %.2f, total time: %.2f, key frames: %d, frame per sec: %.3f\n" \
                % (start_time - start_time0, elapse_time, key_frame_counter, key_frame_counter / elapse_time))
            fps += key_frame_counter / elapse_time
            fps_count += 1

            #time.sleep(10)

        video_list.close()
        log_file.write("average fps: %.3f\n" % (fps/fps_count))
        log_file.close()
Exemple #20
0
def sub_cmd_multisearch(args):
    if not (args.m and args.sc):
        exit(1)

    config = xq.get_strategy_config(args.sc)
    pprint.pprint(config)

    module_name = config["module_name"].replace("/", ".")
    class_name = config["class_name"]
    symbol = config['symbol']
    md = DBMD(args.m, kl.KLINE_DATA_TYPE_JSON)
    start_time, end_time = get_time_range(md, symbol, args.r)

    count = args.count
    cpus = cpu_count()
    print("count: %s,  cpus: %s" % (count, cpus) )

    result_q = Manager().Queue()#Manager中的Queue才能配合Pool
    task_q = Manager().Queue()#Manager中的Queue才能配合Pool
    for index in range(count):
        task_q.put(index)

    print('Parent process %s.' % os.getpid())
    p = Pool(cpus)
    for i in range(cpus):
        #p.apply_async(child_process_test, args=(i, task_q, result_q))
        p.apply_async(child_process, args=(i, task_q, result_q, args.m, config, module_name, class_name, start_time, end_time))
    print('Waiting for all subprocesses done...')
    p.close()

    start_time = datetime.now()
    result = []
    while len(result) < count:
        if result_q.empty():
            time.sleep(1)
        else:
            value = result_q.get()
            print("result value: ", value)
            result.append(value)

        sys.stdout.write(
            "  %d/%d,  cost: %s,  progress: %g%% \r"
            % (
                len(result),
                count,
                datetime.now() - start_time,
                round((len(result) / count) * 100, 2)
            )
        )
        sys.stdout.flush()

    print("")
    #print("result queue(len: %s)" % (result_q.qsize()))

    p.join()
    print('All subprocesses done.')

    sorted_rs = sorted(result, key=lambda x: x[1][0], reverse=True)
    for r in sorted_rs:
        #print("r: ", r)
        info = "%6s    %30s    %s " % r
        print(info)
def query_request_z(kb_ids, issue, kb_vers_map):
    """
    requirement: (1)cache中查找知识库的地址,cache中没有的,则为其在cache中没有被其他kb占用的box中选取BOX,并异步写入其TARGET
                 (2)遍历/B/vm/下的子节点,找到空闲节点,为其赋一个知识库,单独起进程发送请求,请求结果放入queue

    param: kb_ids 列表
    return:
    """

    # 取cache中已有的kb
    # start_get_add = time()

    # kb_add_dict = cache.get_many(kb_ids)

    # no_kbs = set(kb_ids) - set(kb_add_dict.keys())

    # logger.debug('no_kbs:%s' % no_kbs)
    # 为cache中没有的kb赋予box

    if cache.ttl("boxs") == 0:
        cache.set("boxs", str(_acquire_zk_node()), timeout=None)

    boxs = eval(cache.get("boxs"))
    add_dict = list(dict(boxs).values())

    # boxs_free = []
    # if kb_add_dict:
    #     boxs_free = set(dict(boxs).keys()) - set(dict(kb_add_dict.values()).keys())
    # else:
    #     boxs_free = set(dict(boxs).keys())

    # if len(boxs_free) < len(no_kbs):
    #     rest_kbs = no_kbs[len(boxs_free):]
    #     kb_ids = set(kb_ids) - set(rest_kbs)

    # 写入cache
    # boxs_free_info = filter(lambda x: x[0] in boxs_free, boxs)
    # temp_kb_box_list = list(zip(no_kbs, boxs_free_info))
    # cache_ret = map(lambda x: cache.set(x[0], x[1], 30 * 60), temp_kb_box_list)
    # logger.debug('cache_ret:%s' % list(cache_ret))

    # kb_add_dict = cache.get_many(kb_ids)
    # logger.debug('kb_add_dict:%s' % kb_add_dict)
    # logger.debug('------get address time:%.5f' % (time() - start_get_add))

    # start_request = time()
    num = len(kb_ids)
    global lock_label
    while True:
        if lock_label:
            lock_label = False
            break

    global top_label
    seed = int(top_label)
    temp = (seed + 1) % len(boxs)
    top_label = temp

    lock_label = True

    logger.debug(seed)

    q = Manager().Queue()
    p_list = []
    for i in range(0, num):
        kb = kb_ids[i]
        version = kb_vers_map[kb]
        # add = kb_add_dict[kb][1]
        add = add_dict[(seed + i) % len(boxs)]
        # logger.debug('Target:%s Add:%s' % (kb, add))
        temp_p = Process(target=_writer, args=(q, kb, version, add, issue))
        p_list.append(temp_p)
        temp_p.start()

    for pr in p_list:
        pr.join()

    # logger.debug('test.q is ' + q.empty())
    # logger.debug('------box-request time:%.5f' % (time() - start_request))

    # start_get_msg = time()
    i = 0
    ret = {'no_box': [], 'ans': [], 'not_match': [], 'fail': []}
    while not q.empty():
        msg = q.get()
        if 'not_match' in msg.keys():
            ret['not_match'].append(msg['not_match'])
        elif 'fail' in msg.keys():
            ret['fail'].append(msg['fail'])
        else:
            ret['ans'].append(msg)
        logger.debug('------%d msg:%s' % (i, msg))
        i += 1
    # logger.debug('------get answers time:%.5f' % (time() - start_get_msg))

    # 异步写入zk
    # set_box_zk.delay(temp_kb_box_list)

    return ret
Exemple #22
0
    def calc_factor_loading(cls, start_date, end_date=None, month_end=True, save=False, **kwargs):
        """
        计算指定日期的样本个股的因子载荷, 并保存至因子数据库
        Parameters:
        --------
        :param start_date: datetime-like, str
            开始日期, 格式: YYYY-MM-DD or YYYYMMDD
        :param end_date: datetime-like, str
            结束日期, 如果为None, 则只计算start_date日期的因子载荷, 格式: YYYY-MM-DD or YYYYMMDD
        :param month_end: bool, 默认为True
            如果为True, 则只计算月末时点的因子载荷
        :param save: bool, 默认为True
            是否保存至因子数据库
        :param kwargs:
            'multi_proc': bool, True=采用多进程, False=采用单进程, 默认为False
        :return: dict
            因子载荷数据
        """
        # 取得交易日序列及股票基本信息表
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date, end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date, ndays=1)
        all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 遍历交易日序列, 计算LIQUIDITY因子载荷
        dict_raw_liquidity = None
        for calc_date in trading_days_series:
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            dict_stom = None
            dict_stoq = None
            dict_stoa = None
            dict_raw_liquidity = None
            logging.info('[%s] Calc Liquidity factor loading.' % Utils.datetimelike_to_str(calc_date))
            # 遍历个股,计算个股LIQUIDITY因子值
            s = (calc_date - datetime.timedelta(days=risk_ct.LIQUIDITY_CT.listed_days)).strftime('%Y%m%d')
            stock_basics = all_stock_basics[all_stock_basics.list_date < s]
            ids = []
            stoms = []
            stoqs = []
            stoas = []
            raw_liquidities = []

            if 'multi_proc' not in kwargs:
                kwargs['multi_proc'] = False
            if not kwargs['multi_proc']:
                # 采用单进程计算LIQUIDITY因子值
                for _, stock_info in stock_basics.iterrows():
                    logging.debug("[%s] Calc %s's LIQUIDITY factor loading." % (Utils.datetimelike_to_str(calc_date, dash=True), stock_info.symbol))
                    liquidity_data = cls._calc_factor_loading(stock_info.symbol, calc_date)
                    if liquidity_data is not None:
                        ids.append(liquidity_data['code'])
                        stoms.append(liquidity_data['stom'])
                        stoqs.append(liquidity_data['stoq'])
                        stoas.append(liquidity_data['stoa'])
                        raw_liquidities.append(liquidity_data['liquidity'])
            else:
                # 采用多进程计算LIQUIDITY因子值
                q = Manager().Queue()
                p = Pool(4)
                for _, stock_info in stock_basics.iterrows():
                    p.apply_async(cls._calc_factor_loading_proc, args=(stock_info.symbol, calc_date, q,))
                p.close()
                p.join()
                while not q.empty():
                    liquidity_data = q.get(True)
                    ids.append(liquidity_data['code'])
                    stoms.append(liquidity_data['stom'])
                    stoqs.append(liquidity_data['stoq'])
                    stoas.append(liquidity_data['stoa'])
                    raw_liquidities.append(liquidity_data['liquidity'])

            date_label = Utils.get_trading_days(start=calc_date, ndays=2)[1]
            dict_stom = dict({'date': [date_label]*len(ids), 'id': ids, 'factorvalue': stoms})
            dict_stoq = dict({'date': [date_label]*len(ids), 'id': ids, 'factorvalue': stoqs})
            dict_stoa = dict({'date': [date_label]*len(ids), 'id': ids, 'factorvalue': stoas})
            dict_raw_liquidity = dict({'date': [date_label]*len(ids), 'id': ids, 'factorvalue': raw_liquidities})
            # 读取Size因子值, 将流动性因子与Size因子正交化
            size_factor_path = os.path.join(factor_ct.FACTOR_DB.db_path, risk_ct.SIZE_CT.db_file)
            df_size = Utils.read_factor_loading(size_factor_path, Utils.datetimelike_to_str(calc_date, dash=False))
            df_size.drop(columns='date', inplace=True)
            df_size.rename(columns={'factorvalue': 'size'}, inplace=True)
            df_liquidity = pd.DataFrame(dict({'id': ids, 'liquidity': raw_liquidities}))
            df_liquidity = pd.merge(left=df_liquidity, right=df_size, how='inner', on='id')
            arr_liquidity = Utils.normalize_data(Utils.clean_extreme_value(np.array(df_liquidity['liquidity']).reshape((len(df_liquidity), 1))))
            arr_size = Utils.normalize_data(Utils.clean_extreme_value(np.array(df_liquidity['size']).reshape((len(df_liquidity), 1))))
            model = sm.OLS(arr_liquidity, arr_size)
            results = model.fit()
            df_liquidity['liquidity'] = results.resid
            df_liquidity.drop(columns='size', inplace=True)
            df_liquidity.rename(columns={'liquidity': 'factorvalue'}, inplace=True)
            df_liquidity['date'] = date_label
            # 保存因子载荷
            if save:
                str_date = Utils.datetimelike_to_str(calc_date, dash=False)
                factor_header = ['date', 'id', 'factorvalue']
                Utils.factor_loading_persistent(cls._db_file, 'stom_{}'.format(str_date), dict_stom, factor_header)
                Utils.factor_loading_persistent(cls._db_file, 'stoq_{}'.format(str_date), dict_stoq, factor_header)
                Utils.factor_loading_persistent(cls._db_file, 'stoa_{}'.format(str_date), dict_stoa, factor_header)
                Utils.factor_loading_persistent(cls._db_file, 'rawliquidity_{}'.format(str_date), dict_raw_liquidity, factor_header)
                Utils.factor_loading_persistent(cls._db_file, str_date, df_liquidity.to_dict('list'), factor_header)

            # 暂停180秒
            # logging.info('Suspending for 180s.')
            # time.sleep(180)
        return dict_raw_liquidity
Exemple #23
0
    def calc_factor_loading(cls,
                            start_date,
                            end_date=None,
                            month_end=True,
                            save=False,
                            **kwargs):
        """
        计算指定日期的样本个股的因子载荷,并保存至因子数据库
        Parameters
        --------
        :param start_date: datetime-like, str
            开始日期
        :param end_date: datetime-like, str,默认None
            结束日期,如果为None,则只计算start_date日期的因子载荷
        :param month_end: bool,默认True
            只计算月末时点的因子载荷,该参数只在end_date不为None时有效,并且不论end_date是否为None,都会计算第一天的因子载荷
        :param save: 是否保存至因子数据库,默认为False
        :param kwargs:
            'multi_proc': bool, True=采用多进程并行计算, False=采用单进程计算, 默认为False
        :return: 因子载荷,DataFrame
        --------
            因子载荷,DataFrame
            0: id, 证券ID
            1: factorvalue, 因子载荷
            如果end_date=None,返回start_date对应的因子载荷数据
            如果end_date!=None,返回最后一天的对应的因子载荷数据
            如果没有计算数据,返回None
        """
        # 1.取得交易日序列及股票基本信息表
        start_date = Utils.to_date(start_date)
        if end_date is not None:
            end_date = Utils.to_date(end_date)
            trading_days_series = Utils.get_trading_days(start=start_date,
                                                         end=end_date)
        else:
            trading_days_series = Utils.get_trading_days(end=start_date,
                                                         ndays=1)
        # all_stock_basics = CDataHandler.DataApi.get_secu_basics()
        # 2.遍历交易日序列,计算APM因子载荷
        dict_apm = None
        for calc_date in trading_days_series:
            dict_apm = {'date': [], 'id': [], 'factorvalue': []}
            if month_end and (not Utils.is_month_end(calc_date)):
                continue
            # 2.1.遍历个股,计算个股APM.stat统计量,过去20日收益率,分别放进stat_lst,ret20_lst列表中
            s = (calc_date - datetime.timedelta(days=90)).strftime('%Y%m%d')
            stock_basics = Utils.get_stock_basics(s)
            stat_lst = []
            ret20_lst = []
            symbol_lst = []

            if 'multi_proc' not in kwargs:
                kwargs['multi_proc'] = False
            if not kwargs['multi_proc']:
                # 采用单进程计算
                for _, stock_info in stock_basics.iterrows():
                    stat_i = cls._calc_factor_loading(stock_info.symbol,
                                                      calc_date)
                    ret20_i = Utils.calc_interval_ret(stock_info.symbol,
                                                      end=calc_date,
                                                      ndays=20)
                    if stat_i is not None and ret20_i is not None:
                        stat_lst.append(stat_i)
                        ret20_lst.append(ret20_i)
                        symbol_lst.append(
                            Utils.code_to_symbol(stock_info.symbol))
                        logging.info('APM of %s = %f' %
                                     (stock_info.symbol, stat_i))
            else:
                # 采用多进程并行计算
                q = Manager().Queue()
                p = Pool(4)  # 最多同时开启4个进程
                for _, stock_info in stock_basics.iterrows():
                    p.apply_async(cls._calc_factor_loading_proc,
                                  args=(
                                      stock_info.symbol,
                                      calc_date,
                                      q,
                                  ))
                p.close()
                p.join()
                while not q.empty():
                    apm_value = q.get(True)
                    symbol_lst.append(apm_value[0])
                    stat_lst.append(apm_value[1])
                    ret20_lst.append(apm_value[2])

            assert len(stat_lst) == len(ret20_lst)
            assert len(stat_lst) == len(symbol_lst)

            # 2.2.构建APM因子
            # 2.2.1.将统计量stat对动量因子ret20j进行截面回归:stat_j = \beta * Ret20_j + \epsilon_j
            #     残差向量即为对应个股的APM因子
            # 截面回归之前,先对stat统计量和动量因子进行去极值和标准化处理
            stat_arr = np.array(stat_lst).reshape((len(stat_lst), 1))
            ret20_arr = np.array(ret20_lst).reshape((len(ret20_lst), 1))
            stat_arr = Utils.clean_extreme_value(stat_arr)
            stat_arr = Utils.normalize_data(stat_arr)
            ret20_arr = Utils.clean_extreme_value(ret20_arr)
            ret20_arr = Utils.normalize_data(ret20_arr)
            # 回归分析
            # ret20_arr = sm.add_constant(ret20_arr)
            apm_model = sm.OLS(stat_arr, ret20_arr)
            apm_result = apm_model.fit()
            apm_lst = list(np.around(apm_result.resid, 6))  # amp因子载荷精确到6位小数
            assert len(apm_lst) == len(symbol_lst)
            # 2.2.2.构造APM因子字典,并持久化
            date_label = Utils.get_trading_days(calc_date, ndays=2)[1]
            dict_apm = {
                'date': [date_label] * len(symbol_lst),
                'id': symbol_lst,
                'factorvalue': apm_lst
            }
            df_std_apm = Utils.normalize_data(pd.DataFrame(dict_apm),
                                              columns='factorvalue',
                                              treat_outlier=True,
                                              weight='eq')
            if save:
                # Utils.factor_loading_persistent(cls._db_file, calc_date.strftime('%Y%m%d'), dict_apm)
                cls._save_factor_loading(cls._db_file,
                                         Utils.datetimelike_to_str(calc_date,
                                                                   dash=False),
                                         dict_apm,
                                         'APM',
                                         factor_type='raw',
                                         columns=['date', 'id', 'factorvalue'])
                cls._save_factor_loading(cls._db_file,
                                         Utils.datetimelike_to_str(calc_date,
                                                                   dash=False),
                                         df_std_apm,
                                         'APM',
                                         factor_type='standardized',
                                         columns=['date', 'id', 'factorvalue'])

            # # 2.3.构建PureAPM因子
            # # 将stat_arr转换为DataFrame, 此时的stat_arr已经经过了去极值和标准化处理
            # df_stat = DataFrame(stat_arr, index=symbol_lst, columns=['stat'])
            # # 取得提纯的因变量因子
            # df_dependent_factor = cls.get_dependent_factors(calc_date)
            # # 将df_stat和因变量因子拼接
            # df_data = pd.concat([df_stat, df_dependent_factor], axis=1, join='inner')
            # # OLS回归,提纯APM因子
            # arr_data = np.array(df_data)
            # pure_apm_model = sm.OLS(arr_data[:, 0], arr_data[:, 1:])
            # pure_apm_result = pure_apm_model.fit()
            # pure_apm_lst = list(np.around(pure_apm_result.resid, 6))
            # pure_symbol_lst = list(df_data.index)
            # assert len(pure_apm_lst) == len(pure_symbol_lst)
            # # 构造pure_apm因子字典,并持久化
            # dict_pure_apm = {'date': [date_label]*len(pure_symbol_lst), 'id': pure_symbol_lst, 'factorvalue': pure_apm_lst}
            # pure_apm_db_file = os.path.join(factor_ct.FACTOR_DB.db_path, factor_ct.APM_CT.pure_apm_db_file)
            # if save:
            #     Utils.factor_loading_persistent(pure_apm_db_file, calc_date.strftime('%Y%m%d'), dict_pure_apm)
            # # 休息360秒
            # logging.info('Suspended for 360s.')
            # time.sleep(360)
        return dict_apm
Exemple #24
0
def main():
    import argparse
    import logging
    import os
    import yaml
    import cv

    global processes
    global forest0, svmmodels, training_bosts, hist0

    parser = argparse.ArgumentParser()
    parser.add_argument('classifier')
    parser.add_argument('cores', type=int, help='Number of processes of paralellism')
    parser.add_argument('--postprocess', action="store_true",
                        help='Run postprocessing, close blobs and remove noise')
    args = parser.parse_args()

    logging.basicConfig(level=logging.WARNING,
                        format="%(asctime)s - %(message)s")

    classifier = zipfile.ZipFile(args.classifier)
    forest0, hist0, forest1, hist1, training_bosts, svmmodels, prior = \
        load_from_classifier(classifier)
    classifier.close()
    
    processes = args.cores
    pool = Pool(processes = processes)

    KEY_FRAME_PERIOD = 2 # in seconds
    q = Manager().Queue()
    total_frame = 0

    new_flag = True
    while True:
        if not new_flag:
            print "wait..."
            time.sleep(1)
        stream_list = get_list(CLOUDLET_RESOURCE, STREAM_RESOURCE)
        new_flag = False
        prev_stream = None
        for stream in stream_list:
            if stream.get("stream_description").find("denatured") == -1 or stream.get("stream_description").find("video") == -1 or stream.get("stream_description").find("pstf") != -1:
                prev_stream = stream
                continue
            ILP_max = [] 
            for i in xrange(len(CLASSES)):
                ILP_max.append(0)
            ILP_list = []
            for i in xrange(len(CLASSES)):
                ILP_list.append([])
            path, name = stream.get("path").replace("mnt", "cloudletstore").rsplit('/', 1)
            print os.path.join(path, name)
            path_p, name_p = prev_stream.get("path").replace("mnt", "cloudletstore").rsplit('/', 1)
            print os.path.join(path_p, name_p)
            statinfo = os.stat(os.path.join(path_p, name_p))      
            prev_stream = stream
           
            if statinfo.st_size == 0:
                continue

            new_flag = True
            frame_rate = 30
     
            capture = cv.CaptureFromFile(os.path.join(path, name))
            frame_rate = cv.GetCaptureProperty(capture, cv.CV_CAP_PROP_FPS)
            total_frames = cv.GetCaptureProperty(capture, cv.CV_CAP_PROP_FRAME_COUNT)
            frame = cv.QueryFrame(capture)
            print frame_rate, total_frames
            print capture
   
            start_time = time.time()
            
            key_frame_counter_base = 0    
            while frame:
                process_num = 0
                while frame:
                    cv.SaveImage("indexing" + "%d.png" % process_num, frame)
                    for i in xrange(int(KEY_FRAME_PERIOD * frame_rate)):
                        frame = cv.QueryFrame(capture)
                    process_num += 1
                    if process_num == processes:
                        break
                pool.map(calculate_class, [(q, x) for x in xrange(key_frame_counter_base, key_frame_counter_base + process_num)])
          
            while not q.empty():
                q_entry = q.get()
                key_frame_counter = q_entry[0]
                ILP = q_entry[1]
                for class_index, score in enumerate(ILP): 
                    if score > SCORE_THRESHOLD:
                        ILP_list[class_index].append((key_frame_counter * int(KEY_FRAME_PERIOD * frame_rate) + 1, score))
                        print (CLASSES[class_index], "%.02f" % score),
                    if score > ILP_max[class_index]:
                        ILP_max[class_index] = score
                print

                key_frame_counter_base += process_num

            for class_index, frame_list in enumerate(ILP_list):
                if not frame_list:
                    continue
                frame_list_split = split_frame_list(frame_list, int(KEY_FRAME_PERIOD * frame_rate) * 2)
                for frame_list, local_max_score in frame_list_split: 
                    tag_entry = {}
                    tag_entry["tag"] = CLASSES[class_index] + ":%d" % (ILP_max[class_index] * 100)
                    tag_entry["tag_value"] = local_max_score
                    tag_entry["offset"] = frame_list[0] / frame_rate
                    tag_entry["duration"] = (frame_list[-1] - frame_list[0]) / frame_rate
                    tag_entry["segment"] = stream.get("segment")
                    print tag_entry
                    ret_dict = post(CLOUDLET_RESOURCE, TAG_RESOURCE, tag_entry)        
        
            if stream.get("stream_description").find("pstf") == -1:
                stream_entry = {"stream_description": stream.get("stream_description") + "pstf;"}
                ret_dict = put(CLOUDLET_RESOURCE, stream.get("resource_uri"), stream_entry)        
           
            elapse_time = time.time() - start_time
            print "max score:"
            print [(CLASSES[class_index], "%.02f" % score) for class_index, score in enumerate(ILP_max)]
            print "total time: %.2f, key frames: %d, frame per sec: %.2f" \
               % (elapse_time, key_frame_counter_base, key_frame_counter_base / elapse_time)
            print
Exemple #25
0
            print('退出')
            break


if __name__ == '__main__':
    print('主进程开始')
    #构建了一个任务队列
    # pageQueue = Queue(300)
    pageQueue = Manager().Queue(300)
    for i in range(1, 50):
        pageQueue.put(i)
    #构建一个结果队列,存储获取的响应结果
    # dataQueue = Queue()
    dataQueue = Manager().Queue()
    #打印,查看队列是否为空
    print(pageQueue.empty(), dataQueue.empty())

    downloadProcess = []
    for i in range(0, 3):
        p1 = Process(target=getdata, args=(pageQueue, dataQueue))
        p1.start()
        downloadProcess.append(p1)

    for process in downloadProcess:
        process.join()

    print('总数', dataQueue.empty(), dataQueue.qsize())

    parseProcess = []
    for i in range(0, 3):
        parse1 = Process(target=parsedata, args=(dataQueue, ))
Exemple #26
0
    def launch_expeditions( self , task_request_list , moon_name_list=None ):
        
        global expedition
        
        # ---[ 1 ]------------------------------------------------------
        
        self.log.show( 'Checking Moon list sent by user' )
        
        working_moons = []
        
        if not moon_name_list :
            
            self.log.show( 'Traveling to available Moons on Orbit' )
            
            working_moons = self.orbit.values()
            
        else :
            
            self.log.show( 'Traveling to ' + str( moon_name_list ) )
            
            working_moons = [ self.orbit.get_moon( moon_name ) for moon_name in moon_name_list ]
            
        # ---[ 2 ]------------------------------------------------------
        
        self.log.show( 'Build Thread-safe Queues with no maximum size' )
        
        recv_queue = Manager().Queue( )#len(task_request_list) )
        
        send_queue  = Manager().Queue( )#len(task_request_list) )

        # ---[ 3 ]------------------------------------------------------
        
        self.log.show( 'Enqueue tasks on "send_queue" object' )
        
        for task_obj in task_request_list : 
            
            send_queue.put_nowait( str(task_obj) ) # "Normal" Objects are note thread safe!
            
        self.log.show( 'send_queue = ' + str(send_queue.qsize())+'/'+str(len(task_request_list)) + 'tasks')
        
        # ---[ 4 ]------------------------------------------------------
        
        self.log.show( 'Starting up Process Pool' )
                
        pool = Pool(processes=len(working_moons))

        

        for moon in working_moons :
            
            #running_expeditions.append( Process( target=expedition , args=(self.name , moon.name , moon.ip , moon.port , taskrequest_queue , taskresponse_queue, ) ) ) # Process Object
            pool.apply_async( func=expedition , args=(self.name , moon.name , moon.ip , moon.port , send_queue , recv_queue , ) )

        # ---[ 5 ]------------------------------------------------------
        
        pool.close()
        pool.join()
        
        self.log.show( 'recv_queue = '+ str(recv_queue.qsize())+'/'+str(len(task_request_list)) + 'tasks' )
        
        tmp = []
        while not recv_queue.empty() :
            
            tmp.append( recv_queue.get() )
            
        self.log.show( 'closing queue' )
        
        self.log.show( 'return results' )
        
        return tmp
Exemple #27
0
def update_heatmap(slaid,
                   model,
                   mpp_inve,
                   li_ij_tissue,
                   wh_inve,
                   num_ij,
                   n_proc,
                   batch_size,
                   transphorm,
                   use_gpu,
                   is_debug,
                   slide_id_ij_input_output_last_tissue,
                   one_over_n_sliding=1):
    # make zero heatmap
    im_heatmap_ij = make_zero_heatmap(num_ij, one_over_n_sliding)
    im_count_ij = make_zero_heatmap(num_ij, one_over_n_sliding)
    # set the model as test mode
    model.eval()
    # for each tissue position
    if n_proc > 0:
        li_xy_inve_tissue = [
            tuple(map(lambda a, b: a * b, ij_tissue, wh_inve))
            for ij_tissue in li_ij_tissue
        ]
        queue_size = batch_size * n_proc
        queue = Manager().Queue(queue_size)
        pool = Pool(n_proc)

        split_points = []
        for i in range(n_proc):
            split_points.append(li_xy_inve_tissue[i::n_proc])
        result = pool.map_async(
            read_region_tissue,
            [(queue, slaid, mpp_inve, wh_inve, li_xy_inve, transphorm)
             for li_xy_inve in split_points])
        li_ij, li_patch_inve = [], []
        while True:
            if queue.empty():
                if not result.ready():
                    time.sleep(0.5)
                elif result.ready() and 0 == len(li_patch_inve):
                    break
            else:
                patch_inve, i, j = queue.get()
                li_ij.append((i, j))
                li_patch_inve.append(patch_inve)

            if len(li_patch_inve) == batch_size or \
                    (result.ready() and queue.empty() and len(li_patch_inve) > 0):
                batch = Variable(torch.FloatTensor(np.stack(li_patch_inve)),
                                 volatile=True)
                if use_gpu:
                    batch = batch.cuda()
                start_time = time.time()
                output = model(batch)
                elapsed_batch = time.time() - start_time
                output = output.cpu().data.numpy()[:, 0]
                logging.debug(f'elapsed time for computing one batch is ' +
                              f'{elapsed_batch:.3f}')
                n_img_in_this_batch = batch.size(0)
                for ii in range(n_img_in_this_batch):
                    i, j = li_ij[ii]
                    im_heatmap_ij[j, i] = output[ii]
                logging.debug(queue.qsize())
                li_ij, li_patch_inve = [], []

        if not result.successful():
            logging.debug('[!] Error: something wrong in result.')
        pool.close()
        pool.join()
    else:
        im_heatmap_ij = update_heatmap_no_parallel(
            slaid, model, im_heatmap_ij, mpp_inve, transphorm, li_ij_tissue,
            wh_inve, batch_size, use_gpu, is_debug,
            slide_id_ij_input_output_last_tissue)
    dummy = 0
    return im_heatmap_ij, im_count