Beispiel #1
0
    def save_point_to_tmp(self, sql, rdd, once_size=1000):
        '''
        批量数据库操作
        :param sql:要批量执行的语句
        :param rdd:数据源RDD,经过Map操作得到的tuple列表[(a,b,c),(d,e,f),(d.f.g)]
        :param once_size:每次执行的条数,默认每次一千条
        :return:
        '''
        cg = CodeGenerator()
        current_args = list()
        args = rdd.collect()

        for i in range(len(args)):
            if len(args) != 0:
                full_arg = [
                    cg.get_sequence('point'),
                ]
                full_arg.extend(args.pop())
                current_args.append(full_arg)

            # 但前参数长度达到批量值 或者 剩余参数已为0
            if len(current_args) >= once_size or len(args) == 0:
                if len(current_args) > 0:
                    try:
                        self.executemany_without_commit(sql, current_args)
                    except Exception as e:
                        return e, current_args
                    current_args.clear()
                    self.cnn.commit()
Beispiel #2
0
class Runner:
    def __init__(self, data_date):
        self.data_date = data_date
        self.MAX_FAIL_TIMES = 1
        self.point_engine = PointEngine()
        self.cg = CodeGenerator()

    def _get_rule_by_id(self, rule_id, special_flag=0):
        return self.point_engine.get_rule_by_id(rule_id, special_flag)

    def _get_rules_by_ids(self, ids):
        rules = list()
        for id in ids:
            rules.append(self._get_rule_by_id(*id))
        return rules

    def _run_rule(self, rule, sum_to_account=False):
        # 重复执行检测 !!! 不能用批次号来判断是否已执行过 因为此时批次号未知
        if self._is_already_executed(rule.rule_id, rule.special_flag):
            Global.logger.warn('数据日期:%s 规则已经成功执行过, 请勿重复执行!:%s' %
                               (self.data_date, str(rule)))
            return
            # raise Exception('规则:%s 数据日期:%s已经成功执行过, 请勿重复执行!' % (str(rule), data_date))

        # 生成批次号
        batch_id = self.cg.get_random(10)
        success_flag = False
        for i in range(self.MAX_FAIL_TIMES):
            # 生成流水水号
            job_id = self.cg.get_sequence('job')
            try:
                Global.logger.info('START JOB_ID:%s ----- %s' %
                                   (job_id, str(rule)))
                # 开始执行规则
                self._start_job(job_id, batch_id, rule.rule_id,
                                rule.special_flag)
                # 执行规则
                if sum_to_account:
                    result = self.point_engine.execute_and_sum(
                        rule, self.data_date, job_id)
                    # 更新规则执行结果
                    self._update_result(job_id, result)
                    if result == Global.SUCCESS:
                        success_flag = True
                        break
                else:
                    # 此处未汇总到积分账户表
                    result = self.point_engine.execute(rule, self.data_date,
                                                       job_id)
                    return job_id

            except Exception as e:
                # 更新规则执行结果
                self._update_result(job_id, Global.FAILURE)
                Global.logger.error('ERROR %s' % e)
                continue  # 再次尝试

        if not success_flag:
            raise MaxFailureError('规则:%s 以达到最大失败次数%s' %
                                  (str(rule), self.MAX_FAIL_TIMES))

    def run_by_cycle(self, rule_cycle):
        """
        根据规则周期执行规则
        :param rule_cycle:
        :return:
        """
        # 按周期获取规则列表
        rules = self.point_engine.get_rules_by_cycle(rule_cycle)
        self.run_batch(rules)

    def run_single(self, rule_id, special_flag=0):
        """
        执行单条规则
        :param rule_id:
        :param special_flag:
        :return:
        """

        # 清空暂存表
        self.point_engine.clear_point_detail_tmp()
        self.point_engine.clear_point_account_tmp()

        rule = self._get_rule_by_id(rule_id, special_flag)
        self._run_rule(rule, sum_to_account=True)

    def run_batch(self, rules):
        """
        批量执行规则
        :param rules:规则list
        :return:
        """
        if rules is None or len(rules) == 0:
            Global.logger.info('run_batch--没有要执行的规则')
            return

        # 清空暂存表
        self.point_engine.clear_point_detail_tmp()
        self.point_engine.clear_point_account_tmp()

        if isinstance(rules[0], tuple):
            rules = self._get_rules_by_ids(rules)

        run_results = list()
        for rule in rules:
            result = self._run_rule(rule, sum_to_account=False)
            run_results.append(result)

            time.sleep(Global.RUN_INTERVAL)

        # 汇总
        sum_result = self.point_engine.sum_detail_to_account()
        if sum_result == Global.SUCCESS and len(run_results) > 0:
            for job_id in run_results:
                self._update_result(job_id, '0')

    def _is_already_executed(self, rule_id, special_flag):
        """
        重复执行检测 !!! 不能用批次号来判断是否已执行过 因为此时批次号未知
        :param rule_id:
        :param special_flag:
        :return:
        """
        check_sql = "SELECT count(1) FROM T_POINT_SPARK_JOB WHERE RULE_ID=? AND SPECIAL_FLAG=? AND DATA_DATE=? AND RESULT='0'"
        count = Global.db2_helper.fetchone(
            check_sql, [rule_id, special_flag, self.data_date])[0]

        if count > 0:
            # 该日期已成功执行过此规则
            return True
        else:
            return False

    def _start_job(self, job_id, batch_id, rule_id, special_flag):
        # 获取重复次数
        repetition = self._get_repetition(batch_id)
        start_sql = "insert into T_POINT_SPARK_JOB(JOB_ID, BATCH_ID, RULE_ID, SPECIAL_FLAG, REPETITION, DATA_DATE, START_TIME, RESULT) values(?,?,?,?,?,?,CURRENT_TIMESTAMP,'1')"
        Global.db2_helper.execute(start_sql, [
            job_id, batch_id, rule_id, special_flag, repetition, self.data_date
        ])

    def _update_result(self, job_id, result):
        update_sql = "update T_POINT_SPARK_JOB set RESULT=? , END_TIME=CURRENT_TIMESTAMP where JOB_ID=?"
        Global.db2_helper.execute(update_sql, [result, job_id])

    def _get_repetition(self, batch_id):
        # 获取当前规则已经重复执行的次数
        repetition_sql = "SELECT max(REPETITION) FROM T_POINT_SPARK_JOB WHERE BATCH_ID=?"

        repetition_result = Global.db2_helper.fetchone(repetition_sql, [
            batch_id,
        ])  # [None]  or [2]

        if repetition_result[0] is not None:
            repetition = repetition_result[0] + 1
        else:
            repetition = 1
        return repetition