Example #1
0
 def run(self):
     global _quality_result
     err_tables_list = self.get_quality_result()
     if self.program_type == 0:  # 抓取程序
         key = self.version
         if key in _quality_result:
             _quality_result[key].append({str(self.sid): err_tables_list})
         else:
             _quality_result[key] = [{str(self.sid): err_tables_list}, ]
         r.set('quality_result', json.dumps(_quality_result))
Example #2
0
 def program_start_running(self):
     global _programs_calculate_info_dic
     version = self.version
     sid = self.sid
     try:
         if version in _programs_calculate_info_dic:
             for program in _programs_calculate_info_dic[version]:
                 if int(sid) == int(program['sid']):
                     _programs_calculate_info_dic[version].remove(program)
                     r.set('maintain_plans_programs_calculate_info_dic',
                           _programs_calculate_info_dic)
         else:
             raise Exception(
                 "SetProgramsCalculateInfoDic.program_start_running:"
                 "version is not in _programs_calculate_info_dic")
     except Exception as e:
         print(str(e))
Example #3
0
def loop_flush_crawler_info():
    while True:
        global _programs_crawler_info
        programs_info = copy.deepcopy(_programs_crawler_info)
        try:
            sql_code_crawler = sql_codes.get_crawler_sql_codes()
            programs_info_new = diff_programs_info(sql_code_crawler,
                                                   programs_info, 'crawler')
            if programs_info_new:
                _programs_crawler_info = copy.deepcopy(programs_info_new)
                distribute = DistributePlans()
                distribute.distribute()
                r.set('maintain_plans_programs_crawler_info',
                      _programs_crawler_info)
        except Exception as e:
            print(e)
            write_log(str(e))
        finally:
            time.sleep(30)
Example #4
0
def loop_flush_calculate_info():
    while True:
        global _programs_calculate_info_last
        programs_info = list(copy.deepcopy(_programs_calculate_info_last))
        try:
            global _programs_calculate_info_dic
            version = last_version()
            sql_code_calculate = sql_codes.get_calculate_sql_codes()
            programs_info_new = diff_programs_info(sql_code_calculate,
                                                   programs_info, 'calculate')
            # ("programs_info_new:", programs_info_new)
            if programs_info_new:
                if version not in _programs_calculate_info_dic:
                    # 如果队列有更新 且 没有最新的版本则将最新版本赋值为 最新的队列
                    _programs_calculate_info_dic[version] = copy.deepcopy(
                        programs_info_new)
                else:
                    # 更新最新版本 队列的 计算程序计划任务
                    add_programs_list = list()
                    remove_programs_list = list()
                    for program in programs_info_new:
                        if program not in programs_info:
                            add_programs_list.append(program)
                    for program in programs_info:
                        if program not in programs_info_new:
                            remove_programs_list.append(program)
                    _programs_calculate_info_last = copy.deepcopy(
                        programs_info_new)
                    # 将最新队列写入缓存
                    r.set('maintain_plans_programs_calculate_info_last',
                          _programs_calculate_info_last)
                    for program in add_programs_list:
                        if program not in _programs_calculate_info_dic[
                                version]:
                            _programs_calculate_info_dic[version].append(
                                program)
                    for program in remove_programs_list:
                        if program in _programs_calculate_info_dic[version]:
                            _programs_calculate_info_dic[version].remove(
                                program)
                # 写入缓存数据库
                r.set('maintain_plans_programs_calculate_info_dic',
                      _programs_calculate_info_dic)
            else:
                # 如果队列没有更新 且 最新版本不存在则用目前的队列生成最新版本
                if version not in _programs_calculate_info_dic and len(
                        programs_info) != 0:
                    _programs_calculate_info_dic[version] = copy.deepcopy(
                        programs_info)
                    # 写入缓存数据库
                    r.set('maintain_plans_programs_calculate_info_dic',
                          _programs_calculate_info_dic)
        except Exception as e:
            print(e)
            write_log(str(e))
        finally:
            # print("_programs_calculate_info_last:", _programs_calculate_info_last)
            # print("_programs_calculate_info_dic:", _programs_calculate_info_dic)
            time.sleep(30)
Example #5
0
    def loop_check_program_result(self):

        start_id, end_id = self._get_max_flag()
        if start_id != end_id and start_id != 0 and end_id != 0:
            execute_calucate_programs = self._get_execute_calculate_programs(start_id, end_id)
            execute_crawling_programs = self._get_execute_crawling_programs(start_id, end_id)
            # print("execute_success_programs:", execute_success_programs)
            # print("execute_failed_programs:", execute_failed_programs)
            threads = []
            if len(execute_crawling_programs) != 0:
                threads.append(threading.Thread(target=self.crawling_procedure, args=(execute_crawling_programs, )))
            if len(execute_calucate_programs) != 0:
                threads.append(threading.Thread(target=self.failed_procedure, args=(execute_calucate_programs,)))
            if threads:
                for thread in threads:
                    thread.start()
                for thread in threads:
                    thread.join()
            try:
                if self.program_rerun_dict:
                    rerun_dict = {}
                    v_dict = {}
                    print("self.program_rerun_dict:", self.program_rerun_dict)
                    for k, v in self.program_rerun_dict.items():
                        if len(v) != 0:
                            for k1, v1 in v.items():
                                v_dict[str(k1)] = v1
                            rerun_dict[str(k)] = v_dict
                    rerun_dict = json.dumps(rerun_dict)
                    print("rerun_dict:", rerun_dict)
                    r.set('crawling_program_rerun_dict', rerun_dict)
            except Exception as e:
                print("loop_check_program_result.loop_check_program_result Err: "
                      "[program_rerun_dict] {err}".format(err=str(e)))
            finally:
                sql = mysql.Mysql()
                sql.mysql_write(update_flag.format(id=end_id))