def get_db_result(_id): user = User(session['user']) result = Mongo().result_by_id(_id) output_dir = result.output_dir review = result.review if output_dir: for case in result.results: try: case_config = result.ref_problem[case.id] if case_config: case.attachments = case_config.get_attachments( user_dir=Env.root.joinpath(output_dir)) except AttributeError: logger.exception('attachments') pass if review: for line, comments in review.items(): for i, c in enumerate(comments): comments[i]['text'] = markdown.markdown(c['text']) mark_as_read = Mongo().mark_as_read(to=user.id, _id=_id, event='new-comment') logger.info('mark-as-read: {}', mark_as_read) return flask.json.dumps(result)
def perma_result(_id): user = User(session['user']) document = Mongo().result_by_id(_id) course = document.ref_course problem = document.ref_problem breadcrumbs = [ Link.CoursesBtn(), Link.CourseBtn(course), Link.ProblemBtn(course, problem) ] return render_template_ext( 'view_result.njk', user=user, notifications=Mongo().load_notifications(user.id), results=[document], result=None, requestReview=False, title='Problem %s' % problem.name, breadcrumbs=Breadcrumbs.new(*breadcrumbs), js=[ '//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.15.6/highlight.min.js', '/static/js/lib/highlightjs-line-numbers.js' ], js_no_cache=['sockets.js', 'process.js'])
def __init__(self, config_file=None, product=None): self.config = self.set_config(config_file) self.mysql_risk = MySql(**self.config['mysql_risk']) self.mysql_risk_table = None self.mongo_derivable = Mongo(**self.config['mongo_derivable']) self.mongo_derivable_table = None self.except_handler = DingdingExceptionHandler(self.config['robots']) self.product = product self.ssh_config = self.config['model_file_remote_ssh']
def request_review(): user = User(session['user']) data = request.json _id = data['_id'] document = Mongo().result_by_id(_id) from_user = document.user or user.id # request_dt = document.review_request # type: datetime # if request_dt: # return flask.json.dumps( # dict(result='warning', message=f'Request was already sent on {request_dt:%Y-%m-%d %H:%M:%S}') # ) # notify all teachers reviewers = list() for reviewer_obj in document.ref_course.teachers: if type(reviewer_obj) is dict: reviewer = str(reviewer_obj.get('id', reviewer_obj)) else: reviewer = reviewer_obj event_document = { 'from': from_user, 'to': reviewer, 'course': document.course, 'problem': document.problem, 'document': _id, 'event': 'codereview', 'title': f'Code review requested by {from_user}', 'description': f'Student {from_user} has requested code review for the problem {document.ref_problem.id}' } if Mongo().add_notification(event_document): logger.info(f'add-notification: {event_document}') reviewers.append(reviewer) else: logger.warning( f'notification already exists: {event_document}') Mongo().update_fields(_id, review_request=datetime.datetime.now()) if reviewers: return flask.json.dumps(dict(result='ok', reviewers=reviewers)) else: return flask.json.dumps( dict(result='warning', message='Request was already sent'))
def __init__(self): """ Inicializa variaveis que serao utilizadas no PreProcessamento. Exemplo: self.train: Registros da collection train self.test: Registros da collection test self.dict_map: Registro da collection dict_map """ self.train = pd.DataFrame(Mongo().find("train")) self.test = pd.DataFrame(Mongo().find("test")) self.dict_map = Mongo().find("target_map")
def view_course(course_name, course_year): user = User(session['user']) course = Courses().find_one(name=course_name, year=course_year, only_active=False) problems: List[Problem] = sorted(list( course.problem_db.find(disabled=(None, False))), key=problem_cat_getter) languages = Languages.db().find(disabled=(None, False)) if not user.is_admin(): problems = [p for p in problems if p.is_visible()] cat_problems = OrderedDict() for cat, items in groupby(problems, key=problem_cat_getter): cat_problems[cat] = list(items) return render_template_ext( 'view_course.njk', user=user, notifications=Mongo().load_notifications(user.id), course=course, languages=languages, has_categories=len(cat_problems) > 1, problems=problems, cat_problems=cat_problems, title=course.name, subtitle=course.year, breadcrumbs=Breadcrumbs.new(Link.CoursesBtn(), ), js_no_cache=['solution.js'])
def process_solution(course_name, course_year): user = User(session['user']) try: course = Courses().find_one(name=course_name, year=course_year, only_active=False) problem = course.problem_db[request.form['prob-id']] lang = Languages.db()[request.form['lang-id']] solution = request.form['src'] use_docker = request.form.get('use-docker', 'off') == 'on' test_result = crates.TestResult( user=user.id, problem=problem.id, lang=lang.id, course=course.id, docker=use_docker, solution=solution, action='solve', ) # save to the db and redirect with _id insert_result = Mongo().save_result(test_result.peek()) return redirect( url_for('view_result', course_name=course.name, course_year=course.year, problem_id=problem.id, _id=str(insert_result.inserted_id))) except: logger.exception('Could not parse data')
def read_notifications(): user = User(session['user']) data = request.json return flask.json.dumps( dict(notifications=Mongo().read_notifications(user.id, n_id=data['_id'])))
def save_model(self, model): """ Proprosito ---------- Salvar o modelo treinado e fazer a insercao dos parametros no MongoDB Parametros ---------- model: Modelo treinado Retorno ---------- none """ models_params = { "name": self.model_name, "X": self.X_columns, "y": self.y_columns, "params": model.get_params() } Mongo().insert_one("models", models_params) pickle.dump(self.model, open(f"{os.environ['model_path']}{self.model_name}", "wb"))
def classify(self, model): """ Proprosito ---------- Faz a classificacao dos registros e salva o resultado no MongoDB Parametros ---------- model: Modelo treinado Retorno ---------- none """ X = self.test[self.X_columns] y_pred = model.predict(X) pred = pd.DataFrame({'id': self.test['id'], 'type': y_pred}) mapped_pred = PreProcessing().target_map(dataframe=pred, invert=True) result = { "model_name": self.model_name, "predict_results": list(json.loads(mapped_pred.T.to_json()).values()) } Mongo().insert_one("results", result)
def view_courses(): user = User(session['user']) courses = list(Courses().find(only_active=not user.is_admin())) return render_template_ext('view_courses.njk', title='Course list', user=user, notifications=Mongo().load_notifications( user.id), courses=courses)
def perma_result(_id): user = User(session['user']) document = Mongo().result_by_id(_id) course = document.ref_course problem = document.ref_problem breadcrumbs = [ Link.CoursesBtn(), Link.CourseBtn(course), Link.ProblemBtn(course, problem) ] return render_template_ext( 'results.njk', user=user, notifications=Mongo().read_notifications(user.id), results=[document], result=None, requestReview=False, title='Problem %s' % problem.name, breadcrumbs=Breadcrumbs.new(*breadcrumbs), )
def _process_solution_by_id(_id, rerun=False): document = Mongo().result_by_id(_id) if document.result is None or rerun: _process_solution( User(dict(id=document.user)), document.action, not document.docker, document.problem, document.course, document.lang, document.solution, document._id )
def student_process_solution(data): print(data) user = User(session['user']) try: document = Mongo().result_by_id(data['_id']) _process_solution( user=user, action=document.action, skip_docker=not document.docker, problem_id=document.problem, course_id=document.course, lang_id=document.lang, src=document.solution, _id=data['_id'], ) except: logger.exception('Error while processing solution')
def view_result(course_name, course_year, problem_id, _id=None): user = User(session['user']) if user.is_admin(): return redirect( url_for('admin_problem', course_name=course_name, course_year=course_year, problem_id=problem_id)) course = Courses().find_one(name=course_name, year=course_year, only_active=False) problem = course.problem_db[problem_id] results = list() result = None breadcrumbs = [Link.CoursesBtn(), Link.CourseBtn(course)] # TODO check access if _id: document = Mongo().result_by_id(_id) if document: # add to previous solution if already executed if document.result: results.append(document.peek()) else: result = document.peek() breadcrumbs.append(Link.ProblemBtn(course, problem)) if Env.use_database: for prev in Mongo().peek_last_n_results(20, user.id, course.id, problem.id): # push only valid result if prev.get('result') and str(prev['_id']) != str(_id): results.append(prev) if _id: for r in results: if str(r['_id']) == str(_id): r['active'] = 'active' results = sorted(results, reverse=True, key=lambda x: x.get('attempt')) return render_template_ext( 'results.njk', user=user, notifications=Mongo().read_notifications(user.id), results=results, result=result, requestReview=True, title='Problem %s' % problem.name, breadcrumbs=Breadcrumbs.new(*breadcrumbs), )
def view_course(course_name, course_year): user = User(session['user']) course = Courses().find_one(name=course_name, year=course_year, only_active=False) problems = list(course.problem_db.find(disabled=(None, False))) languages = Languages.db().find(disabled=(None, False)) return render_template_ext( 'submit.njk', user=user, notifications=Mongo().read_notifications(user.id), course=course, languages=languages, problems=problems, title=course.name, subtitle=course.year, breadcrumbs=Breadcrumbs.new(Link.CoursesBtn(), ), )
def clear_notification(): data = request.json _id = data['_id'] result = dict(result="ok", message="ok") try: delete_many_result = Mongo().events.delete_many(dict(document=_id)) if delete_many_result.deleted_count > 0: result[ 'message'] = f"Ok deleted {delete_many_result.deleted_count} notification related to this result" return result if delete_many_result.deleted_count == 0: result['result'] = "warning" result[ 'message'] = f"No notification related to this result found" return result except: result['result'] = 'error' result['message'] = f"No notification related to this result found" return result
def admin_problem(course_name, course_year, problem_id): user = User(session['user']) course = Courses().find_one(name=course_name, year=course_year, only_active=False) problems_ids = ','.join([x.id for x in list(course.problem_db.find())]) problem = course.problem_db[problem_id] languages = Languages.db().find(disabled=(None, False)) return render_template_ext( 'problem.njk', user=user, notifications=Mongo().read_notifications(user.id), course=course, languages=languages, problem=problem, problems_ids=problems_ids, title='Manage problem %s' % problem.name, breadcrumbs=Breadcrumbs.new(Link.CoursesBtn(), Link.CourseBtn(course)), )
def get_side_by_side_diff(doc_id, case_id): result = Mongo().result_by_id(doc_id) output_dir = result.output_dir if output_dir: try: case_config = result.ref_problem[case_id] if case_config: attachments = case_config.get_path_to_output_files( user_dir=Env.root.joinpath(output_dir)) from utils import comparison result = comparison.line_by_line_diff( Env.root / attachments.reference, Env.root / attachments.generated) return result.html else: logger.error(f'Could not find case {case_id}') return f'Could not find case {case_id}' except FileNotFoundError: logger.exception('Could not find files for comparison') return 'Could not find files' except: logger.exception('Error while comparing') return 'Error while comparison'
def _process_solution(user, action, skip_docker, problem_id, course_id, lang_id=None, src=None, _id=None): if not user.is_admin() and (skip_docker or action in (ProcessRequestType.GENERATE_INPUT, ProcessRequestType.GENERATE_OUTPUT)): Emittor.error('Operation not permitted', [ 'You do not have sufficient privileges to perform action:', ' %s (skip docker: %s)' % (action, skip_docker), '', 'Please contact [email protected]', 'if you want to gain the privileges.' ]) return request = processing.request.ProcessRequest( user=user, lang=lang_id, problem=problem_id, course=course_id, src=src, type=action, docker=False if (skip_docker and user.is_admin()) else True, ) if Env.use_database: Mongo().save_log(request.get_log_dict()) # ignore problems which are past due if request.problem.time_left < 0: return Emittor.register_events(request) Emittor.queue_status(queue_status()) queue.append(request) Emittor.queue_push(request) # put a barrier here so only certain amount fo users can process code at once # while other will see queue list with thread_lock: try: request.process() except ConfigurationException as e: if user.is_admin(): logger.exception('[visible to admin only] invalid yaml config') Emittor.exception(e) except Exception as e: logger.exception('process error:') Emittor.exception(e) finally: output_dir, attempt = request.save_result() if Env.use_database: # replace document instead of creating new one Mongo().save_result( request.get_result_dict(), _id=_id, output_dir=output_dir, attempt=attempt, ) request.destroy() queue.remove(request) Emittor.queue_pop(request)
class BaseFeatureMonitor(object): def __init__(self, config_file=None, product=None): self.config = self.set_config(config_file) self.mysql_risk = MySql(**self.config['mysql_risk']) self.mysql_risk_table = None self.mongo_derivable = Mongo(**self.config['mongo_derivable']) self.mongo_derivable_table = None self.except_handler = DingdingExceptionHandler(self.config['robots']) self.product = product self.ssh_config = self.config['model_file_remote_ssh'] def set_config(self, config_file): with open(config_file, 'r') as f: file = f.read() config = yaml.load(file) return config def get_model_path_from_mysql(self, table=None): pass def get_top_features(self, monitor_flag): model_path_list = self.get_model_path_from_mysql() final_features = [] # 连接远程服务器 ssh_client = paramiko.Transport(self.ssh_config['hostname'], self.ssh_config['port']) ssh_client.connect(username=self.ssh_config['username'], password=self.ssh_config['password']) sftp = paramiko.SFTPClient.from_transport(ssh_client) for model_dict in [model for model in model_path_list if model['monitor_flag'] == monitor_flag]: remote_model_path = model_dict['model_path'] # 判断本地模型文件所在目录是否存在,没有就创建 if not os.path.isdir(os.path.split(remote_model_path)[0]): os.makedirs(os.path.split(remote_model_path)[0]) # 将远程文件下载到本地 sftp.get(remote_model_path, remote_model_path) with open(remote_model_path, 'rb') as f: model_info = pickle.load(f) top_columns = [] try: model = model_info['model'] enum = model.get_params()['enum'] mm = model.get_params()['clf'] top_columns = [] for i, v in enumerate( sorted(zip(map(lambda x: round(x, 4), mm.feature_importances_), enum.clean_col_names), reverse=True)): if i <= 30: top_columns.append(v[1]) except Exception as e: logging.error(e) final_features.extend(top_columns) sftp.close() no_final_features = ['ALPHA_Behavior_submit_date', 'ALPHA_Behavior_submit_hour', 'ALPHA_Behavior_submit_weekday', 'X_DNA_Behavior_submit_date', 'X_DNA_Behavior_submit_hour', 'X_DNA_Behavior_submit_weekday'] # 这些不监控 final_features = list(set(final_features) - set(no_final_features)) logging.info('{}-top_features: {}'.format(self.product, final_features)) return final_features def get_appid_from_mysql(self, start_time, diff_day, diff_hour): """获取所需要的11天的所有appid信息""" end_time = (datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S') + timedelta(days=diff_day)).strftime( "%Y-%m-%d %H:%M:%S") start_hour = 0 end_hour = start_hour + diff_hour sql = '''select upper(app_id) as app_id,flow_type,work_flag,date(create_time) as date from {} where create_time >= '{}' and create_time < '{}' and hour(create_time) >= {} and hour(create_time) <= {} '''.format(self.mysql_risk_table, start_time, end_time, start_hour, end_hour) res = self.mysql_risk.query(sql) return pd.DataFrame(res) def get_features(self, df_appid, top_feature): appids = list(set(df_appid['app_id'].tolist())) qry = {'_id': {'$in': appids}} qry1 = {feature: 1 for feature in top_feature} res = self.mongo_derivable.get_collection(self.mongo_derivable_table).find(qry, qry1, batch_size=500) res_list = list(res) return pd.DataFrame(res_list) def psi(self, df_feature_1, df_feature_2, feature, bin_num=10): df_feature_1['label'] = 0 df_feature_2['label'] = 1 df_feature = pd.concat([df_feature_1, df_feature_2]) df_feature = df_feature.replace('null', np.nan) df_feature = df_feature.replace('NaN', np.nan) df_feature = df_feature.apply(pd.to_numeric, errors='ignore') enum = EnumMapper(maximum_enum_num=100) enum.fit(df_feature) df_feature = enum.transform(df_feature) if feature in df_feature.columns.tolist(): df_psi = df_feature[[feature, 'label']].copy() if df_psi[feature].dtype not in ['int', 'float'] and df_psi[feature].unique().shape[0] > 20: # print("The unique number of feature is {}".format(df_psi[feature].unique().shape[0])) return None, 999 else: if df_psi[feature].unique().shape[0] > 2: df_psi['bins'] = pd.qcut(df_psi[feature], 10, precision=2, duplicates='drop') nan_df = df_psi[df_psi[feature].map(lambda x: pd.isnull(x))].reset_index(drop=True) if not nan_df.empty: df_psi['bins'] = df_psi['bins'].cat.add_categories('-999') df_psi['bins'] = df_psi['bins'].fillna('-999') else: df_psi['bins'] = df_psi[feature].map(lambda x: -999 if pd.isnull(x) else x) group_df = df_psi.groupby(['bins', 'label']).size().unstack('label') group_df = group_df.fillna(0) group_df['b_rate'] = group_df[0] / group_df[0].sum() group_df['a_rate'] = group_df[1] / group_df[1].sum() e = 0.000000000001 group_df['psi_part'] = group_df.apply( lambda group_df: (group_df['a_rate'] - group_df['b_rate']) * math.log( (group_df['a_rate'] + e) / (group_df['b_rate'] + e)), axis=1) return group_df, group_df.psi_part.sum() else: return None, 99 def psi_classified(self, start_time, diff_day, diff_hour, timedetail): """psi分类监控""" ls_top_loss_rate = [] # 发送到钉钉的丢失率监控列表 ls_top_psi = [] # 发送到钉钉的psi监控列表 total_appids_df = self.get_appid_from_mysql(start_time, diff_day, diff_hour) # 获取所需要的11天的所有appid信息 total_appids_df.date = total_appids_df.date.map(lambda x: str(x)) # 将里面date字段的类型转换为str # 开卡初审 top_features = self.get_top_features(monitor_flag='cp') cp_ls_top_loss_rate, cp_ls_top_psi = self.psi_distr(start_time, total_appids_df, top_features, flow_type='c', work_flag='precheck') if cp_ls_top_loss_rate: ls_top_loss_rate.append('#######开卡初审#######') ls_top_loss_rate.extend(cp_ls_top_loss_rate) if cp_ls_top_psi: ls_top_psi.append('#######开卡初审#######') ls_top_psi.extend(cp_ls_top_psi) # 开卡复审 top_features = self.get_top_features(monitor_flag='cf') cf_ls_top_loss_rate, cf_ls_top_psi = self.psi_distr(start_time, total_appids_df, top_features, flow_type='c', work_flag='finalcheck') if cf_ls_top_loss_rate: ls_top_loss_rate.append('#######开卡复审#######') ls_top_loss_rate.extend(cf_ls_top_loss_rate) if cf_ls_top_psi: ls_top_psi.append('#######开卡复审#######') ls_top_psi.extend(cf_ls_top_psi) # 首贷提现初审 top_features = self.get_top_features(monitor_flag='fp') fp_ls_top_loss_rate, fp_ls_top_psi = self.psi_distr(start_time, total_appids_df, top_features, flow_type='f', work_flag='precheck') if fp_ls_top_loss_rate: ls_top_loss_rate.append('#######首贷提现初审#######') ls_top_loss_rate.extend(fp_ls_top_loss_rate) if fp_ls_top_psi: ls_top_psi.append('#######首贷提现初审#######') ls_top_psi.extend(fp_ls_top_psi) # 首贷提现复审 top_features = self.get_top_features(monitor_flag='ff') ff_ls_top_loss_rate, ff_ls_top_psi = self.psi_distr(start_time, total_appids_df, top_features, flow_type='f', work_flag='finalcheck') if ff_ls_top_loss_rate: ls_top_loss_rate.append('#######首贷提现复审#######') ls_top_loss_rate.extend(ff_ls_top_loss_rate) if ff_ls_top_psi: ls_top_psi.append('#######首贷提现复审#######') ls_top_psi.extend(ff_ls_top_psi) # 复贷初审 top_features = self.get_top_features(monitor_flag='wp') wp_ls_top_loss_rate, wp_ls_top_psi = self.psi_distr(start_time, total_appids_df, top_features, flow_type='w', work_flag='precheck') if wp_ls_top_loss_rate: ls_top_loss_rate.append('#######复贷初审#######') ls_top_loss_rate.extend(wp_ls_top_loss_rate) if wp_ls_top_psi: ls_top_psi.append('#######复贷初审#######') ls_top_psi.extend(wp_ls_top_psi) # 复贷复审 top_features = self.get_top_features(monitor_flag='wf') wf_ls_top_loss_rate, wf_ls_top_psi = self.psi_distr(start_time, total_appids_df, top_features, flow_type='w', work_flag='finalcheck') if wf_ls_top_loss_rate: ls_top_loss_rate.append('#######复贷复审#######') ls_top_loss_rate.extend(wf_ls_top_loss_rate) if wf_ls_top_psi: ls_top_psi.append('#######复贷复审#######') ls_top_psi.extend(wf_ls_top_psi) # 结清调额 top_features = self.get_top_features(monitor_flag='q') q_ls_top_loss_rate, q_ls_top_psi = self.psi_distr(start_time, total_appids_df, top_features, flow_type='q', work_flag='finalcheck') if q_ls_top_loss_rate: ls_top_loss_rate.append('#######结清调额#######') ls_top_loss_rate.extend(q_ls_top_loss_rate) if q_ls_top_psi: ls_top_psi.append('#######结清调额#######') ls_top_psi.extend(q_ls_top_psi) if ls_top_loss_rate: ls_top_loss_rate.insert(0, '*******{}-丢失率报警*******'.format(self.product)) ls_top_loss_rate.insert(1, '时间:{}'.format(datetime.now().strftime('%Y-%m-%d ') + timedetail)) self.except_handler.handle(msg=ls_top_loss_rate) if ls_top_psi: ls_top_psi.insert(0, '*******{}-psi报警*******'.format(self.product)) ls_top_psi.insert(1, '时间:{}'.format(datetime.now().strftime('%Y-%m-%d ') + timedetail)) self.except_handler.handle(msg=ls_top_psi) def psi_distr(self, start_time, total_appids_df, top_features, flow_type, work_flag): the_psi_date = (datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S') + timedelta(days=10)).strftime( '%Y-%m-%d') # 所监控的日期 logging.info('所监控的日期为:{}'.format(the_psi_date)) # 所监控日期前十天对应类型的appids df_appid1 = total_appids_df.query( "flow_type=='{}' and work_flag=='{}' and date!='{}'".format(flow_type, work_flag, the_psi_date)).reset_index(drop=True) df_appid1 = df_appid1.sample(min(10000, df_appid1.shape[0])) logging.info('flow_type:{} work_flag:{} 前十天全部app_id个数:{}'.format(flow_type, work_flag, len(df_appid1))) # 所监控日期对应类型的appids df_appid2 = total_appids_df.query( "flow_type=='{}' and work_flag=='{}' and date=='{}'".format(flow_type, work_flag, the_psi_date)).reset_index(drop=True) df_appid2 = df_appid2.sample(min(1000, df_appid2.shape[0])) logging.info('flow_type:{} work_flag:{} 所监控的app_id个数:{}'.format(flow_type, work_flag, len(df_appid2))) dict_report = {} ls_top_psi = [] ls_top_loss_rate = [] df_feature_all_1 = self.get_features(df_appid1, top_features) df_feature_all_2 = self.get_features(df_appid2, top_features) for feature in top_features: df_feature_1 = pd.DataFrame(df_feature_all_1, columns=[feature]) df_feature_2 = pd.DataFrame(df_feature_all_2, columns=[feature]) # 这里添加计算空值逻辑 feature_precent = df_feature_2.iloc[:, 0].isna().tolist().count(True) / df_feature_2.shape[0] if feature_precent > 0.7: ls_top_loss_rate.append("{}--loss_rate:{}".format(feature, round(feature_precent, 3))) dict_report[feature] = self.psi(df_feature_1, df_feature_2, feature, bin_num=10)[1] if dict_report[feature] > 0.25: ls_top_psi.append("{}--psi:{}".format(feature, round(dict_report[feature], 3))) return ls_top_loss_rate, ls_top_psi # 前一天 def job1(self): """比较昨天top特征的分布与昨天的前10天top特征的分布""" logging.info('{} start handle feature_monitor job1!'.format(self.product)) start_time = (datetime.now() - timedelta(days=11)).strftime( '%Y-%m-%d') + ' 00:00:00' # 获取所监控及其对比的前10天所有appid的开始时间 diff_day = 11 # 获取从开始时间往后11天的数据 diff_hour = 24 # 获取每天从0时到24时的数据 self.psi_classified(start_time, diff_day, diff_hour, timedetail='上午') logging.info('{} end handle feature_monitor job1!'.format(self.product)) # 当天 def job2(self): """比较当天(0-15时)的top特征的分布与前10天(0-16时)top特征的分布""" logging.info('{} start handle feature_monitor job2!'.format(self.product)) start_time = (datetime.now() - timedelta(days=10)).strftime( '%Y-%m-%d') + ' 00:00:00' # 获取所监控及其对比的前10天所有appid的开始时间 diff_day = 11 # 获取从开始时间往后11天的数据 diff_hour = 15 # 获取每天从0时到15时的数据 self.psi_classified(start_time, diff_day, diff_hour, timedetail='下午') logging.info('{} end handle feature_monitor job2!'.format(self.product))
from flask_socketio import emit from loguru import logger import processing.request from database.mongo import Mongo from database.objects import User from exceptions import ConfigurationException from processing import ProcessRequestType from www import socketio from www.emittor import Emittor namespace = None queue = list() thread_lock_max = 10 thread_lock = Semaphore(value=thread_lock_max) mongo = Mongo() def get_datetime(value=None): # return 'aaa' return (value if value else dt.datetime.now()).strftime('%y%m%d_%H%M%S') def queue_status(): return dict(items=queue, maximum=thread_lock_max, current=len(queue)) def broadcast_queue_status(): # print('queue-status', queue_status()) emit('queue-status', dict(status=200, queue=queue_status()))
def add_comment(): data = request.json user = User(session['user']) # course = Courses()[data['course']] # problem = course.problem_db[data['problem']] # attempt = data['attempt'] _id = data['_id'] document = Mongo().result_by_id(_id) review = document.review or dict() now = time.time() from_user = user.id author_user = document.user for comment in data['comments']: line, text = str(comment['line']), comment['comment'] review_line = review[line] if line in review else list() review_line.append(dict( user=user.id, time=now, text=text, )) review[line] = review_line recipients = {from_user, author_user} for cmts in review.values(): for cmt in cmts: recipients.add(cmt['user']) for recipient in recipients: if recipient == from_user: logger.info('Not creating notification for self') else: event_document = { 'from': from_user, 'to': recipient, 'course': document.course, 'problem': document.problem, 'document': _id, 'event': 'new-comment', 'title': f'New comment from {from_user}', 'description': f'{document.ref_problem.id} User {from_user} commented your code in problem ' } if Mongo().add_notification(event_document): logger.info('add-notification: {}', event_document) else: logger.warning('notification already exists: {}', event_document) mark_as_read = Mongo().mark_as_read(_id=_id, event='codereview', to=None) logger.info('mark-as-read: {}', mark_as_read) update_one = Mongo().update_fields(_id, review=review) logger.info('document-updated: {}', update_one) return flask.json.dumps(dict(result='ok'))
def stats(): data = request.json filters = {} def add_filter(n, v=None, l=None): r = data if n.find('.') != -1: r, n = data['filters'], n.split('.')[1] if r.get(n, None): val = l(r.get(n)) if l else r.get(n) if val is not SKIP: filters[v or n] = val def dummy_object_id(period): if period == 'day': gen_time = datetime.datetime.today() - datetime.timedelta( days=1) elif period == 'week': gen_time = datetime.datetime.today() - datetime.timedelta( days=7) elif period == 'two weeks': gen_time = datetime.datetime.today() - datetime.timedelta( days=14) elif period == 'month': gen_time = datetime.datetime.today() - datetime.timedelta( days=31) else: gen_time = datetime.datetime.today() - datetime.timedelta( days=365 * 5) return ObjectId.from_datetime(gen_time) # {'course': 'TST-2019', 'problem': 'problem-1', 'filters': # {'daterange': 'week', 'status': 'all', 'limit-per-user': '******', 'has-review-flag': 'no', 'search': 'a'}} limit_per_user = data['filters']['limit-per-user'] if limit_per_user == 'all': limit_per_user = 1000 else: limit_per_user = int(limit_per_user) has_review_flag = data['filters']['has-review-flag'] if has_review_flag == 'yes': filters['review_request'] = {'$ne': None} if has_review_flag == 'no': filters['review_request'] = {'$exists': False} sort_by_inner = data['filters']['sort-by-inner'] sort_by_outer = data['filters']['sort-by-outer'] search = str(data['filters']['search']).strip() if search: filters['user'] = {'$regex': f".*{search}.*"} add_filter('course') add_filter('filters.problem', 'problem', skip_if_all) # add_filter('filters.course', 'course', skip_if_all) add_filter('filters.status', 'result.status', skip_if_all) add_filter('filters.daterange', '_id', lambda x: {'$gte': dummy_object_id(x)}) base_properties = {x: 1 for x in Mongo().base_properties} pipeline = [ { '$match': filters }, { '$project': { 'review': 1, **base_properties } }, { '$sort': { sort_by_inner: -1 } }, { '$group': { '_id': '$user', 'results': { '$push': '$$ROOT' } # $$ROOT } }, ] # print(pipeline, limit_per_user) items = list(Mongo().data.aggregate(pipeline)) try: course = Courses()[data['course']] except: course = None if course: for key in data['filters'].keys(): if key.startswith('tag-'): tag = key[4:] value = data['filters'][key] if value == 'all': continue items = [ x for x in items if course.student_has_tag(x['_id'], tag, value) ] # tags = .get('tag-group', None) def add_fields(x): x['firstname'] = str(x['_id']).split('.')[0] x['lastname'] = str(x['_id']).split('.')[-1] return x items = map(add_fields, items) items = sorted(items, key=lambda x: x[sort_by_outer]) result = list() for item in items: item_copy = deepcopy(item) item_copy['results'] = item_copy['results'][0:limit_per_user] for attempt in item_copy['results']: attempt['time'] = datetime.datetime.timestamp( attempt['_id'].generation_time) # item_copy['results'] = sorted(item_copy['results'], key=lambda x: x['time'], reverse=True) if 'results' in item_copy: item_copy['results'] = [ r for r in item_copy['results'] if 'result' in r ] result.append(item_copy) return flask.json.dumps(result)
def load_notifications(): user = User(session['user']) return flask.json.dumps( dict(notifications=Mongo().load_notifications(user.id).peek(), ))
def get_linkedin_profile(): path = settings.DATA_PATH + "\\linkedin\\" mongo = Mongo() col = mongo.db['person_profiles'] index = 0 res = col.find(skip=index) id_map = codecs.open(settings.DATA_PATH + "\\idmap" + str(index) + ".txt", 'w', encoding="utf-8") for item in res: id_map.write(str(index) + ' ' + item['_id'] + '\n') index += 1 out = codecs.open(path + item['_id'].strip().replace('"', ' ').split('?')[0], 'w', encoding="utf-8") print str(index) try: print item['_id'] + '\n' except Exception, e: print e if item.has_key('interests'): out.write(item['interests'] + '\n') else: print '[DEBUG]No Interests' if item.has_key('education'): for e in item['education']: out.write(e['name'] + '\n') if e.has_key('desc'): out.write(e['desc'] + '\n') else: print '[DEBUG]No Education' if item.has_key('group'): if item['group'].has_key('member'): out.write(item['group']['member'] + '\n') if item['group'].has_key('affilition'): for a in item['group']['affilition']: out.write(a + '\n') else: print '[DEBUG]No Group' out.write(item['name']['family_name'] + ' ' + item['name']['given_name']) if item.has_key('overview_html'): soup = BeautifulSoup(item['overview_html']) out.write(' '.join(list(soup.strings)) + '\n') else: print '[DEBUG]No Overview' if item.has_key('locality'): out.write(item['locality'] + '\n') else: print '[DEBUG]No Locality' if item.has_key('skills'): for s in item['skills']: out.write(s + '\n') else: print "[DEBUG]No Skills" if item.has_key('industry'): out.write(item['industry'] + '\n') else: print "[DEBUG]No Industry" if item.has_key('experience'): for e in item['experience']: if e.has_key('org'): out.write(e['org'] + '\n') if e.has_key('title'): out.write(e['title'] + '\n') else: print "[DEBUG]No Experience" if item.has_key('summary'): out.write(item['summary'] + '\n') else: print "[DEBUG]No Summary" out.write('url') if item.has_key('specilities'): out.write(item['specilities'] + '\n') else: print "[DEBUG]No Specilities" if item.has_key('homepage'): for k in item['homepage'].keys(): for h in item['homepage'][k]: out.write(h + '\n') else: print "[DEBUG]No Homepage" if item.has_key('honors'): for h in item['honors']: out.write(h + '\n') else: print "[DEBUG]No Honors" out.close()
def convert_db(from_db, to_db): from database.mongo import Mongo from plucky import plucks from processing.statuses import Status mongo = Mongo() def rename(document, old_name, new_name): if old_name in document: document[new_name] = document[old_name] del document[old_name] return document def delete(document, old_name): if old_name in document: del document[old_name] return document def compute_score(statuses): return dict( score=sum(plucks(statuses, 'score')), scores=[ len([s.score for s in statuses if s.code == 100]), len([s.score for s in statuses if s.code == 101]), len([s.score for s in statuses if s.code in (200, 201)]), ]) processed_ids = [ str(x['_id']) for x in mongo.db.get_collection(to_db).find({}, {'_id': 1}) ] items = mongo.db.get_collection(from_db).find() updated = list() for item in items: # skip already processed items if str(item['_id']) in processed_ids: continue rename(item, 'language', 'lang') rename(item, 'tests', 'results') delete(item, 'datetime') result = item.get('result', {}) if 'attempt' not in item: item['attempt'] = int('{:%Y%H%M%S}'.format( item['_id'].generation_time)) if item.get('action') == 'solve' and 'score' not in result: results = item.get('results', []) for r in results: if 'score' not in r and 'status' in r: r.update(compute_score([Status[r.get('status')]])) statuses = list(map(Status.get, plucks(results, 'status'))) result.update(compute_score(statuses)) if 'id' in result: if str(result['id']).upper() in ('FINAL RESULT', 'EVALUATION'): result['id'] = 'Result' updated.append(item) if updated: ack = mongo.db.get_collection(to_db).insert_many(updated) print(ack) print(ack.acknowledged) print(len(ack.inserted_ids))
def student_submit_solution(data): print(data) user = User(session['user']) try: type = str(data['type']) action = ProcessRequestType(type) skip_docker = not data.get('docker', True) except: Emittor.error('Unsupported action', [ 'Given action is not supported:', ' %s' % data['type'], '', 'Please contact [email protected]', 'if you think this is a mistake.' ]) return if not user.is_admin() and (skip_docker or action in (ProcessRequestType.GENERATE_INPUT, ProcessRequestType.GENERATE_OUTPUT)): Emittor.error('Operation not permitted', [ 'You do not have sufficient privileges to perform action:', ' %s (skip docker: %s)' % (action, skip_docker), '', 'Please contact [email protected]', 'if you want to gain the privileges.' ]) return request = processing.request.ProcessRequest( user=user, lang=data['lang'], problem=data['prob'], course=data['course'], src=data['src'], type=action, docker=False if (skip_docker and user.is_admin()) else True, ) if Env.use_database: Mongo().save_log(request.get_log_dict()) # ignore problems which are past due if request.problem.time_left < 0: return Emittor.register_events(request) Emittor.queue_status(queue_status()) time.sleep(0.1) queue.append(request) Emittor.queue_push(request) time.sleep(0.1) # put a barrier here so only certain amount fo users can process code at once # while other will see queue list with thread_lock: try: request.process() except ConfigurationException as e: if user.is_admin(): logger.exception( '[visible to admin only] invalid yaml config') Emittor.exception(e) except Exception as e: logger.exception('process error:') Emittor.exception(e) finally: output_dir, attempt = request.save_result() if Env.use_database: Mongo().save_result( request.get_result_dict(), output_dir=output_dir, attempt=attempt, ) request.destroy() queue.remove(request) Emittor.queue_pop(request)
def view_result(course_name, course_year, problem_id, _id=None): user = User(session['user']) if user.is_admin(): return redirect( url_for('admin_problem', course_name=course_name, course_year=course_year, problem_id=problem_id)) course = Courses().find_one(name=course_name, year=course_year, only_active=False) problem = course.problem_db[problem_id] results = list() result = None breadcrumbs = [Link.CoursesBtn(), Link.CourseBtn(course)] # TODO check access if _id: document = Mongo().result_by_id(_id) if document: # add to previous solution if already executed if document.result: results.append(document) else: result = document breadcrumbs.append(Link.ProblemBtn(course, problem)) if Env.use_database: for prev in Mongo().peek_last_n_results(10, user.id, course.id, problem.id): # push only valid result if prev.result and str(prev._id) != str(_id): results.append(prev) if _id: for r in results: if str(r._id) == str(_id): r.active = 'active' def get_attempt(obj): try: return int(obj.attempt) except: return 0 results = sorted(results, reverse=True, key=get_attempt) return render_template_ext( 'view_result.njk', user=user, notifications=Mongo().load_notifications(user.id), results=results, result=result, requestReview=True, title='Problem %s' % problem.name, breadcrumbs=Breadcrumbs.new(*breadcrumbs), js=[ '//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.15.6/highlight.min.js', '/static/js/lib/highlightjs-line-numbers.js' ], js_no_cache=['sockets.js', 'process.js'])
class BaseFeatureMonitor(object): def __init__(self, config_file=None, product=None): self.config = self.set_config(config_file) self.mysql_risk = MySql(**self.config['mysql_risk']) self.mysql_risk_table = None self.mongo_derivable = Mongo(**self.config['mongo_derivable']) self.mongo_derivable_table = None self.except_handler = DingdingExceptionHandler( self.config['robots_psi']) self.product = product self.ssh_config = self.config['model_file_remote_ssh'] def set_config(self, config_file): with open(config_file, 'r') as f: file = f.read() config = yaml.load(file) return config def get_model_path_from_mysql(self, table=None): pass def get_top_features(self): """" """ model_path_list = self.get_model_path_from_mysql() model_path_df = pd.DataFrame(model_path_list) group_df = model_path_df.groupby('monitor_flag').apply( lambda x: x.model_path.unique()).rename( 'model_path_list').reset_index() group_df['top_features'] = group_df['model_path_list'].map( lambda x: self.top30_features(x)) return group_df[['monitor_flag', 'top_features']] def top30_features(self, model_path): final_features = [] # 连接远程服务器 ssh_client = paramiko.Transport(self.ssh_config['hostname'], self.ssh_config['port']) ssh_client.connect(username=self.ssh_config['username'], password=self.ssh_config['password']) sftp = paramiko.SFTPClient.from_transport(ssh_client) for remote_model_path in model_path: # remote_model_path = model_dict['model_path'] # 判断本地模型文件所在目录是否存在,没有就创建 if not os.path.isdir(os.path.split(remote_model_path)[0]): os.makedirs(os.path.split(remote_model_path)[0]) # 将远程文件下载到本地 sftp.get(remote_model_path, remote_model_path) with open(remote_model_path, 'rb') as f: model_info = pickle.load(f) top_columns = [] try: model = model_info['model'] enum = model.get_params()['enum'] mm = model.get_params()['clf'] top_columns = [] for i, v in enumerate( sorted(zip( map(lambda x: round(x, 4), mm.feature_importances_), enum.clean_col_names), reverse=True)): if i <= 30: top_columns.append(v[1]) except Exception as e: logging.error(e) final_features.extend(top_columns) sftp.close() no_final_features = [ 'ALPHA_Behavior_submit_date', 'ALPHA_Behavior_submit_hour', 'ALPHA_Behavior_submit_weekday', 'X_DNA_Behavior_submit_date', 'X_DNA_Behavior_submit_hour', 'X_DNA_Behavior_submit_weekday' ] # 这些不监控 final_features = list(set(final_features) - set(no_final_features)) # logging.info('{}-top_features: {}'.format(self.product, final_features)) return final_features def get_appid_from_mysql(self, start_time, diff_day, diff_hour): """获取所需要的11天的所有appid信息""" end_time = (datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S') + timedelta(days=diff_day)).strftime("%Y-%m-%d %H:%M:%S") start_hour = 0 end_hour = start_hour + diff_hour sql = '''select upper(app_id) as app_id,flow_type,work_flag,date(create_time) as date from {} where create_time >= '{}' and create_time < '{}' and hour(create_time) >= {} and hour(create_time) <= {} '''.format(self.mysql_risk_table, start_time, end_time, start_hour, end_hour) res = self.mysql_risk.query(sql) return pd.DataFrame(res) def get_features(self, df_appid, top_feature): appids = list(set(df_appid['app_id'].tolist())) qry = {'_id': {'$in': appids}} qry1 = {feature: 1 for feature in top_feature} res = self.mongo_derivable.get_collection( self.mongo_derivable_table).find(qry, qry1, batch_size=500) res_list = list(res) return pd.DataFrame(res_list) @staticmethod def cal_psi(x, y): e = 0.00001 if x == 0 or y == 0: psi = (x - y) * math.log((x + e) / (y + e)) else: psi = (x - y) * math.log(x / y) return round(psi, 3) def psi(self, df_feature_1, df_feature_2, feature, bin_num=10): df_feature_1['label'] = 0 df_feature_2['label'] = 1 df_feature = pd.concat([df_feature_1, df_feature_2]) df_feature = df_feature.replace('null', np.nan) df_feature = df_feature.replace('NaN', np.nan) df_feature = df_feature.apply(pd.to_numeric, errors='ignore') enum = EnumMapper(maximum_enum_num=100) enum.fit(df_feature) df_feature = enum.transform(df_feature) if feature in df_feature.columns.tolist(): df_psi = df_feature[[feature, 'label']].copy() if df_psi[feature].dtype not in [ 'int', 'float' ] and df_psi[feature].unique().shape[0] > 20: # print("The unique number of feature is {}".format(df_psi[feature].unique().shape[0])) return None, 999 else: if df_psi[feature].unique().shape[0] > 2: df_psi['bins'] = pd.qcut(df_psi[feature], 10, precision=2, duplicates='drop') nan_df = df_psi[df_psi[feature].map( lambda x: pd.isnull(x))].reset_index(drop=True) if not nan_df.empty: df_psi['bins'] = df_psi['bins'].cat.add_categories( '(-999.1, -999]') df_psi['bins'] = df_psi['bins'].fillna( '(-999.1, -999]') else: df_psi['bins'] = df_psi[feature].map( lambda x: -999 if pd.isnull(x) else x) group_df = df_psi.groupby(['bins', 'label']).size().unstack('label') group_df = group_df.fillna(0) group_df['b_rate'] = group_df[0] / group_df[0].sum() group_df['a_rate'] = group_df[1] / group_df[1].sum() group_df = group_df.map(lambda x: round(x, 4)) group_df['psi_part'] = list( map(lambda x, y: self.cal_psi(x, y), group_df.b_rate, group_df.a_rate)) group_df = group_df.apply(lambda x: round(x, 3)) group_df = group_df.reset_index() return group_df, group_df.psi_part.sum() else: return None, 99 @staticmethod def define_mf(x, y): if x == 'c' and y == 'precheck': return 'cp' elif x == 'c' and y == 'finalcheck': return 'cf' elif x == 'f' and y == 'precheck': return 'fp' elif x == 'f' and y == 'finalcheck': return 'ff' elif x == 'w' and y == 'precheck': return 'wp' elif x == 'w' and y == 'finalcheck': return 'wf' elif x == 'q' and y == 'finalcheck': return 'q' def psi_classified(self, start_time, diff_day, diff_hour, timedetail): """psi分类监控""" total_appids_df = self.get_appid_from_mysql( start_time, diff_day, diff_hour) # 获取所需要的11天的所有appid信息 total_appids_df.date = total_appids_df.date.map( lambda x: str(x)) # 将里面date字段的类型转换为str total_appids_df['monitor_flag'] = list( map(lambda x, y: self.define_mf(x, y), total_appids_df.flow_type, total_appids_df.work_flag)) cp_ls_top_psi = [] # 发送到钉钉的psi监控列表 features_df = self.get_top_features() for monitor_flag in total_appids_df.monitor_flag.unique().tolist(): top_features = features_df.query( "monitor_flag=='{}'".format(monitor_flag)).top_features.values if top_features: top_psi = self.psi_distr(start_time, total_appids_df, top_features[0], monitor_flag) if monitor_flag == 'cp' and top_psi: cp_ls_top_psi.append('=======开卡初审=======') elif monitor_flag == 'cf' and top_psi: cp_ls_top_psi.append('=======开卡复审=======') elif monitor_flag == 'fp' and top_psi: cp_ls_top_psi.append('=======首贷提现初审=======') elif monitor_flag == 'ff' and top_psi: cp_ls_top_psi.append('=======首贷提现复审=======') elif monitor_flag == 'wp' and top_psi: cp_ls_top_psi.append('=======复贷初审=======') elif monitor_flag == 'wf' and top_psi: cp_ls_top_psi.append('=======复贷复审=======') elif monitor_flag == 'q' and top_psi: cp_ls_top_psi.append('=======结清调额=======') else: pass cp_ls_top_psi.extend(top_psi) logging.info('warming psi list: {}'.format(cp_ls_top_psi)) if cp_ls_top_psi: cp_ls_top_psi.insert(0, '*******{}-psi报警*******'.format(self.product)) cp_ls_top_psi.insert( 1, '时间:{}'.format(datetime.now().strftime('%Y-%m-%d ') + timedetail)) self.except_handler.handle(msg=cp_ls_top_psi) def psi_distr(self, start_time, total_appids_df, top_features, monitor_flag): the_psi_date = (datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S') + timedelta(days=10)).strftime('%Y-%m-%d') # 所监控的日期 logging.info('所监控的日期为:{}'.format(the_psi_date)) # 所监控日期前十天对应类型的appids df_appid1 = total_appids_df.query( "monitor_flag=='{}' and date!='{}'".format( monitor_flag, the_psi_date)).reset_index(drop=True) df_appid1 = df_appid1.sample(min(10000, df_appid1.shape[0])) logging.info('monitor_flag:{} 前十天全部app_id个数:{}'.format( monitor_flag, len(df_appid1))) # 所监控日期对应类型的appids df_appid2 = total_appids_df.query( "monitor_flag=='{}' and date=='{}'".format( monitor_flag, the_psi_date)).reset_index(drop=True) df_appid2 = df_appid2.sample(min(1000, df_appid2.shape[0])) logging.info('monitor_flag:{} 所监控的app_id个数:{}'.format( monitor_flag, len(df_appid2))) ls_top_psi = [] df_feature_all_1 = self.get_features(df_appid1, top_features) df_feature_all_2 = self.get_features(df_appid2, top_features) psi_dict = {} for feature in top_features: df_feature_1 = pd.DataFrame(df_feature_all_1, columns=[feature]) df_feature_2 = pd.DataFrame(df_feature_all_2, columns=[feature]) df, psi = self.psi(df_feature_1, df_feature_2, feature, bin_num=10) psi_dict.update({feature: psi}) if psi > 0.25: ls_top_psi.append("{}--psi:{}".format(feature, round(psi, 3))) df['bins'] = df['bins'].map(lambda x: str(x)) max_index = df.query("psi_part=={}".format( df.psi_part.max()))['bins'].values[0] str_text = '' if str(max_index) == '(-999.1, -999]': str_text += '原因:缺失值变化导致, ' else: str_text += '原因:区间{}变化所致, '.format(max_index) if df.query("bins=='{}'".format(max_index)).a_rate.values[0] > \ df.query("bins=='{}'".format(max_index)).b_rate.values[0]: str_text += '当前比例大于过去比例。' else: str_text += '当前比例小于过去比例。' ls_top_psi.append(str_text) ls_top_psi.append('==' * 18) ls_top_psi.append(str(df)) ls_top_psi.append('==' * 18) logging.info('{} calculate psi done :{}'.format( monitor_flag, psi_dict)) return ls_top_psi # 前一天 def job1(self): """比较昨天top特征的分布与昨天的前10天top特征的分布""" logging.info('{} start handle psi_monitor job1!'.format(self.product)) start_time = (datetime.now() - timedelta(days=11)).strftime( '%Y-%m-%d') + ' 00:00:00' # 获取所监控及其对比的前10天所有appid的开始时间 diff_day = 11 # 获取从开始时间往后11天的数据 diff_hour = 24 # 获取每天从0时到24时的数据 self.psi_classified(start_time, diff_day, diff_hour, timedetail='前一天0-24时分布变化') logging.info('{} end handle psi_monitor job1!'.format(self.product)) # 当天 def job2(self): """比较当天(0-16时)的top特征的分布与前10天(0-16时)top特征的分布""" logging.info('{} start handle feature_monitor job2!'.format( self.product)) start_time = (datetime.now() - timedelta(days=10)).strftime( '%Y-%m-%d') + ' 00:00:00' # 获取所监控及其对比的前10天所有appid的开始时间 diff_day = 11 # 获取从开始时间往后11天的数据 diff_hour = 15 # 获取每天从0时到15时的数据 self.psi_classified(start_time, diff_day, diff_hour, timedetail='当天0-16时分布变化') logging.info('{} end handle psi_monitor job2!'.format(self.product)) # 当天 def job3(self): """比较当天(0-18时)的top特征的分布与前10天(0-18时)top特征的分布""" logging.info('{} start handle feature_monitor job2!'.format( self.product)) start_time = (datetime.now() - timedelta(days=10)).strftime('%Y-%m-%d') + ' 00:00:00' diff_day = 11 # 获取从开始时间往后11天的数据 diff_hour = 17 # 获取每天从0时到18时的数据 self.psi_classified(start_time, diff_day, diff_hour, timedetail='当天0-18时分布变化') logging.info('{} end handle psi_monitor job3!'.format(self.product))