def equity_interest_pledge_too_high(securities: str, time_serial: tuple, data_hub: DataHubEntry, database: DatabaseEntry, context: AnalysisContext, **kwargs) -> AnalysisResult: nop(database, context, kwargs) query_fields = ['质押次数', '无限售股质押数量', '限售股份质押数量', '总股本', '质押比例'] if not data_hub.get_data_center().check_readable_name(query_fields): return AnalysisResult(securities, None, AnalysisResult.SCORE_NOT_APPLIED, '无法识别的字段名', '无法识别的字段名') df = data_hub.get_data_center().query('Stockholder.PledgeStatus', securities, (years_ago(2), now()), fields=query_fields + ['stock_identity', 'due_date'], readable=True) if df is None or len(df) == 0: return AnalysisResult(securities, None, AnalysisResult.SCORE_NOT_APPLIED, '没有数据', '没有数据') df = df.sort_values('due_date', ascending=False) score = 100 reason = [] previous_pledge_times = 0 for index, row in df.iterrows(): due_date = row['due_date'] pledge_rate = row['质押比例'] pledge_times = row['质押次数'] if pledge_times != previous_pledge_times: if pledge_rate > 50.0: score = 0 if pledge_rate > 20.0: score = 60 reason.append('%s: 质押比例:%.2f%%' % (str(due_date.date()), pledge_rate)) previous_pledge_times = pledge_times if len(reason) == 0: brief = '近4年没有超过20%%的质押记录' reason = brief else: brief = '近4年有%s次超过20%%的质押记录' % len(reason) reason = brief return AnalysisResult(securities, None, score, reason, brief)
def analysis_investigation(securities: str, time_serial: tuple, data_hub: DataHubEntry, database: DatabaseEntry, context: AnalysisContext, **kwargs) -> AnalysisResult: nop(time_serial, database, ) if context.cache.get('investigation', None) is None: context.cache['investigation'] = data_hub.get_data_center().query('Market.Investigation') df = context.cache.get('investigation', None) error_report = check_gen_report_when_data_missing(df, securities, 'Market.Investigation', ['stock_identity', 'investigate_date', 'investigate_topic', 'investigate_reason']) if error_report is not None: return error_report df_slice = df[df['stock_identity'] == securities] df_slice_in_4_years = df_slice[df_slice['investigate_date'] > years_ago(5)] score = 100 reason = [] for index, row in df_slice_in_4_years.iterrows(): score = 0 investigate_date = row['investigate_date'] investigate_topic = row['investigate_topic'] investigate_reason = row['investigate_reason'] reason.append('%s: <<%s>> -- %s' % (date2text(investigate_date), investigate_topic, investigate_reason)) if len(reason) == 0: reason.append('近四年无立案调查记录') return AnalysisResult(securities, None, score, reason, AnalysisResult.WEIGHT_ONE_VOTE_VETO)
def analysis_location_limitation(securities: str, time_serial: tuple, data_hub: DataHubEntry, database: DatabaseEntry, context: AnalysisContext, **kwargs) -> [AnalysisResult]: nop(kwargs) nop(database) nop(time_serial) if context.cache.get('securities_info', None) is None: context.cache['securities_info'] = data_hub.get_data_center().query( 'Market.SecuritiesInfo') df = context.cache.get('securities_info', None) df_slice = df[df['stock_identity'] == securities] area = get_dataframe_slice_item(df_slice, 'area', 0, '') # Add your exclude area here exclude = area in [] # List the stock code here helps us resolving report line stock and result not matching issue reason = '%s: 地域为%s' % (securities, str(area)) return AnalysisResult(securities, None, not exclude, reason, '排除' if exclude else '正常')
def analysis_current_and_quick_ratio(securities: str, time_serial: tuple, data_hub: DataHubEntry, database: DatabaseEntry, context: AnalysisContext, **kwargs) -> [AnalysisResult]: nop(database, context, kwargs) df = data_hub.get_data_center().query_from_factor( 'Factor.Finance', securities, time_serial, fields=['流动比率', '速动比率'], readable=True) if df is None or len(df) == 0: return AnalysisResult(securities, None, AnalysisResult.SCORE_NOT_APPLIED, '') # Annual report df = df[df['period'].dt.month == 12] results = [] for index, row in df.iterrows(): score = 100 reason = [] period = row['period'] if row['流动比率'] < 2.0: score -= 50 reason.append('%s: 流动比率为%.2f < 2.0' % (str(period), row['流动比率'])) else: reason.append('%s: 流动比率为%.2f - 合格' % (str(period), row['流动比率'])) if row['速动比率'] < 1.0: score -= 50 reason.append('%s: 速动比率为%.2f < 1.0' % (str(period), row['速动比率'])) else: reason.append('%s: 速动比率为%.2f - 合格' % (str(period), row['速动比率'])) results.append(AnalysisResult(securities, period, score, reason)) return results
def analysis_inquiry(securities: str, time_serial: tuple, data_hub: DataHubEntry, database: DatabaseEntry, context: AnalysisContext, **kwargs) -> AnalysisResult: nop(time_serial, database, ) df = data_hub.get_data_center().query('Market.Enquiries', securities) if df is None or len(df) == 0: return AnalysisResult(securities, None, AnalysisResult.SCORE_PASS, '四年内无问询记录(也可能是数据缺失)') error_report = check_gen_report_when_data_missing(df, securities, 'Market.Enquiries', ['stock_identity', 'enquiry_date', 'enquiry_topic']) if error_report is not None: return error_report df_slice = df[df['stock_identity'] == securities] df_slice_in_4_years = df_slice[df_slice['enquiry_date'] > years_ago(5)] score = 100 reason = [] for index, row in df_slice_in_4_years.iterrows(): enquiry_date = row['enquiry_date'] enquiry_topic = row['enquiry_topic'] enquiry_title = row['enquiry_title'] if '问询函' in enquiry_topic or '关注函' in enquiry_topic: score = 59 reason.append('%s: <<%s>> -- %s' % (date2text(enquiry_date), enquiry_topic, enquiry_title)) if len(reason) == 0: reason.append('近四年无敏感问询') return AnalysisResult(securities, None, score, reason)
def analysis_finance_report_sign(securities: str, time_serial: tuple, data_hub: DataHubEntry, database: DatabaseEntry, context: AnalysisContext, **kwargs) -> [AnalysisResult]: nop(database, kwargs) if context.cache.get('finance_audit', None) is None: context.cache['finance_audit'] = data_hub.get_data_center().query('Finance.Audit', None, time_serial) df = context.cache.get('finance_audit', None) error_report = check_gen_report_when_data_missing(df, securities, 'Finance.Audit', ['stock_identity', 'period', 'conclusion']) if error_report is not None: return error_report df_slice = df[df['stock_identity'] == securities] # df_slice_in_4_years = df_slice[df_slice['period'] > years_ago(5)] results = [] for index, row in df_slice.iterrows(): reason = [] period = row['period'] conclusion = row['conclusion'] if pd.isnull(conclusion): score = AnalysisResult.SCORE_NOT_APPLIED reason.append(date2text(period) + ' : No sign data.') elif conclusion != '标准无保留意见': score = AnalysisResult.SCORE_FAIL reason.append(date2text(period) + ' : ' + conclusion) else: score = AnalysisResult.SCORE_PASS results.append(AnalysisResult(securities, period, score, reason, AnalysisResult.WEIGHT_ONE_VOTE_VETO)) return results
def analysis_finance_report_sign(securities: str, data_hub: DataHubEntry, database: DatabaseEntry, context: AnalysisContext) -> AnalysisResult: nop(database) if context.cache.get('finance_audit', None) is None: context.cache['finance_audit'] = data_hub.get_data_center().query('Finance.Audit') df = context.cache.get('finance_audit', None) error_report = check_gen_report_when_data_missing(df, securities, 'Finance.Audit', ['stock_identity', 'period', 'conclusion']) if error_report is not None: return error_report df_slice = df[df['stock_identity'] == securities] df_slice_in_4_years = df_slice[df_slice['period'] > years_ago(5)] score = 100 reason = [] for index, row in df_slice_in_4_years.iterrows(): period = row['period'] conclusion = row['conclusion'] if conclusion != '标准无保留意见': score = 0 reason.append(date2text(period) + ' : ' + conclusion) if len(reason) == 0: reason.append('近四年均为标准无保留意见') return AnalysisResult(securities, score, reason, AnalysisResult.WEIGHT_ONE_VOTE_VETO)
def analysis_dispersed_ownership(securities: str, time_serial: tuple, data_hub: DataHubEntry, database: DatabaseEntry, context: AnalysisContext, **kwargs) -> AnalysisResult: nop(database, context, kwargs) df = data_hub.get_data_center().query( 'Stockholder.Statistics', securities, (years_ago(3), now()), ) if df is None or len(df) == 0: return AnalysisResult(securities, None, AnalysisResult.SCORE_NOT_APPLIED, '没有数据') df = df[df['period'].dt.month == 12] df = df.sort_values('period', ascending=False) score = 100 reason = [] applied = False for index, row in df.iterrows(): period = row['period'] stockholder_top10 = row['stockholder_top10'] stockholder_top10_nt = row['stockholder_top10_nt'] if not isinstance(stockholder_top10, (list, tuple)): continue if len(stockholder_top10) != 10: continue largest_ratio = 0.0 biggest_holder = '' for stockholder_data in stockholder_top10: if 'hold_ratio' not in stockholder_data.keys( ) or 'holder_name' not in stockholder_data.keys(): break applied = True hold_ratio = stockholder_data.get('hold_ratio') holder_name = stockholder_data.get('holder_name') if hold_ratio > largest_ratio: largest_ratio = hold_ratio biggest_holder = holder_name if largest_ratio == 0.0: return AnalysisResult(securities, None, AnalysisResult.SCORE_NOT_APPLIED, '缺少必要数据,请确保数据包含tushare pro数据源') if largest_ratio < 0.1: score = 0 reason.append('%s: 最大股东 %s 持股比例为%.2f%%,小于10%%' % (str(period), biggest_holder, largest_ratio * 100)) else: reason.append('%s: 最大股东 %s 持股比例为%.2f%%' % (str(period), biggest_holder, largest_ratio * 100)) if len(reason) == 0: reason.append('没有数据') return AnalysisResult(securities, None, score, reason) if applied else \ AnalysisResult(securities, None, AnalysisResult.SCORE_NOT_APPLIED, reason)
def analysis_repurchase(securities: str, time_serial: tuple, data_hub: DataHubEntry, database: DatabaseEntry, context: AnalysisContext, **kwargs) -> AnalysisResult: nop(time_serial, database, context, kwargs) df = data_hub.get_data_center().query('Stockholder.Repurchase', securities, (years_ago(1), now())) if df is None or len(df) == 0: return AnalysisResult(securities, None, AnalysisResult.SCORE_JUST, '前后一年内没有回购数据', '无回购数据') # df = df.where(df.notnull(), None) volume = 0 reasons = [] for index, row in df.iterrows(): proc = row['proc'] if proc != '股东大会通过': # For multiple calculate, just count pass continue ann_date = row['ann_date'] # TS_ISSUE: Some fields may miss end_date = row['end_date'] if 'end_date' in df.columns else None volume = row['vol'] if 'vol' in df.columns else None low_limit = row['low_limit'] if 'low_limit' in df.columns else None high_limit = row['high_limit'] if 'high_limit' in df.columns else None end_date_text = ('截止%s' % end_date.date()) if \ isinstance(end_date, datetime.datetime) and end_date is not pd.NaT else '' if not pd.isnull(low_limit) and not pd.isnull(high_limit): if low_limit == high_limit: price_text = '将以%s元的价格' % low_limit else: price_text = '将以%s - %s元的价格' % (low_limit, high_limit) elif not pd.isnull(low_limit): price_text = '将以最低%s元的价格' % low_limit elif not pd.isnull(high_limit): price_text = '将以最高%s元的价格' % high_limit else: price_text = '' if not pd.isnull(volume): volume += int(volume) volume_text = ('%s股' % volume) else: volume_text = '' reasons.append( '%s: 股东大会通过,%s%s回购%s股票' % (ann_date.date(), end_date_text, price_text, volume_text)) brief = '期间计划回购%s股' % volume return AnalysisResult(securities, None, AnalysisResult.SCORE_PASS, reasons, brief) if len(reasons) > 0 else \ AnalysisResult(securities, None, AnalysisResult.SCORE_JUST, '前后一年内没有回购数据', '近一年无数据')
def analysis_roe_roa(securities: str, time_serial: tuple, data_hub: DataHubEntry, database: DatabaseEntry, context: AnalysisContext, **kwargs) -> [AnalysisResult]: nop(database, context, kwargs) df = data_hub.get_data_center().query_from_factor( 'Factor.Finance', securities, time_serial, fields=['总资产收益率', '净资产收益率'], readable=True) if df is None or len(df) == 0: return AnalysisResult(securities, None, AnalysisResult.SCORE_NOT_APPLIED, '') # Annual report df = df[df['period'].dt.month == 12] results = [] for index, row in df.iterrows(): score = 100 reason = [] period = row['period'] if row['总资产收益率'] < 0.05: score -= 50 reason.append('%s: 总资产收益率为%s%% - 过低' % (str(period), format_pct(row['总资产收益率']))) elif row['总资产收益率'] > 0.15: score -= 50 reason.append('%s: 总资产收益率为%s%% - 偏高,需要引起注意' % (str(period), format_pct(row['总资产收益率']))) else: # Theory: 7.5% - 13%, but we use 5% - 15% for wider tolerance. reason.append('%s: 总资产收益率为%s%% - 合理' % (str(period), format_pct(row['总资产收益率']))) pass if row['净资产收益率'] < 0.15: score -= 50 reason.append('%s: 净资产收益率为%s%% - 偏低' % (str(period), format_pct(row['净资产收益率']))) elif row['净资产收益率'] > 0.40: score = 0 reason.append('%s: 净资产收益率为%s%% - 过高,可能是造假或偶然因素' % (str(period), format_pct(row['净资产收益率']))) else: # Theory: 15% - 39%. reason.append('%s: 净资产收益率为%s%% - 合理' % (str(period), format_pct(row['净资产收益率']))) results.append(AnalysisResult(securities, period, score, reason)) return results
def analysis_stock_unlock(securities: str, time_serial: tuple, data_hub: DataHubEntry, database: DatabaseEntry, context: AnalysisContext, **kwargs) -> AnalysisResult: nop(time_serial, database, context, kwargs) no_data_result = AnalysisResult(securities, None, AnalysisResult.SCORE_PASS, '前三个月或后半年内没有解禁数据', '无解禁数据') df: pd.DataFrame = data_hub.get_data_center().query( 'Stockholder.StockUnlock', securities, (years_ago(2), now())) if df is None or df.empty: return no_data_result df = df[df['unlock_date'].notna()] df['unlock_date'] = df['unlock_date'].apply(text_auto_time) mask = (df['unlock_date'] > days_ago(90)) & (df['unlock_date'] <= days_after(180)) df = df.loc[mask] if df is None or df.empty: return no_data_result df_group = df.groupby('unlock_date') reasons = [] float_share_sum = 0 for g, df in df_group: unlock_date = g float_share = sum(df['float_share']) float_ratio = sum(df['float_ratio']) float_share_sum += float_share reasons.append('%s: 解禁%s股,占总股份%s%%' % (unlock_date.date(), float_share, float_ratio)) # for index, row in df.iterrows(): # float_date = row['float_date'] # float_share = row['float_share'] # float_ratio = row['float_ratio'] # # # Maybe have not converted to datetime but keeping str # # Maybe there're a lot of unlock in one day # # if not isinstance(float_date, datetime.datetime): # float_date = text_auto_time(float_date) # if days_ago(90) < float_date < days_after(180): # reasons.append('%s: 解禁%s股,占总股份%s%%' % (float_date.date(), float_share, float_ratio)) brief = '共解禁%s股' % float_share_sum return AnalysisResult(securities, None, AnalysisResult.SCORE_FAIL, reasons, brief) if len(reasons) > 0 else \ AnalysisResult(securities, None, AnalysisResult.SCORE_PASS, '前三个月或后半年内没有解禁数据', '无解禁数据')
def analysis_location_limitation(securities: str, data_hub: DataHubEntry, database: DatabaseEntry, context: AnalysisContext) -> AnalysisResult: nop(database) if context.cache.get('securities_info', None) is None: context.cache['securities_info'] = data_hub.get_data_center().query( 'Market.SecuritiesInfo') df = context.cache.get('securities_info', None) df_slice = df[df['stock_identity'] == securities] area = get_dataframe_slice_item(df_slice, 'area', 0, '') exclude = area in ['黑龙江', '辽宁', '吉林'] reason = securities + '地域为' + str(area) return AnalysisResult(securities, not exclude, reason)
def analysis_less_than_3_years(securities: str, data_hub: DataHubEntry, database: DatabaseEntry, context: AnalysisContext) -> AnalysisResult: nop(database) if context.cache.get('securities_info', None) is None: context.cache['securities_info'] = data_hub.get_data_center().query( 'Market.SecuritiesInfo') df = context.cache.get('securities_info', None) df_slice = df[df['stock_identity'] == securities] listing_date = get_dataframe_slice_item(df_slice, 'listing_date', 0, now()) exclude = now().year - listing_date.year < 3 reason = '上市日期' + str(listing_date) + ('小于三年' if exclude else '大于三年') return AnalysisResult(securities, not exclude, reason)
def analysis_exclude_industries(securities: str, data_hub: DataHubEntry, database: DatabaseEntry, context: AnalysisContext) -> AnalysisResult: nop(database) if context.cache.get('securities_info', None) is None: context.cache['securities_info'] = data_hub.get_data_center().query( 'Market.SecuritiesInfo') df = context.cache.get('securities_info', None) df_slice = df[df['stock_identity'] == securities] industry = get_dataframe_slice_item(df_slice, 'industry', 0, '') exclude = industry in ['种植业', '渔业', '林业', '畜禽养殖', '农业综合'] reason = '所在行业[' + str(industry) + (']属于农林牧渔' if exclude else ']不属于农林牧渔') return AnalysisResult(securities, not exclude, reason)
def analysis_less_than_3_years(securities: str, time_serial: tuple, data_hub: DataHubEntry, database: DatabaseEntry, context: AnalysisContext, **kwargs) -> [AnalysisResult]: nop(kwargs) nop(database) nop(time_serial) if context.cache.get('securities_info', None) is None: context.cache['securities_info'] = data_hub.get_data_center().query( 'Market.SecuritiesInfo') df = context.cache.get('securities_info', None) df_slice = df[df['stock_identity'] == securities] listing_date = get_dataframe_slice_item(df_slice, 'listing_date', 0, now()) less_than_3_years = now().year - listing_date.year < 3 reason = '上市日期 %s (%s)' % (str(listing_date), ('小于三年' if less_than_3_years else '大于三年')) return AnalysisResult(securities, None, not less_than_3_years, reason)
def analysis_black_list(securities: str, time_serial: tuple, data_hub: DataHubEntry, database: DatabaseEntry, context: AnalysisContext, **kwargs) -> [AnalysisResult]: nop(kwargs) nop(database) nop(time_serial) if context.cache.get('black_table', None) is None: black_list_module = data_hub.get_data_extra('black_list') if black_list_module is not None: context.cache[ 'black_table'] = black_list_module.get_black_list_data() black_table = context.cache.get('black_table', None) if isinstance(black_table, pd.DataFrame) and not black_table.empty: df_slice = black_table[black_table['security'] == securities] in_black_list = len(df_slice) > 0 reason = get_dataframe_slice_item(df_slice, 'content', 0, '') if in_black_list else '不在黑名单中' return AnalysisResult(securities, None, not in_black_list, reason) else: return AnalysisResult(securities, None, AnalysisResult.SCORE_NOT_APPLIED, '载入黑名单模块失败')
def analysis_increase_decrease(securities: str, time_serial: tuple, data_hub: DataHubEntry, database: DatabaseEntry, context: AnalysisContext, **kwargs) -> AnalysisResult: nop(time_serial, database, context, kwargs) no_data_result = AnalysisResult(securities, None, AnalysisResult.SCORE_NOT_APPLIED, '前后一年内没有增减持数据', '近一年无数据') df = data_hub.get_data_center().query('Stockholder.ReductionIncrease', securities, (years_ago(2), now())) if df is None or len(df) == 0: return no_data_result volume = 0 reasons = [] for index, row in df.iterrows(): stock_holder = row['stock_holder'] holder_type = row['holder_type'] holder_type = { 'G': '高管', 'P': '个人', 'C': '公司', }.get(holder_type, '') increase_or_decrease = row['in_de'] change_vol = row['change_vol'] if 'change_vol' in df.columns else '?' change_ratio = row[ 'change_ratio'] if 'change_ratio' in df.columns else '?' avg_price = row['avg_price'] if 'avg_price' in df.columns else '?' begin_date = text_auto_time( row['begin_date']) if 'begin_date' in df.columns else None close_date = text_auto_time( row['close_date']) if 'close_date' in df.columns else None if (begin_date is not None and days_ago(365) < begin_date < days_after(365)) or \ (close_date is not None and days_ago(365) < close_date < days_after(365)): if increase_or_decrease == 'IN': volume += change_vol operation = '增持' elif increase_or_decrease == 'DE': volume -= change_vol operation = '减持' else: operation = '' if operation != '': reasons.append( '%s - %s: %s[%s]以平均价格%s元%s%s股,占流通股%s%%' % (begin_date.date() if begin_date is not None else '?', close_date.date() if close_date is not None else '?', holder_type, stock_holder, avg_price, operation, change_vol, change_ratio)) if len(reasons) == 0: return no_data_result if volume > 0: conclusion = '此期间净增持%s股' % volume elif volume < 0: conclusion = '此期间净减持%s股' % -volume else: conclusion = '此期间增持减持持平' reasons.append(conclusion) final_score = AnalysisResult.SCORE_FAIL if volume < 0 else AnalysisResult.SCORE_PASS return AnalysisResult(securities, None, final_score, reasons, conclusion.replace('此', ''))