def _run(self, code, start, end): ss = requests.session() code_d = 'sh' + code if code[0] == '6' else 'sz' + code f_data_list = [] # 得到日期 url = 'http://emweb.securities.eastmoney.com/PC_HSF10/ShareholderResearch/ShareholderResearchAjax?code=' + code_d ctx = ss.get(url=url, headers=header_dict) if (ctx.content != None): f_data_list = json.loads(ctx.content)['zlcc_rz'] logger.info('funds date:' + ",".join(f_data_list)) df = pd.DataFrame() for idx, f_d in enumerate(f_data_list): url = 'http://emweb.securities.eastmoney.com/PC_HSF10/ShareholderResearch/MainPositionsHodlerAjax?date=' + str( f_d) + '&code=' + code_d logger.debug(url) ctx = ss.get(url=url, headers=header_dict) df.loc[idx, 'code'] = code df.loc[idx, 'fin_year'] = DateUtil.getYear(f_d) df.loc[idx, 'fin_season'] = DateUtil.getSeason(f_d) df.loc[idx, 'fin_type'] = 1 if (ctx.content != None): data_list = json.loads(ctx.content) for d in data_list: if (d['jglx'] == u'基金'): df.loc[idx, 'fund_holding'] = StrUtil.parse_field( d['zltgbl']) if (d['jglx'] == u'社保基金'): df.loc[idx, 'sb_holding'] = StrUtil.parse_field(d['zltgbl']) return df
def _parse(self,list,code): df = pd.DataFrame(); for idx, obj in enumerate(list): df.loc[idx,'code']=code; df.loc[idx, 'fin_date']=obj['date']; df.loc[idx, 'fin_year'] = DateUtil.getYear(obj['date']) df.loc[idx, 'fin_season'] = DateUtil.getSeason(obj['date']) df.loc[idx, 'fin_type'] = self._get_type(); df.loc[idx, 'jll'] = self._filterField(obj['jll']); df.loc[idx, 'mll'] = self._filterField(obj['mll']) df.loc[idx, 'jbmgsy'] =self._filterField(obj['jbmgsy']) df.loc[idx, 'mgjyxjl'] = self._filterField(obj['mgjyxjl']) df.loc[idx, 'yyzsr'] = self._filterField(obj['yyzsr']) df.loc[idx, 'gsjlr'] = self._filterField(obj['gsjlr']) df.loc[idx, 'kfjlr'] = self._filterField(obj['kfjlr']) df.loc[idx, 'yyzsrtbzz'] =self._filterField(obj['yyzsrtbzz']) df.loc[idx, 'gsjlrtbzz'] = self._filterField(obj['gsjlrtbzz']) df.loc[idx, 'kfjlrtbzz'] = self._filterField(obj['kfjlrtbzz']) return df;
def to_csv(dict, type): print(dict) df = pd.DataFrame() i = 0 for k in dict.keys(): if (dict[k] == False): df.loc[i, 'date'] = DateUtil.getLongFormat( datetime.datetime.now()) df.loc[i, 'type'] = type df.loc[i, 'code'] = k i = i + 1 if (df.empty): return df.to_csv(ConfigDict['k_fail_log'], mode='a', header=False)
def pull_data_hk(start_date=None, start_code=None, retry=False, retryDict=None): shp = SharePuller() df = shp.query_from_mysql() dict = {} hkp = HkHoldPuller() for dt in DateUtil.getDateSeq(start_date): if (retry and dt not in retryDict['ph']): continue dict[dt] = hkp.pull(dt) time.sleep(3) KManager.to_csv(dict, 'ph')
def count_kpi(start_date, s=True, m=True, start_code=None, retry=False, retryDict=None): shp = SharePuller() df = shp.query_from_mysql() end = datetime.datetime.now().strftime('%Y-%m-%d') # ================================ ## 单指标计算 # ================================ if (s): dict = {} for index, row in df.iterrows(): code = row['code'] start = str(row['timeToMarket']) if (start_code and code < start_code): continue if (retry and code not in retryDict['ks']): continue if (start_date != None and start < start_date): start = start_date KManager.kpi_s(code, start_date, pm, dict) KManager.to_csv(dict, 'ks') # ================================ ## 多指标计算 # ================================ if (m): dict = {} if (start_date == None): start_date = end dict[start_date] = KManager.kpi_m(start_date, pm) elif (start_date <= end): for dt in DateUtil.getDateSeq(start_date): if (retry and code not in retryDict['km']): continue dict[dt] = KManager.kpi_m(dt, pm) #更新数据 pm.execute( "update share_data_day set valid=1 where trade_date='%s'" % (dt)) KManager.to_csv(dict, 'km')
def job_every_day(): from k.manager.FinManager import FinManager from k.puller.SharePuller import SharePuller from k.manager.Kmanager import KManager from k.util.Logger import logger from k.util.DateUtil import DateUtil from k.util.PandasToMysql import pm if (isWeekDay()): return td = datetime.datetime.now().strftime('%Y-%m-%d') logger.info('schedule job start to work:' + td) KManager.pull_data(td) KManager.pull_data_hk(td) KManager.pull_data_hk(DateUtil.getLastDay(td)) KManager.count_kpi(td)
def __process(self, df, start_date): df.sort_values(by=[Config.db_date], inplace=True) # 按列进行排序 df.index = np.arange(0, df.shape[0], 1) # 保证索引排序 df[Config.db_date] = df[Config.db_date].astype(np.str) #日期更改 start = None if (start_date <= str(df.loc[0, Config.db_date])): start = 0 elif (start_date >= str(df.loc[df.shape[0] - 1, Config.db_date])): start = df.shape[0] - 1 else: while (start == None and start_date <= df.loc[df.shape[0] - 1, Config.db_date]): print('while', start, start_date) start_v = df[df[Config.db_date] == start_date].index.values if (len(start_v) > 0): start = start_v[0] else: start_date = DateUtil.getNextDay(start_date) print('start:', start, ' start_date:', start_date) return start
def query_month_data_and_count_reverse(self, code, dt, close): dt = DateUtil.getLastMonthForShort(dt[0:7]) mdf = self.month_data.get('' + code + '_' + dt) if (mdf is None): mdf = pm.query(DbCreator.share_data_month, where=' code="%s" and trade_date="%s"' % (code, dt)) self.month_data[code + '_' + dt] = mdf if (not mdf.empty): ha = mdf.loc[0, 'ha'] hb = mdf.loc[0, 'hb'] if (ha != 0 or hb != 0): #print(ha,hb,ha*12+hb,close) if (FactorUtil.is_reverse(ha, hb, close) == 1): return 1 la = mdf.loc[0, 'la'] lb = mdf.loc[0, 'lb'] if (la != 0 or lb != 0): if (FactorUtil.is_reverse(ha, hb, close, direct='l') == 1): return 2 return 0