def get_report_path(self): date_str = util.timestamp2date_str(self.current_report_metadata.announcementTime / 1000) report_path = self.report_path_pattern.format(code=self.current_stock.stock_code, id=self.current_report_metadata.announcementId, cob=util.date_str2cob(date_str), title=self.current_report_metadata.announcementTitle) target_path = self.base_path + report_path.replace("*", "") target_url = self.report_url.format(exchange_name=self.current_stock.exchange_name, id=self.current_report_metadata.announcementId) return date_str, target_path, target_url
def download_report_metadata_category(self, category): has_new_report_metadata = False self.current_category_metadata = self.current_stock.get_report_metadata( category) if self.current_category_metadata.is_report_metadata_need_download( self.fromCob, self.toCob): try: page_num = 1 while True: r = requests.post( self.report_metadata_url, files={ "stock": (None, self.current_stock.stock_code), "category": (None, category), "pageNum": (None, str(page_num)), "pageSize": (None, "30"), "column": (None, self.current_stock.exchange_name), "tabName": (None, "fulltext"), "seDate": (None, util.cob2date_range_string( self.fromCob, self.toCob)), }, timeout=45, stream=False, headers={'Connection': 'close'}) if r.status_code == requests.codes.ok: result = json.loads(r.content) reports = result['announcements'] for report in reports: if not util.array_contains( report['announcementTitle'], self.report_ignore_patterns): report_metadata = ReportMetadata( report['announcementId'], report['announcementTitle'], report['announcementTime']) if self.current_category_metadata.add_report_metadata( report_metadata): has_new_report_metadata = True if result['hasMore']: page_num += 1 else: break else: logging.error('download report metadata for ' + self.current_stock.stock_code) return None if self.current_category_metadata.update_effective_cob(self.fromCob, self.toCob) \ or has_new_report_metadata: self.serialization_single_stock_data() except (IOError, AttributeError, RuntimeError): logging.exception(self.current_stock.stock_code + ' save report metadata failed')
def get_report_path(self): date_str = util.timestamp2date_str( self.current_report_metadata.announcementTime / 1000) report_path = self.report_path_pattern.format( code=self.current_stock.stock_code, id=self.current_report_metadata.announcementId, cob=util.date_str2cob(date_str), title=self.current_report_metadata.announcementTitle) target_path = self.base_path + report_path.replace("*", "") target_url = self.report_url.format( exchange_name=self.current_stock.exchange_name, id=self.current_report_metadata.announcementId) return date_str, target_path, target_url
def get_contract_data_jisilu(contract_id): url = 'https://www.jisilu.cn/data/index_future/if_hist_list/' + contract_id json_data = get_data_from_jisilu(url) contract_ids = [] rows = json_data['rows'] for row in rows: contract_ids.append(util.json_object_to_convert(row['id'])) return contract_ids
def collect_data_from_stock_dir(self, stock_code): folder = self.base_path + stock_code if not os.path.exists(folder): os.mkdir(folder) else: obj = util.deserialization_object(folder + '/metadata.json') if obj is not None: # use this one as benchmark self.current_stock = obj self.stocks[stock_code] = self.current_stock
def download_report(self): date_str, target_path, target_url = self.get_report_path() if os.path.exists(target_path): self.current_report_metadata.is_download = True return True try: logging.warn("download " + target_path) r = requests.get(target_url, stream=True, params={"announceTime": date_str}) with open(target_path, 'wb') as fd: for chunk in r.iter_content(chunk_size=16384): fd.write(chunk) self.current_report_metadata.is_download = True return True except (IOError, RuntimeError): logging.exception(self.current_stock.stock_code + ' save report failed. ' + self.current_report_metadata.announcementTitle) util.delete_file(target_path) return False
def save_fund_data(df, file_path): if df is None: return old_df = util.get_dataframe_from_file(file_path) if old_df is not None: old_df = old_df.set_index('price_dt') result = df.combine_first(old_df) # accept new data, but in case lose old data, use combine_first result.to_csv(file_path) else: df.to_csv(file_path)
def download_report_metadata_category(self, category): has_new_report_metadata = False self.current_category_metadata = self.current_stock.get_report_metadata(category) if self.current_category_metadata.is_report_metadata_need_download(self.fromCob, self.toCob): try: page_num = 1 while True: r = requests.post(self.report_metadata_url, files={ "stock": (None, self.current_stock.stock_code), "category": (None, category), "pageNum": (None, str(page_num)), "pageSize": (None, "30"), "column": (None, self.current_stock.exchange_name), "tabName": (None, "fulltext"), "seDate": (None, util.cob2date_range_string(self.fromCob, self.toCob)), }, timeout=45, stream=False, headers={'Connection': 'close'}) if r.status_code == requests.codes.ok: result = json.loads(r.content) reports = result['announcements'] for report in reports: if not util.array_contains(report['announcementTitle'], self.report_ignore_patterns): report_metadata = ReportMetadata(report['announcementId'], report['announcementTitle'], report['announcementTime']) if self.current_category_metadata.add_report_metadata(report_metadata): has_new_report_metadata = True if result['hasMore']: page_num += 1 else: break else: logging.error('download report metadata for ' + self.current_stock.stock_code) return None if self.current_category_metadata.update_effective_cob(self.fromCob, self.toCob) \ or has_new_report_metadata: self.serialization_single_stock_data() except (IOError, AttributeError, RuntimeError): logging.exception(self.current_stock.stock_code + ' save report metadata failed')
def check_report_integrity(self): for category in self.current_stock.report_category: self.current_category_metadata = self.current_stock.report_category[category] has_integrity_checked = False to_delete_report_ids = [] for report_id in self.current_category_metadata.report_metadata: self.current_report_metadata = self.current_category_metadata.report_metadata[report_id] date_str, target_path, target_url = self.get_report_path() if util.array_contains(self.current_report_metadata.announcementTitle, self.report_ignore_patterns): if self.current_report_metadata.is_download: util.delete_file(target_path) to_delete_report_ids.append(report_id) elif self.current_report_metadata.is_download: if not self.current_report_metadata.is_valid: if util.is_invalid_pdf(target_path): has_integrity_checked = True self.current_report_metadata.is_download = False self.current_report_metadata.is_valid = False util.delete_file(target_path) else: has_integrity_checked = True self.current_report_metadata.is_valid = True if len(to_delete_report_ids) > 0: has_integrity_checked = True for to_delete_report_id in to_delete_report_ids: logging.warn(self.current_stock.stock_code + " delete report " + self.current_category_metadata.report_metadata[to_delete_report_id].announcementTitle) del self.current_category_metadata.report_metadata[to_delete_report_id] if has_integrity_checked: self.serialization_single_stock_data()
def do_cmd(self, cmd_str): if cmd_str == 'delete_un_downloadable_reports' or cmd_str == 'fix_metadata': is_sure = raw_input("are you sure to continue? (y/n)\n") if is_sure != 'y': return all_stock_codes = util.get_all_stock_codes() for code in all_stock_codes: if code is not None and not code.startswith('IDX'): stock_code = code[2:] if stock_code not in self.stocks: self.current_stock = StockData(stock_code) self.stocks[stock_code] = self.current_stock else: self.current_stock = self.stocks[stock_code] self.do_cmd_single(cmd_str, stock_code)
def check_report_integrity(self): for category in self.current_stock.report_category: self.current_category_metadata = self.current_stock.report_category[ category] has_integrity_checked = False to_delete_report_ids = [] for report_id in self.current_category_metadata.report_metadata: self.current_report_metadata = self.current_category_metadata.report_metadata[ report_id] date_str, target_path, target_url = self.get_report_path() if util.array_contains( self.current_report_metadata.announcementTitle, self.report_ignore_patterns): if self.current_report_metadata.is_download: util.delete_file(target_path) to_delete_report_ids.append(report_id) elif self.current_report_metadata.is_download: if not self.current_report_metadata.is_valid: if util.is_invalid_pdf(target_path): has_integrity_checked = True self.current_report_metadata.is_download = False self.current_report_metadata.is_valid = False util.delete_file(target_path) else: has_integrity_checked = True self.current_report_metadata.is_valid = True if len(to_delete_report_ids) > 0: has_integrity_checked = True for to_delete_report_id in to_delete_report_ids: logging.warn( self.current_stock.stock_code + " delete report " + self.current_category_metadata. report_metadata[to_delete_report_id].announcementTitle) del self.current_category_metadata.report_metadata[ to_delete_report_id] if has_integrity_checked: self.serialization_single_stock_data()
def parse_jisilu_data(data): price_dt = [] a_price = [] a_price_increase_rt = [] # A涨幅 a_profit_rt = [] # A收益率 a_amount = [] # A份额(万份) a_amount_increase = [] # A新增份额(万份) a_amount_increase_rt = [] # A份额增长率 a_discount_rt = [] # A折价率 b_discount_rt = [] # B溢价率 b_net_leverage_rt = [] # b基净值杠杆率 b_price_leverage_rt = [] # b基价格杠杆率 base_discount_rt = [] # 合并溢价 net_value = [] # 母基净值 base_est_val = [] # 母基估值 est_err = [] # 母基估值误差 rows = data['rows'] for row in rows: row_data = row['cell'] price_dt.append(util.json_object_to_convert(row_data['price_dt'])) a_price.append(util.json_object_to_convert(row_data['a_price'])) a_price_increase_rt.append( util.json_object_to_convert(row_data['a_price_increase_rt'])) a_profit_rt.append(util.json_object_to_convert( row_data['a_profit_rt'])) a_amount.append(util.json_object_to_convert(row_data['a_amount'])) a_amount_increase.append( util.json_object_to_convert(row_data['a_amount_increase'])) a_amount_increase_rt.append( util.json_object_to_convert(row_data['a_amount_increase_rt'])) a_discount_rt.append( util.json_object_to_convert(row_data['a_discount_rt'])) b_discount_rt.append( util.json_object_to_convert(row_data['b_discount_rt'])) b_net_leverage_rt.append( util.json_object_to_convert(row_data['b_net_leverage_rt'])) b_price_leverage_rt.append( util.json_object_to_convert(row_data['b_price_leverage_rt'])) base_discount_rt.append( util.json_object_to_convert(row_data['base_discount_rt'])) net_value.append(util.json_object_to_convert(row_data['net_value'])) base_est_val.append( util.json_object_to_convert(row_data['base_est_val'])) est_err.append(util.json_object_to_convert(row_data['est_err'])) d = { 'price_dt': price_dt, 'a_price': a_price, 'a_price_increase_rt': a_price_increase_rt, 'a_profit_rt': a_profit_rt, 'a_amount': a_amount, 'a_amount_increase': a_amount_increase, 'a_amount_increase_rt': a_amount_increase_rt, 'a_discount_rt': a_discount_rt, 'b_discount_rt': b_discount_rt, 'b_net_leverage_rt': b_net_leverage_rt, 'b_price_leverage_rt': b_price_leverage_rt, 'base_discount_rt': base_discount_rt, 'net_value': net_value, 'base_est_val': base_est_val, 'est_err': est_err } df = pd.DataFrame(d) df = df.set_index('price_dt') return df
def serialization_stock_data(self): util.serialization_object(self.base_path + 'stocks.json', self.stocks)
def deserialization_stock_data(self): obj = util.deserialization_object(self.base_path + 'stocks.json') if obj is not None: self.stocks = obj
def parse_jisilu_data(data): price_dt = [] a_price = [] a_price_increase_rt = [] # A涨幅 a_profit_rt = [] # A收益率 a_amount = [] # A份额(万份) a_amount_increase = [] # A新增份额(万份) a_amount_increase_rt = [] # A份额增长率 a_discount_rt = [] # A折价率 b_discount_rt = [] # B溢价率 b_net_leverage_rt = [] # b基净值杠杆率 b_price_leverage_rt = [] # b基价格杠杆率 base_discount_rt = [] # 合并溢价 net_value = [] # 母基净值 base_est_val = [] # 母基估值 est_err = [] # 母基估值误差 rows = data['rows'] for row in rows: row_data = row['cell'] price_dt.append(util.json_object_to_convert(row_data['price_dt'])) a_price.append(util.json_object_to_convert(row_data['a_price'])) a_price_increase_rt.append(util.json_object_to_convert(row_data['a_price_increase_rt'])) a_profit_rt.append(util.json_object_to_convert(row_data['a_profit_rt'])) a_amount.append(util.json_object_to_convert(row_data['a_amount'])) a_amount_increase.append(util.json_object_to_convert(row_data['a_amount_increase'])) a_amount_increase_rt.append(util.json_object_to_convert(row_data['a_amount_increase_rt'])) a_discount_rt.append(util.json_object_to_convert(row_data['a_discount_rt'])) b_discount_rt.append(util.json_object_to_convert(row_data['b_discount_rt'])) b_net_leverage_rt.append(util.json_object_to_convert(row_data['b_net_leverage_rt'])) b_price_leverage_rt.append(util.json_object_to_convert(row_data['b_price_leverage_rt'])) base_discount_rt.append(util.json_object_to_convert(row_data['base_discount_rt'])) net_value.append(util.json_object_to_convert(row_data['net_value'])) base_est_val.append(util.json_object_to_convert(row_data['base_est_val'])) est_err.append(util.json_object_to_convert(row_data['est_err'])) d = { 'price_dt': price_dt, 'a_price': a_price, 'a_price_increase_rt': a_price_increase_rt, 'a_profit_rt': a_profit_rt, 'a_amount': a_amount, 'a_amount_increase': a_amount_increase, 'a_amount_increase_rt': a_amount_increase_rt, 'a_discount_rt': a_discount_rt, 'b_discount_rt': b_discount_rt, 'b_net_leverage_rt': b_net_leverage_rt, 'b_price_leverage_rt': b_price_leverage_rt, 'base_discount_rt': base_discount_rt, 'net_value': net_value, 'base_est_val': base_est_val, 'est_err': est_err } df = pd.DataFrame(d) df = df.set_index('price_dt') return df
def load_all_funds_file(file_path): df = util.get_dataframe_from_file(file_path) if df is not None: df = df.set_index('fundb_base_fund_id') return df
def parse_data(data): fundb_base_fund_id = [] funda_id = [] funda_name = [] fundb_id = [] fundb_name = [] maturity_dt = [] coupon_descr_s = [] # 利率规则 fundb_nav_dt = [] fundb_discount_rt = [] # 溢价率 fundb_price_leverage_rt = [] # 杠杆率 fundb_capital_rasising_rt = [] # 融资成本 fundb_lower_recalc_rt = [] # 下折母基跌幅 fundb_upper_recalc_rt = [] # 上折母基涨幅 fundb_base_est_dis_rt = [] # 整体溢价率 abrate = [] # a/b 份额比 fundb_base_price = [] # 母基净值 funda_current_price = [] # a基现价 fundb_current_price = [] # b基现价 fundb_value = [] # b基净值 rows = data['rows'] for row in rows: row_data = row['cell'] fundb_base_fund_id.append( util.json_object_to_convert(row_data['fundb_base_fund_id'])) funda_id.append(util.json_object_to_convert(row_data['funda_id'])) funda_name.append(util.json_object_to_convert(row_data['funda_name'])) fundb_id.append(util.json_object_to_convert(row_data['fundb_id'])) fundb_name.append(util.json_object_to_convert(row_data['fundb_name'])) maturity_dt.append(util.json_object_to_convert( row_data['maturity_dt'])) coupon_descr_s.append( util.json_object_to_convert(row_data['coupon_descr_s'])) fundb_nav_dt.append( util.json_object_to_convert(row_data['fundb_nav_dt'])) fundb_discount_rt.append( util.json_object_to_convert(row_data['fundb_discount_rt'])) fundb_price_leverage_rt.append( util.json_object_to_convert(row_data['fundb_price_leverage_rt'])) fundb_capital_rasising_rt.append( util.json_object_to_convert(row_data['fundb_capital_rasising_rt'])) fundb_lower_recalc_rt.append( util.json_object_to_convert(row_data['fundb_lower_recalc_rt'])) fundb_base_est_dis_rt.append( util.json_object_to_convert(row_data['fundb_base_est_dis_rt'])) abrate.append(util.json_object_to_convert(row_data['abrate'])) fundb_base_price.append( util.json_object_to_convert(row_data['fundb_base_price'])) fundb_upper_recalc_rt.append( util.json_object_to_convert(row_data['fundb_upper_recalc_rt'])) funda_current_price.append( util.json_object_to_convert(row_data['funda_current_price'])) fundb_current_price.append( util.json_object_to_convert(row_data['fundb_current_price'])) fundb_value.append(util.json_object_to_convert( row_data['fundb_value'])) d = { 'fundb_base_fund_id': fundb_base_fund_id, 'funda_id': funda_id, 'fundb_id': fundb_id, 'funda_name': funda_name, 'fundb_name': fundb_name, 'maturity_dt': maturity_dt, 'coupon_descr_s': coupon_descr_s, 'fundb_nav_dt': fundb_nav_dt, 'fundb_discount_rt': fundb_discount_rt, 'fundb_price_leverage_rt': fundb_price_leverage_rt, 'fundb_capital_rasising_rt': fundb_capital_rasising_rt, 'fundb_lower_recalc_rt': fundb_lower_recalc_rt, 'fundb_base_est_dis_rt': fundb_base_est_dis_rt, 'abrate': abrate, 'fundb_upper_recalc_rt': fundb_upper_recalc_rt, 'funda_current_price': funda_current_price, 'fundb_current_price': fundb_current_price, 'fundb_value': fundb_value, 'fundb_base_price': fundb_base_price } df = pd.DataFrame(d) df = df.set_index('fundb_base_fund_id') return df
} df = pd.DataFrame(d) df = df.set_index('fundb_base_fund_id') return df def load_all_funds_file(file_path): df = util.get_dataframe_from_file(file_path) if df is not None: df = df.set_index('fundb_base_fund_id') return df if __name__ == '__main__': url = 'https://www.jisilu.cn/data/sfnew/fundb_list/' my_props = PropertiesReader.get_properties() df = parse_data(util.json_data_get(url)) old_df = load_all_funds_file( my_props['MktDataLoader.Fund.AllFundsRelationship.Path']) if old_df is not None: # avoid data type not the same, so first save then read df.to_csv(my_props['MktDataLoader.Fund.Tmp.Path']) df = load_all_funds_file(my_props['MktDataLoader.Fund.Tmp.Path']) result = df.combine_first( old_df ) # accept new data, but in case lose old data, use combine_first result.to_csv(my_props['MktDataLoader.Fund.AllFundsRelationship.Path']) else: df.to_csv(my_props['MktDataLoader.Fund.AllFundsRelationship.Path'])
def serialization_single_stock_data(self): logging.warn('save metadata for ' + self.current_stock.stock_code) util.serialization_object(self.base_path + self.current_stock.stock_code + '/metadata.json', self.current_stock)
def parse_data(data): fundb_base_fund_id = [] funda_id = [] funda_name = [] fundb_id = [] fundb_name = [] maturity_dt = [] coupon_descr_s = [] # 利率规则 fundb_nav_dt = [] fundb_discount_rt = [] # 溢价率 fundb_price_leverage_rt = [] # 杠杆率 fundb_capital_rasising_rt = [] # 融资成本 fundb_lower_recalc_rt = [] # 下折母基跌幅 fundb_upper_recalc_rt = [] # 上折母基涨幅 fundb_base_est_dis_rt = [] # 整体溢价率 abrate = [] # a/b 份额比 fundb_base_price = [] # 母基净值 funda_current_price = [] # a基现价 fundb_current_price = [] # b基现价 fundb_value = [] # b基净值 rows = data['rows'] for row in rows: row_data = row['cell'] fundb_base_fund_id.append(util.json_object_to_convert(row_data['fundb_base_fund_id'])) funda_id.append(util.json_object_to_convert(row_data['funda_id'])) funda_name.append(util.json_object_to_convert(row_data['funda_name'])) fundb_id.append(util.json_object_to_convert(row_data['fundb_id'])) fundb_name.append(util.json_object_to_convert(row_data['fundb_name'])) maturity_dt.append(util.json_object_to_convert(row_data['maturity_dt'])) coupon_descr_s.append(util.json_object_to_convert(row_data['coupon_descr_s'])) fundb_nav_dt.append(util.json_object_to_convert(row_data['fundb_nav_dt'])) fundb_discount_rt.append(util.json_object_to_convert(row_data['fundb_discount_rt'])) fundb_price_leverage_rt.append(util.json_object_to_convert(row_data['fundb_price_leverage_rt'])) fundb_capital_rasising_rt.append(util.json_object_to_convert(row_data['fundb_capital_rasising_rt'])) fundb_lower_recalc_rt.append(util.json_object_to_convert(row_data['fundb_lower_recalc_rt'])) fundb_base_est_dis_rt.append(util.json_object_to_convert(row_data['fundb_base_est_dis_rt'])) abrate.append(util.json_object_to_convert(row_data['abrate'])) fundb_base_price.append(util.json_object_to_convert(row_data['fundb_base_price'])) fundb_upper_recalc_rt.append(util.json_object_to_convert(row_data['fundb_upper_recalc_rt'])) funda_current_price.append(util.json_object_to_convert(row_data['funda_current_price'])) fundb_current_price.append(util.json_object_to_convert(row_data['fundb_current_price'])) fundb_value.append(util.json_object_to_convert(row_data['fundb_value'])) d = { 'fundb_base_fund_id': fundb_base_fund_id, 'funda_id': funda_id, 'fundb_id': fundb_id, 'funda_name': funda_name, 'fundb_name': fundb_name, 'maturity_dt': maturity_dt, 'coupon_descr_s': coupon_descr_s, 'fundb_nav_dt': fundb_nav_dt, 'fundb_discount_rt': fundb_discount_rt, 'fundb_price_leverage_rt': fundb_price_leverage_rt, 'fundb_capital_rasising_rt': fundb_capital_rasising_rt, 'fundb_lower_recalc_rt': fundb_lower_recalc_rt, 'fundb_base_est_dis_rt': fundb_base_est_dis_rt, 'abrate': abrate, 'fundb_upper_recalc_rt': fundb_upper_recalc_rt, 'funda_current_price': funda_current_price, 'fundb_current_price': fundb_current_price, 'fundb_value': fundb_value, 'fundb_base_price': fundb_base_price} df = pd.DataFrame(d) df = df.set_index('fundb_base_fund_id') return df
def serialization_single_stock_data(self): logging.warn('save metadata for ' + self.current_stock.stock_code) util.serialization_object( self.base_path + self.current_stock.stock_code + '/metadata.json', self.current_stock)
'funda_current_price': funda_current_price, 'fundb_current_price': fundb_current_price, 'fundb_value': fundb_value, 'fundb_base_price': fundb_base_price} df = pd.DataFrame(d) df = df.set_index('fundb_base_fund_id') return df def load_all_funds_file(file_path): df = util.get_dataframe_from_file(file_path) if df is not None: df = df.set_index('fundb_base_fund_id') return df if __name__ == '__main__': url = 'https://www.jisilu.cn/data/sfnew/fundb_list/' my_props = PropertiesReader.get_properties() df = parse_data(util.json_data_get(url)) old_df = load_all_funds_file(my_props['MktDataLoader.Fund.AllFundsRelationship.Path']) if old_df is not None: # avoid data type not the same, so first save then read df.to_csv(my_props['MktDataLoader.Fund.Tmp.Path']) df = load_all_funds_file(my_props['MktDataLoader.Fund.Tmp.Path']) result = df.combine_first(old_df) # accept new data, but in case lose old data, use combine_first result.to_csv(my_props['MktDataLoader.Fund.AllFundsRelationship.Path']) else: df.to_csv(my_props['MktDataLoader.Fund.AllFundsRelationship.Path'])