def __get_data_fhpx(self): response = requests.get(self.__url_fhpx) if response.status_code == 200: html = etree.HTML(response.text) nodes = html.cssselect( 'body > div.area > div:nth-child(5) > table > tr') self.fhpx_ggrq = [] # 公告日期 self.fhpx_years = [] # 分红派息年份 self.fhpx_sg = [] # 送股 self.fhpx_zz = [] # 转增 self.fhpx_px = [] # 派息 self.fhpx_cqcxr = [] # 除权除息日 for i, node in enumerate(nodes): all_td = node.findall('td') if len(all_td) == 1: # 暂无数据 break self.fhpx_ggrq.append(all_td[0].text) self.fhpx_years.append(all_td[1].text) self.fhpx_sg.append(change_text(all_td[2].text, 0)) self.fhpx_zz.append(change_text(all_td[3].text, 0)) self.fhpx_px.append(change_text(all_td[4].text, 0)) self.fhpx_cqcxr.append(change_text(all_td[6].text, to_type=str)) del response gc.collect()
def __get_data_lrb(self): df = pd.read_csv(self.__url_lrb, encoding=self.encoding) self.lrb_years = [ ymd[:4] for ymd in df.columns.to_list()[1:] if ymd.strip() != '' and ymd[:4].isdigit() ] self.lrb_jlr_gm = [] # 归属于母公司所有者的净利润 CSV_LINE:42 DF_INDEX:40 self.lrb_jlr = [] # 净利润 CSV_LINE:41 DF_INDEX:39 self.lrb_yysr = [] # 营业收入 CSV_LINE:2 DF_INDEX:0 self.lrb_yysr_hj = [ ] # 营业收入合计 CSV_LINE:3+4+5+6+7+8 DF_INDEX:1+2+3+4+5+6 self.lrb_yycb = [] # 营业成本 CSV_LINE:10 DF_INDEX:8 self.lrb_yycb_hj = [] # 营业成本合计 CSV_LINE:10~21 DF_INDEX:8~19 self.lrb_yylr = [] # 营业利润 CSV_LINE:34 DF_INDEX:32 self.lrb_mgyy = [] # 每股盈余 CSV_LINE:45 DF_INDEX:43 for year in self.lrb_years: data = df[f'{year}-12-31'] self.lrb_jlr_gm.append(change_text(data[40], 0)) self.lrb_jlr.append(change_text(data[39], 0)) self.lrb_yysr.append(change_text(data[0], 0)) self.lrb_yysr_hj.append(sum([change_text(v, 0) for v in data[1:7]])) self.lrb_yycb.append(change_text(data[8], 0)) self.lrb_yycb_hj.append( sum([change_text(v, 0) for v in data[8:20]])) self.lrb_yylr.append(change_text(data[32], 0)) self.lrb_mgyy.append(change_text(data[43], 0)) del df gc.collect()
def __get_data_gszl(self): response = requests.get(self.__url_gszl) if response.status_code == 200: html = etree.HTML(response.text) self.zzxs = change_text(html.xpath( '/html/body/div[2]/div[4]/table/tr[1]/td[2]')[0].text, to_type=str) # 组织形式 self.dy = html.xpath( '/html/body/div[2]/div[4]/table/tr[1]/td[4]')[0].text # 地域 self.zwjc = html.xpath( '/html/body/div[2]/div[4]/table/tr[2]/td[2]')[0].text # 中文简称 self.zwjc_py = pinyin(self.zwjc) # 中文简称_拼音首字母 self.gsqc = html.xpath( '/html/body/div[2]/div[4]/table/tr[3]/td[2]')[0].text # 公司全称 comment = html.xpath('/html/body/div[2]/div[4]/table/comment()')[0] comment = etree.fromstring(comment.text) self.gswz = change_text(comment.xpath('/tr/td[2]')[0].text, to_type=str) # 公司网站 self.zyyw = html.xpath( '/html/body/div[2]/div[4]/table/tr[10]/td[2]')[0].text.strip( ) # 主营业务 self.jyfw = html.xpath( '/html/body/div[2]/div[4]/table/tr[11]/td[2]')[0].text.strip( ) # 经营范围 self.clrq = change_text(html.xpath( '/html/body/div[2]/div[5]/table/tr[1]/td[2]')[0].text, to_type=str) # 成立日期 self.ssrq = change_text(html.xpath( '/html/body/div[2]/div[5]/table/tr[2]/td[2]')[0].text, to_type=str) # 上市日期 self.sssc = self.market() # 上市市场 self.zcxs = change_text(html.xpath( '/html/body/div[2]/div[5]/table/tr[16]/td[2]')[0].text, to_type=str) # 主承销商 self.ssbjr = change_text(html.xpath( '/html/body/div[2]/div[5]/table/tr[17]/td[2]')[0].text, to_type=str) # 上市保荐人 self.kjssws = change_text(html.xpath( '/html/body/div[2]/div[5]/table/tr[18]/td[2]')[0].text, to_type=str) # 会计师事务所 del response gc.collect()
def __get_data_xjllb(self): df = pd.read_csv(self.__url_xjllb, encoding=self.encoding) self.xjllb_years = [ ymd[:4] for ymd in df.columns.to_list()[1:] if ymd.strip() != '' and ymd[:4].isdigit() ] self.xjllb_yyhdxjll = [] # 营业活动现金流量 CSV_LINE:26 DF_INDEX:24 self.xjllb_tzhdxjll = [] # 投资活动现金流量 CSV_LINE:41 DF_INDEX:39 self.xjllb_czhdxjll = [] # 筹资活动现金流量 CSV_LINE:53 DF_INDEX:51 self.xjllb_xjgl = [] # 现金股利 CSV_LINE:49 DF_INDEX:47 self.xjllb_zbzc = [] # 资本支出 CSV_LINE:34 DF_INDEX:32 self.xjllb_chjse = [] # 存货减少额 CSV_LINE:76 DF_INDEX:74 for year in self.xjllb_years: data = df[f'{year}-12-31'] self.xjllb_yyhdxjll.append(change_text(data[24], 0)) self.xjllb_tzhdxjll.append(change_text(data[39], 0)) self.xjllb_czhdxjll.append(change_text(data[51], 0)) self.xjllb_xjgl.append(change_text(data[47], 0)) self.xjllb_zbzc.append(change_text(data[32], 0)) self.xjllb_chjse.append(change_text(data[74], 0)) del df gc.collect()
def __get_data_zcfzb(self): df = pd.read_csv(self.__url_zcfzb, encoding=self.encoding) self.zcfzb_years = [ ymd[:4] for ymd in df.columns.to_list()[1:] if ymd.strip() != '' and ymd[:4].isdigit() ] self.zcfzb_zgb = [] # 总股本 CSV_LINE:96 DF_INDEX:94 self.zcfzb_xjyydxj = [ ] # 现金与约当现金 CSV_LINE:2+3+4+5+6 DF_INDEX:0+1+2+3+4 self.zcfzb_yszk = [] # 应收账款 CSV_LINE:8 DF_INDEX:6 self.zcfzb_ch = [] # 存货 CSV_LINE:21 DF_INDEX:19 self.zcfzb_ldzc = [] # 流动资产 CSV_LINE:26 DF_INDEX:24 self.zcfzb_yfzk = [] # 应付账款 CSV_LINE:61 DF_INDEX:59 self.zcfzb_yfkx = [] # 预付款项 CSV_LINE:9 DF_INDEX:7 self.zcfzb_ldfz = [] # 流动负债 CSV_LINE:85 DF_INDEX:83 self.zcfzb_cqfz = [] # 长期负债 CSV_LINE:94 DF_INDEX:92 self.zcfzb_gdqy = [] # 股东权益 CSV_LINE:108 DF_INDEX:106 self.zcfzb_gdqy_gm = [] # 归属母公司股东权益 CSV_LINE:106 DF_INDEX:104 self.zcfzb_gdzc = [] # 固定资产 CSV_LINE:38 DF_INDEX:36 self.zcfzb_zjgc = [] # 在建工程 CSV_LINE:39 DF_INDEX:37 self.zcfzb_gcwz = [] # 工程物资 CSV_LINE:40 DF_INDEX:38 self.zcfzb_zfz = [] # 总负债 CSV_LINE:95 DF_INDEX:93 self.zcfzb_zzc = [] # 总资产 CSV_LINE:53 DF_INDEX:51 for year in self.zcfzb_years: data = df[f'{year}-12-31'] # 总股本 self.zcfzb_zgb.append(change_text(data[94], 0)) # 现金与约当现金 v_csv_2 = change_text(data[0], 0) v_csv_3 = change_text(data[1], 0) v_csv_4 = change_text(data[2], 0) v_csv_5 = change_text(data[3], 0) v_csv_6 = change_text(data[4], 0) self.zcfzb_xjyydxj.append(v_csv_2 + v_csv_3 + v_csv_4 + v_csv_5 + v_csv_6) # 应收账款 self.zcfzb_yszk.append(change_text(data[6], 0)) # 预付款项 self.zcfzb_yfkx.append(change_text(data[7], 0)) # 存货 self.zcfzb_ch.append(change_text(data[19], 0)) # 流动资产 self.zcfzb_ldzc.append(change_text(data[24], 0)) # 应付账款 self.zcfzb_yfzk.append(change_text(data[59], 0)) # 流动负债 self.zcfzb_ldfz.append(change_text(data[83], 0)) # 长期负债 self.zcfzb_cqfz.append(change_text(data[92], 0)) # 股东权益 self.zcfzb_gdqy.append(change_text(data[106], 0)) # 归属母公司股东权益 self.zcfzb_gdqy_gm.append(change_text(data[104], 0)) # 总负债 self.zcfzb_zfz.append(change_text(data[93], 0)) # 固定资产 self.zcfzb_gdzc.append(change_text(data[36], 0)) # 在建工程 self.zcfzb_zjgc.append(change_text(data[37], 0)) # 工程物资 self.zcfzb_gcwz.append(change_text(data[38], 0)) # 总资产 self.zcfzb_zzc.append(change_text(data[51], 0)) del df gc.collect()