def lhb(): # sql_date = "select distinct `date` from `indexdb` WHERE `date`>='2017-01-01' ORDER BY `date` ASC " # list_date = pd.read_sql(sql_date,localconn())['date'].values print("LHB:正在获取成交回报信息...") today =datetime.date.today() list_date=[today] errorlist =[] for date in list_date: df_lhbdetail = pd.DataFrame() try: lhb_list = get_lhblist(str(date),proxy=0) print(str(date),len(lhb_list)) if len(lhb_list) ==0: errorlist.append((str(date),0)) for code in lhb_list: # print(str(date),code) tmp_lhbdetail = get_lhbdetail(code,str(date),proxy=0) # tmp_lhbdetail =tmp_lhbdetail.drop_duplicates() df_lhbdetail = pd.concat((tmp_lhbdetail,df_lhbdetail)) sleep(random()/10+1) except Exception as e: errorlist.append((str(date),code,e)) df_lhbdetail.to_csv('./data/lhb/'+str(date)+'.csv',encoding='utf-8') try: df_lhbdetail.to_sql('lhb',conn(),schema='stockdata',if_exists='append', index=True,chunksize=10000) except Exception as e: print(e) df_error = pd.DataFrame(errorlist) df_error.to_csv(path()+'/data/lhb/error.csv') print("LHB:更新完毕!")
def analysis(): # print("UNUSUAL: Running...") today = datetime.date.today() now = datetime.datetime.today() - datetime.timedelta(minutes=5) data = json.loads(unusual()) table = [re.split(",", ele) for ele in data] for elem in table: elem[0] = elem[0][:-1] df = pd.DataFrame( table, columns=['code', 'name', 'time', 'tcode', 'type', 'data', 'goodorbad']) js = df.to_json(orient='records', force_ascii=False).encode('utf-8') if int(time.strftime("%H%M%S")) > 150000: with open(path() + "/data/unusual/" + str(today) + ".jz", 'wb') as f: f.write(gzip.compress(js, compresslevel=9)) df['date'] = str(today) df['datetime'] = df['date'] + " " + df['time'] df['datetime'] = pd.to_datetime(df['datetime']) df = df[['datetime', 'code', 'type', 'data', 'goodorbad']] df = df[df['datetime'] >= now] engine = conn() for param in df.values: sql_update = "insert ignore into `unusual`(`datetime`, `code`, `type`, `data`, `goodorbad`) value(%s,%s,%s,%s,%s)" param = (str(param[0]), param[1], param[2], param[3], param[4]) engine.execute(sql_update, tuple(param)) # print("UNUSUAL: Done!") # df.to_sql('unusual',localconn(),flavor='mysql',schema='stockdata',if_exists='append',index=False) # counts = Counter(df['code'].values).items() # print(pd.DataFrame(list(counts),columns=['code','times']).to_json(orient='records',force_ascii= False)) return df
def amorank(self): df_data = self.df_dayline df_data = df_data.sort_values(by=['amo'], ascending=False) df_data['amorank'] = pd.Series(np.arange(len(df_data['date'])) + 1, index=df_data.index) df_data = df_data[['code', 'date', 'amorank']] df_data = df_data.reset_index(drop=True) result = [] df_list = calc.adjfactor(self) taresultlist1, taresultlist2 = calc.tamodel(self) # print(taresultlist) print("CALC:正在按成交额进行排序...") for i in range(len(df_data)): code = df_data['code'][i] date = df_data['date'][i] fAmorank = df_data['amorank'][i] sql_refar = "select `amorank` from `usefuldata` WHERE `code` ='%s' and `date`<'%s' and `amorank` is NOT NULL" \ " ORDER BY `date` DESC LIMIT 0,1" % (code, date) df_refar = pd.read_sql(sql_refar, self.con) if df_refar.empty == False: ref_ar = df_refar.values[0][0] ARaise = fAmorank - ref_ar else: ARaise = np.nan percentage = df_list[df_list['code'] == code]['percentage'].values[0] if code in taresultlist1 and code in taresultlist2: taresult = '1,2' elif code in taresultlist1 and code not in taresultlist2: taresult = '1' elif code in taresultlist2 and code not in taresultlist1: taresult = '2' else: taresult = '0' # taresult = '1,2' if code in taresultlist1 else '0' result.append([code, date, fAmorank, ARaise, percentage, taresult]) result = pd.DataFrame(result, columns=[ 'code', 'date', 'amorank', 'araise', 'percentage', 'taresult' ]) print("CALC:正在将成交量信息写入数据库...") errorlist = [] try: result.to_sql('usefuldata', self.con, schema='stockdata', if_exists='append', index=False, chunksize=10000) except Exception as e: print(e) errorlist.append(e) dferrorlist = pd.DataFrame(errorlist) dferrorlist.to_csv(path() + '/error/amorank.csv') return result
def mo(pages, proxy=0): today = datetime.date.today() #- datetime.timedelta(days=1) error = [] df = pd.DataFrame() for page in pages: print("MO:Page:", page) try: url = "http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx?type=GG&sty=GGMX&p=%s&ps=1000" % ( page) html = myspyder(url, proxy=proxy).content.decode('utf-8')[1:-1] sleep(random() / 10 + 3) table = re.findall(r'\"([^"]+)\"', html) list = [re.split("\,", line) for line in table] list = pd.DataFrame(list, columns=[ '变动比例', '董监高人员姓名', 'code', '变动人', '持股种类', '日期', '变动股数', '变动后持股数', '成交均价', '名称', '变动人与董监高的关系', 11, '变动方式', '变动金额', '职务', 15 ]) list = list[[ 'code', '日期', '变动人', '持股种类', '变动股数', '变动后持股数', '成交均价', '变动人与董监高的关系', '变动方式', '变动金额', '职务', '变动比例', '董监高人员姓名' ]] list.to_csv(path() + "/data/managerial_ownership/" + str(page) + ".csv", encoding='utf-8') df = pd.concat((list, df), ignore_index=True) except Exception as e: print(e) error.append(page) df = df.drop_duplicates() df['日期'] = df['日期'].astype('datetime64[ns]') df = df[df['日期'] >= today] engine = conn() if df.empty != True: for elem in df.values: sql_update_managerial="INSERT IGNORE INTO `managerial`(`code`, `日期`, `变动人`, `持股种类`, `变动股数`, " \ "`变动后持股数`, `成交均价`, `变动人与董监高的关系`, `变动方式`, `变动金额`, `职务`," \ " `变动比例`, `董监高人员姓名`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" params = [str(param) for param in elem] engine.execute(sql_update_managerial, params) # df.to_sql('managerial', conn, flavor='mysql', schema='stockdata', if_exists='append', index=False, # chunksize=10000) # conns.commit() # df.to_sql('managerial',conns,flavor='mysql',schema='stockdata',if_exists='append',index=False, # chunksize=10000) return error
def stcn_news(): """ :param ser: local/server/both :return: """ import datetime # =================== lastday = datetime.date.today() - datetime.timedelta(days=2) sql_check = "select `link` from `news` where `datetime`>='%s'" % (lastday) engine = conn() #读取最近两天地址,以减少写入次数 linklist = pd.read_sql(sql_check, engine)['link'].values # ================================================= # pages = range(1, 2) url = "http://kuaixun.stcn.com/index_%s.shtml" source = 'stcn.com' urllist = [url % (page_id) for page_id in pages] newsresult = pd.DataFrame() errorlist = [] for i in range(len(urllist)): try: result = get_news(urllist[i], proxy=0) newsresult = pd.concat((result, newsresult), ignore_index=True) sleep(random() / 10 + 1) except Exception as e: print(e) errorlist.append(e) for j in range(len(newsresult)): try: stype = newsresult['type'][j] title = newsresult['title'][j] link = newsresult['link'][j] datetime = newsresult['datetime'][j] if link not in linklist: sql_update = "insert ignore INTO `news`(`source`, `type`, `title`, `link`, `datetime`) VALUES (%s,%s,%s,%s,%s)" param = (source, stype, title, link, str(datetime)) # cur = conn.cursor() engine.execute(sql_update, param) # conn.commit() else: pass except Exception as e: print(e) errorlist.append(e) dfErrorList = pd.DataFrame(errorlist) dfErrorList.to_csv(path() + '/error/update_news.csv')
def update_capitalchange(ser): """ :param ser: server,local or both :return: """ sql_last = "select `变动日期` from `capitalchange` ORDER BY `变动日期` DESC limit 1" lastdate = pd.read_sql(sql_last, localconn()).values[0][0] # print(lastdate) Errorlist = [] sqli = "INSERT IGNORE INTO `capitalchange`(`股票代码`, `变动日期`, `变动原因`, `总股本_变动`, `流通A股_变动`, " \ "`流通B股_变动`, `总股本_前值`, `流通A股_前值`, `流通B股_前值`, `总股本`, `流通A股`, `流通B股`) VALUES " \ "(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" table = pd.read_csv(path() + "/data/dzhdata/capitalchange.csv", dtype='object') table['变动日期'] = pd.to_datetime(table['变动日期']) # print(table.dtypes) table = table[table['变动日期'] > lastdate].values # print(table) # with open(path() + "/data/dzhdata/capitalchange.csv", encoding='utf-8') as f: # f_csv = csv.DictReader(f) for elem in table: try: # param = (row['股票代码'], row['变动日期'],row['变动原因'],row['总股本_变动'],row['流通A股_变动'], # row['流通B股_变动'],row['总股本_前值'], row['流通A股_前值'], row['流通B股_前值'], row['总股本'], # row['流通A股'], row['流通B股']) # print(param) param = [str(elem[i]) for i in range(len(elem))] if ser == "server": conn = serverconn() cur = conn.cursor() cur.execute(sqli, tuple(param)) conn.commit() elif ser == "local": conn = localconn() cur = conn.cursor() cur.execute(sqli, tuple(param)) conn.commit() else: conn1 = serverconn() cur = conn1.cursor() cur.execute(sqli, tuple(param)) conn1.commit() conn2 = localconn() cur = conn2.cursor() cur.execute(sqli, tuple(param)) conn2.commit() except Exception as e: print(param[0], e) Errorlist.append((param[0], e)) # f.close() return Errorlist
def update_financial(filename,conn=localconn()): file_path = path()+"/data/dzhfinancial/"+filename+".csv" df_data = pd.read_csv(file_path,encoding='gbk',dtype='object') list_data=df_data.values errorlist = [] for i in range(len(list_data)): sql = "INSERT IGNORE INTO `financial`(`名称`, `报表日期`, `代码`, `摊薄每股收益`, `净资产收益率`, `每股经营活动现金流量`, `每股净资产`, `每股资本公积金`, `每股未分配利润`, `每股主营收入`, `扣除非经常损益每股收益`, `货币资金`, `交易性金融资产`, `应收票据`, `应收账款`, `预付款项`, `应收利息`, `应收股利`, `其他应收款`, `应收关联公司款`, `存货`, `消耗性生物资产`, `一年内到期的非流动资产`, `其他流动资产`, `流动资产合计`, `可供出售金融资产`, `持有至到期投资`, `长期应收款`, `长期股权投资`, `投资性房地产`, `固定资产`, `在建工程`, `工程物资`, `固定资产清理`, `生产性生物资产`, `油气资产`, `无形资产`, `开发支出`, `商誉`, `长期待摊费用`, `递延所得税资产`, `其他非流动资产`, `非流动资产合计`, `资产总计`, `短期借款`, `交易性金融负债`, `应付票据`, `应付账款`, `预收账款`, `应付职工薪酬`, `应交税费`, `应付利息`, `应付股利`, `其他应付款`, `应付关联公司款`, `一年内到期的非流动负债`, `其他流动负债`, `流动负债合计`, `长期借款`, `应付债券`, `长期应付款`, `专项应付款`, `预计负债`, `递延所得税负债`, `其他非流动负债`, `非流动负债合计`, `负债合计`, `实收资本或股本`, `资本公积`, `库存股`, `盈余公积`, `未分配利润`, `外币报表折算差额`, `非正常经营项目收益调整`, `股东权益合计不含少数股东权益`, `少数股东权益`, `股东权益合计含少数股东权益`, `负债和股东权益合计`, `营业收入`, `营业成本`, `营业税金及附加`, `销售费用`, `管理费用`, `堪探费用`, `财务费用z`, `资产减值损失`, `公允价值变动净收益`, `投资收益`, `对联合营企业的投资收益`, `影响营业利润的其他科目`, `营业利润`, `补贴收入`, `营业外收入`, `营业外支出`, `非流动资产处置净损失`, `影响利润总额的其他科目`, `利润总额`, `所得税费用`, `影响净利润的其他科目`, `净利润含少数股东损益`, `净利润不含少数股东损益`, `少数股东损益`, `销售商品、提供劳务收到的现金`, `收到的税费返还`, `收到的其他与经营活动有关的现金`, `经营活动现金流入小计`, `购买商品、接受劳务支付的现金`, `支付给职工以及为职工支付的现金`, `支付的各项税费`, `支付的其他与经营活动有关的现金`, `经营活动现金流出小计`, `经营活动产生的现金流量净额`, `收回投资所收到的现金`, `取得投资收益所收到的现金`, `处置固定、无形和其他长期资产收回的现金净额`, `处置子公司及其他营业单位收到的现金净额`, `收到的其他与投资活动有关的现金`, `投资活动现金流入小计`, `购建固定资产、无形资产和其他长期资产支付的现金`, `投资所支付的现金`, `取得子公司及其他营业单位支付的现金净额`, `支付其他与投资活动有关的现金`, `投资活动现金流出小计`, `投资活动产生的现金流量净额`, `吸收投资所收到的现金`, `子公司吸收少数股东权益性投资收到的现金`, `取得借款收到的现金`, `收到其他与筹资活动有关的现金`, `筹资活动现金流入小计`, `偿还债务支付的现金`, `分配股利、利润或偿付利息支付的现金`, `子公司支给付少数股东的股利、利润`, `支付其他与筹资活动有关的现金`, `筹资活动现金流出小计`, `筹资活动产生的现金流量净额`, `汇率变动对现金的影响`, `其他原因对现金的影响`, `现金及现金等价物净增加额`, `期初现金及现金等价物余额`, `期末现金及现金等价物余额`, `净利润`, `加:资产减值准备`, `固定资产折旧、油气资产折耗、生产性生物资产折旧`, `无形资产摊销`, `长期待摊费用摊销`, `处置固定资产、无形资产和其他长期资产的损失`, `固定资产报废损失`, `公允价值变动损失`, `财务费用l`, `投资损失`, `递延所得税资产减少`, `递延所得税负债增加`, `存货的减少`, `经营性应收项目的减少`, `经营性应付项目的增加`, `其他`, `债务转为资本`, `一年内到期的可转换公司债券`, `融资租入固定资产`, `现金的期末余额`, `现金的期初余额`, `现金等价物的期末余额`, `现金等价物的期初余额`, `流动比率`, `速动比率`, `现金比率`, `负债权益比率`, `股东权益比率1`, `股东权益对负债比率`, `权益乘数`, `长期债务与营运资金比`, `长期负债比率1`, `利息支付倍数`, `股东权益与固定资产比`, `固定资产对长期负债比`, `有形净值债务率`, `清算价值比率`, `债务保障率`, `现金流量比率`, `每股有形资产净值`, `每股营运资金`, `债务总额EBITDA`, `营业周期`, `存货周转天数`, `应收账款周转天数`, `流动资产周转天数`, `总资产周转天数`, `存货周转率`, `应收账款周转率`, `流动资产周转率`, `固定资产周转率`, `总资产周转率`, `净资产周转率`, `股东权益周转率`, `营运资金周转率`, `存货同比增长率`, `应收帐款同比增长率`, `主营业务收入增长率`, `营业利润增长率`, `利润总额增长率`, `净利润增长率`, `净资产增长率`, `流动资产增长率`, `固定资产增长率`, `总资产增长率`, `摊薄每股收益增长率`, `每股净资产增长率`, `每股经营性现金流量增长率`, `三年平均净资收益率`, `总资产净利润率`, `投入资本回报率ROIC`, `成本费用利润率`, `营业利润率`, `主营业务成本率`, `销售净利率`, `总资产报酬率`, `销售毛利率`, `三项费用比重`, `营业费用率`, `管理费用率`, `财务费用率`, `非主营比重`, `营业利润比重`, `每股息税折旧摊销前利润`, `每股息税前利润EBIT`, `EBITDA主营业务收入`, `资产负债率`, `股东权益比率`, `长期负债比率`, `股东权益与固定资产比率`, `负债与所有者权益比率`, `长期资产与长期资金比率`, `资本化比率`, `资本固定化比率`, `固定资产比重`, `经营现金净流量对销售收入比率`, `资产的经营现金流量回报率`, `经营现金净流量与净利润的比率`, `经营现金净流量对负债比率`, `每股营业现金流量`, `每股经营活动现金流量净额`, `每股投资活动产生现金流量净额`, `每股筹资活动产生现金流量净额`, `每股现金及现金等价物净增加额`, `现金流量满足率`, `现金营运指数`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" sql_param =tuple(list_data[i]) with conn as con: try: con.execute(sql,sql_param) except Exception as e: errorlist.append(e) print(e) return errorlist
def update_dzhcontrol(ser): """ :param ser: server,local or both :return: """ Errorlist = [] sqli = "UPDATE `basedata` SET `实际控制人名称` = %s, `实际控制人类型` = %s,`央企控制人名称`=%s,`控股股东名称`=%s,`控股股东类型`=%s WHERE `basedata`.`证券代码` = %s ;" with open(path() + "/data/dzhdata/control.csv", encoding='utf-8') as f: f_csv = csv.DictReader(f) for row in f_csv: try: print(row['证券代码'], row['实际控制人名称'], row['实际控制人类型'], row['央企控制人名称'], row['控股股东名称'], row['控股股东类型']) if ser == "server": conn = serverconn() cur = conn.cursor() cur.execute( sqli, (row['实际控制人名称'], row['实际控制人类型'], row['央企控制人名称'], row['控股股东名称'], row['控股股东类型'], row['证券代码'])) conn.commit() elif ser == "local": conn = localconn() cur = conn.cursor() cur.execute( sqli, (row['实际控制人名称'], row['实际控制人类型'], row['央企控制人名称'], row['控股股东名称'], row['控股股东类型'], row['证券代码'])) conn.commit() else: conn1 = serverconn() cur = conn1.cursor() cur.execute( sqli, (row['实际控制人名称'], row['实际控制人类型'], row['央企控制人名称'], row['控股股东名称'], row['控股股东类型'], row['证券代码'])) conn1.commit() conn2 = localconn() cur = conn2.cursor() cur.execute( sqli, (row['实际控制人名称'], row['实际控制人类型'], row['央企控制人名称'], row['控股股东名称'], row['控股股东类型'], row['证券代码'])) conn2.commit() except Exception as e: print(row['证券代码'], e) Errorlist.append((row['证券代码'], e)) f.close() return Errorlist
def update_embasedata(stocklist, proxy=0): Errorlist = [] engine = conn() for i in range(len(stocklist)): sqli = "UPDATE `basedata` SET `证券简称` = %s, `公司名称` = %s, `英文名称` = %s, " \ " `曾用名` = %s, `公司简介` = %s, `成立日期` = %s, `工商登记号` = %s, `注册资本` = %s, `法人代表` = %s, " \ " `所属证监会行业` = %s, `员工总数` = %s, `总经理` = %s, `董事会秘书` = %s, `省份` = %s, `城市` = %s, " \ "`注册地址` = %s, `办公地址` = %s, `邮编` = %s, `电话` = %s, `传真` = %s, `电子邮件` = %s, " \ "`公司网站` = %s, `审计机构` = %s, `法律顾问` = %s, `经营分析` = %s, `简史` = %s, `核心题材` = %s " \ "WHERE `basedata`.`证券代码` = %s" symbol = stocklist['证券代码'][i] + "01" if stocklist['证券代码'][i][ 0] == "6" else stocklist['证券代码'][i] + "02" sName = stocklist['证券简称'][i] try: Intr = myspyder( 'http://soft-f9.eastmoney.com/soft/gp3.php?code=%s' % (symbol), proxy=proxy).content Conc = myspyder( 'http://soft-f9.eastmoney.com/soft/gp30.php?code=%s' % (symbol), proxy=proxy).content IntrSoup = bs(Intr, 'html5lib') ConcSoup = bs(Conc, 'html5lib') stockdata = [] stockdata.append(sName) for tr in IntrSoup.find_all(width=880): stockdata.append(txt_pre(tr.text.strip())) point = ConcSoup.p del point['style'] stockdata.append(str(point)) stockdata.append(symbol[:-2]) print("BASEDATA:", symbol[:-2], sName, i + 1, "/", len(stocklist), round((i + 1) / (len(stocklist)) * 100, 2)) engine.execute(sqli, tuple(stockdata)) sleep((random() / 10 + 1)) except Exception as e: print(symbol[:-2], sName, e) Errorlist.append((symbol[:-2], sName, e)) Errorlist = pd.DataFrame(Errorlist, columns=['证券代码', '证券简称', 'error']) Errorlist.to_csv(path() + '/error/update_basedata.csv') return Errorlist
def update_stocklist(ser='both'): table = pd.read_csv(path() + "/data/dzhdata/stocklist.csv", dtype='object').values stocklist = pd.read_sql("select * from `stocklist`", localconn())['证券简称'].values Errorlist = [] for row in table: if row[1] in stocklist: pass else: print(row[1]) sql = "update `stocklist` set `证券简称`=%s,`拼音缩写`=%s WHERE `证券代码`=%s" pinyin = getpinyin(row[1]) pinyin = pinyin if '银行' not in row[1] else pinyin.replace( 'YX', 'YH') param = [row[1], pinyin, row[0]] print(param) # try: if ser == "server": conn = serverconn() cur = conn.cursor() cur.execute(sql, tuple(param)) conn.commit() elif ser == "local": conn = localconn() cur = conn.cursor() cur.execute(sql, tuple(param)) conn.commit() else: conn1 = serverconn() cur = conn1.cursor() cur.execute(sql, tuple(param)) conn1.commit() conn2 = localconn() cur = conn2.cursor() cur.execute(sql, tuple(param)) conn2.commit() # except Exception as e: # print(param[0], e) # Errorlist.append((param[0], e)) return Errorlist
cur.execute(sql, tuple(param)) conn2.commit() # except Exception as e: # print(param[0], e) # Errorlist.append((param[0], e)) return Errorlist if __name__ == '__main__': gbk_to_utf8() # concept_errorlist = update_dzhconcept(ser='both') # 注意导出数据是否完整 文件编码 # dfErrorList = pd.DataFrame( concept_errorlist) # dfErrorList.to_csv(path() + '/error/dzhconcept.csv') # print(dfErrorList) # control_errorlist = update_dzhcontrol(ser='both') # 注意导出数据是否完整 文件编码 # dfErrorList2 = pd.DataFrame( control_errorlist) # dfErrorList2.to_csv(path() + '/error/dzhcontrol.csv') # print(dfErrorList2) capitalchange_errorlist = update_capitalchange(ser='both') # 改抬头,删字段,数字格式 dfErrorList3 = pd.DataFrame(capitalchange_errorlist) dfErrorList3.to_csv(path() + '/error/capitalchange.csv') print(dfErrorList3) # buyback_errorlist = update_buyback(ser='both') # 改抬头,删字段,数字格式,文件编码 # dfErrorList4 = pd.DataFrame(buyback_errorlist) # dfErrorList4.to_csv(path() + '/error/update_buyback.csv') # print(dfErrorList4) # incentive_errorlist = update_incentive(ser='both') # 改抬头,删字段,数字格式,文件编码 # dfErrorList5 = pd.DataFrame(incentive_errorlist) # dfErrorList5.to_csv(path() + '/error/update_incentive.csv') # print(dfErrorList5) update_stocklist(ser='both')
def update_incentive(ser): """ :param ser: server,local or both :return: """ sql_last = "select `薪酬委员会预案公告日` from `incentive` ORDER BY `薪酬委员会预案公告日` DESC limit 1" lastdate = pd.read_sql(sql_last, localconn()).values[0][0] Errorlist = [] sqli = "INSERT IGNORE INTO `incentive`(`股票代码`, `本期计划制定年度`, `本期计划激励次数`, `方案进度`, `激励标的物`, " \ "`标的股票来源`, `激励总数_万`, `激励总数占当时总股本的比例`, `计划授权授予股票价格`, `本期计划有效期_年`, " \ "`股权激励授予条件说明`, `薪酬委员会预案公告日`, `董事会修订方案日`, `股东大会通过日`, `独立财务顾问`, `律师事务所`," \ " `备注`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" table = pd.read_csv(path() + "/data/dzhdata/incentive.csv", dtype='object') # print(table) table['薪酬委员会预案公告日'] = pd.to_datetime(table['薪酬委员会预案公告日']) # print(table.dtypes) table = table[table['薪酬委员会预案公告日'] > lastdate] if ser == "local" or ser == "both": table.to_sql('incentive', localconn(), flavor='mysql', schema='stockdata', index=False, if_exists='append') if ser == "server" or ser == "both": table.to_sql('incentive', serverconn(), flavor='mysql', schema='stockdata', index=False, if_exists='append') # with open(path() + "/data/dzhdata/incentive.csv", encoding='utf-8') as f: # f_csv = csv.DictReader(f) # for elem in table: # try: # # param = (row['股票代码'], row['本期计划制定年度'], row['本期计划激励次数'], row['方案进度'], row['激励标的物'], # # row['标的股票来源'], row['激励总数_万'], row['激励总数占当时总股本的比例'], row['计划授权授予股票价格'], # # row['本期计划有效期_年'], row['股权激励授予条件说明'], row['薪酬委员会预案公告日'], row['董事会修订方案日'], # # row['股东大会通过日'], row['独立财务顾问'], row['律师事务所'], row['备注']) # param = [str(elem[i]) for i in range(len(elem))] # # if ser == "server": # conn = serverconn() # cur = conn.cursor() # cur.execute(sqli, tuple(param)) # conn.commit() # elif ser == "local": # conn = localconn() # cur = conn.cursor() # cur.execute(sqli, tuple(param)) # conn.commit() # else: # conn1 = serverconn() # cur = conn1.cursor() # cur.execute(sqli, tuple(param)) # conn1.commit() # conn2 = localconn() # cur = conn2.cursor() # cur.execute(sqli, tuple(param)) # conn2.commit() # except Exception as e: # print(param[0],e) # Errorlist.append((param[0], e)) # # f.close() return Errorlist
def get_forecast(proxy=0,lastday=0,update=1): """ :param ser: 选择更新的数据库,local/server/both :param proxy: 设置是否使用ip代理,0为不开启,1为开启 :param lastday: 上次更新距离今天多长时间,默认当天为0 :param update: 设置更新还是重建,更新设为1,其他为重建 :return: """ errorList=[] today = datetime.date.today()-datetime.timedelta(days=lastday) print("FORECAST:",today) iyear =int(str(today)[0:4]) imonth= int(str(today)[5:7]) List_stock = get_stocklist() iLong = int((round(len(List_stock) / 1000, 0) + 1) * 1000) Q4 = datetime.datetime(iyear-1,12,31).strftime('%Y-%m-%d') if imonth <=2 else \ datetime.datetime(iyear,12,31).strftime('%Y-%m-%d') Q3 = datetime.datetime(iyear-1,9,30).strftime('%Y-%m-%d') if imonth < 8 else \ datetime.datetime(iyear,9,30).strftime('%Y-%m-%d') Q2 = datetime.datetime(iyear-1,6,30).strftime('%Y-%m-%d') if imonth < 5 else \ datetime.datetime(iyear, 6, 30).strftime('%Y-%m-%d') Q1 = datetime.datetime(iyear, 3, 31).strftime('%Y-%m-%d') List_Quarter =[Q1,Q2,Q3,Q4] df_forecast = pd.DataFrame() for Quarter in List_Quarter: url = 'http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx?type=SR&sty=YJYG&fd=%s&st=4&sr=true&p=1&ps=%s' \ % (Quarter, iLong) content = "error!" times_retry = 3 while content =="error!" and times_retry!=0: content = myspyder(url,proxy=proxy).content.decode('utf-8') times_retry -= 1 print("FORECAST:",Quarter,"数据抓取完毕,正在对数据进行处理...") try: return_list = re.findall("\"(.*?)\"", content) fctable = [] for j in range(len(return_list)): appd = re.split("\,", return_list[j]) fctable.append(appd) fctable = pd.DataFrame(fctable, columns=['code', '股票简称', '业绩变动', '变动幅度', '预告类型', '同期净利润' , '预喜预悲', 'date','财报日期']) fctable = fctable[['code', '业绩变动', '变动幅度', '预告类型', '同期净利润', 'date', '财报日期']] df_forecast = pd.concat((df_forecast, fctable)).sort_values('date', ascending=False).drop_duplicates() except Exception as e: errorList.append(e) df_forecast['date'] = df_forecast['date'].astype('datetime64', error='ignore') df_forecast['财报日期'] = df_forecast['财报日期'].astype('datetime64', error='ignore') print("FORECAST: 数据处理完毕,正在更新数据库...") if update == 1: df_forecast = df_forecast[df_forecast['date']>=today] else: df_forecast = df_forecast engine = conn() for j in range(len(df_forecast)): try: Scode = df_forecast.get_value(j,'code') Sdate = str(df_forecast.get_value(j,'date')) Schange = df_forecast.get_value(j, '业绩变动') Spercent = df_forecast.get_value(j, '变动幅度') Stype = df_forecast.get_value(j, '预告类型') Sprofit = df_forecast.get_value(j, '同期净利润') Srepdate = str(df_forecast.get_value(j, '财报日期')) ul = re.split("~", Spercent) if len(ul) == 2: upper = ul[1].replace('%', '') lower = ul[0].replace('%', '') elif len(ul) == 1 and ul[0] != '': upper = ul[0].replace('%', '') lower = None else: upper = None lower = None sql_update = "INSERT IGNORE INTO `forecast`(`code`, `date`, `业绩变动`, `变动幅度`, `预告类型`, `同" \ "期净利润`, `财报日期`,`上限`,`下限`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s)" param = (Scode, Sdate, Schange, Spercent, Stype, Sprofit, Srepdate, upper, lower) engine.execute(sql_update,param) except Exception as e: errorList.append(e) dfErrorList = pd.DataFrame({'error': errorList}) dfErrorList.to_csv(path() + '/error/update_forecast.csv') print("FORECAST: 更新数据库完毕!") return dfErrorList
pinyin = getpinyin(name) if '银行' not in name else getpinyin( name).replace('YX', 'YH') market = '上海证券交易所' if code[0] == '6' else '深圳证券交易所' if code not in stocklist or code in ipocheck: try: sql_xg = "INSERT ignore INTO `stocklist`(`证券代码`, `证券简称`, `上市市场`,`拼音缩写`) VALUES (%s,%s,%s,%s)" sql_ipo = "update `basedata` set `首发日期`=%s ,`首发价格`=%s WHERE `证券代码`=%s" engine.execute(sql_xg, (code, name, market, pinyin)) # conn.commit() engine.execute(sql_ipo, (str(ipodate), ipoprice, code)) # conn.commit() print("STOCKLIST:", code, ":更新成功!") except Exception as e: print("STOCKLIST:", code, ":更新失败!", e) Errorlist.append(code) output = str(datetime.date.today()) + (" 更新完成!" if len(Errorlist) == 0 else " 更新出错!请检查!") print("STOCKLIST:", output) return Errorlist except: return ['数据获取失败...'] if __name__ == "__main__": error = update_stocklist(proxy=0) df_error = pd.DataFrame(error) df_error.to_csv(path() + '/error/update_stocklist.csv')
def update_buyback(ser): """ :param ser: server,local or both :return: """ sql_last = "select `董事会通过日` from `buyback` ORDER BY `董事会通过日` DESC limit 1" lastdate = pd.read_sql(sql_last, localconn()).values[0][0] Errorlist = [] # sqli="insert ignore into `buyback`(`证劵代码`, `方案进度`, `董事会通过日`, `股东大会通过日`, `国资委通过日`, " \ # "`证监会通过日`, `回购资金上限_CNY`, `回购价格上限_CNY`, `回购股份预计_万`, `占总股本`, `占实际流通股`, `股份种类`," \ # " `回购资金来源`, `回购股份方式`, `回购股份实施期限`, `备注`) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" table = pd.read_csv(path() + "/data/dzhdata/buyback.csv", dtype='object') # print(table) table['董事会通过日'] = pd.to_datetime(table['董事会通过日']) # print(table.dtypes) table = table[table['董事会通过日'] > lastdate].values table = pd.DataFrame(table, columns=[ '证劵代码', '方案进度', '董事会通过日', '股东大会通过日', '国资委通过日', '证监会通过日', '回购资金上限_CNY', '回购价格上限_CNY', '回购股份预计_万', '占总股本', '占实际流通股', '股份种类', '回购资金来源', '回购股份方式', '回购股份实施期限', '备注' ]) table['董事会通过日'] = pd.to_datetime(table['董事会通过日']) table['回购资金上限_CNY'] = table['回购资金上限_CNY'].astype('float') table['回购价格上限_CNY'] = table['回购价格上限_CNY'].astype('float') table['回购股份预计_万'] = table['回购股份预计_万'].astype('float') table['占实际流通股'] = table['占实际流通股'].astype('float') table['占总股本'] = table['占总股本'].astype('float') print(table.dtypes) print(table) # with open(path() + "/data/dzhdata/buyback.csv", encoding='utf-8') as f: # f_csv = csv.DictReader(f) if ser == "local" or ser == "both": table.to_sql('buyback', localconn(), flavor='mysql', schema='stockdata', index=False, if_exists='append') if ser == "server" or ser == "both": table.to_sql('buyback', serverconn(), flavor='mysql', schema='stockdata', index=False, if_exists='append') # for param in table: # try: # # param = (row['证劵代码'], row['方案进度'], row['董事会通过日'], row['股东大会通过日'], row['国资委通过日'], # # row['证监会通过日'], row['回购资金上限_CNY'], row['回购价格上限_CNY'], row['回购股份预计_万'], # # row['占总股本'], row['占实际流通股'],row['股份种类'],row['回购资金来源'],row['回购股份方式'], # # row['回购股份实施期限'],row['备注']) # # param = [str(elem[i]) for i in range(len(elem))] # if ser == "server": # conn = serverconn() # cur = conn.cursor() # cur.execute(sqli,tuple(param)) # conn.commit() # elif ser == "local": # conn = localconn() # cur = conn.cursor() # cur.execute(sqli, tuple(param)) # conn.commit() # else: # conn1 = serverconn() # cur = conn1.cursor() # cur.execute(sqli, tuple(param)) # conn1.commit() # conn2 = localconn() # cur = conn2.cursor() # cur.execute(sqli, tuple(param)) # conn2.commit() # except Exception as e: # print(param[0],e) # Errorlist.append((param[0], e)) # f.close() return Errorlist
def news_content(): # =================== engine = conn() today = datetime.date.today() - datetime.timedelta(days=5) # ===================set requests================== # # rqs = rq.session() # rqs.keep_alive = False # ===================get news content================ # sql_news_null = text( "SELECT * FROM `news` WHERE `content` IS NULL OR (`title` LIKE :ud and `datetime`>=:dt)" ) df_listurl = pd.read_sql(sql_news_null, engine, params={ "ud": "%更新中%", "dt": str(today) }) list_url = df_listurl['link'].values list_title = df_listurl['title'].values list_content = df_listurl['content'].values errorlist = [] # list_url=['http://kuaixun.stcn.com/2017/1110/13761584.shtml'] #ip_list =pd.read_csv('ip.csv')['ip'].values for i in range(len(list_url)): time.sleep(random.random() / 10 + 3) newsid = re.split("\.", re.split("\/", list_url[i])[-1])[0] newurl = "http://app.stcn.com/?app=article&controller=article&action=fulltext&contentid=%s" % ( newsid) try: html = myspyder(newurl, proxy=0).content.decode('utf-8')[1:-2] newscontent = json.loads(html)['content'] # print(newscontent) # newsSoup = bs(html, 'html.parser') # newsSouptitle = newsSoup.select(".intal_tit")[0].h2.text # newsSoup = newsSoup.select(".txt_con")[0] # [s.extract() for s in newsSoup('a')] # [s.extract() for s in newsSoup('script')] # [s.extract() for s in newsSoup('div')] # newscontent = str(newsSoup) newscontent = "".join( re.split("\|STCNTTTP\|.+\|STCNTTTP\|", newscontent)) if newscontent == list_content[i]: pass else: print("NEW:", list_title[i]) # print(newscontent) sql_update_newscontent = "update `news` set `content`=%s WHERE `link`=%s" param = (newscontent, list_url[i]) engine.execute(sql_update_newscontent, param) except Exception as e: # print(url,e) errorlist.append((list_url[i], e)) df_errorlist = pd.DataFrame(errorlist, columns=['link', 'error']) df_errorlist.to_csv(path() + '/error/update_newscontent.csv')
def spo(proxy=0): """ http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx?type=SR&sty=ZF&p=1&ps=5000&st=5 update the spo data from eastmoney.com to server and local. :param ser: local/server/both :param proxy: user proxy set proxy=1 if not proxy=0,default 0 :return: errorlist """ print("SPO: Running...") errorlist = [] today = datetime.date.today() - datetime.timedelta(days=100) url = "http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx?type=SR&sty=ZF&p=1&ps=1000&st=5" html = myspyder(url,proxy=proxy).content table = eval(html.decode('utf-8')) list =[] for ele in table: list.append(re.split("\,",ele)) df_spo = pd.DataFrame(list,columns=['code','name','发行方式','发行总数','发行价格','现价','发行日期','增发上市日期', '8','增发代码','网上发行','中签号公布日','中签率','13','14','15','16']) df_spo = df_spo[['code','name','发行方式','发行总数','发行价格','发行日期','增发上市日期','增发代码','网上发行', '中签号公布日','中签率']] df_spo = df_spo.drop_duplicates() # df_spo = df_spo.replace('-','') # print(df_spo) df_spo['发行日期']=df_spo['发行日期'].astype('datetime64[ns]') spo = df_spo[df_spo['发行日期']>=today] spo.to_csv(path() + '/data/spo_done/spo_' + str(today) + '.csv',encoding='utf-8') engine = conn() try: for elem in spo.values: sql_update_spo = "INSERT IGNORE INTO `spo_done`(`code`, `name`, `发行方式`, `发行总数`, `发行价格`, " \ "`发行日期`, `增发上市日期`, `增发代码`, `网上发行`, `中签号公布日`, `中签率`) VALUES" \ " (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" params = [] for param in elem: if param!='-' : params.append(str(param)) else: params.append(None) # if ser == 'local' or ser == 'both': # conn = localconn() # cur = conn.cursor() result=engine.execute(sql_update_spo,params) # result.close() # conn.commit() # spo.to_sql('spo_done',localconn(),flavor='mysql',schema='stockdata',if_exists='append', # index=False,chunksize=10000) # if ser == 'server' or ser == 'both': # conns = serverconn() # curs = conns.cursor() # results=conns.execute(sql_update_spo, params) # results.close() # conns.commit() # spo.to_sql('spo_done',serverconn(),flavor='mysql',schema='stockdata',if_exists='append', # index=False,chunksize=10000) # if ser == 'local' or ser == 'both': # conn.close() # if ser == 'server' or ser == 'both': # conns.close() print("SPO: Done!") except Exception as e: print("SPO:",e) errorlist.append(e) return errorlist
result=engine.execute(sql_update_spo,params) # result.close() # conn.commit() # spo.to_sql('spo_done',localconn(),flavor='mysql',schema='stockdata',if_exists='append', # index=False,chunksize=10000) # if ser == 'server' or ser == 'both': # conns = serverconn() # curs = conns.cursor() # results=conns.execute(sql_update_spo, params) # results.close() # conns.commit() # spo.to_sql('spo_done',serverconn(),flavor='mysql',schema='stockdata',if_exists='append', # index=False,chunksize=10000) # if ser == 'local' or ser == 'both': # conn.close() # if ser == 'server' or ser == 'both': # conns.close() print("SPO: Done!") except Exception as e: print("SPO:",e) errorlist.append(e) return errorlist """ "http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx?type=SR&sty=ZF&p=1&ps=50&st=5" """ if __name__ == '__main__': errorlist = spo(proxy=0) df = pd.DataFrame(errorlist) df.to_csv(path()+'/error/update_spo.csv')