def get_all_hushen_data(): """ :return: 获取所有的股票数据存储到mysql,但是是限于近期的,列数多但是行数不够 """ df = get_pro_stock_basic() symbol = df['symbol'].tolist() names = df['name'].tolist() count = -1 length = len(names) for code in symbol: count += 1 if count % 100 == 0: print('[' + '=' * (count // 100) + '>' + " " * ((length - count) // 100) + ']') try: df = get_hist_data(code) except Exception as e: print("%s 这只股票有问题!" % code) continue try: if not df.empty: saveData(df, code) except AttributeError as e: print("在%s 没拉取成功" % code) continue
def all_stock(): stocks = get_pro_stock_basic()['ts_code'].tolist() new_stocks = [] for idx in stocks: if idx[0] == '3' or idx[:3] == '688': continue else: new_stocks.append(idx) stocks = new_stocks today, ago = get_Date_base_gap(0, 365) count = 0 length = len(stocks) # length = 100 if not os.path.exists('stock'): os.makedirs('stock') existStock = os.listdir('stock') stock_data = {} ok_stock = [] while count < length: index = count if index % 10 == 0: print("\r【%s%s/%s%s】" % ('>' * int(index * 20 // length), index, length, '=' * (20 - int(index * 20 // length))), end='') code = stocks[index] try: if code + '.csv' not in existStock: # count += 1 # continue df = get_stock_daily(ts_code=code, start_date=ago, end_date=today) if len(df) < 80: count += 1 continue df.to_csv('stock/%s.csv' % code, index=False) # print( code+'.csv' +" not in stock!") else: df = pd.read_csv('stock/%s.csv' % code) # count += 1 # print( code+'.csv' +" in stock!") if len(df) < 10: print(code + '.csv' + " not long enough!") count += 1 continue stock_data[code] = df ok_stock.append(code) count += 1 except Exception as e: if str(e).find('not enough values to unpack') != -1 \ or str(e).find('object is not subscriptable') != -1 \ or str(e).find("object of type 'NoneType' has no len()") != -1: print(e) count += 1 continue else: print(e) time.sleep(61) return stock_data, ok_stock
def get_stock_info(field='industry'): data = get_pro_stock_basic() data.set_index(['ts_code'], inplace=True) industry_stock = {} stock_industry = {} for index in data.index: stock_industry[index] = data.at[index, 'industry'] if not industry_stock.get(data.at[index, 'industry']): industry_stock[data.at[index, 'industry']] = [index] else: industry_stock[data.at[index, 'industry']].append(index) return stock_industry, industry_stock
def update_neo4j_stock_finance_info(graph): """ 更新每个股票节点的财务信息 :param graph: 图 :return: 无 """ # para_en = ['open', 'high', 'close', 'low', 'volume', 'price_change', 'p_change', 'ma5', 'ma10', 'ma20', 'v_ma5', 'v_ma10', 'v_ma20'] # paras_cn = ["开盘价", "最高价", "收盘价","最低价","成交量","价格变动","涨跌幅","五日均价","十日均价","二十日均价","五日均量","十日均量","二十日均量"] code_in_Neo4j = [x['n.stock_id'] for x in graph.run("match (n:`股票`) return n.stock_id").data()] count = 0 basic = get_pro_stock_basic() codes = list(basic['symbol']) length = len(codes) code_ts = {basic.at[idx, 'symbol']:basic.at[idx, 'ts_code'] for idx in basic.index} code_name = {basic.at[idx, 'symbol']:basic.at[idx, 'name'] for idx in basic.index} ROE = {'current_ratio':"流动比率",'quick_ratio':"速动比率",'debt_to_assets':"资产负债率", 'dp_assets_to_eqt':"权益乘数(杜邦分析)",'roe':"净资产收益率(ROE)", 'roe_waa':"加权平均净资产收益率(ROE_WAA)", 'roe_dt':"净资产收益率(扣除非经常损益,ROE_DT)", 'roe_yearly':"年化净资产收益率(ROE_YEARLY)"} for index in codes[2574:]: count+=1 if index == '0' or index == None: continue if len(index) != 6 and len(index) > 0: index = '0'*(6-len(index)) + index if count % int(length/50) == 0: print("%s / %s"%(count, length)) getNode(graph, '股票', ['stock_id', 'name'],[index, code_name[index]], createNode=True) try: df = get_fina_indicator(ts_code=code_ts[index]) time.sleep(0.75) if df.empty: print("没有获取到数据") continue except TypeError as e: print("Code:%s, TypeError!!!"%index) continue except AttributeError as ae: print("Code:%s, AttributeError!"%index) continue for idx in ROE.keys(): try: cypher = "match (n:`股票`{stock_id:'%s'}) set n.`%s` = '%s'" % (index, ROE[idx], df.at[0, idx]) graph.run(cypher) except IndexError as e: print("索引 %s 出错"%index) print(df)
def update_all_hushen_data(): """ :return: 更新所有的股票数据存储到mysql,配合get_all_hushen_data使用 """ codes = list(get_pro_stock_basic()['symbol']) count = -1 length = len(codes) for code in codes: count += 1 if count % 100 == 0: print('[' + '=' * (count // 100) + '>' + " " * ((length - count) // 100) + ']') try: updateData(code) except AttributeError as e: print("在%s 没拉取成功" % code) except pymysql.err.ProgrammingError as xe: print(xe) saveData(get_hist_data(code), code) except Exception as e: print("%s 这只股票有问题" % code) continue
def get_all_hist_data_by_pro(): """ :return: 获取十年以上的历史数据。通过pro接口 """ all_data = get_pro_stock_basic() ts_code = list(all_data.ts_code) symbol = list(all_data.symbol) connect, conn = connectSQL() index_in_here = [ 'open', 'high', 'low', 'close', 'change', 'pct_chg', 'vol', ] index_in_mysql = [ 'open', 'high', 'low', 'close', 'price_change', 'p_change', 'volume' ] insertSQL = [] count = 0 existTables = [ x[0] for x in list(executeSQL(connect, conn, 'show tables;', True)) ] not_in_mysql_symbols = [x for x in symbol if x not in existTables] for code in not_in_mysql_symbols: if code not in existTables: createTable(code) for idx in all_data.index: if count % 100 == 0: print("Finish %s/%s Stock~ " % (count, len(symbol))) count += 1 # try: getDateSQL = "select TIME from `" + symbol[ idx] + "` order by TIME limit 1;" try: lastestDate = executeSQL(connect, conn, getDateSQL, query=True)[0][0].isoformat() except IndexError as e: lastestDate = str(datetime.date.today().isoformat()) # print(ts_code[idx], lastestDate) data = get_pro_daily(ts_code=ts_code[idx], start_date='2010-01-04', end_date=str(lastestDate)) time.sleep(0.12) for x in data.index: date = str(data.at[x, 'trade_date']) date = date[:4] + '-' + date[4:6] + '-' + date[6:] string = "insert ignore into `%s`(TIME, " % (symbol[idx]) for ss in index_in_mysql[:-1]: string += (ss + ', ') string += index_in_mysql[-1] + ") values( '%s', " % date for key in index_in_here[:-1]: string += (str(data.at[x, key]) + ', ') string += str(data.at[x, index_in_here[-1]]) + ');' insertSQL.append(string) # except KeyError as e: # print("Error!", ts_code[idx], e) # continue executeSQL(connect, conn, insertSQL) closeSQL(connect, conn)
def all_base(): stock_info = get_pro_stock_basic() length = int(stock_info.size / len(stock_info.columns)) concept_info = get_concept() concepts = concept_info['name'] concepts_code = concept_info['code'] concept_code2name = { concepts_code[i]: concepts[i] for i in range(len(concepts)) } stock_concept = {} concept_stock = {} # concept_info.set_index(["code"], inplace=True) count = 0 for i in concepts_code[:90]: data = get_stock_concepts(i) concept_stock[i] = data['ts_code'] count += 1 index = count length = len(concepts_code) if index % 100 == 0: print("\r【%s%s%s】" % ('>>' * int(index * 100 // length), int(index * 100 // length), '=' * (100 - int(index // length)))) time.sleep(60) for j in data['ts_code']: if not stock_concept.get(j): stock_concept[j] = [i] else: stock_concept[j].append(i) industry = stock_info['industry'] stock_industry = {} industry_stock = {} for index in range(length): stock_industry[stock_info.at[index, 'ts_code']] = stock_info.at[index, 'industry'] if not industry_stock.get(stock_info.at[index, 'industry']): industry_stock[stock_info.at[index, 'industry']] = [ stock_info.at[index, 'ts_code'] ] else: industry_stock[stock_info.at[index, 'industry']].append( stock_info.at[index, 'ts_code']) area = stock_info['area'] stock_area = {} area_stock = {} for index in range(length): stock_area[stock_info.at[index, 'ts_code']] = stock_info.at[index, 'area'] if not area_stock.get(stock_info.at[index, 'area']): area_stock[stock_info.at[index, 'area']] = [ stock_info.at[index, 'ts_code'] ] else: area_stock[stock_info.at[index, 'area']].append(stock_info.at[index, 'ts_code']) df = pro.daily() df.set_index(["ts_code"], inplace=True) # 去重 df = df.loc[~df.index.duplicated(keep='first')].copy() for name in ['industry', 'concept', 'area']: if name == 'industry': stock_name = stock_industry name_stock = industry_stock elif name == 'concept': stock_name = stock_concept name_stock = concept_stock elif name == 'area': stock_name = stock_area name_stock = area_stock num_list = [] names = list(name_stock.keys()) for i in names: num = 0 for j in name_stock[i]: try: num += df.at[j, 'pct_chg'] except Exception as e: print(e) num_list.append(round(num / len(name_stock[i]), 4)) try: names = [ concept_code2name[names[i]] + str(len(name_stock[i])) for i in range(len(names)) ] except Exception as e: names = [concept_code2name[s] for s in names] names_nums = {names[i]: num_list[i] for i in range(len(num_list))} nn = sorted(names_nums.items(), key=lambda x: x[1], reverse=True) for i in nn[:5]: print(nn[0], name_stock[nn[0]]) names_nums = {names[i]: num_list[i] for i in range(len(num_list))} nn = sorted(names_nums.items(), key=lambda x: x[1], reverse=True) if len(nn) > 50: nn = nn[:50] names = [n[0] for n in nn] num_list = [n[1] for n in nn] print("NumX:%s, NumY:%s, NAME:%s " % (len(names), len(num_list), name)) plot_(names, num_list, name)
round(close_price[x - 1] - price, 2)]) price = close_price[x] if not month_change.get(mon): month_change[mon] = [round(close_price[x - 1] - price, 2)] else: month_change[mon].append(round(close_price[x - 1] - price, 2)) # print(table) ret_m_c = {} for x in month_change.keys(): # print(x,' ---> ', month_change[x], '==>', round(sum(month_change[x])/ len(month_change[x]),4)) ret_m_c[x] = round(sum(month_change[x]) / len(month_change[x]), 4) return ret_m_c if __name__ == '__main__': stocks = get_pro_stock_basic() codes = [x for x in stocks['ts_code'] if x[0] != 3 and x[:2] != '68'] names = [x for x in stocks['name']] rate = {} for x in range(len(codes)): if names[x].find('ST') != -1 or x % 100 == 0: continue # print("\n\n模拟%s 周期观察" % names[x]) sleep(0.1) d = CycleObservation(codes[x], '20150101', '20210110', names[x]) for x in d.keys(): if not rate.get(x): rate[x] = [d[x]] else: rate[x].append(d[x]) for x in rate.keys():