import os import gc import myfunc import re from matplotlib import pyplot as plt from sklearn.linear_model import LinearRegression from fuzzywuzzy import process ######################################导入源数据 data = pd.read_excel('e:\\jm\\附件1.xlsx') data.to_csv('e:\\jm\\data1.csv') data1 = pd.read_csv('e:\\jm\\data1.csv') data2 = pd.read_sas('e:\\jm\\data.sas7bdat') myfunc.search('map') #######################################导入数据字典 lineData = [] with open('e:\\jm\\数据字典.txt') as txtData: lines = txtData.readlines() for line in lines: lineData = lineData + [line.strip()] def var_dic(n): var_lx = lineData[n].replace(')', ')').replace('(', '(').replace(' ', '').split('、') t = [re.split('[()]', x) for x in var_lx] var_lx = dict() for x in t:
from queue import Queue import threading import os import datetime import pandas as pd import tushare as ts import numpy as np from sqlalchemy import create_engine from sqlalchemy import types import myfunc from imp import reload reload(myfunc) myfunc.search('strp') # # 获取所有股票数据,利用股票代码获取复权数据 # stock_basics = ts.get_stock_basics() # cal_dates = ts.trade_cal() # 返回交易所日历,类型为DataFrame, calendarDate isOpen # stock_basics.to_csv('e:\\data\\stock_basics.csv',encoding='GBK') # cal_dates.to_csv('e:\\data\\cal_dates.csv',encoding='GBK') stock_basics = pd.read_csv('e:\\data\\stock_basics.csv', encoding='GBK') stock_basics.index = stock_basics.code.astype('str').str.zfill(6) cal_dates = pd.read_csv('e:\\data\\cal_dates.csv', encoding='GBK').iloc[:, -2:] # 本地实现判断市场开市函数 # date: str类型日期eg.'2017-11-23' def get_date_list(begin_date, end_date): date_list = []
import tushare as ts import numpy as np import pandas as pd import os import gc import myfunc np.nan<1 os.chdir("e:\\stock") # 修改当前工作目录 from datetime import datetime, timedelta import time help(pd.DataFrame.isnull) help(pd.DataFrame.count) myfunc.search('corr') b'\xe6\xb1\xbd\xe8\xbd\xa6'.decode('utf8') 1319-1491 myfunc.search('eval') a='search' b='myfunc' eval(b).search('s') a=['a','f'] a.append(['d','d']) list(['d','f']) list('f') a['g']=[3,3] a=1 # 保存
import numpy as np from time import time import pandas as pd import math import datetime import tushare as ts from arch import arch_model import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression from matplotlib.font_manager import FontProperties font = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=12) from urllib.request import urlretrieve import myfunc import os myfunc.search('pytorch') # 下载光大银行 2017.4.5之前的股价走势图 # import tushare as ts # df = ts.lpr_data() #取当前年份的数据 # #df = ts.lpr_data(2014) #取2014年的数据 # df.sort('date', ascending=False).head(10) # 导入转债数据,缺失值填充 file = pd.read_excel(r'e:\bond\2018年上市转债数据.xlsx') file = file[:-2] file.iloc[:,8].fillna(70,inplace=True) ###### 无风险利率 shibor = pd.read_csv(r'e:\bond\Shibor1Y.csv',encoding='GBK')
nums[i][j] += nums[i-1][j] else: nums[i][j] += min(nums[i-1][j],nums[i][j-1]) nums[i][j] class ListNode: def __init__(self, x): self.val = x self.next = None a=ListNode([1,2,3,54]) a.val a.next import myfunc myfunc.search('sort_values') import pandas as pd a=pd.DataFrame([[1,2],[1,3]],columns=list('ab')) a.sort_values('a').drop_duplicates('a') [1,2,3,1,2,3,5].count(5) import numpy as np a=np.matrix([[1,2],[3,4]]) a*a.T X=np.array([1,2,3,4,5]) X=X.reshape(-1, 1) Y=np.array([0,2,3,4,6]) Y=Y.reshape(-1, 1) from sklearn.linear_model import LinearRegression
d = a.sort_values('nkill').tail(1000) d = np.mat(d.values) N = len(d) # 归一化 from sklearn import preprocessing min_max_scaler = preprocessing.MinMaxScaler() d2 = min_max_scaler.fit_transform(d) # 正太化 from sklearn.preprocessing import StandardScaler scaler = StandardScaler().fit(d) X = scaler.transform(d) X.shape myfunc.search('font') ################绘制频率直方图 t = X[:, -1] t = t[t < 0.3] from matplotlib.font_manager import FontProperties font_set = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=12) fig, (ax0, ax1) = plt.subplots(nrows=2, figsize=(9, 6)) ax0.hist(t, 80, normed=1, histtype='bar', facecolor='yellowgreen', alpha=0.75) ##pdf概率分布图,一万个数落在某个区间内的数有多少个 ax0.set_title('恐怖袭击死亡人数pdf', fontproperties=font_set) ax1.hist(t, 20, normed=1, histtype='bar', facecolor='pink', alpha=0.75,
def prim(G): n = len(x) - 1 # 这个图的顶点个数,prim算法的边数就是顶点数减一 v = 0 # 从第一个顶点开始 s = {v} # 这个集合避免重复的顶点被重复执行,造成无限循环 edges = [] # 存放边的值 # 存放结果 res = [] for _ in range(n - 1): # 对字典进行解包 for u, w in enumerate(x): # print(u,w) if u != v: w = abs(w - x[v]) heapq.heappush(edges, (w, v, u)) while edges: w, p, q = heapq.heappop(edges) if q not in s: s.add(q) res.append(((p, q), w)) v = q break return res import queue import stack import myfunc myfunc.search('ShowProcess')
b=pd.read_sas('e:\\wind\\hold.sas7bdat') # b.drop('VAR3',axis=1,inplace=True) b.VAR2 = b.VAR2.str.decode('utf8') #str(b.VAR2, encoding = "utf8") b.VAR4=b.VAR4.astype('int').astype(str) # 选出 6.30 12.31 的 b['date'] = pd.to_datetime(b.VAR4,format='%Y%m%d') b1=b[b.VAR4.str.contains('0630') | b.VAR4.str.contains('1231')] b1 = b1.iloc[:,:3] b1.to_csv('e:/wind/manager.csv',index=False) myfunc.search('to_csv') help(pd.DataFrame.to_csv) len(b) len(b1) t=['000650.XSHE', '002014.XSHE', '002088.XSHE', '002107.XSHE', '002117.XSHE', '002136.XSHE', '002148.XSHE', '002270.XSHE', '002365.XSHE', '002561.XSHE', '002553.XSHE', '002706.XSHE', '002718.XSHE', '002732.XSHE', '002749.XSHE', '002763.XSHE', '002793.XSHE', '002790.XSHE', '002836.XSHE', '002853.XSHE', '002849.XSHE', '002900.XSHE', '002880.XSHE', '002887.XSHE', '002896.XSHE', '002913.XSHE', '002868.XSHE', '002882.XSHE', '300075.XSHE', '300081.XSHE', '300231.XSHE', '300258.XSHE', '300286.XSHE', '300295.XSHE', '300341.XSHE', '300371.XSHE', '300396.XSHE', '300445.XSHE', '300410.XSHE', '300428.XSHE', '300446.XSHE', '300427.XSHE', '300394.XSHE', '300547.XSHE', '300519.XSHE', '300548.XSHE', '300561.XSHE', '300600.XSHE', '300637.XSHE', '300635.XSHE', '300648.XSHE', '300656.XSHE', '300687.XSHE', '300590.XSHE', '300599.XSHE', '300605.XSHE', '300695.XSHE', '300701.XSHE', '300696.XSHE', '300709.XSHE', '300727.XSHE', '300735.XSHE', '300723.XSHE', '300707.XSHE', '300739.XSHE', '300711.XSHE', '300738.XSHE', '300732.XSHE', '600305.XSHG', '600345.XSHG', '600419.XSHG', '600668.XSHG', '600732.XSHG', '600844.XSHG', '600892.XSHG', '600995.XSHG', '603023.XSHG', '603037.XSHG', '603158.XSHG', '603266.XSHG', '603086.XSHG', '603079.XSHG', '603110.XSHG', '603226.XSHG', '603106.XSHG', '603136.XSHG', '603283.XSHG', '603058.XSHG', '603599.XSHG', '603339.XSHG', '603306.XSHG', '603585.XSHG', '603535.XSHG', '603326.XSHG', '603500.XSHG', '603496.XSHG', '603608.XSHG', '603639.XSHG', '603808.XSHG', '603669.XSHG', '603809.XSHG', '603829.XSHG', '603722.XSHG', '603607.XSHG', '603617.XSHG', '603856.XSHG', '603889.XSHG', '603987.XSHG', '603968.XSHG', '603928.XSHG', '603978.XSHG', '603938.XSHG', '603960.XSHG'] len(t) ############################## 吉姆.史莱特 (Jim Slater) 祖鲁原则投资法 ############################## # 选股标准: # 1. 总市值 < 市场平均总市值*1.0。 # 2. 过去五年税后净利皆为正值。 # 3. 过去三年税后净利成长率皆 >= 15%。 # 4. 预估税后净利成长率 >= 15%。
import pandas as pd import numpy as np from matplotlib import pyplot as plt import datetime import tushare as ts import os import gc import myfunc os.chdir("e:\\stock\\siyinzi") myfunc.search('sas', 1) #导入前复权收盘价 close = pd.read_csv('closeADJ.csv') close = close.iloc[:, 1:] close.ticker = close.ticker.astype(str) close.ticker = close.ticker.str.zfill(6) close1 = pd.pivot_table(close, columns='ticker', values='closePrice', index='tradeDate') mad = lambda x: x[1] / x[0] - 1 close2 = close1.rolling(window=2).apply(mad) close = close2.stack() close.name = 'rate' close = close.reset_index() #导入市值 PB shizhi_PE = pd.read_csv('shizhi_PE.csv') shizhi_PE = shizhi_PE.iloc[:, 1:] shizhi_PE.ticker = shizhi_PE.ticker.astype(str) shizhi_PE.ticker = shizhi_PE.ticker.str.zfill(6)
return alldata def init(): line.set_data([], []) return line def animate(i): axes.set_xlim(0, i + 10) x = range(i + 1) y = dapan('sh000001') line.set_data(x, y) return line anim = animation.FuncAnimation(fig, animate, init_func=init, frames=10000, interval=5000) plt.show() import myfunc myfunc.search('sys.') ############################## ts.get_hist_data('600848',start='2018-01-05',end='2018-01-09') from pandas import DataFrame, Series import pandas as pd import matplotlib.pyplot as plt from matplotlib import dates as mdates from matplotlib import ticker as mticker from matplotlib.finance import candlestick_ohlc from matplotlib.dates import DateFormatter, WeekdayLocator, DayLocator, MONDAY, YEARLY from matplotlib.dates import MonthLocator, MONTHLY import datetime as dt import pylab daylinefilespath = 'G:\\dayline\\'
z.pop(1) final = Final(z) final1 = final[final['Unnamed: 1'].str.contains('资产总计') | final['Unnamed: 1'].str.contains('49\.') | final['Unnamed: 1'].str.contains('59\.') | final['Unnamed: 1'].str.contains('6\d\.')] t = pivot(final1, [4], col='Unnamed: 1') add_sheet(t, 'temp', '16') ################################################ ################################################ ################################################ t.str.replace(r'\d\.\d*', '') import myfunc myfunc.search('正则', dirFlag=1) def pivot(final1, x, col='name', ind='year'): for i in x: final1.iloc[:, i] = final1.iloc[:, i].astype(float) a = pd.pivot_table(final1, columns=col, values=final1.columns[x], index=ind) return a def Final(z): df = pd.DataFrame({ 'num': get_num(z),
import pandas as pd import numpy as np from matplotlib import pyplot as plt import datetime import tushare as ts import os import gc os.chdir("e:\\temp") import myfunc myfunc.search('filter', ) #以读入文件为例: f = open("shiti.txt") #二进制格式读文件 a = [] lines = f.readlines() for line in lines: a.append(line) b = pd.DataFrame({'a': a}) c = b.a.str.contains('考试') c1 = np.cumsum(c) t1 = np.where(c) b['c'] = c1 c = b.a.str.contains('(参考答案)|(真题(卷二)答案)') t2 = np.where(c)
import pandas as pd import numpy as np from matplotlib import pyplot as plt import datetime import tushare as ts import os import gc os.chdir("e:\\stock\\temp") import myfunc from importlib import reload reload(myfunc) myfunc.search('npy') ##############股票列表 data=ts.get_stock_basics() code=data.index code=code.sort_values() np.save('code.npy',code) code = np.load('code.npy') data=pd.DataFrame();bug=[] process_bar = myfunc.ShowProcess(len(code)) for x in code: t=ts.get_hist_data(x,start='2018-03-01',end='2018-05-09') #t = ts.get_hist_data('601965') 为空 if t is None:bug.append(x) else: t = ts.get_hist_data(x,start='2018-03-01',end='2018-05-09').iloc[:,:5] t['code']=x data = data.append(t)
import psm import pandas as pd import numpy as np from matplotlib import pyplot as plt import datetime import tushare as ts import os import gc import myfunc os.chdir("e:\\stock\\siyinzi") myfunc.search('gbk') import scipy as sp import numpy as np from sklearn.cross_validation import train_test_split from sklearn import metrics from sklearn.linear_model import LogisticRegression x = np.loadtxt("wine.data", delimiter=",", usecols=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)) # 获取属性集 y = np.loadtxt("wine.data", delimiter=",", usecols=(0)) # 获取标签集 print(x) # 查看样本 # 加载数据集,切分数据集80%训练,20%测试 x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2) # 切分数据集 # 调用逻辑斯特回归 model = LogisticRegression() model.fit(x_train, y_train) print(model) # 输出模型 # make predictions