def musicdetails(href): list2 = [] for i in range(1, 51): if href[i] != 'nan': locals()['tmp' + str(i)] = requests.get('https://www.oricon.co.jp' + href[i]) locals()['res1' + str(i)] = re.findall( 'music-title">[0-9].(.*?)</div>', locals()['tmp' + str(i)].text, re.S) locals()['res2' + str(i)] = re.findall( 'composition-info-content">(.*?)</', locals()['tmp' + str(i)].text, re.S) else: locals()['res1' + str(i)] = ['nan'] locals()['res2' + str(i)] = ['nan', 'nan', 'nan', 'nan', 'nan', 'nan'] # locals()['num' + str(i)] = locals()['tmp' + str(i)].text.count('music-title') # locals()['rks' + str(i)] = np.linspace(1, locals()['num' + str(i)], locals()['num' + str(i)], dtype= int) # locals()['ss1' + str(i)] = pd.DataFrame(locals()['res1'+ str(i)], index=locals()['rks'+ str(i)], columns=["music"]) if len(locals()['res2' + str(i)]) == 4: locals()['res2' + str(i)].insert(0, '<span>nan') locals()['res2' + str(i)].insert( 0, '<span>nan' ) #fufill two columns with '<span>nan' when the detail pages are not informed list2.append( np.append( np.array(locals()['res2' + str(i)]).reshape(1, 6), locals()['res1' + str(i)][0])) rank = np.linspace(1, 50, 50, dtype=int) ss = pd.DataFrame(list2, index=rank, columns=[ "highestrank", "rankingtime", "selldata", "publisher", "PN", "price", "main" ]) return ss
def train(training_inputs, yd, training_iterations): w = np.random.randn(2, 1) # w = np.array([[3.0],[1.0]]) for iteration in range(training_iterations): # 得到輸出 y = sigmoid(np.dot(training_inputs, w)) # 計算誤差 error = yd - y # 微調權重( x 與 e*sigmoid_derivative ) adjustments = np.dot(training_inputs.T, error * sigmoid_derivative(y)) w += learning_rate * adjustments fig = plt.figure() ax = plt.subplot() # ax=fig.add_axes([0, 0, 5, 5]) plt.figure(figsize=(10, 7)) # plt.plot(x_train.data.numpy(),y_train.data.numpy(),'*') plt.scatter(x[:, 0], yd, s=100, alpha=0.3) xs = np.linspace(0, 70, 200) ys2 = w[1] + w[0] * xs plt.plot(xs, ys2, 'g', linewidth=1) plt.show() return w
def yearrank(year): url_loc = 'https://www.oricon.co.jp/rank/js/y/' + year + '/' ranks = lambda n: np.linspace(n*10-9, n*10, 10, dtype= int) list1 = [] req1 = requests.get(url_loc) result1 = re.findall('box-rank-entry.*?href="(.*?)" itemprop.*?name">(.*?)</h2>.*?name">(.*?)</p>.*?<li>発売日:\s+(.*?)\s+</li>\s+<li>(.*?)\s+</li>', req1.text, re.S) s1 = pd.DataFrame(result1, index=ranks(1), columns= ["href", "title", "name", "selldata", "publisher"]) for i in range(2,11): locals()['req'+ str(i)] = requests.get(url_loc + 'p/'+ str(i) + '/') locals()['result'+ str(i)] = re.findall('box-rank-entry.*?href="(.*?)" itemprop.*?name">(.*?)</h2>.*?name">(.*?)</p>.*?<li>発売日:\s+(.*?)\s+</li>\s+<li>(.*?)\s+</li>', locals()['req'+str(i)].text, re.S) locals()['result1'+str(i)] = re.findall('box-rank-entry.*?name">(.*?)</h2>.*?name">(.*?)</p>.*?<li>発売日:\s+(.*?)\s+</li>\s+<li>(.*?)\s+</li>', locals()['req'+str(i)].text, re.S) if len(locals()['result'+ str(i)]) == len(locals()['result1' + str(i)]): list1.append(locals()['result'+ str(i)]) else: for c in range(len(locals()['result' + str(i)])): if locals()['result'+ str(i)][c][1] != locals()['result1' + str(i)][c][0]: temp = ( 'nan', locals()['result1' + str(i)][c][0], locals()['result1'+ str(i)][c][1], locals()['result1'+str(i)][c][2], locals()['result1'+str(i)][c][3]) locals()['result'+ str(i)].insert(c, temp) list1.append(locals()['result'+str(i)]) s2 = pd.DataFrame(list1[0], index = ranks(2), columns=["href", "title", "name", "selldata", "publisher"]) s3 = pd.DataFrame(list1[1], index = ranks(3), columns=["href", "title", "name", "selldata", "publisher"]) s4 = pd.DataFrame(list1[2], index = ranks(4), columns=["href", "title", "name", "selldata", "publisher"]) s5 = pd.DataFrame(list1[3], index = ranks(5), columns=["href", "title", "name", "selldata", "publisher"]) s6 = pd.DataFrame(list1[4], index = ranks(6), columns=["href", "title", "name", "selldata", "publisher"]) s7 = pd.DataFrame(list1[5], index = ranks(7), columns=["href", "title", "name", "selldata", "publisher"]) s8 = pd.DataFrame(list1[6], index = ranks(8), columns=["href", "title", "name", "selldata", "publisher"]) s9 = pd.DataFrame(list1[7], index = ranks(9), columns=["href", "title", "name", "selldata", "publisher"]) s0 = pd.DataFrame(list1[8], index = ranks(10), columns=["href", "title", "name", "selldata", "publisher"]) s = pd.concat([s1, s2, s3, s4, s5, s6, s7, s8, s9, s0]) return s
def monthrank(month): ranks = lambda n: np.linspace(n * 10 - 9, n * 10, 10, dtype=int) list1 = [] url_loc = 'https://www.oricon.co.jp/rank/js/m/' + month + '/' print(url_loc) req1 = requests.get(url_loc) result1 = re.findall( '<p class="status (.*?)">.*?<a href="(.*?)".*?itemprop="name">(.*?)</h2>\s*<p class="name">(.*?)</p>.*?<li>発売日:\s*(.*?)\s*</li>\s*<li>推定売上枚数:(.*?)</li>', req1.text, re.S) # ranks1 = np.linspace(1, 10, 10, dtype= int) s1 = pd.DataFrame( result1, index=ranks(1), columns=["state", "href", "title", "name", "selldata", "num"]) for i in range(2, 6): locals()['req' + str(i)] = requests.get(url_loc + 'p/' + str(i) + '/') locals()['result' + str(i)] = re.findall( '<p class="status (.*?)">.*?<a href="(.*?)".*?itemprop="name">(.*?)</h2>\s*<p class="name">(.*?)</p>.*?<li>発売日:\s*(.*?)\s*</li>\s*<li>推定売上枚数:(.*?)</li>', locals()['req' + str(i)].text, re.S) locals()['result1' + str(i)] = re.findall( '<p class="status (.*?)">.*?itemprop="name">(.*?)</h2>\s*<p class="name">(.*?)</p>.*?<li>発売日:\s*(.*?)\s*</li>\s*<li>推定売上枚数:(.*?)</li>', locals()['req' + str(i)].text, re.S) # locals()['s'+str(i)] = pd.DataFrame(locals()['result'+ str(i)], index=locals()['ranks'+str(i)], columns= ["state", "href", "title", "name", "selldata", "num"]) # stotal = pd.concat([s1, locals()['s2'], locals()['s3'], locals()['s4'], locals()['s5']]) if len(locals()['result' + str(i)]) == len( locals()['result1' + str(i)]): #some ablum do not contain detail pages list1.append(locals()['result' + str(i)]) else: for c in range(len(locals()['result' + str(i)])): if locals()['result' + str(i)][c][2] != locals()['result1' + str(i)][c][1]: temp = (locals()['result1' + str(i)][c][0], 'nan', locals()['result1' + str(i)][c][1], locals()['result1' + str(i)][c][2], locals()['result1' + str(i)][c][3], locals()['result1' + str(i)][c][4] ) #stupid but simple,lol locals()['result' + str(i)].insert(c, temp) list1.append(locals()['result' + str(i)]) s2 = pd.DataFrame( list1[0], index=ranks(2), columns=["state", "href", "title", "name", "selldata", "num"]) s3 = pd.DataFrame( list1[1], index=ranks(3), columns=["state", "href", "title", "name", "selldata", "num"]) s4 = pd.DataFrame( list1[2], index=ranks(4), columns=["state", "href", "title", "name", "selldata", "num"]) s5 = pd.DataFrame( list1[3], index=ranks(5), columns=["state", "href", "title", "name", "selldata", "num"]) s = pd.concat([ s1, s2, s3, s4, s5 ]) #seems tedious, but i have no good idea to handle them right now. return s
# Python实现正态分布 import np import matplotlib.pyplot as plt import math # 绘制正态分布概率密度函数 u = 0 # 均值μ u01 = -2 sig = math.sqrt(0.2) # 标准差δ sig01 = math.sqrt(1) sig02 = math.sqrt(5) sig_u01 = math.sqrt(0.5) x = np.linspace(u - 3*sig, u + 3*sig, 50) x_01 = np.linspace(u - 6 * sig, u + 6 * sig, 50) x_02 = np.linspace(u - 10 * sig, u + 10 * sig, 50) x_u01 = np.linspace(u - 10 * sig, u + 1 * sig, 50) y_sig = np.exp(-(x - u) ** 2 /(2* sig **2))/(math.sqrt(2*math.pi)*sig) y_sig01 = np.exp(-(x_01 - u) ** 2 /(2* sig01 **2))/(math.sqrt(2*math.pi)*sig01) y_sig02 = np.exp(-(x_02 - u) ** 2 / (2 * sig02 ** 2)) / (math.sqrt(2 * math.pi) * sig02) y_sig_u01 = np.exp(-(x_u01 - u01) ** 2 / (2 * sig_u01 ** 2)) / (math.sqrt(2 * math.pi) * sig_u01) plt.plot(x, y_sig, "r-", linewidth=2) plt.plot(x_01, y_sig01, "g-", linewidth=2) plt.plot(x_02, y_sig02, "b-", linewidth=2) plt.plot(x_u01, y_sig_u01, "m-", linewidth=2) # plt.plot(x, y, 'r-', x, y, 'go', linewidth=2,markersize=8) plt.grid(True) plt.show()
np.genfromtxt(data,dtypr=(int,float,int)) np.genfromtxt(data,dtype=(int,float,int)) convertfunc = lambda x: float(x.strip(b"%"))/100. data = u"1,2.3%,45.\n6,78.9%,0" np.genfromtxt(StringIO(data),delimiter='',names=names,converters={1:convertfunc}) names = ['a','b','c'] np.genfromtxt(StringIO(data),delimiter='',names=names,converters={1:convertfunc}) data = u"N/A, 2, 3\n4,,???" kwargs = dict(delimiter=',',dtype=int,names='a,b,c',missing_values={0:"N"}) kwargs = dict(delimiter=',',dtype=int,names='a,b,c',missing_values={0:"N/A",'b':0,2:"???"},filling_values={0:0,'b':0,2:-999}) np.gemfromtxt(SringIO(data),**kwargs) np.genfromtxt(SringIO(data),**kwargs) np.genfromtxt(StringIO(data),**kwargs) x = np.arange(10,1,-1) x y = np.linspace(1,19,4) y x[np.array([3,3,1,8])] z = x[np.array([3,3,1,8])] z.base z.base() y = x y.base y.base() np.base(y) x.base z.flags.owndata y.flags.owndata x.flags.owndata y.base is a y.base is x
plt.show() return w # training_inputs = np.array([(0, 0, 1), # (0, 1, 1), # (1, 0, 1), # (1, 1, 1)]) # yd = np.array([[0, 0, 0, 1]]).T synaptic_weights = train(x, yd, 15000) print(synaptic_weights) print("Ending Weights After Training: ") # k = -synaptic_weights[0] / synaptic_weights[1] # b = -synaptic_weights[2] / synaptic_weights[1] fig = plt.figure() ax = plt.subplot() # ax=fig.add_axes([0, 0, 5, 5]) plt.figure(figsize=(10, 7)) # plt.plot(x_train.data.numpy(),y_train.data.numpy(),'*') plt.scatter(x[:, 0], yd, s=100, alpha=0.3) xs = np.linspace(0, 70, 200) ys2 = synaptic_weights[1] + synaptic_weights[0] * xs plt.plot(xs, ys2, 'g', linewidth=1) plt.show()
import numpy as np import matplotlib.pyplot as plt import seaborn as sns import pandas as pd X = np.array([1, 5, 7, 9, 13, 16]) Y = np.array([37, 66, 71, 79, 85, 99]) # 维度 print X.shape plt.scatter(x=X, y=Y, marker='o', c='r') def hypothesis_function(p, x): a, b = p y = a * x + b return y def cost_function(p, x, y): return hypothesis_function(p, x) - y p = [50, 100] from scipy.optimize import leastsq pa = leastsq(cost_function,p,args=(X,Y)) a,b = pa[0] print a,b x = np.linspace(0,16,1000) y = a*x+b plt.scatter(x,y,color ="red") plt.show()
def plot_break_point(): x = np.linspace(0, -25, 10) y = np.linspace(0, 0, 10) plt.plot(x, y, color='black', linewidth='4') plt.scatter(breakPoint, 0, color="green", marker="o", linewidth='5')