Esempio n. 1
0
    def musicdetails(href):

        list2 = []
        for i in range(1, 51):
            if href[i] != 'nan':
                locals()['tmp' +
                         str(i)] = requests.get('https://www.oricon.co.jp' +
                                                href[i])
                locals()['res1' + str(i)] = re.findall(
                    'music-title">[0-9].(.*?)</div>',
                    locals()['tmp' + str(i)].text, re.S)
                locals()['res2' + str(i)] = re.findall(
                    'composition-info-content">(.*?)</',
                    locals()['tmp' + str(i)].text, re.S)
            else:
                locals()['res1' + str(i)] = ['nan']
                locals()['res2' +
                         str(i)] = ['nan', 'nan', 'nan', 'nan', 'nan', 'nan']
        # locals()['num' + str(i)] = locals()['tmp' + str(i)].text.count('music-title')
        # locals()['rks' + str(i)] = np.linspace(1, locals()['num' + str(i)], locals()['num' + str(i)], dtype= int)
        # locals()['ss1' + str(i)] = pd.DataFrame(locals()['res1'+ str(i)], index=locals()['rks'+ str(i)], columns=["music"])
            if len(locals()['res2' + str(i)]) == 4:
                locals()['res2' + str(i)].insert(0, '<span>nan')
                locals()['res2' + str(i)].insert(
                    0, '<span>nan'
                )  #fufill two columns with '<span>nan' when the detail pages are not informed
            list2.append(
                np.append(
                    np.array(locals()['res2' + str(i)]).reshape(1, 6),
                    locals()['res1' + str(i)][0]))

        rank = np.linspace(1, 50, 50, dtype=int)
        ss = pd.DataFrame(list2,
                          index=rank,
                          columns=[
                              "highestrank", "rankingtime", "selldata",
                              "publisher", "PN", "price", "main"
                          ])
        return ss
Esempio n. 2
0
def train(training_inputs, yd, training_iterations):
    w = np.random.randn(2, 1)
    # w = np.array([[3.0],[1.0]])
    for iteration in range(training_iterations):
        # 得到輸出
        y = sigmoid(np.dot(training_inputs, w))
        # 計算誤差
        error = yd - y
        # 微調權重( x 與 e*sigmoid_derivative )
        adjustments = np.dot(training_inputs.T, error * sigmoid_derivative(y))
        w += learning_rate * adjustments
        fig = plt.figure()
        ax = plt.subplot()
        # ax=fig.add_axes([0, 0, 5, 5])
        plt.figure(figsize=(10, 7))
        # plt.plot(x_train.data.numpy(),y_train.data.numpy(),'*')
        plt.scatter(x[:, 0], yd, s=100, alpha=0.3)
        xs = np.linspace(0, 70, 200)
        ys2 = w[1] + w[0] * xs
        plt.plot(xs, ys2, 'g', linewidth=1)

        plt.show()
    return w
Esempio n. 3
0
  def yearrank(year):

    url_loc = 'https://www.oricon.co.jp/rank/js/y/' + year + '/'
    ranks = lambda n: np.linspace(n*10-9, n*10, 10, dtype= int)
    list1 = []

    req1 = requests.get(url_loc)
    result1 = re.findall('box-rank-entry.*?href="(.*?)" itemprop.*?name">(.*?)</h2>.*?name">(.*?)</p>.*?<li>発売日:\s+(.*?)\s+</li>\s+<li>(.*?)\s+</li>', req1.text, re.S)
    s1 = pd.DataFrame(result1, index=ranks(1), columns= ["href", "title", "name", "selldata", "publisher"])

    for i in range(2,11):
      locals()['req'+ str(i)] = requests.get(url_loc + 'p/'+ str(i) + '/')
      locals()['result'+ str(i)] = re.findall('box-rank-entry.*?href="(.*?)" itemprop.*?name">(.*?)</h2>.*?name">(.*?)</p>.*?<li>発売日:\s+(.*?)\s+</li>\s+<li>(.*?)\s+</li>', locals()['req'+str(i)].text, re.S)
      locals()['result1'+str(i)] = re.findall('box-rank-entry.*?name">(.*?)</h2>.*?name">(.*?)</p>.*?<li>発売日:\s+(.*?)\s+</li>\s+<li>(.*?)\s+</li>', locals()['req'+str(i)].text, re.S)
      if len(locals()['result'+ str(i)]) == len(locals()['result1' + str(i)]):
        list1.append(locals()['result'+ str(i)])
      else:
        for c in range(len(locals()['result' + str(i)])):
          if locals()['result'+ str(i)][c][1] != locals()['result1' + str(i)][c][0]:
            temp = ( 'nan', locals()['result1' + str(i)][c][0], locals()['result1'+ str(i)][c][1], locals()['result1'+str(i)][c][2], locals()['result1'+str(i)][c][3])
            locals()['result'+ str(i)].insert(c, temp)
        list1.append(locals()['result'+str(i)])

    s2 = pd.DataFrame(list1[0], index = ranks(2), columns=["href", "title", "name", "selldata", "publisher"])
    s3 = pd.DataFrame(list1[1], index = ranks(3), columns=["href", "title", "name", "selldata", "publisher"])
    s4 = pd.DataFrame(list1[2], index = ranks(4), columns=["href", "title", "name", "selldata", "publisher"])
    s5 = pd.DataFrame(list1[3], index = ranks(5), columns=["href", "title", "name", "selldata", "publisher"])
    s6 = pd.DataFrame(list1[4], index = ranks(6), columns=["href", "title", "name", "selldata", "publisher"])
    s7 = pd.DataFrame(list1[5], index = ranks(7), columns=["href", "title", "name", "selldata", "publisher"])
    s8 = pd.DataFrame(list1[6], index = ranks(8), columns=["href", "title", "name", "selldata", "publisher"])
    s9 = pd.DataFrame(list1[7], index = ranks(9), columns=["href", "title", "name", "selldata", "publisher"])
    s0 = pd.DataFrame(list1[8], index = ranks(10), columns=["href", "title", "name", "selldata", "publisher"])


    s = pd.concat([s1, s2, s3, s4, s5, s6, s7, s8, s9, s0])
    return s
Esempio n. 4
0
    def monthrank(month):

        ranks = lambda n: np.linspace(n * 10 - 9, n * 10, 10, dtype=int)
        list1 = []
        url_loc = 'https://www.oricon.co.jp/rank/js/m/' + month + '/'
        print(url_loc)

        req1 = requests.get(url_loc)
        result1 = re.findall(
            '<p class="status (.*?)">.*?<a href="(.*?)".*?itemprop="name">(.*?)</h2>\s*<p class="name">(.*?)</p>.*?<li>発売日:\s*(.*?)\s*</li>\s*<li>推定売上枚数:(.*?)</li>',
            req1.text, re.S)
        #    ranks1 = np.linspace(1, 10, 10, dtype= int)
        s1 = pd.DataFrame(
            result1,
            index=ranks(1),
            columns=["state", "href", "title", "name", "selldata", "num"])

        for i in range(2, 6):
            locals()['req' + str(i)] = requests.get(url_loc + 'p/' + str(i) +
                                                    '/')
            locals()['result' + str(i)] = re.findall(
                '<p class="status (.*?)">.*?<a href="(.*?)".*?itemprop="name">(.*?)</h2>\s*<p class="name">(.*?)</p>.*?<li>発売日:\s*(.*?)\s*</li>\s*<li>推定売上枚数:(.*?)</li>',
                locals()['req' + str(i)].text, re.S)
            locals()['result1' + str(i)] = re.findall(
                '<p class="status (.*?)">.*?itemprop="name">(.*?)</h2>\s*<p class="name">(.*?)</p>.*?<li>発売日:\s*(.*?)\s*</li>\s*<li>推定売上枚数:(.*?)</li>',
                locals()['req' + str(i)].text, re.S)
            # locals()['s'+str(i)] = pd.DataFrame(locals()['result'+ str(i)], index=locals()['ranks'+str(i)], columns= ["state", "href", "title", "name", "selldata", "num"])
            # stotal = pd.concat([s1, locals()['s2'], locals()['s3'], locals()['s4'], locals()['s5']])
            if len(locals()['result' + str(i)]) == len(
                    locals()['result1' +
                             str(i)]):  #some ablum do not contain detail pages
                list1.append(locals()['result' + str(i)])
            else:
                for c in range(len(locals()['result' + str(i)])):
                    if locals()['result' +
                                str(i)][c][2] != locals()['result1' +
                                                          str(i)][c][1]:
                        temp = (locals()['result1' + str(i)][c][0], 'nan',
                                locals()['result1' + str(i)][c][1],
                                locals()['result1' + str(i)][c][2],
                                locals()['result1' + str(i)][c][3],
                                locals()['result1' + str(i)][c][4]
                                )  #stupid but simple,lol
                        locals()['result' + str(i)].insert(c, temp)
                list1.append(locals()['result' + str(i)])

        s2 = pd.DataFrame(
            list1[0],
            index=ranks(2),
            columns=["state", "href", "title", "name", "selldata", "num"])
        s3 = pd.DataFrame(
            list1[1],
            index=ranks(3),
            columns=["state", "href", "title", "name", "selldata", "num"])
        s4 = pd.DataFrame(
            list1[2],
            index=ranks(4),
            columns=["state", "href", "title", "name", "selldata", "num"])
        s5 = pd.DataFrame(
            list1[3],
            index=ranks(5),
            columns=["state", "href", "title", "name", "selldata", "num"])
        s = pd.concat([
            s1, s2, s3, s4, s5
        ])  #seems tedious, but i have no good idea to handle them right now.
        return s
Esempio n. 5
0
    # Python实现正态分布
    import np
    import matplotlib.pyplot as plt
    import math
    # 绘制正态分布概率密度函数
    u = 0   # 均值μ
    u01 = -2
    sig = math.sqrt(0.2)  # 标准差δ
    sig01 = math.sqrt(1)
    sig02 = math.sqrt(5)
    sig_u01 = math.sqrt(0.5)
    x = np.linspace(u - 3*sig, u + 3*sig, 50)
    x_01 = np.linspace(u - 6 * sig, u + 6 * sig, 50)
    x_02 = np.linspace(u - 10 * sig, u + 10 * sig, 50)
    x_u01 = np.linspace(u - 10 * sig, u + 1 * sig, 50)
    y_sig = np.exp(-(x - u) ** 2 /(2* sig **2))/(math.sqrt(2*math.pi)*sig)
    y_sig01 = np.exp(-(x_01 - u) ** 2 /(2* sig01 **2))/(math.sqrt(2*math.pi)*sig01)
    y_sig02 = np.exp(-(x_02 - u) ** 2 / (2 * sig02 ** 2)) / (math.sqrt(2 * math.pi) * sig02)
    y_sig_u01 = np.exp(-(x_u01 - u01) ** 2 / (2 * sig_u01 ** 2)) / (math.sqrt(2 * math.pi) * sig_u01)
    plt.plot(x, y_sig, "r-", linewidth=2)
    plt.plot(x_01, y_sig01, "g-", linewidth=2)
    plt.plot(x_02, y_sig02, "b-", linewidth=2)
    plt.plot(x_u01, y_sig_u01, "m-", linewidth=2)
    # plt.plot(x, y, 'r-', x, y, 'go', linewidth=2,markersize=8)
    plt.grid(True)
    plt.show()
Esempio n. 6
0
np.genfromtxt(data,dtypr=(int,float,int))
np.genfromtxt(data,dtype=(int,float,int))
convertfunc = lambda x: float(x.strip(b"%"))/100.
data = u"1,2.3%,45.\n6,78.9%,0"
np.genfromtxt(StringIO(data),delimiter='',names=names,converters={1:convertfunc})
names = ['a','b','c']
np.genfromtxt(StringIO(data),delimiter='',names=names,converters={1:convertfunc})
data = u"N/A, 2, 3\n4,,???"
kwargs = dict(delimiter=',',dtype=int,names='a,b,c',missing_values={0:"N"})
kwargs = dict(delimiter=',',dtype=int,names='a,b,c',missing_values={0:"N/A",'b':0,2:"???"},filling_values={0:0,'b':0,2:-999})
np.gemfromtxt(SringIO(data),**kwargs)
np.genfromtxt(SringIO(data),**kwargs)
np.genfromtxt(StringIO(data),**kwargs)
x = np.arange(10,1,-1)
x
y = np.linspace(1,19,4)
y
x[np.array([3,3,1,8])]
z = x[np.array([3,3,1,8])]
z.base
z.base()
y = x
y.base
y.base()
np.base(y)
x.base
z.flags.owndata
y.flags.owndata
x.flags.owndata
y.base is a
y.base is x
Esempio n. 7
0
        plt.show()
    return w


# training_inputs = np.array([(0, 0, 1),
#                             (0, 1, 1),
#                             (1, 0, 1),
#                             (1, 1, 1)])

# yd = np.array([[0, 0, 0, 1]]).T

synaptic_weights = train(x, yd, 15000)
print(synaptic_weights)
print("Ending Weights After Training: ")

# k = -synaptic_weights[0] / synaptic_weights[1]
# b = -synaptic_weights[2] / synaptic_weights[1]

fig = plt.figure()
ax = plt.subplot()
# ax=fig.add_axes([0, 0, 5, 5])
plt.figure(figsize=(10, 7))
# plt.plot(x_train.data.numpy(),y_train.data.numpy(),'*')
plt.scatter(x[:, 0], yd, s=100, alpha=0.3)
xs = np.linspace(0, 70, 200)
ys2 = synaptic_weights[1] + synaptic_weights[0] * xs
plt.plot(xs, ys2, 'g', linewidth=1)

plt.show()
Esempio n. 8
0
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

X = np.array([1, 5, 7, 9, 13, 16])
Y = np.array([37, 66, 71, 79, 85, 99])
# 维度
print X.shape
plt.scatter(x=X, y=Y, marker='o', c='r')

def hypothesis_function(p, x):
    a, b = p
    y = a * x + b
    return y


def cost_function(p, x, y):
    return hypothesis_function(p, x) - y

p = [50, 100]
from scipy.optimize import leastsq
pa = leastsq(cost_function,p,args=(X,Y))
a,b = pa[0]
print a,b

x = np.linspace(0,16,1000)
y = a*x+b
plt.scatter(x,y,color ="red")
plt.show()
Esempio n. 9
0
def plot_break_point():
    x = np.linspace(0, -25, 10)
    y = np.linspace(0, 0, 10)
    plt.plot(x, y, color='black', linewidth='4')
    plt.scatter(breakPoint, 0, color="green", marker="o", linewidth='5')