Esempio n. 1
0
def draw_learning_curves(X, y, estimator, num_trainings):
    train_sizes, train_scores, test_scores = learning_curve(
        DecisionTreeClassifier,
        x2,
        y2,
        cv=None,
        n_jobs=1,
        train_sizes=np.linspace(.1, 1.0, num_trainings))
    # 训练数据的平均值np,mean  标准差np.std     np.mean(train_scores,axis=1)计算每一行的平均值
    train_scores_mean = np.mean(train_scores, axis=1)
    train_scores_std = np.std(train_scores, axis=1)
    test_scores_mean = np.mean(test_scores, axis=1)
    test_scores_std = np.std(test_scores, axis=1)

    # plt.grid(linestyle=":",color="r") 绘制带刻度线的网格线,linestyle线条风格 color线条颜色
    plt.grid()

    plt.tytle("learning curve")
    plt.xlabe("Training examples")
    plt.ylabel("Score")

    plt.plot(train_scores_mean, 'o-', color="g", label="Training score")
    plt.plot(test_scores_mean, 'o-', color="y", label="Cross-validation score")

    plt.legend(loc="best")

    plt.show()
Esempio n. 2
0
#统计一下在qlist 共出现了多少个单词?总共出现了多少个不同的单词
#这里需要做简单的分词,英文用空格
qlist, alist = read_corpus(qa_corpus_path)
q_dict = get_dict(qlist)
word_total_q = sum(q_dict.values())
n_distinctive_words_q = len(q_dict)
print('There are {} words and {} distinctive tokens in question texts'.format(
    word_total_q, n_distinctive_words_q))
print(word_total_q)

#todo :统计一下qlist中每个单词出现频率,并把这些频率排一下序
#使用matplotlib里的plot函数,y是词频

plt.bar(np.arange(10000), list(q_dict.value())[100:10100])
plt.ylabe('Frequency')
plt.xlabe('Word Order')
plt.title('Word Frequencies of the Question Corpus')
plt.show()

a_dict = get_dict(alist)
print('The 10 frequentist words in question list (qlist) are :\n{}'.format(
    '|'.join(get_topk(10, q_dict))))


class TextNormalizer:
    def __init__(self, stopwords, filter_vocab, re_cleaners):
        self.lemmatizer = WordNetLemmatizer()
        self.filter_vocab = filter_vocab
        self.stopwords = stopwords
        self.re_cleaners = re_cleaners
Esempio n. 3
0
print(len(X), len(y))

X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, test_size=0.2)

clf = LinearRegression()
clf.fit(X_train, y_train)
accuracy = clf.score(X_test, y_test)
#print(accuracy)

forecast_set = clf.predict(X_lately)
print(forecast_set, accuracy, forecast_out)

last_date = df.iloc[-1].name
last_unix = last_date.to_datetime()
last_unix = time.mktime(last_unix.time_tuple())
one_day = 86400
next_unix = last_unix + one_day

for i in forecast_set:
    next_date = datetime.datetime.fromtimestamp(next_unix)
    next_unix += one_day
    df.loc[next_date] = [np.nan for _ in range(len(df.coloumns) - 1)] + [i]

df['adj_close'].plot()
df['Forecast'].plot()
plt.legend(loc=4)
plt.xlabe('Date')
plt.ylabel('Price')
plt.show()
Esempio n. 4
0
plt.show()

#visulaisng the test set results
from matplotlib.colors import ListedColormap
X_set, Y_set = X_test, Y_test
X1, X2 = np.meshgrid(
    np.arange(start=X_set[:, 0].min() - 1,
              stop=X_set[:, 0].max() + 1,
              step=0.01),
    np.arange(start=X_set[:, 1].min() - 1,
              stop=X_set[:, 1].max() + 1,
              step=0.01))
plt.contourf(X1,
             X2,
             classifier.predict(np.array([X1.ravel(),
                                          X2.ravel()]).T).reshape(X1.shape),
             alpha=0.75,
             cmap=ListedColormap(('red', 'green', 'blue')))
plt.xlim(X1.min(), X1.max())
plt.ylim(X2.min(), X2.max())
for i, j in enumerate(np.unique(Y_set)):
    plt.scatter(X_set[Y_set == j, 0],
                X_set[Y_set == j, 1],
                c=ListedColormap(('red', 'green', 'blue'))(i),
                label=j)
plt.xlabe("LD1")
plt.ylabe("LD2")
plt.tiltle("KNN(on training set)")
plt.legend()
plt.show()
Esempio n. 5
0
inputs = dataset_total[len(dataset_total) - len(dataset_test) -
                       60:].values  #all the inputs of january 2017
inputs = inputs.reshape(-1, 1)  # stock minus january
inputs = sc.transform(inputs)

X_test = []
for i in range(60, 80):  # test has 20 days
    X_test.append(inputs[i - 60:i, 0])  #zero colomn and 60 rows for each day

X_test = np.array(X_test)
X_test = np.reshape(
    X_test, (X_test.shape[0], X_test.shape[1], 1)
)  #batch size total days,  timsteps 60, inputsize #new indicator price of another stock that is dependent

predicted_stock_price = regressor.predict(X_test)
#go back to non scaling the data
predicted_stock_price = sc.inverse_transform(predicted_stock_price)

#using the matplot to plot the data
plt.plot(real_stock_price, color='red',
         label='Real Google Stock Price')  # data and label to it
plt.plot(predicted_stock_price, color='red',
         label='Real Google Stock Price')  #data and label to it
plt.title("google stock price prediction")  # title
plt.xlabe('Time')
plt.ylabe('google stock price')
plt.legend()  # to includ the legend in the char with no input
plt.show()

## we can increased its accurary by changing the scoring method  to accuracy or neg_mean_squared_error
for i in range(len(observacion_1)):
    distancia = (puntos[i]**2 * np.sin(2 * theta[i])) / g

    puntos = puntos[d]

    puntos = np.random.choice(puntos, 1000000)

    theta = theta[:len(puntos)]

    print(len(puntos))

for i in range(len(observacion_2)):
    distancia = (puntos[i]**2 * np.sin(2 * theta[i])) / g

    puntos = puntos[d]

    puntos = np.random.choice(puntos, 1000000)

    theta = theta[:len(puntos)]

    print(len(puntos))

    plt.figure()

    plt.xlabe('Velocidad')
    plt.ylabel('Probabilidad de la Velocidad')
    plt.hist(puntos, 1000, normed=1)
    plt.legend()
    plt.savefig('Histograma_vel.png')
#making prediction

data_test = pd.read_csv('Google_Stock_Price_Test.csv')
real_result = data_test.iloc[:, 1:2].values

total_data = pd.concat((data_train['Open'], data_test['Open']), axis=0)

inputs = total_data[len(total_data) - len(data_test) - 60:].values
inputs = inputs.reshape(-1, 1)

inputs = sc.transform(inputs)

xtest = []

for i in range(60, 80):
    xtest.append(inputs[i - 60:i, 0])
xtest = np.array(xtest)

xtest = np.reshape(xtest, (xtest.shape[0], xtest.shape[1], 1))

prediction = model.predict(xtest)
prediction = sc.inverse_transform(prediction)

plt.plot(real_result, color='red', label='google stock price')
plt.plot(prediction, color='blue', label='predicted result')
plt.title('google stock price prediciton')
plt.xlabe('date')
plt.ylabel('price')
plt.legend()
plt.show()
def plot(x, y):
    plt.plot(x, y)
    plt.xlabe("Hour interval of releases")
    plt.ylabel("Bugs in Interval")
    plt.title("Development of Fixes in Hours")
    plt.show()
#Feature scaling
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
sc_y = StandardScaler()
y_train = sc_y.fit_transform(y_train)

#fitting simple linear regression to the training set
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

#Predicting the test set result
y_pred = regressor.predict(X_test)

#visualizing the training set result
plt.scatter(X_train, y_train, color = 'red')
plt.plot(X_train, regressor.predict(X_train), color= 'blue')
plt.title('Salary vs Experience (Training set)')
plt.xlabe('Years of Experience')
plt.ylabel('Salary')
plt.show()

#visualizing the test set result
plt.scatter(X_train, y_train, color = 'red')
plt.plot(X_train, regressor.predict(X_train), color= 'blue')
plt.title('Salary vs Experience (Test set)')
plt.xlabe('Years of Experience')
plt.ylabel('Salary')
plt.show()