def test_wrong_row_name_number():
    with pytest.raises(AssertionError) as excinfo:

        heatmap(np.random.random((10, 5)), row_names=['a', 'b', 'c'])
        assert excinfo.value.message == ('len(column_names) (got 3)'
                                         ' should be equal to number of'
                                         ' rows in the input '
                                         ' array (expect 10).')
Beispiel #2
0
def plot_corr(X, y, output_path='./output/plot', plot_it=False):
    '''
    绘制Pearson特征相关性热力图
    Input: X, y: 预处理过的数据集 并在计算相关性前进行归一化
            output_path: 保存路径
            plot_it: 是否显示
    Output: corr_matrix.png: 热力图
            col_dict: 图上index对应的特征名
    '''

    X = X.drop(['name', 'date'], axis=1)
    X['y'] = y

    # Pearson correlation coefficience matrix
    cm = X.corr(method='pearson')

    col_dict = {k: v for k, v in enumerate(X.columns)}
    hm = heatmap(np.array(cm),
                 row_names=list(col_dict.keys()),
                 column_names=list(col_dict.keys()),
                 figsize=(40, 40))
    plt.title('Features Correlation Heatmap', fontsize=20)

    # output plot and col_dict
    if not os.path.isdir(output_path):
        os.makedirs(output_path)
    plt.savefig(os.path.join(output_path, 'corr_matrix.png'), dpi=300)
    with open(os.path.join(output_path, 'col_dict.json'), 'w') as f:
        json.dump(col_dict, f)

    if plot_it:
        plt.show()
    def plot_correlations(self, df : pd.DataFrame, cols=[], postfix='') -> None:

        no_date_df = df.drop(columns=['date'])

        if not cols:
            cols = list(no_date_df.columns)

        corr = np.corrcoef(no_date_df[cols].values.T)
        heat = heatmap(corr, row_names=cols, column_names=cols)
        plt.show()
        plt.savefig(f'correlation_matrix{postfix}.png')
Beispiel #4
0
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
import matplotlib.pyplot as plt
from mlxtend.plotting import scatterplotmatrix
import numpy as np
from mlxtend.plotting import heatmap

# Load dataset
diabetes = pd.read_csv('diabetes.csv', header=0)
diabetes.columns = ['PREG', 'GLU', 'BP', 'SKIN', 'INSU', 'BMI', 'DPF', 'AGE', 'OUT']
features = ['PREG', 'GLU', 'BP', 'SKIN', 'INSU', 'BMI', 'DPF', 'AGE']
X = diabetes[features].values
y = diabetes['OUT'].T

# EDA
cm = np.corrcoef(diabetes[diabetes.columns].values.T)
hm = heatmap(cm, row_names=diabetes.columns, column_names=diabetes.columns)

scatterplotmatrix(diabetes[diabetes.columns].values, figsize=(10, 8), names=diabetes.columns, alpha=0.4)
plt.show()
# In[9]:

from mlxtend.plotting import heatmap

# In[10]:

import numpy as np

# In[11]:

cm = np.corrcoef(df[cols].values.T)

# In[12]:

hm = heatmap(cm, row_names=cols, column_names=cols)
plt.show()

# In[13]:


class LinearRegressionGD(object):
    def __init__(self, eta=0.001, n_iter=20):
        self.eta = eta
        self.n_iter = n_iter

    def fit(self, X, y):
        self.w_ = np.zeros(1 + X.shape[1])
        self.cost_ = []

        for i in range(self.n_iter):
def test_defaults():
    heatmap(np.random.random((10, 5)))
Beispiel #7
0
          y_val=y_test,
          epochs=args_dict['epochs'])
print("Fitting time: {:.3f} seconds".format(time() - start_))

ans = model.predict(x_test)
y_pred = np.argmax(ans, axis=1)
print(y_pred)
print(collections.Counter(np.equal(y_pred, y_test1)))
#prediction counter
print(collections.Counter(np.round(y_pred)))
x_ = (np.equal(np.round(y_test1), np.round(y_pred)))
print(collections.Counter(x_))
#confusion matrix heat map
x = (confusion_matrix(np.round(y_test1), np.round(y_pred)))
hm = heatmap(x,
             column_names=np.sort(np.unique(np.round(y_pred))),
             row_names=np.sort(np.unique(np.round(y_pred))),
             figsize=(30, 30))
plt.show()

#Jaccard Score
#j_score = jaccard_score(np.round(y_test1),np.round(y_pred),	average='micro')
#print('Jaccard Score: ',j_score)

#Accuracy Score
accuracy = accuracy_score(np.round(y_test1), np.round(y_pred))
print('Accuracy Score: ', accuracy)

#classification report
print('classification_report')
print(classification_report(np.round(y_test1), np.round(y_pred)))
#Distinct classes
Beispiel #8
0
 def heat_map(self, data, cols):
     corr_map = np.corrcoef(data[cols].values.T)
     heat_map = heatmap(corr_map, row_names = cols, column_names = cols)
     plt.show()