예제 #1
0
파일: _graph.py 프로젝트: sabiz/datit
def check_normal_dist(x: pd.Series):
    from scipy import stats
    from scipy.stats import norm
    if x.dtype == 'object':
        return
    fig, ax = _visu._create_fig(ncols=2)
    sns.distplot(x, fit=norm, ax=ax[0])
    stats.probplot(x, plot=ax[1])
    _visu._plot()
예제 #2
0
파일: _graph.py 프로젝트: sabiz/datit
def value_count(data: pd.Series):
    """
    Plot value counts
    """
    fig, ax = _visu._create_fig()
    counts = data.value_counts()
    ax.bar(counts.index, counts.values)
    ax.set(title=f'Value Count: {counts.name}')
    _visu._plot()
예제 #3
0
파일: _graph.py 프로젝트: sabiz/datit
def histgram(data: pd.Series, bins: Optional[int] = None):
    if data.dtype == 'object':
        print("Ignore non number type")
        return
    bin_number = bins
    if bin_number is None:
        bin_number = int(1 + np.log2(len(data)))
    fig, ax = _visu._create_fig()
    ax.hist(data, bins=bin_number)
    ax.set(title=f'Histgram {data.name}(bin:{bin_number})',
           xlabel=data.name,
           ylabel='Frequency')
    _visu._plot()
예제 #4
0
파일: _graph.py 프로젝트: sabiz/datit
def density_plot(data: pd.Series, band_width: Optional[float] = None):
    band = band_width
    if data.dtype == 'object':
        print("Ignore non number type")
        return
    if band is None:
        band = 'scott'
    fig, ax = _visu._create_fig()
    sns.kdeplot(data, shade=True, ax=ax, bw=band)
    ax.set(title=f'Density {data.name}(band width:{band})',
           xlabel=data.name,
           ylabel='Frequency')
    _visu._plot()
예제 #5
0
파일: _graph.py 프로젝트: sabiz/datit
def density_2d_plot(x: pd.Series,
                    y: pd.Series,
                    band_width: Optional[float] = None):
    band = band_width
    if x.dtype == 'object' or y.dtype == 'object':
        print("Ignore non number type")
        return
    if band is None:
        band = 'scott'
    fig, ax = _visu._create_fig()
    sns.kdeplot(x, y, shade=True, ax=ax, bw=band)
    ax.set(title=f'Density {x.name} x {y.name} (band width:{band})',
           xlabel=x.name,
           ylabel=y.name)
    _visu._plot()
예제 #6
0
파일: _graph.py 프로젝트: sabiz/datit
def pareto(data: pd.Series):
    """
    Plot pareto chart
    """
    if data.dtype != 'object':
        return
    fig, ax = _visu._create_fig()
    counts = data.value_counts()
    ax.bar(counts.index, counts.values, label='count')
    ax.set(title=f'Pareto: {counts.name}')
    ax2 = ax.twinx()
    ratio = counts / counts.sum()
    ax2.plot(ratio.index, ratio.cumsum(), label='ratio', c=palette[1])
    ax2.set(ylim=(0, 1.05))
    h1, l1 = ax.get_legend_handles_labels()
    h2, l2 = ax2.get_legend_handles_labels()
    _visu.plt.legend(h1 + h2, l1 + l2)
    _visu._plot()
예제 #7
0
파일: _metrics.py 프로젝트: sabiz/datit
def roccurve(y, y_prob):
    """
    plot ROC
    """
    from sklearn.metrics import roc_auc_score
    from sklearn.metrics import roc_curve
    auc = roc_auc_score(y, y_prob)
    fpr, tpr, thresholds = roc_curve(y, y_prob)
    fig, ax = _visu._create_fig()
    ax.plot([0, 1e-10, 1], [0, 1, 1], color=palette[0], label='Perfect')
    ax.plot(fpr, tpr, color=palette[1], label='Model')
    ax.plot([0, 1], [0, 1],
            linestyle='dashed', color=palette[2], label='Random')
    ax_params = {
            "title": f"ROC curve (AUC:{auc:.04})",
            "xlabel": "FP rate",
            "ylabel": "TP rate",
            }
    ax.set(**ax_params)
    ax.legend()
    _visu._plot()
    pass
예제 #8
0
파일: _metrics.py 프로젝트: sabiz/datit
def capcurve(y, y_prob):
    """
    plot CAP
    """
    from scipy import integrate
    num_sum = np.sum(y)
    num_count = len(y)
    rate_val = float(num_sum) / float(num_count)
    ideal = np.array([[0, rate_val, 1], [0, 1, 1]])

    y_cap_df = (pd.DataFrame({'y': y, 'yp': y_prob})
                .sort_values('yp', ascending=False)
                .reset_index(drop=True))
    y_cap_df['y_rate'] = np.cumsum(y_cap_df.y)/num_sum
    y_cap_df['x_rate'] = np.arange(num_count)/num_count

    perfect = integrate.simps(ideal[1, :], ideal[0, :])
    model = integrate.simps(y_cap_df.y_rate, y_cap_df.x_rate)
    _random = integrate.simps(y_cap_df.x_rate, y_cap_df.x_rate)
    gini = (model - _random) / (perfect - _random)

    fig, ax = _visu._create_fig()
    ax.plot(ideal[0, :], ideal[1, :],
            color=palette[0], label='Perfect')
    ax.plot(y_cap_df.x_rate, y_cap_df.y_rate,
            color=palette[1], label='Model')
    ax.plot(y_cap_df.x_rate, y_cap_df.x_rate,
            linestyle='dashed', color=palette[2], label='Random')
    ax_params = {
            "title": f"CAP Curve (Gini index:{gini:.04})",
            "xlabel": "data ratio",
            "ylabel": "positive ratio",
            }
    ax.set(**ax_params)
    ax.legend()
    _visu._plot()
예제 #9
0
파일: _graph.py 프로젝트: sabiz/datit
def box_plot(x: pd.Series, y: pd.Series):
    fig, ax = _visu._create_fig()
    sns.boxplot(x=x, y=y)
    sns.stripplot(x=x, y=y, color='black', size=2, jitter=1, alpha=0.5)
    ax.set(title=f'Box {x.name} x {y.name}', xlabel=x.name, ylabel=y.name)
    _visu._plot()
예제 #10
0
파일: _graph.py 프로젝트: sabiz/datit
def violin_plot(x: pd.Series, y: pd.Series):
    fig, ax = _visu._create_fig()
    sns.violinplot(x=x, y=y, scale='width', inner='quartile')
    ax.set(title=f'Violin {x.name} x {y.name}', xlabel=x.name, ylabel=y.name)
    _visu._plot()
예제 #11
0
파일: _graph.py 프로젝트: sabiz/datit
def scatter_plot(x: pd.Series, y: pd.Series):
    fig, ax = _visu._create_fig()
    ax.scatter(x, y)
    ax.set(title=f'Scatter {x.name} x {y.name}', xlabel=x.name, ylabel=y.name)
    _visu._plot()