Python fitの例

プログラミング言語: Python

名前空間/パッケージ名: Kmeans_python.fit

メソッド/関数: fit

hotexamples.comのコード掲載数: 6

Python fit - 6件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのKmeans_python.fit.fitの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: test_fit.py プロジェクト: sreejithmunthikodu/Kmeans_python

def test_edge():
    test_df = pd.DataFrame({'X1': np.zeros(10), 'X2': np.ones(10)})

    centers, labels = fit(test_df, 1)

    print(labels)

    assert centers.all() == np.array([0, 1]).all()

コード例 #2

ファイルを表示

ファイル: test_fit.py プロジェクト: sreejithmunthikodu/Kmeans_python

def test_center():
    # Helper data
    iris = datasets.load_iris()  # loading the iris dataset
    features = iris.data
    test_df1 = pd.DataFrame({'X1': features[:, 2], 'X2': features[:, 3]})

    test_df2 = pd.DataFrame({'X1': np.arange(9), 'X2': np.arange(9)})
    # getting centers of clusters
    centers1, _ = fit(test_df2, 1)

    centers2, _ = fit(test_df1, 2)

    assert centers2.all() == np.array([[4.92525253, 1.68181818],
                                       [1.49215686, 0.2627451]]).all(), \
        "Centers did not match"

    assert centers1.all() == np.array([4, 4]).all(), "Centers did not match"

コード例 #3

ファイルを表示

def silhouette(X, k_array):
    """
    Plots a graph of silhouette scores for each k value
    in the given array using fit. Returns a list of each k value
    in k_array paired with its corresponding silhouette score.

    Parameters
    ----------
    X : 2-d array, shape=(n_samples, n_features)
        - The data to be clustered.
    k_array : array
         - An array of all contending k values.

    Returns
    -------
    1-d array
        - An array containing silhouette scores in the same order as k_array.

    Altair chart object
        - An Altair chart displaying silhouette scores
        with their corresponding k values.

    Examples
    --------
    >>> X = np.array([[1, 2], [1, 4], [1, 0],
    ...               [10, 2], [10, 4], [10, 0]])
    >>> k_array = [2, 3, 4, 5]
    >>> silhouette(X, k_array)

    """

    if (X.dtype != "float" and X.dtype != "int"):
        raise ValueError("Input X must be numeric")

    if not (isinstance(k_array[0], int)):
        raise ValueError("Input k_array must be type int")

    scores = []
    for i in range(len(k_array)):
        centers, labels = fit(X, k_array[i])
        score = sil_score(X, labels)
        scores.append([k_array[i], score])

    scores = pd.DataFrame(scores)
    scores.rename(columns={0: "k", 1: "Score"}, inplace=True)

    chart = (alt.Chart(scores).mark_line().encode(
        alt.X('k:O', axis=alt.Axis(title='k')),
        alt.Y('Score:Q', axis=alt.Axis(title='Silhouette score')),
    ).properties(title="Silhouette scores", width=800))

    return (scores["Score"], chart)

コード例 #4

ファイルを表示

ファイル: test_fit.py プロジェクト: sreejithmunthikodu/Kmeans_python

def test_exceptions():
    # Helper data
    test_df4 = "this is a python package"
    test_df2 = pd.DataFrame({'X1': np.arange(9), 'X2': np.arange(9)})
    test_df5 = pd.DataFrame({'X1': [1, 2, 3, 4], 'X2': [1, 2, "A", "BC"]})

    K = -2
    num_init = 0
    max_iteration = 4.5

    # checking the exception handling of the function
    try:
        fit(test_df4, 2)
        print("Should throw an error if data is not in "
              "a dataframe or numpy array")
        raise
    except ValueError:
        pass

    try:
        fit(test_df2, K)
    except ValueError:
        pass

    try:
        fit(test_df5, 2)
    except ValueError:
        pass

    try:
        fit(test_df2, 1, n_init=num_init)
    except ValueError:
        pass

    try:
        fit(test_df2, 1, max_iter=max_iteration)
    except ValueError:
        pass

コード例 #5

ファイルを表示

ファイル: test_fit.py プロジェクト: sreejithmunthikodu/Kmeans_python

def test_label():
    # Helper data
    test_df3 = pd.DataFrame({
        'X1':
        np.concatenate((np.arange(5, 10), np.arange(15, 20)), axis=0),
        'X2':
        np.concatenate((np.arange(5, 10), np.arange(15, 20)), axis=0)
    })

    # getting the labels for the helper data
    _, labels = fit(test_df3, 2)

    assert labels.all() == np.concatenate((np.zeros(5), np.ones(5)),
                                          axis=0).all(), "labels did not match"

コード例 #6

ファイルを表示

def elbow(X, centers_list):
    """
    Creates a plot of inertia vs number of cluster centers
    as per the elbow method. Calculates and returns the inertia
    values for all cluster centers. Useful for identifying the optimal
    number of clusters while using k-means clustering algorithm.

    Parameters
    ----------
    X : array-like, shape=(n_samples, n_features)
        Input data that is to be clustered.
    centers_list : list or 1-d array-like
        A list of all possible numbers of cluster centers

    Returns
    -------
    tuple
        A tuple of an altair plot object containing a line plot of
        k (number of cluster centers) vs inertia and inertia for all k.

    Examples
    --------
    >>> from Kmeans_python.elbow import elbow
    >>> import numpy as np
    >>> X = np.array([[1, 2], [1, 4], [1, 0],
    ...               [10, 2], [10, 4], [10, 0]])
    >>> centers = [2, 3, 4, 5]
    >>> elbow(X, centers)
    >>> (alt.Chart(...),
        [2.8284271247461903, 2.8284271247461903, 1.4142135623730951, 0.0])
        """

    # Check if number of centers is contained in an array or list
    if not ((isinstance(centers_list, list)) |
            (isinstance(centers_list, np.ndarray))):
        raise ValueError("Invalid input type for list of numbers of clusters.\
            centers_list must be list or a numpy array.")

    # Ensure input arguments are valid
    if not ((isinstance(X, pd.DataFrame)) | (isinstance(X, np.ndarray))):
        raise ValueError("Invalid input type for samples. X must be \
            pandas dataframe or a numpy array.")

    # Check if there are atleast two samples
    if not X.shape[0] >= 2:
        raise ValueError("At least two samples should be there in data")

    # Prompt user to reshape if data has only one feature
    if len(X.shape) == 1:
        raise ValueError("If you have only one feature in the dataset\
            please reshape your data using X.reshape(-1, 1)")

    # Check if number of centers are numeric values
    data = np.reshape(centers_list, -1)
    if not any([isinstance(x, int) or isinstance(x, np.int64) for x in data]):
        raise ValueError(
            "Invalid input type for centers. Centers_list must contain \
            only numeric values.")

    # Check if all number of centers are integers
    for k in centers_list:
        if int(k) != np.ceil(k):
            raise ValueError("Number of centers should be integers")

    # Check if data points are numbers
    data = np.reshape(np.array(X), -1)
    if not any([isinstance(x, int) or isinstance(x, np.int64) for x in data]):
        raise ValueError("Invalid input type for samples. X must contain \
            only numeric values.")

    # Check if the range of number of centers is valid
    if (np.min(centers_list) < 1) | (np.max(centers_list) > X.shape[0]):
        raise ValueError("Invalid values in list of numbers of clusters. \
            Number of clusters should be between 1 and number of samples")

    if isinstance(X, pd.DataFrame):
        X = X.to_numpy()

    # Convert all integer types to int
    centers_list = [int(x) for x in centers_list]

    # Iterate through centers list and get inertia
    inertia = []
    for k in centers_list:
        # Fit Kmeans algorithm to get cluster centers and labels
        centers, labels = fit(X, k, n_init=10, max_iter=200)
        # Compute inertia
        for cluster in range(k):
            x_cluster = X[np.where(labels == cluster)]
            cluster_inertia = np.linalg.norm(x_cluster - centers[cluster])
        inertia.append(np.sum(cluster_inertia))
    # Save results to a dataframe
    results = pd.DataFrame({"k": centers_list, "inertia": inertia})

    # Create a plot object of K vs Inertia
    p = alt.Chart(results).mark_line().encode(
        alt.X("k:Q", title="k"),
        alt.Y("inertia:Q", title="Inertia")).properties(
            title="Optimal K Using Elbow Method", width=700,
            height=300).configure_axis(
                labelFontSize=20,
                titleFontSize=20).configure_title(fontSize=20)

    return p, inertia