Python StandardScaler.dropna Examples

Programming Language: Python

Namespace/Package Name: sklearn.preprocessing

Class/Type: StandardScaler

Method/Function: dropna

Examples at hotexamples.com: 3

Python StandardScaler.dropna - 3 examples found. These are the top rated real world Python examples of sklearn.preprocessing.StandardScaler.dropna extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

dot(30)

mean_(30)

fit(30)

fit_transform(30)

inverse_transform(30)

astype(30)

min(28)

columns(25)

get_params(21)

mean(21)

copy(17)

max(15)

drop(13)

head(10)

append(10)

describe(7)

flatten(6)

index(4)

join(3)

dropna(3)

StandardScaler(3)

fit_tranform(3)

corr(2)

isnull(2)

insert(2)

cpu(2)

as_matrix(2)

fit_transfrom(2)

cuda(2)

fillna(2)

cov(2)

__abs__(1)

interpolate(1)

merge(1)

median(1)

__dict__(1)

__dir__(1)

__init__(1)

lower(1)

_reset(1)

execute(1)

inverser_transform(1)

abs(1)

inverse(1)

any(1)

feature_scalling(1)

fit_transfor(1)

iinverse_transform(1)

idxmin(1)

idxmax(1)

Example #1

Show file

    def dbscan(self, min_samples, eps=None, window=None):

        self.window = window
        '''
        self.tickers = window.snp_live_tickers
        flat_data = self.average_over_time(window)
        self.normalised = StandardScaler().fit_transform(flat_data)
        '''
        fundamental = self.window.get_fundamental()

        def get_ticker(index_list):
            result = []
            for i in index_list:
                ticker = i[0]
                if ticker not in result:
                    result.append(ticker)
            return result

        fundamental_tickers = get_ticker(fundamental.index)
        num_of_feature = 7
        fundamental_reshaped = fundamental.values.reshape(-1, num_of_feature)
        fundamental_normalised = StandardScaler().fit_transform(
            fundamental_reshaped)
        fundamental_normalised = pd.DataFrame(fundamental_normalised,
                                              index=fundamental_tickers)
        fundamental_normalised = fundamental_normalised.dropna()
        self.tickers = fundamental_normalised.index
        dbscan = DBSCAN(eps=eps,
                        min_samples=min_samples).fit(fundamental_normalised)

        self.dbscan_labels = labels = dbscan.labels_
        core_samples_mask = np.zeros_like(dbscan.labels_, dtype=bool)
        core_samples_mask[dbscan.core_sample_indices_] = True
        self.dbscan_core_indices = dbscan.core_sample_indices_
        self.dbscan_core_length = len(dbscan.core_sample_indices_)
        self.dbscan_core_mask = core_samples_mask

        self.unique_labels = set(labels)
        self.n_clusters = n_clusters = len(
            set(labels)) - (1 if -1 in labels else 0)
        self.n_noise = list(labels).count(-1)
        self.noise = np.where(labels == -1)[0]

        clusters = {}
        for j in range(n_clusters):
            pairs = []
            for i in itertools.combinations(np.where(labels == j)[0], 2):
                pair = (i[0], i[1])
                if window is not None:
                    pair = (self.tickers[i[0]], self.tickers[i[1]])
                pairs.append(pair)
            clusters[j] = pairs
        pair_count = 0
        all_pairs = clusters.values()
        for i in all_pairs:
            pair_count += len(i)
        print('total pairs: ', pair_count)
        return clusters

Example #2

Show file

def fill_knn(df, train_cols, label_cols):
    # del_cols = []
    # for col in train_cols:
    #     if (df[col] == df[col][0]).all():
    #         del_cols.append(col)
    # for col in del_cols:
    #     train_cols.remove(col)
    data = StandardScaler().fit_transform(df.loc[:, train_cols].values)
    data = pd.DataFrame(data, columns=train_cols)
    data.dropna(axis=1, how='any', inplace=True)
    knn_train_cols = [col for col in train_cols if col in data.columns]
    for col in label_cols:
        data_train = data.loc[df[col].notnull().tolist(),
                              knn_train_cols].values
        data_label = df.loc[df[col].notnull().tolist(), col].values
        knn = neighbors.KNeighborsRegressor()
        knn.fit(data_train, data_label)
        df.loc[df[col].isnull(),
               col] = knn.predict(data.loc[df[col].isnull().tolist(),
                                           knn_train_cols].values)

Example #3

Show file

#optimum bilese sayisi
pca = PCA().fit(df)
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel("Bileşen Sayısını")
plt.ylabel("Kümülatif Varyans Oranı")
pca.explained_variance_ratio_
#final
pca = PCA(n_components=3)
pca_fit = pca.fit_transform(df)
#burası açıklama oranı oluyor bu sayede o bilggilerle ne kadar açıklayıcı olabildiğimizi görebiliyoruz
pca.explained_variance_ratio_

#örenek yapıyorum

df = pd.read_csv("diabetes.csv", sep=",")
df = df.dropna()

dms = pd.get_dummies(df[['Age', 'DiabetesPedigreeFunction', 'Insulin']])
y = df["Outcome"]
#okunmayan  değerleri silmem lazım

df

X_ = df.drop(['Outcome', 'Age', 'DiabetesPedigreeFunction', 'Insulin'],
             axis=1).astype('float64')
X = pd.concat([X_, dms[['DiabetesPedigreeFunction', 'Insulin']]], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.25,
                                                    random_state=42)