Python StandardScaler.join Examples

Programming Language: Python

Namespace/Package Name: sklearn.preprocessing

Class/Type: StandardScaler

Method/Function: join

Examples at hotexamples.com: 3

Python StandardScaler.join - 3 examples found. These are the top rated real world Python examples of sklearn.preprocessing.StandardScaler.join extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

dot(30)

mean_(30)

fit(30)

fit_transform(30)

inverse_transform(30)

astype(30)

min(28)

columns(25)

get_params(21)

mean(21)

copy(17)

max(15)

drop(13)

head(10)

append(10)

describe(7)

flatten(6)

index(4)

join(3)

dropna(3)

StandardScaler(3)

fit_tranform(3)

corr(2)

isnull(2)

insert(2)

cpu(2)

as_matrix(2)

fit_transfrom(2)

cuda(2)

fillna(2)

cov(2)

__abs__(1)

interpolate(1)

merge(1)

median(1)

__dict__(1)

__dir__(1)

__init__(1)

lower(1)

_reset(1)

execute(1)

inverser_transform(1)

abs(1)

inverse(1)

any(1)

feature_scalling(1)

fit_transfor(1)

iinverse_transform(1)

idxmin(1)

idxmax(1)

Example #1

Show file

File: run.py Project: gitter-badger/datacanvas-modules

def main(params, inputs, outputs):
    ### 读入数据 ###
    df = inputs.df
    df_new = outputs.df_new
    df = pd.read_pickle(df)

    ### 从变量中选择连续变量 ###
    df_x = df.iloc[:, :-1]
    df_y = df.iloc[:, -1]
    df_standard = df_x.drop(df_x.select_dtypes(['object']), axis=1)
    df_label = df_x.select_dtypes(['object'])

    ### 变量标准化 ###
    df_standard = StandardScaler().fit_transform(df_standard)
    df_standard = pd.DataFrame(df_standrad,
                               columns=df_x.drop(df_x.select_dtypes(['object'
                                                                     ]),
                                                 axis=1).columns)

    ### 合并数据 ###
    df_combine = df_standard.join(df_label)

    ### 输出数据集 ###
    df_combine.to_pickle(df_new)

Example #2

Show file

File: CustomerClassification.py Project: wleopach/DS_Challenge

plt.ylabel('Score')
plt.title('Elbow Curve')
#plt.show()
#Corremos KMeans
kmeans = KMeans(n_clusters=7).fit(users)
#Graficamos los rsultados de KMeans
df_labeled = pd.DataFrame(kmeans.labels_, columns=list(['labels']))
df_labeled['labels'] = df_labeled['labels'].astype('category')
plt.figure(figsize=(10, 8))
df_labeled['labels'].value_counts().plot.bar(color='y')
plt.xlabel("Cluster")
plt.ylabel("Número de clientes")
plt.title("Número clientes por Cluster")
#plt.show()
#Agragamos los resultados las categorias del KMeans a users
users = users.join(df_labeled)
#Graicamos el dendograma
plt.figure(figsize=(20, 10))
merg = linkage(users.drop('labels', 1), method='ward')
dendrogram(merg, leaf_rotation=360)
plt.title('Dendrogram')
#plt.show()
#Definimos el clusterin jerarquico
hier_clus = AgglomerativeClustering(n_clusters=5,
                                    affinity='euclidean',
                                    linkage='ward')
cluster = hier_clus.fit_predict(users.drop('labels', 1))
#Agregamos las categorias del clustrein jerarqico
users['Agg_label'] = cluster
#Grafica del CH
df_labeled = pd.DataFrame(hier_clus.labels_, columns=list(['labels']))

Example #3

Show file

 def pca(self, x, components=None):
     if self.cols_to_pca == None:
     return x
     x_other = x.iloc[:,list(set(list(range(0, len(x.columns))))-set(self.cols_to_pca))]
     x = x.iloc[:,self.cols_to_pca]
     scaled_x = StandardScaler().fit_transform(x)
     if self.pcaler == None:
     if (components==None):
     components = scaled_x.shape[1]
     pcaler = PCA(n_components=components)
     self.pcaler = pcaler
     else:
     pcaler = self.pcaler
     pca_x = pcaler.fit_transform(scaled_x)
     scaled_x = pd.DataFrame(pca_x, columns = x.columns, index = x.index)
     x = scaled_x.join(x_other, how='outer')
     return x
     
 #Rolling scaling to ensure out of sample testing
 #TODO
# def roll_scale(self, x_test, x):
# if self.cols_to_scale == None:
# return x_test
# x_other = x.iloc[:,list(set(list(range(0, len(x.columns))))-set(self.cols_to_scale))]
# x = x.iloc[:,self.cols_to_scale]
# x_test = x_test.iloc[:,self.cols_to_scale]
# x_all = x.append(x_test)
# scaled_x = x_all.expanding(min_periods=len(x)).apply(lambda x: self.scale(pd.DataFrame(x).iloc[len(x)-1:,], pd.DataFrame(x)[:len(x)-1]))
# scaled_x = pd.DataFrame(scaled_x, columns = x_all.columns, index = x_all.index)
# x = scaled_x.join(x_other, how='outer')
# return x
 
 #Merge data_dict into one dataframe
 def merge_data(self, data_dict):
     data = data_dict.values()
     temp_data = reduce(lambda x,y: x.join(y, how='outer'), data)
     temp_data = temp_data.fillna(method='pad')
     self.data_df = temp_data.dropna(axis=0)
     #self.data_df.sort_index(inplace=True)
     self.data_df.columns = list(data_dict.keys())
     return self.data_df
 
 #Ensure data is in a pd.Dataframe before splitting in train/test
 #If threshold given, use that, else use the train/test cutoff points with optional scaling
 #Returns y_train, y_test, x_train, x_test as DataFrames
 def get_traintest(self, x, y, threshold = 0.8):
     #pdb.set_trace()
     common_ind = x.index.intersection(y.index)
     y = y.loc[common_ind]
     x = x.loc[common_ind]
     merged = y.to_frame().join(x)
     merged = merged.dropna(how='all')
     y = merged.iloc[:,0]
     x = merged.iloc[:,1:]
     #pdb.set_trace()
     cutoff = int(np.floor(len(merged)*threshold))
     end_cut = len(merged)
     y_train = y.iloc[0:cutoff]
     y_test = y.iloc[cutoff+1:end_cut]
     x_train = x.iloc[0:cutoff, :]
     x_test = x.iloc[cutoff+1:end_cut,:]
     return y_train.to_frame().values.ravel(), y_test.to_frame().values.ravel(), x_train, x_test