Python transform Examples

Programming Language: Python

Namespace/Package Name: sklearn.preprocessing.scale

Method/Function: transform

Examples at hotexamples.com: 3

Python transform - 3 examples found. These are the top rated real world Python examples of sklearn.preprocessing.scale.transform extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def data():
    PATH = "/ASTRAL186/train1/astral_train.csv"
    dataset = pd.read_csv(PATH)
    col = dataset.columns.values.tolist()
    col1 = col[1:]
    print(len(col1))  #2460
    X_train = np.array(dataset[col1])
    y_train = preprocessing.LabelEncoder().fit_transform(dataset['class'])
    print(len(y_train))  #5273
    scale = StandardScaler().fit(X_train)
    X_train = scale.transform(X_train)

    PATH_ = "/ASTRAL186/test1/astral_test.csv"
    dataset_ = pd.read_csv(PATH_)
    col_ = dataset_.columns.values.tolist()
    col1_ = col_[1:]
    print(len(col1_))  #2460
    X_test = np.array(dataset_[col1_])
    y_test = preprocessing.LabelEncoder().fit_transform(dataset_['class'])
    print(len(y_test))  #1319
    scale = StandardScaler().fit(X_test)
    X_test = scale.transform(X_test)

    #fs
    #clf=LogisticRegression(penalty='l1',C=0.1,solver='liblinear',random_state=0)########################edd+1.25mean(676)/tg+1.25mean(584)/astral+1.25mean(549)/astral_train+1.0*mean(867)
    clf = LinearSVC(
        penalty='l1', C=0.1, dual=False, random_state=0
    )  ########################dd+1.5mean(584)/le+mean(554)/astral_train1+1.25mean(794)
    clf.fit(X_train, y_train)
    importance = np.linalg.norm(clf.coef_, axis=0, ord=1)
    mean = np.mean(importance)
    model = SelectFromModel(clf, prefit=True,
                            threshold=1.25 * mean)  ##########################

    X_train1 = model.transform(X_train)
    print(X_train1.shape[1])  #867/794
    X_test1 = model.transform(X_test)
    print(X_test1.shape[1])  #867/794

    y_train = np_utils.to_categorical(y_train)
    y_test = np_utils.to_categorical(y_test)

    return X_train1, X_test1, y_train, y_test

Example #2

Show file

def read_xy(datapath):
    dataset = pd.read_csv(datapath)
    col = dataset.columns.values.tolist()
    col1 = col[1:]
    print(len(col1))
    X_train = np.array(dataset[col1])
    y_train = preprocessing.LabelEncoder().fit_transform(dataset['class'])
    print(len(y_train))
    scale = StandardScaler().fit(X_train)
    X_train = scale.transform(X_train)
    f_dim = X_train.shape[1]
    y_train = np_utils.to_categorical(y_train)
    return X_train, y_train, f_dim

Example #3

Show file

def read_xy(PATH):
    dataset = pd.read_csv(PATH)  #用pandas读取原始数据
    col = dataset.columns.values.tolist()  #取第一行
    col1 = col[1:]  #取特征
    print(len(col1))  #特征维数
    X_train = np.array(dataset[col1])  #取数据
    y_train = preprocessing.LabelEncoder().fit_transform(
        dataset['class'])  #标签标准化
    print(len(y_train))
    #标准化
    scale = StandardScaler().fit(
        X_train)  #特征矩阵标准化（与距离计算无关的概率模型、与距离计算无关的基于树的模型不需要）
    X_train = scale.transform(X_train)

    #带L1/L2/L1+L2惩罚项的逻辑回归作为基模型的特征选择——SelectFromModel
    #小的C会导致少的特征被选择。使用Lasso，alpha的值越大，越少的特征会被选择。
    ######################################针对clf.coef_：1*n_features#####################################
    '''
	#clf=Lasso(normalize=True,alpha=0.001,max_iter=5000,random_state=0)#Lasso回归
	#clf = LassoCV()
	#clf=Ridge(normalize=True,alpha=0.001,max_iter=5000,random_state=0)#岭回归
	#clf=ElasticNet(normalize=True,alpha=0.001,l1_ratio=0.1,max_iter=5000,random_state=0)#弹性网络正则
	clf=LinearRegression(normalize=True)
	clf.fit(X_train, y_train)
	#print(clf.coef_)
	importance=np.abs(clf.coef_)
	#print(importance)
	'''
    ######################################针对clf.coef_：n_classes*n_features#####################################

    #‘newton-cg’，‘sag’和‘lbfgs’等solvers仅支持‘L2’regularization，
    #‘liblinear’ solver同时支持‘L1’、‘L2’regularization，
    #若dual=Ture，则仅支持L2 penalty。
    clf = LogisticRegression(penalty='l1',
                             C=0.1,
                             solver='liblinear',
                             random_state=0)  #clf.coef_：n_classes*n_features
    #clf=LogisticRegression(penalty='l2',C=0.1,random_state=0)
    #clf=LR(threshold=0.5, C=0.1)#参数threshold为权值系数之差的阈值
    #clf=LinearSVC(penalty='l1',C=0.1,dual=False,random_state=0)
    #clf=LinearSVC(penalty='l2',C=0.1,random_state=0)
    clf.fit(X_train, y_train)
    #print(clf.coef_)
    #每个类别--每个属性--都有一个权重，将不同类别同一属性权重相加--即为该维度的--重要程度得分
    #方法一：
    importance = np.linalg.norm(clf.coef_, axis=0, ord=1)
    #方法二：
    #coef=np.abs(clf.coef_)
    #importance=np.sum(coef,axis=0)
    #print(importance)

    mean = np.mean(importance)
    #print(mean)
    #median=np.median(importance)
    #print(median)

    #model=SelectFromModel(clf,prefit=True)
    model = SelectFromModel(clf, prefit=True, threshold=2.0 * mean)
    '''
	model=SelectFromModel(estimator=clf).fit(X_train, y_train)
	importance=model.estimator_.coef_
	threshold=model.threshold_
	print(threshold)
	'''
    #threshold ： 阈值，string, float, optional default None
    #可以使用：median 或者 mean 或者 1.25 * mean 这种格式。
    #如果使用参数惩罚设置为L1，则使用的阈值为1e-5，否则默认使用用mean
    X_train = model.transform(X_train)
    f_dim = X_train.shape[1]
    print(f_dim)
    y_train = np_utils.to_categorical(y_train)
    return X_train, y_train, f_dim