コード例 #1
0
    def fit(self, X, y):
        X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = self.test_size, random_state = self.random_state)

        dim = X.shape[1]
        self.indices_ = tuple(range(dim))
        self.subsets_ = [self.indices_]
        score = self._cal_score(X_train, X_test, y_train, y_test,self.indices_)
        self.scores_ = [score]

        while dim > self.k_features:

            scores = []
            subsets = []

            for p in combinations(self.indices_, r = dim -1):
                score = self._cal_score(X_train, X_test, y_train, y_test,p)
                scores.append(score)
                subsets.append(p)

            best = np.argmax(scores)
            self.indices_ = subsets[best]
            self.subsets_.append(self.indices_)
            dim -= 1

            self.scores_.append(scores[best])
        self.k_score_ = self.scores_[-1]
        return self
コード例 #2
0
def model_auto_tpot(
    df,
    colX, coly,
    outfolder="aaserialize/",
    model_type="regressor/classifier",
    train_size=0.5,
    generation=1,
    population_size=5,
    verbosity=2,
):
    """ Automatic training of Xmat--->Y, Generate SKlearn code in outfile
      Very Slow Process, use lower number of Sample
  :param Xmat:
  :param y:
  :param outfolder:
  :param model_type:
  :param train_size:
  :param generation:
  :param population_size:
  :param verbosity:
  :return:
    """
    tpot = import_("tpot")

    X = df[colX].values
    y = df[coly].values

    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.5, test_size=0.5)

    if model_type == "regressor":
        clf = tpot.TPOTRegressor(
            generations=generation, population_size=population_size, verbosity=verbosity
        )
    elif model_type == "classifier":
        clf = tpot.TPOTClassifier(
            generations=generation, population_size=population_size, verbosity=verbosity
        )


    print("Start")
    clf.fit(X_train, y_train)
    
    score = tpot.score(X_test, y_test)
    print("score", score)

    file1 =  outfolder + "/tpot_regression_pipeline_" + str(np.random.randint(1000, 9999)) + ".py"
    tpot.export(file1)
    return file1
コード例 #3
0
X = name_letters

X = np.array(X).reshapre(-1, 1)
y = np.where(labeled_names[ind, 1] == 'male', 0, 1)

from sklearn import preprocessing

lb = preprocessing.LabelBinarizer()
lb.fit(X)
X2 = lb.transform(X)

from sklearn.preprocessing import train_test_split

X_train2, y_train2, X_test2, y_test2 = train_test_split(X2,
                                                        y,
                                                        test_size=0.4,
                                                        random_state=42)

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.4,
                                                    random_state=42)

from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix

clf = MultinomialNB.new(alpha=0.1, fit_prior=True)
clf.fit(X_train2, y_train2)
y_train_pred = clf.predict(x_train2)
y_test_pred = clf.predict(x_test2)
コード例 #4
0
def train_val_test(df, y):
    train, test1 = train_test_split(df, test_size=.70, stratify=y, random_state=42)

    val, test = train_test_split(test1, test_size=.50, stratify=y, random_stat=42)

    return train, val, test
コード例 #5
0
ファイル: model.py プロジェクト: elshazly1996/NTI-Assginment
 def train_test(self, test_size):
     self.x_train, self.x_test, self.y_train, self.y_test = train_test_split(
         self.x, self.y, test_size=test_size, random_state=0)
X = dataset.ilocs[:, :-1].values
y = dataset.ilocs[:, :4].values

#Encoding Categorical Data
from sklearn import LabelEncoder, OneHotEncoder
labelencoder = LabelEncoder()
X[:, 3] = labelencoder.fit_transform(X[:, 3])
onehotencoder = OneHotEncoder(categorical_features=[3])
X = onehotencoder.fit_transform(X).toarray()
#exclure l'index 0
X = X[:, 1:]

#Split Data Train and DataSet
from sklearn.preprocessing import train_test_split
X_train, y_train, X_test, y_test = train_test_split(X,
                                                    y,
                                                    test_size=1 / 3,
                                                    random_state=0)

from sklearn.Preprocessing import train_test_split
X_train, y_train, X_test, y_test = train_test_split(X,
                                                    y,
                                                    test_seize=1 / 3,
                                                    random_state=0)

# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Importing the dataset
dataset = pd.read_csv('50_Startups.csv')
コード例 #7
0
r_matr = dataset.iloc[:, -1].values

# Fill in missing data with mean method
from sklearn.preprocessing import Imputer
imputer = Imputer(missing_values="NaN", strategy="mean", axis=0)
imputer = imputer.fit(f_matr[:, [1,2]])
f_matr[:, [1,2]] = imputer.transform(f_matr[:, [1,2]])

# Categorial Data splitting
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

country_encoder = LabelEncoder()
f_matr[:, 0] = country_encoder.fit_transform(f_matr[:, 0])

onehotencoder = OneHotEncoder(categorical_features=[0])
f_matr = onehotencoder.fit_transform(features_matrix).toarray()

response_encoder = LabelEncoder()
r_matr = response_encoder.fit_transform(r_matr)

# Separating training and testing set
from sklearn.preprocessing import train_test_split
f_matr_train, f_matr_test,\
r_matr_train, r_matr_test = train_test_split(f_matr, r_matr, test_size=0.2, random_state=0)

# Feature Scaling -- age and salary columns should be same range
from sklearn.preprocessing import StandardScaler
standard_scaler = StandardScaler()
f_matr_train = standard_scaler.fit_transform(f_matr_train)
f_matr_test = standard_scaler.fit_transform(f_matr_test)