def test_polynomialfeatures_vs_sklearn():
    # Compare msmbuilder.preprocessing.PolynomialFeatures
    # with sklearn.preprocessing.PolynomialFeatures

    polynomialfeaturesr = PolynomialFeaturesR()
    polynomialfeaturesr.fit(np.concatenate(trajs))

    polynomialfeatures = PolynomialFeatures()
    polynomialfeatures.fit(trajs)

    y_ref1 = polynomialfeaturesr.transform(trajs[0])
    y1 = polynomialfeatures.transform(trajs)[0]

    np.testing.assert_array_almost_equal(y_ref1, y1)
Exemple #2
0
def get_polynomial_features(df, interaction_sign=' x ', **kwargs):
    """
    Gets polynomial features for the given data frame using the given sklearn.PolynomialFeatures arguments
    :param df: DataFrame to create new features from
    :param kwargs: Arguments for PolynomialFeatures
    :return: DataFrame with labeled polynomial feature values
    """
    pf = PolynomialFeatures(**kwargs)
    feats = _get_polynomial_features(df.columns.tolist(), pf.fit(df), interaction_sign=interaction_sign)
    return pd.DataFrame(pf.transform(df), columns=feats)
Exemple #3
0
    def _polynomial_features(self, input_df):
        """Uses Scikit-learn's PolynomialFeatures to construct new degree-2 polynomial features from the existing feature set

        Parameters
        ----------
        input_df: pandas.DataFrame {n_samples, n_features+['class', 'group', 'guess']}
            Input DataFrame to scale

        Returns
        -------
        modified_df: pandas.DataFrame {n_samples, n_constructed_features + ['guess', 'group', 'class']}
            Returns a DataFrame containing the constructed features

        """

        training_features = input_df.loc[input_df['group'] == 'training'].drop(['class', 'group', 'guess'], axis=1)

        if len(training_features.columns.values) == 0:
            return input_df.copy()
        elif len(training_features.columns.values) > 700:
            # Too many features to produce - skip this operator
            return input_df.copy()

        # The feature constructor must be fit on only the training data
        poly = PolynomialFeatures(degree=2, include_bias=False)
        poly.fit(training_features.values.astype(np.float64))
        constructed_features = poly.transform(input_df.drop(['class', 'group', 'guess'], axis=1).values.astype(np.float64))

        modified_df = pd.DataFrame(data=constructed_features)
        modified_df['class'] = input_df['class'].values
        modified_df['group'] = input_df['group'].values
        modified_df['guess'] = input_df['guess'].values

        new_col_names = {}
        for column in modified_df.columns.values:
            if type(column) != str:
                new_col_names[column] = str(column).zfill(10)
        modified_df.rename(columns=new_col_names, inplace=True)

        return modified_df.copy()
Exemple #4
0
y = np.array(y)
y = y.reshape(-1, 1)

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=5)

poly = PolynomialFeatures(degree=2)
print("Printing first row of X before Polynomial Features have been applied:",
      X_train[0])
X_poly = poly.fit_transform(X_train)
print(
    "Printing first row of X after Polynomial Features (w/ deg =2) have been applied:",
    X_poly[0])
poly.fit(X_poly, y_train)

X_test_poly = poly.fit_transform(X_test)
poly.fit(X_test_poly, y_test)

clf = Perceptron()
clf.fit(X_poly, y_train)

y_test_pred = clf.predict(X_test_poly)
finalscore = clf.score(X_test_poly, y_test)
coef = clf.coef_
intercept = clf.intercept_

print("Final Parameters values:", coef)
print("Intercept:", intercept)
print("Final Score:", finalscore)
Exemple #5
0
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

# Importing the dataset
datas = pd.read_csv('data.csv')
datas

X = datas.iloc[:, 0:1].values
y = datas.iloc[:, 1].values

poly = PolynomialFeatures(degree=1)
X_poly = poly.fit_transform(X)

poly.fit(X_poly, y)
lin2 = LinearRegression()
lin2.fit(X_poly, y)

lin = LinearRegression()
lin.fit(X, y)

plt.scatter(X, y, color='blue')
plt.plot(X, lin.predict(X), color='red')
plt.title('Linear Regression')
plt.xlabel('SF')
plt.ylabel('numPersons')

plt.show()

plt.scatter(X, y, color='blue')
Exemple #6
0
# Simple Linear Regression
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X, y)
y_pred_sim = regressor.predict(X)

# Polynomial Regression
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X, y)

from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree=4)
X_poly = poly_reg.fit_transform(X)
poly_reg.fit(X_poly, y)
lin_reg_2 = LinearRegression()
lin_reg_2.fit(X_poly, y)
y_pred_poly = lin_reg_2.predict(poly_reg.fit_transform(X))

#RandomForest Regression
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators=280, random_state=0)
regressor.fit(X, y)
y_pred_ran = regressor.predict(X)

#DecisionTree Regression
from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor(random_state=0)
regressor.fit(X, y)
y_pred_dec = regressor.predict(X)
Exemple #7
0
file = ["1", "2", "3", "4", "5", "6"]
for i in file:
    data = pd.read_csv("dim_reduced_to_" + i + ".csv")
    print("For dim_reduced_to_" + i + ".csv ")
    data.drop("CreationTime", inplace=True, axis=1)
    col = data["InBandwidth"]
    data.drop("InBandwidth", inplace=True, axis=1)
    X_train, X_test, Y_train, Y_test = train_test_split(data,
                                                        col,
                                                        test_size=0.33,
                                                        random_state=42)
    degrees = [2, 3, 4]
    for i in degrees:
        regressor = PolynomialFeatures(degree=i)
        x_poly = regressor.fit_transform(X_train)
        regressor.fit(x_poly, Y_train)
        lin_reg = LinearRegression()
        lin_reg.fit(x_poly, Y_train)
        Y_pred_test = lin_reg.predict(regressor.fit_transform(X_test))
        plt.scatter(Y_test, Y_pred_test)
        plt.xlabel("Y_test")
        plt.ylabel("Y_pred_test")
        plt.show()
        print("R2 score when degree =", i, "is: ",
              r2_score(Y_test.values, Y_pred_test))
        print("rmse score when degree=", i, "is:",
              rms(Y_test.values, Y_pred_test))
        Y_pred_train = lin_reg.predict(regressor.fit_transform(X_train))
        plt.scatter(Y_test, Y_pred_test)
        plt.xlabel("Y_train")
        plt.ylabel("Y_pred_train")
Exemple #8
0
# Let's proceed with seeing how we can invoke some 
# machine learning functionalities in scikit-learn
import numpy as np
from sklearn.preprocessing import PolynomialFeatures
# Data Preprocessing routines
x = np.asmatrix([[1,2],[2,4]])
# instantiate poynomial feature
poly = PolynomialFeatures(degree = 2)
# buid model
poly.fit(x)
x_poly = poly.transform(x)

print ("Original x variable shape",x.shape)
print (x)
print ('\n##############################\n')
print ("Transformed x variables",x_poly.shape)
print (x_poly)

#alternatively 
x_poly = poly.fit_transform(x)
print ('##################alternatively')
print(x_poly)



from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
#Load data: Let's use the iris dataset to see how the tree algorithm can be used

data = load_iris()
# We will load the iris dataset in the x and y variables.
np.mean((prediction - test_y)**2)

pd.DataFrame({
    'actual': test_y,
    'prediction': prediction,
    'diff': (test_y - prediction)
})
#end of linear Regression model

#start of Polynomial Regression

from sklearn.preprocessing import PolynomialFeatures

poly = PolynomialFeatures(degree=4)
x_poly = poly.fit_transform(train_x)
poly.fit(x_poly, train_y)
lin2 = LinearRegression()
lin2.fit(x_poly, train_y)

prediction2 = lin2.predict(poly.fit_transform(test_x))
#calculating error
np.mean((prediction2 - test_y)**2)

pd.DataFrame({
    'actual': test_y,
    'prediction': prediction2,
    'diff': (test_y - prediction2)
})
#end of Polynomial Regression

#start of DecisionTreeRegressor
ds = pd.read_csv("Position_Salaries.csv")

x = ds.iloc[:,1:2].values
y = ds.iloc[:,2].values

from sklearn.linear_model import LinearRegression as LR# comparison purposes
linreg1 = LR()
linreg2 = LR()
linreg1.fit(x,y)
y_pred1 = linreg1.predict(x)
y_pred1 = np.array(y_pred1,dtype = 'int64')

from sklearn.preprocessing import PolynomialFeatures as PF# polynomial regression obj
polyreg = PF(degree = 2)
x_poly = polyreg.fit_transform(x)# adding new features like x0
x_poly = np.array(x_poly,dtype = 'int64')
polyreg.fit(x_poly,y)
linreg2.fit(x_poly,y)
y_pred2 = linreg2.predict(polyreg.fit_transform(x))

#plotting the results
plt.scatter(x,y,c = 'r')
plt.plot(x,y_pred1,c = 'b')# plotting the linear model which is bad
plt.plot(x,y_pred2,c = 'g')
plt.xlabel("Position")
plt.ylabel("Salary")


plt.show()
Exemple #11
0
def poly2_regr(x, y):
    from sklearn.preprocessing import PolynomialFeatures
    poly = PolynomialFeatures(degree=2)
    poly.fit(x, y)
    return poly
regression_lineaire.fit(X, y)


#utilisation du modele polynomial qui est dans le package preprocessing plutot
"""
se modele permet de prendre le degre de la fonction et de l'approximer jusqua se qu'elle a l'allure 
de nos données
"""
from sklearn.preprocessing import PolynomialFeatures

regression_polynomial = PolynomialFeatures(degree=4)

X_optimal_data = regression_polynomial.fit_transform(X)

#entrainement sur des valeurs de X_optimaux
regression_polynomial.fit(X_optimal_data, y)

regression_lineaire_X_optimal = LinearRegression()
regression_lineaire_X_optimal.fit(X_optimal_data, y)

#visualisation regresion lineaire 
plt.scatter(X, y, color='red')
plt.plot(X, regression_lineaire.predict(X), color='blue')
plt.title('affichage avec la regression lineaire')
plt.xlabel('ranking de la personnalité')
plt.ylabel('salaire moyen annuelle')

plt.show()

#visualisation regression lineaire optimal
"""plt.scatter(X_optimal_data, y[:,np.newaxis], color='red')
Exemple #13
0
poly_target = poly_features['TARGET']

poly_features = poly_features.drop(columns=['TARGET'])

# Need to impute missing values
poly_features = imputer.fit_transform(poly_features)
poly_features_test = imputer.transform(poly_features_test)

from sklearn.preprocessing import PolynomialFeatures

# Create the polynomial object with specified degree
poly_transformer = PolynomialFeatures(degree=3)

# Train the polynomial features
poly_transformer.fit(poly_features)

# Transform the features
poly_features = poly_transformer.transform(poly_features)
poly_features_test = poly_transformer.transform(poly_features_test)
print('Polynomial Features shape: ', poly_features.shape)

poly_transformer.get_feature_names(input_features=[
    'EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3', 'DAYS_BIRTH'
])[:15]

# Create a dataframe of the features
poly_features = pd.DataFrame(poly_features,
                             columns=poly_transformer.get_feature_names([
                                 'EXT_SOURCE_1', 'EXT_SOURCE_2',
                                 'EXT_SOURCE_3', 'DAYS_BIRTH'
Python 3.8.1 (tags/v3.8.1:1b293b6, Dec 18 2019, 23:11:46) [MSC v.1916 64 bit (AMD64)] on win32
Type "help", "copyright", "credits" or "license()" for more information.
>>> import pandas as pd
>>> import numpy as np
>>> import matplotlib.pyplot as plt
>>> from sklearn.linear_model import LinearRegression
>>> from sklearn.preprocessing import PolynomialFeatures
>>> df = pd.read_csv("C:\\Users\shashikant\Desktop\polynomial_regression\polynomial.csv")
>>> x = df[['level']].values
>>> y = df[['salary']].values
>>> model = LinearRegression()
>>> model.fit(x,y)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
>>> poly = PolynomialFeatures()
>>> p_x = poly.fit_transform(x)
>>> poly.fit(p_x,y)
PolynomialFeatures(degree=2, include_bias=True, interaction_only=False,
                   order='C')
>>> model1 = LinearRegression()
>>> model1.fit(p_x,y)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
>>> plt.title('Linear Model')
Text(0.5, 1.0, 'Linear Model')
>>> plt.xlabel('Position Level')
Text(0.5, 0, 'Position Level')
>>> plt.ylabel('salary')
Text(0, 0.5, 'salary')
>>> plt.scatter(x,y,color = 'r')
<matplotlib.collections.PathCollection object at 0x0000000018C33F70>
>>> plt.plot(x,model.predict(x),color = 'b')
[<matplotlib.lines.Line2D object at 0x0000000018C4E4C0>]
                'Part_2:_Regression/Section 6 - Polynomial'
                ' Regression/Polynomial_Regression/'
                'Position_Salaries.csv')

X = D.iloc[:, 1:2].values  # independent variable
y = D.iloc[:, c(3)].values  # dependent variable

from sklearn.linear_model import LinearRegression
modelLinear = LinearRegression().fit(X=X, y=y)
# y_artificial = modelLinear.predict(X)

from sklearn.preprocessing import PolynomialFeatures

polyRegression = PolynomialFeatures(degree=4)
X_poly = polyRegression.fit_transform(X)
polyRegression.fit(X_poly, y)

modelPoly = LinearRegression().fit(X_poly, y)

# Visualization Linear Model
plt.scatter(X, y, color='red')
plt.plot(X, modelLinear.predict(X), color='blue')
plt.title('Truth or Bluff')
plt.show()

# Visualization Polynomial Model
X_grid = np.arange(min(X), max(X), 0.1)
X_grid = X_grid.reshape(len(X_grid), 1)

plt.scatter(X, y, color='red')
plt.plot(X_grid,
Exemple #16
0
    async def deprocessing(self, event):
        x_train = event['value'][0]
        y_train = event['value'][1]
        Type = event['value'][2]

        if Type == "logistic":
            if len(x_train) == 0:
                return

            if len(set(y_train)) <= 1:
                returne

            clf = LogisticRegression()
            clf.fit(np.array(x_train), np.array(y_train))

            w = clf.coef_
            b = clf.intercept_

            x = np.array([0, 1])
            y = -(x * w[0][0] + b) / w[0][1]

            await self.send(
                text_data=json.dumps({
                    'y1': y[0],
                    'y2': y[1],
                    'intercept': clf.intercept_.tolist(),
                    'slope': clf.coef_.tolist()
                }))
        elif Type == "linear-reg":

            if len(x_train) == 0:
                return

            clf = LinearRegression()
            clf.fit(
                np.array(x_train).reshape(-1, 1),
                np.array(y_train).reshape(-1, 1))

            x_test = [0, 1]

            y_pred = clf.predict(np.array(x_test).reshape(-1, 1))
            await self.send(text_data=json.dumps({'y_pred': y_pred.tolist()}))

        elif Type == "poly-reg":

            if len(x_train) == 0:
                return

            poly_reg = PolynomialFeatures(degree=4)
            X_poly = poly_reg.fit_transform(np.array(x_train).reshape(-1, 1))

            # print(X_poly)

            poly_reg.fit(X_poly, np.array(y_train))

            clf = LinearRegression()
            clf.fit(X_poly, np.array(y_train).reshape(-1, 1))

            x_test = np.arange(0.0, 1.0, 0.02)

            y_pred = clf.predict(poly_reg.fit_transform(x_test.reshape(-1, 1)))

            await self.send(text_data=json.dumps({
                'y_pred': y_pred.tolist(),
                'x_test': x_test.tolist()
            }))
Y = data.iloc[:, -1]

from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 42)


from sklearn.linear_model import LinearRegression
linear = LinearRegression()
linear = linear.fit(X_train, Y_train)

"""from sklearn import metrics
print('Mean Absolute Error:', metrics.mean_absolute_error(Y_test, y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(Y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(Y_test, y_pred)))"""

from sklearn.preprocessing import PolynomialFeatures

polyR = PolynomialFeatures(degree = 4)
x_poly = polyR.fit_transform(X)

polyR.fit(x_poly, Y)
polymodel = linear.predict(X_test)

pickle.dump(linear, open('model.pk1', 'wb'))

model = pickle.load(open('model.pk1','rb'))



x, y = make_circles()
plt.close('all')
plt.figure(1)
plt.scatter(x[:,0], x[:,1], c=y)

x, y = make_moons()
plt.figure(2)
plt.scatter(x[:,0], x[:,1], c=y)

# plt.show()

from sklearn.preprocessing import PolynomialFeatures
# Data Preprocessing routines
x = np.asmatrix([[1,2],[2,4]])
poly = PolynomialFeatures(degree = 2)
poly.fit(x)
x_poly = poly.transform(x)
print "Original x variable shape", x.shape
print x
print
print "Transformed x variables", x_poly.shape
print x_poly

# alternatively
x_poly = poly.fit_transform(x)

from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris

data = load_iris()
x = data['data']
# Fitting Simple Linear Regression to the Training set

from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(Years_train, x_train)

# Predicting the Test set results

Y_pred = regressor.predict(Years_test)

# Fitting Polynomial Regression to the dataset

from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree=5)
Years_poly = poly_reg.fit_transform(Years)
poly_reg.fit(Years_poly, x)
lin_reg_2 = LinearRegression()
lin_reg_2.fit(Years_poly, x)

# Visualising the Training set results

plt.scatter(Years_train, x_train, color='yellow')
plt.plot(Years_train, regressor.predict(Years_train), color='red')
plt.title('Annual Temp Years (Training set)')
plt.xlabel('Years ')
plt.ylabel('MEAN TEMP')
plt.show()

# Visualising the Test set results

plt.scatter(Years_test, x_test, color='brown')
Exemple #20
0
class PolynomialTransformation(Transformer):
    def __init__(self,
                 degree=2,
                 interaction_only='True',
                 include_bias='False',
                 random_state=1):
        super().__init__("polynomial_regression", 34)
        self.input_type = [DISCRETE, NUMERICAL]
        self.compound_mode = 'concatenate'
        self.best_idxs = list()
        if degree == 2:
            self.bestn = 25
        elif degree == 3:
            self.bestn = 10
        elif degree == 4:
            self.bestn = 6

        self.output_type = NUMERICAL
        self.degree = degree
        self.interaction_only = check_for_bool(interaction_only)
        self.include_bias = check_for_bool(include_bias)
        self.random_state = random_state

    @ease_trans
    def operate(self, input_datanode, target_fields):
        from sklearn.preprocessing import PolynomialFeatures
        from lightgbm import LGBMRegressor
        X, y = input_datanode.data

        if not self.best_idxs:
            lgb = LGBMRegressor(random_state=1)
            lgb.fit(X, y)
            _importance = lgb.feature_importances_
            idx_importance = np.argsort(-_importance)
            cur_idx = 0
            while len(self.best_idxs) < self.bestn and cur_idx < len(
                    _importance):
                if idx_importance[cur_idx] in target_fields:
                    self.best_idxs.append(idx_importance[cur_idx])
                cur_idx += 1

        X_new = X[:, self.best_idxs]
        if not self.model:
            self.degree = int(self.degree)

            self.model = PolynomialFeatures(
                degree=self.degree,
                interaction_only=self.interaction_only,
                include_bias=self.include_bias)
            self.model.fit(X_new)

        _X = self.model.transform(X_new)
        _X = _X[:, X_new.shape[1]:]
        return _X

    @staticmethod
    def get_hyperparameter_search_space(dataset_properties=None):
        degree = UniformIntegerHyperparameter("degree",
                                              lower=2,
                                              upper=4,
                                              default_value=2)
        interaction_only = CategoricalHyperparameter("interaction_only",
                                                     ["False", "True"],
                                                     default_value="False")
        include_bias = UnParametrizedHyperparameter("include_bias", "False")

        cs = ConfigurationSpace()
        cs.add_hyperparameters([degree, interaction_only, include_bias])

        return cs
Exemple #21
0
class HigherOrderSimulator(BaseSimulator):
    def __init__(self,
                 n,
                 p,
                 noise_var=0.1,
                 x_var=1.,
                 degree=3,
                 with_input_blocks=False,
                 drop_a=0.2,
                 drop_i=0.8,
                 discretize_beta=False,
                 discretize_x=False,
                 *args,
                 **kwargs):
        """
        A vanilla simulator that simulates an arbitrary high-order Polynomial,
        for benchmarking interaction effects
        Args:
            n:
            p:
            noise_var:
            degree:
            with_input_blocks:
            drop_a:
            drop_i:
            discretize_beta:
            discretize_x:
            max_x:
        """
        self.n = n
        self.p = p
        self.with_input_blocks = with_input_blocks
        self.noise_var = noise_var
        self.x_var = x_var
        self.degree = degree
        self.polynomial_fitter = PolynomialFeatures(degree=degree,
                                                    interaction_only=False,
                                                    include_bias=False)
        self.polynomial_fitter.fit(np.zeros((self.n, self.p)))
        self.beta_a = np.zeros(p)
        self.beta_i = np.zeros(self.polynomial_fitter.n_output_features_ - p)
        self.powers_i_ = self.polynomial_fitter.powers_[p:]
        self.drop_a = drop_a
        self.drop_i = drop_i
        if discretize_beta:
            self.beta_rng = lambda p: np.random.choice(range(-1, 2), p)
        else:
            self.beta_rng = lambda p: np.random.uniform(-1, 1, p)
        if discretize_x:
            self.x_rng = lambda n: np.random.poisson(x_var, n)
        else:
            self.x_rng = lambda n: np.random.normal(0, np.sqrt(x_var), n)
        self.is_beta_built = False

    def sample_effect(self):
        # additive
        a_idx = np.random.choice(self.p,
                                 int(np.ceil(self.p * (1 - self.drop_a))),
                                 replace=False)
        self.beta_a[a_idx] = self.beta_rng(len(a_idx))
        # interaction
        i_idx = np.random.choice(
            len(self.beta_i),
            int(np.ceil(len(self.beta_i) * (1 - self.drop_i))),
            replace=False)
        self.beta_i[i_idx] = self.beta_rng(len(i_idx))
        self.is_beta_built = True

    def set_effect(self, beta_a, beta_i):
        self.beta_a = beta_a
        self.beta_i = beta_i
        self.is_beta_built = True

    def sample_data(self, N=None, *args, **kwargs):
        N = self.n if N is None else N
        X = self.x_rng(N * self.p).reshape(N, self.p)
        X_s = self.polynomial_fitter.transform(X)
        if not self.is_beta_built:
            self.sample_effect()
        beta = np.concatenate([self.beta_a, self.beta_i])
        y = X_s.dot(beta) + np.random.normal(0, np.sqrt(self.noise_var), N)
        if self.with_input_blocks:
            X = [
                X[:, i] if len(X.shape) > 2 else X[:,
                                                   i].reshape(X.shape[0], 1)
                for i in range(X.shape[1])
            ]
        return X, y

    def get_ground_truth(self, X):
        if self.with_input_blocks:
            X_ = np.concatenate(X, axis=1)
        else:
            X_ = X
        X_s = self.polynomial_fitter.transform(X_)
        beta = np.concatenate([self.beta_a, self.beta_i])
        return X_s.dot(beta)

    def get_nonzero_powers(self):
        if not self.is_beta_built:
            self.sample_effect()
            self.is_beta_built = True
        return self.powers_i_[np.where(self.beta_i != 0)]
class ValueFunction():
    """
    The member functions of this class compute action-value function, epsGreedyPolicy,
    or perform a semi-gradient training step.
    """
    def __init__(self, in_len, out_len, degree=1):
        """
        Takes number of features in the state vector, number of actions, and polynomial degree. 
        """
        self.in_len = in_len
        self.out_len = out_len
        self.featureTransfromer = PolynomialFeatures(degree=degree,
                                                     interaction_only=False,
                                                     include_bias=False)
        self.featureTransfromer.fit(np.zeros(in_len).reshape(1, -1))
        self.weights = np.zeros(
            (len(self.featureTransfromer.get_feature_names()), out_len))
        self.old_weights = np.zeros(
            self.weights.shape
        )  # old_weights are used for dutch eligibility trace
        self.eligibility_trace = np.zeros(self.weights.shape)

    def _checkDims(self, state):
        if state.shape[0] != self.in_len:
            raise TypeError('Length of state must be equal to', self.in_len)

    def _transformState(self, state):
        self._checkDims(state)
        return self.featureTransfromer.transform(state.reshape(1, -1))[0]

    def computeVF(self, state):
        """
        Takes a state vector and returns an array contains value for each possible action.
        """
        transformed_state = self._transformState(state)
        return np.matmul(transformed_state, self.weights)

    def epsGreedyPolicy(self, state, eps):
        """
        Takes a state vector and epsilon; returns an epsilon greedy action.
        """
        probs = np.zeros(self.out_len)
        probs[np.argmax(self.computeVF(state))] = 1 - eps
        probs = probs + (eps / len(probs))
        return np.argmax(np.random.multinomial(1, probs, 1))

    def softmaxPolicy(self, state, temperature=1.0):
        """
        Choose a soft-max action with respect to the actionVF.
        It is possible to set a temperature parameter, default is 1.0.
        """
        expVF = np.exp(self.computeVF(state) / temperature)
        probs = expVF / np.sum(expVF)
        return np.argmax(np.random.multinomial(1, probs, 1))

    def trainSemiGrad(self, state, action, td_error, learning_rate):
        """
        Performs a semi-gragient training step:
        state:         a state vector in which to train
        action:        an action choosen from the state vector
        td_error:      the TD error at the current state, usually denoted delta
        learning_rate: learning rate for training, usually denoted alpha
        """
        # To derive gradient, realize that the value function is a matrix multiplication
        # of state (1,4)-matrix and weight (4,2)-matrix and gives (1,2)-matrix (two actions).
        # Gradient of this matrix multiplication w.r.t. weight vector gives two matrices
        # of shape (4,2), one for each action. For the action 0, the first column of its gradient
        # matrix is basically the state vector the other column is full of zeros; for the other
        # action the columns are interchanged.
        grad = np.zeros(self.weights.shape)
        grad[:, action] = self._transformState(state)
        self.weights = self.weights + learning_rate * td_error * grad

    def trainEligibTraceSemiGrad(self, state, action, td_error, discount,
                                 decay_factor, learning_rate):
        """
        Perform a semi-gradient training step with eligibility trace:
        state:         a state vector in which to train
        action:        an action choosen from the state vector
        td_error:      the TD error at the current state, usually denoted delta
        discount:      discount of the future rewards, usually denoted gamma
        decay_factor:  decay of trace elements, usually denoted lambda
        learning_rate: learning rate for training, usually denoted alpha
        """
        grad = np.zeros(self.weights.shape)
        grad[:, action] = self._transformState(state)
        self.eligibility_trace = discount * decay_factor * self.eligibility_trace + grad
        self.weights = self.weights + learning_rate * td_error * self.eligibility_trace

    def trainDutchTraceSemiGrad(self, state, action, td_error, discount,
                                decay_factor, learning_rate):
        grad = np.zeros(self.weights.shape)
        grad[:, action] = self._transformState(state)
        lr = learning_rate
        ddf = discount * decay_factor
        lrddf = lr * ddf

        self.eligibility_trace = ddf*self.eligibility_trace + grad \
                                -lrddf*np.matmul(grad, np.matmul(self.eligibility_trace.T, grad))
        temp = self.weights
        self.weights = self.weights + lr*td_error*self.eligibility_trace \
                      +lr*np.matmul( self.eligibility_trace - grad, \
                                     np.matmul(self.weights.T - self.old_weights.T, grad) )
        self.old_weights = temp

    def reset(self):
        """
        Reset the weight vector and eligibility trace to zeros.
        """
        self.weights = np.zeros(self.weights.shape)
        self.old_weights = np.zeros(self.old_weights.shape)
        self.eligibility_trace = np.zeros(self.eligibility_trace.shape)

    def resetTraces(self):
        """
        Reset eligibility trace back to zeros.
        """
        self.old_weights = np.zeros(self.old_weights.shape)
        self.eligibility_trace = np.zeros(self.eligibility_trace.shape)
Exemple #23
0
)
# --- Modelo lineal ---

print("\n--- Modelo lineal ---\n")
resp = input(
    '¿Quiere ejecutar la búsqueda de hiperparámetros y generación de gráficos? Tiempo aproximado 30 min - 1 hora. (S/N)'
)

if resp == 'S':
    print('\n\nEstudio de la variabilidad polinómica de los datos')
    X_copy = X.copy()
    for i in range(1, 3):
        print('Estudio con dimensión: ', i)
        poly = PolynomialFeatures(i)
        np.random.seed(0)
        poly.fit(X)
        poly.transform(X)
        poly.transform(X_tst)
        clf = LR(random_state=0)
        clf.fit(X, y)
        resultados(clf, X, y, X_tst, y_tst)
        X = X_copy.copy()
    input("\n--- Pulsar intro para continuar ---\n")

    print('\n\nEstudio de la Fuerza de Regularización Lineal (tarda un poco).')

    acu = []
    fsc = []
    x_axis = [i for i in range(-5, 10)]
    for i in x_axis:
        clf = LR(penalty='l2', random_state=0, solver='liblinear', C=10**i)
Exemple #24
0
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

# Generate the input data uisng random numbers
size = 20
x = np.random.randint(1, 100, size=size)
error = np.random.rand(size)
#error = np.zeros(size)
y = x * x + error
#print(error)
#print(x)
#print(y)

X = x.reshape((-1, 1))
#print(X)

transformer = PolynomialFeatures(degree=2, include_bias=False)
transformer.fit(X)
X = transformer.transform(X)
# X = PolynomialFeatures(degree=2, include_bias=False).fit_transform(X)
#print(X)


model = LinearRegression().fit(X, y)
r_sq = model.score(X, y)
print('coefficient of determination:', r_sq)
print('intercept:', model.intercept_)
print('coefficients:', model.coef_)

Exemple #25
0
#Get all of the y values except the last n rows
y = y[:-forecast_out]
print(y)

from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

#Create and train the Polynomial REgression Model
poly_reg = PolynomialFeatures(degree = 3)
X_poly = poly_reg.fit_transform(x_train)
poly_reg.fit(X_poly, y_train)
lin_reg_2 = LinearRegression()
lin_reg_2.fit(X_poly, y_train)
#plt.scatter(X, y, color = 'red')
#plt.plot(X, lin_reg_2.predict(poly_reg.fit_transform(X)), color = 'blue')
#plt.title('Truth or Bluff (Polynomial Regression)')
#plt.xlabel('Position level')
#plt.ylabel('Salary')
#plt.show()

#Testing Model
pr_confidence = lin_reg_2.score(poly_reg.fit_transform(x_test), y_test)
print(pr_confidence)

predictionss = lin_reg_2.predict(poly_reg.fit_transform(x_test))
lin_reg2.fit(X2, y)
y_predict2 = lin_reg2.predict(X2)
print("绘制多元回归结果图")
plt.scatter(x, y)
plt.plot(np.sort(x), y_predict2[np.argsort(x)], color='r')
plt.show()

print("\n新特征处理下,多元线性回归:", )
print("系数:{}".format(lin_reg2.coef_))
print("截距:{}\n".format(lin_reg2.intercept_))

# sklearn中多元线性回归
# 特征准备
# 这个degree表示我们使用多少次幂的多项式
poly = PolynomialFeatures(degree=2)
poly.fit(X)
X2 = poly.transform(X)
X2.shape  # 输出:(100, 3)
reg = LinearRegression()
reg.fit(X2, y)
y_predict = reg.predict(X2)
print("绘制sklearn多元回归结果图")
plt.scatter(x, y)
plt.plot(np.sort(x), y_predict2[np.argsort(x)], color='r')
plt.show()
print("\nsklearn中的多元线性回归:", )
print("系数:{}".format(reg.coef_))
print("截距:{}\n".format(reg.intercept_))

# 多元多项式回归
print("三元多项式回归")
warnings.filterwarnings(action="ignore",
                        module="scipy",
                        message="^internal gelsd")

data = pd.read_csv('Position_Salaries.csv')

# does this so that x is a matrix; upper bound is non-inclusive so this matrix
# will only contain column 1
x = data.iloc[:, 1:2].values
y = data.iloc[:, -1].values

# No need to split set into training and test set because the dataset is very
# small
poly_reg = PolynomialFeatures(degree=4)
x_poly = poly_reg.fit_transform(x)
poly_reg.fit(x_poly, y)
x_grid = np.arange(min(x), max(x), 0.1)
x_grid = x_grid.reshape((len(x_grid), 1))
lin_reg = LinearRegression()
lin_reg.fit(x_poly, y)
y_pred = lin_reg.predict(poly_reg.fit_transform(x_grid))

plt.scatter(x, y, color='red')
plt.plot(x_grid, y_pred, color='blue')
plt.title('Salary vs Position')
plt.xlabel('Position')
plt.ylabel('Salary')

salary = lin_reg.predict(poly_reg.fit_transform(6.5))[0]
print('Projected salary for position 6.5: %0.2f' % (salary))
Exemple #28
0
extrapolation_days = 4  # how many extrapolate in future
previous_days = 7  # how many days regression in past

X = [[x] for x in day_of_march[-previous_days:]]
first_future_day = current_day + 1
trendline_dates = [[x] for x in range(first_future_day, first_future_day +
                                      extrapolation_days)]
X_ = X + trendline_dates
y = log2_hosp[-previous_days:]
print(f"Days of March:               {trendline_dates}")

# linear (degree 1)
poly_1 = PolynomialFeatures(degree=1)
X_poly_1 = poly_1.fit_transform(X)

poly_1.fit(X_poly_1, y)
lin2_1 = LinearRegression()
lin2_1.fit(X_poly_1, y)

trend_1 = [pow(2, x) for x in lin2_1.predict(poly_1.fit_transform(X_))]

log2_hosp_trend_1 = lin2_1.predict(poly_1.fit_transform(trendline_dates))
print(
    f"Trendline LINEAR numbers:    {[int(pow(2, x)) for x in log2_hosp_trend_1]}"
)

# quadratic (degree 2)
poly_2 = PolynomialFeatures(degree=2)
X_poly_2 = poly_2.fit_transform(X)

poly_2.fit(X_poly_2, y)
Exemple #29
0
Created on Sun Dec 15 18:42:23 2019

@author: 64191
"""

import numpy as np 
import matplotlib.pyplot as plt 
#生成虚拟数据集
x = np.random.uniform(-3,3,size = 100)
X = x.reshape(-1,1)  
y = 0.5 * x**2 + x + 2 + np.random.normal(0,1,size = 100)

from sklearn.preprocessing import PolynomialFeatures
#degree :为数据添加几次幂
ploy = PolynomialFeatures(degree = 5)
ploy.fit(X)
X2 = ploy.transform(X)
#里面已经在第一列添加一列1了,所以不需要增加一列纯1的X0
from sklearn.linear_model import LinearRegression,Ridge
lin_reg2 = LinearRegression()
lin_reg2.fit(X2,y)
y_predict2 = lin_reg2.predict(X2)
ridge=Ridge(alpha=60)
ridge.fit(X2,y)
y_pre=ridge.predict(X2)
plt.scatter(x,y)
plt.plot(np.sort(x),y_predict2[np.argsort(x)],color = 'r')
plt.plot(np.sort(x),y_pre[np.argsort(x)],color = 'g')
plt.show()
print(lin_reg2.coef_)
print(ridge.coef_)
Exemple #30
0
    x, y = get_data()

    # Divide the data into Train, dev and test
    x_train, x_test_all, y_train, y_test_all = train_test_split(x,
                                                                y,
                                                                test_size=0.3,
                                                                random_state=9)
    x_dev, x_test, y_dev, y_test = train_test_split(x_test_all,
                                                    y_test_all,
                                                    test_size=0.3,
                                                    random_state=9)

    #Prepare some polynomial features
    poly_features = PolynomialFeatures(interaction_only=True)
    poly_features.fit(x_train)
    x_train_poly = poly_features.transform(x_train)
    x_dev_poly = poly_features.transform(x_dev)

    #choosen_model,choosen_subset,low_mse = subset_selection(x_train_poly,y_train)
    choosen_model = build_model(x_train_poly, y_train, 20)
    #print choosen_subse
    predicted_y = choosen_model.predict(x_train_poly)
    print "\n Model Performance in Training set (Polynomial features)\n"
    mse = model_worth(y_train, predicted_y)

    # Apply the model on dev set
    predicted_y = choosen_model.predict(x_dev_poly)
    print "\n Model Performance in Dev set  (Polynomial features)\n"
    model_worth(y_dev, predicted_y)
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

df = pd.read_csv('bluegills.csv')
x = df.iloc[:, 0:1].values
y = df.iloc[:, -1].values

from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=4)
X = poly.fit_transform(x)
poly.fit(X, y)

from sklearn.linear_model import LinearRegression
linear = LinearRegression()
linear.fit(X, y)

x_pred = linear.predict(X)

plt.scatter(x, y, color='red')
plt.plot(x, linear.predict(X), color='blue')
plt.xlabel('Position_level')
plt.ylabel('Salary')
plt.show()
Exemple #32
0
    plt.subplots_adjust(top=.96)
    plt.ylim(-1000, 8000)
    plt.xlim(-2.03, 2.03)
    plt.legend(loc='lower center', borderaxespad=0, borderpad=0, ncol=2)

    style_figs.light_axis()
    plt.savefig('tide_polynome_%d.pdf' % d, facecolor='none', edgecolor='none')

# %%
# Plot the corresponding basis

plt.figure(figsize=[5.12, 3])

for d in (10, 100, 1000):
    transformer = PolynomialFeatures(degree=d)
    transformer.fit(t.reshape(-1, 1), y)
    basis = transformer.transform(t_test.reshape(-1, 1))
    for i in range(2, 10):
        this_signal = basis[:, -i]
        this_signal /= this_signal.max()
        plt.plot(t_test, this_signal, linewidth=2, color='.75')

    this_signal = basis[:, -3]
    this_signal /= this_signal.max()

    this_signal = basis[:, -1]
    this_signal /= this_signal.max()
    plt.plot(t_test, this_signal, label='Degree %d' % d)

    #style_figs.no_axis()
    plt.subplots_adjust(top=.96)
X_poly = poly.fit_transform(X)

# create training and test sets
from sklearn.cross_validation import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
         X_poly, y, test_size=0.3, random_state=0)

## POLYNOMINAL 
# Create linear regression object
poly = linear_model.LinearRegression(normalize=True)

# Train the model using the training sets
X_train_no_intercept = X_train
X_train = X_train.reshape(-1, X_train.shape[1])
poly.fit(X_train, y_train)

# The intercept
print('Intercept: \n', poly.intercept_)
# The coefficients
print('Coefficients: \n', poly.coef_)
# The mean square error
print("Residual sum of squares, training data: %.2f"
      % np.mean((poly.predict(X_train) - y_train) ** 2))
print("Residual sum of squares, test data: %.2f"
      % np.mean((poly.predict(X_test) - y_test) ** 2))
var_to_graph['multReg_poly'] = np.mean((poly.predict(X_test) - y_test) ** 2)
# Explained variance score: 1 is perfect prediction
print('Variance score, training data: %.2f' % poly.score(X_train, y_train))
#vector of prediction error
print('Distribution of prediction error on training data:')
Exemple #34
0
x_train = traindata['Father'].values.reshape(-1, 1)
y_train = traindata['Son'].values.reshape(-1, 1)
x_test = testdata['Father'].values.reshape(-1, 1)
y_test = testdata['Son'].values.reshape(-1, 1)
from sklearn.metrics import mean_squared_error
from math import sqrt
polyreg = PolynomialFeatures(degree=10)
x_modified_train = polyreg.fit_transform(x_train)
x_modified_test = polyreg.fit_transform(x_test)
model = linear_model.Lasso(alpha=0.5)
model.fit(x_modified_train, y_train)
y_predicted_test = model.predict(x_modified_test)
y_predicted_train = model.predict(x_modified_train)
print('RMSE Train:', sqrt(mean_squared_error(y_train, y_predicted_train)))
print('RMSE Test:', sqrt(mean_squared_error(y_test, y_predicted_test)))
train_err = []
test_err = []
alpha_vals = np.linspace(0, 1, 9)
for alpha_v in alpha_vals:
    polyreg = linear_model.Lasso(alpha=alpha_v)
    polyreg.fit(x_train, y_train)
    train_err.append(
        sqrt(mean_squared_error(y_train, polyreg.predict(x_train))))
    test_err.append(sqrt(mean_squared_error(y_test, polyreg.predict(x_test))))
plt.title('Lasso')
plt.xlabel('Alpha value')
plt.ylabel('RMSE')
plt.plot(np.linspace(0, 1, 9), train_err, 'bo-', label='Train')
plt.plot(np.linspace(0, 1, 9), test_err, 'ro-', label='Test')
plt.legend()
plt.show()
Exemple #35
0
print('Mean of MAE after 1000 tests: ',np.mean(MAE_arr,axis=0))
print('STD of MAE after 1000 tests: ', np.std(MAE_arr,axis=0))

#Create loop for P equal 1,2,3,4
for n in range (1,5):
	RMSE_list = []
	#Create empty array
	error_array = np.zeros(shape=(1,1))

	#create loop for 1000 tests
	for i in range(0,1000):
		#define polynomail degree
		poly_reg_model = PolynomialFeatures(degree=n)
		X_train, X_test, y_train, y_test = train_test_split(X_matrix,y_matrix,test_size=20,train_size=372)
		#train data
		poly_reg_model.fit(X_train,y_train)
		#predict 
		y_predict = reg_model_train.predict(X_test)
		#calculate mean squared error
		RMSE_list.append(sqrt(mean_squared_error(y_predict,y_test)))
		RMSE_arr = np.array(RMSE_list)

		#subtract test and predict
		error = y_test - y_predict

		#add to array for poltting
		error_array = np.concatenate((error_array,error),axis=0)
		

	print('Mean of squared Error when P equals ',n,np.mean(RMSE_arr,axis=0))
	print('STD of squared Error when P equals ',n,np.std(RMSE_arr,axis=0))
"""

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

data = pd.read_csv("Income_Data.csv")
reg = LinearRegression()
polyf = PolynomialFeatures(degree=6)
features = data.iloc[:, 0:1]
labels = data.iloc[:, 1:]

features_poln = polyf.fit_transform(features)
polyf.fit(features_poln)
reg.fit(features_poln, labels)
""" we will have to change the value too in the polynomial regression 
format in order to predict the value """

print(reg.predict(polyf.fit_transform([[6.5]])))
""" OBSERVING THE RESULT OF POLYNOMIAL REGRESSION """

plt.scatter(features, labels, color='red')
plt.plot(features, reg.predict(polyf.fit_transform(features)), color='blue')
plt.title('Truth or Bluff (Polynomial Regression)')
plt.xlabel('Position level')
plt.ylabel('Salary')
plt.show()

# Visualising the Polynomial Regression results (for higher resolution and smoother curve)


if __name__ == "__main__":
    
    x,y = get_data()
    
    # Divide the data into Train, dev and test    
    x_train,x_test_all,y_train,y_test_all = train_test_split(x,y,test_size = 0.3,random_state=9)
    x_dev,x_test,y_dev,y_test = train_test_split(x_test_all,y_test_all,test_size=0.3,random_state=9)
    
    
    
    #Prepare some polynomial features
    poly_features = PolynomialFeatures(2,interaction_only=True)
    poly_features.fit(x_train)
    x_train_poly = poly_features.transform(x_train)
    x_dev_poly   = poly_features.transform(x_dev)
    
    # Build model with polynomial features
    model_poly = build_model(x_train_poly,y_train)
    predicted_y = model_poly.predict(x_train_poly)
    print "\n Model Performance in Training set (Polynomial features)\n"
    model_worth(y_train,predicted_y)  

    # View model details
    view_model(model_poly)
    
    # Apply the model on dev set
    predicted_y = model_poly.predict(x_dev_poly)
    print "\n Model Performance in Dev set  (Polynomial features)\n"
Exemple #38
0
class HiddenStateSimulator(HigherOrderSimulator):
    def __init__(self,
                 n,
                 x_index,
                 h_index=None,
                 degree=2,
                 interaction_strength=None,
                 *args,
                 **kwargs):
        """
        Args:
            n:
            x_index:
            h_index:
            degree:
            interaction_strength: interaction strength defines drop_i as well as beta_rng for interaction terms
                effect sizes
            *args:
            **kwargs:
        """
        if "noise_var" in kwargs:
            assert kwargs[
                'noise_var'] == 0, "HiddenStateSimulator must set Noise_var=0; got %s" % kwargs[
                    'noise_var']
        self.x_index = x_index
        self.x_len = len(self.x_index)
        self.interaction_strength = interaction_strength
        self.h_index = h_index if h_index is not None else []
        self.h_len = len(self.h_index)
        # the order for concat is x + h
        p = self.x_len + self.h_len
        if interaction_strength is None:
            super().__init__(n=n,
                             p=p,
                             degree=degree,
                             noise_var=0,
                             drop_a=0,
                             *args,
                             **kwargs)
        else:
            super().__init__(n=n,
                             p=p,
                             degree=degree,
                             noise_var=0,
                             drop_a=0,
                             drop_i=1 - interaction_strength,
                             *args,
                             **kwargs)
        # overwrite
        self.polynomial_fitter = PolynomialFeatures(degree=degree,
                                                    interaction_only=True,
                                                    include_bias=False)
        self.polynomial_fitter.fit(np.zeros((self.n, self.p)))
        self.beta_a = np.zeros(p)
        self.beta_i = np.zeros(self.polynomial_fitter.n_output_features_ - p)
        self.powers_i_ = self.polynomial_fitter.powers_[p:]
        if self.interaction_strength is None:
            self.beta_i_rng = self.beta_rng
        else:
            # normal distribution has 95% prob. of falling within mu +/- 2*sigma
            self.beta_i_rng = lambda n: np.sign(
                np.random.uniform(-1, 1, n)) * np.random.uniform(
                    self.interaction_strength, 0.1, n)

    def sample_effect(self):
        # additive
        a_idx = np.random.choice(self.p,
                                 int(np.ceil(self.p * (1 - self.drop_a))),
                                 replace=False)
        self.beta_a[a_idx] = self.beta_rng(len(a_idx))
        # interaction
        i_idx = np.random.choice(
            len(self.beta_i),
            int(np.ceil(len(self.beta_i) * (1 - self.drop_i))),
            replace=False)
        self.beta_i[i_idx] = self.beta_i_rng(len(i_idx))
        self.is_beta_built = True

    def sample_data(self, N=None, hs=None, *args, **kwargs):
        assert self.h_len == 0 or hs is not None, "If h_index is not empty, must parse `hs` in argument"
        N = self.n if N is None else N
        X = self.x_rng(N * self.x_len).reshape(N, self.x_len)
        if hs is not None:
            h = hs[:, self.h_index]
            X = np.concatenate([X, h], axis=1)
        X_s = self.polynomial_fitter.transform(X)
        if not self.is_beta_built:
            self.sample_effect()
        beta = np.concatenate([self.beta_a, self.beta_i], )
        y = X_s.dot(beta) + np.random.normal(0, np.sqrt(self.noise_var), N)
        if self.with_input_blocks:
            X = [
                X[:, i] if len(X.shape) > 2 else X[:,
                                                   i].reshape(X.shape[0], 1)
                for i in range(X.shape[1])
            ]
        return X, y
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.preprocessing import PolynomialFeatures

# NOTE: Make sure that the class is labeled 'class' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR')
training_indices, testing_indices = train_test_split(tpot_data.index, stratify = tpot_data['class'].values, train_size=0.75, test_size=0.25)

result1 = tpot_data.copy()

# Use Scikit-learn's PolynomialFeatures to construct new features from the existing feature set
training_features = result1.loc[training_indices].drop('class', axis=1)

if len(training_features.columns.values) > 0 and len(training_features.columns.values) <= 700:
    # The feature constructor must be fit on only the training data
    poly = PolynomialFeatures(degree=2, include_bias=False)
    poly.fit(training_features.values.astype(np.float64))
    constructed_features = poly.transform(result1.drop('class', axis=1).values.astype(np.float64))
    result1 = pd.DataFrame(data=constructed_features)
    result1['class'] = result1['class'].values
else:
    result1 = result1.copy()

result2 = result1.copy()
# Perform classification with an Ada Boost classifier
adab2 = AdaBoostClassifier(learning_rate=0.15, n_estimators=500, random_state=42)
adab2.fit(result2.loc[training_indices].drop('class', axis=1).values, result2.loc[training_indices, 'class'].values)

result2['adab2-classification'] = adab2.predict(result2.drop('class', axis=1).values)
# Feature Scaling
"""from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)"""

# Fitting Linear Regression to the dataset
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(X, y)

# Fitting Polynomial Regression to the dataset
from sklearn.preprocessing import PolynomialFeatures
poly_reg = PolynomialFeatures(degree = 5)
X_poly = poly_reg.fit_transform(X)
poly_reg.fit(X_poly, y)
lin_reg_2 = LinearRegression()
lin_reg_2.fit(X_poly, y)

# Visualising the Linear Regression results
plt.scatter(X, y, color = 'red')
plt.plot(X, lin_reg.predict(X), color = 'blue')
plt.title('Truth or Bluff (Linear Regression)')
plt.xlabel('Position level')
plt.ylabel('Salary')
plt.show()

# Visualising the Polynomial Regression results
plt.scatter(X, y, color = 'red')
plt.plot(X, lin_reg_2.predict(poly_reg.fit_transform(X)), color = 'blue')
plt.title('Truth or Bluff (Polynomial Regression)')