Esempio n. 1
0
            # print("Conditions: " , comb)
            print("Conditions shape: ", comb.shape)
            # print("Coefficients: " , models_b)
            print("Coefficients shape: ", models_b.shape)

            hyper_model = MultiOutputRegressor(make_pipeline(PolynomialFeatures(hyper_degree),
                                                             linear_model.LinearRegression(fit_intercept=True,
                                                                                           normalize=True)))

            ##################################################################
            # Predict
            hyper_model.fit(comb, models_b)
            pred = hyper_model.predict(comb)

            score = hyper_model.score(comb, models_b)

            print("Score: ", score)

        # Hyper-Model implementation
        else:

            coeffs = []
            error_models = []

            for i in range(comb.shape[0]):

                print('Target: ', comb[i, 0], ',', comb[i, 1])

                y = beta.pdf(x, comb[i, 0], comb[i, 1])
Esempio n. 2
0
                                                          random_state=0))
regr_multirf.fit(X_train, y_train)

regr_rf = RandomForestRegressor(max_depth=max_depth, random_state=2)
regr_rf.fit(X_train, y_train)

# Predict on new data
y_multirf = regr_multirf.predict(X_test)
y_rf = regr_rf.predict(X_test)

# Plot the results
plt.figure()
s = 50
a = 0.4
plt.scatter(y_test[:, 0], y_test[:, 1], edgecolor='k',
            c="navy", s=s, marker="s", alpha=a, label="Data")
plt.scatter(y_multirf[:, 0], y_multirf[:, 1], edgecolor='k',
            c="cornflowerblue", s=s, alpha=a,
            label="Multi RF score=%.2f" % regr_multirf.score(X_test, y_test))
plt.scatter(y_rf[:, 0], y_rf[:, 1], edgecolor='k',
            c="c", s=s, marker="^", alpha=a,
            label="RF score=%.2f" % regr_rf.score(X_test, y_test))
plt.xlim([-6, 6])
plt.ylim([-6, 6])
plt.xlabel("target 1")
plt.ylabel("target 2")
plt.title("Comparing random forests and the multi-output meta estimator")
plt.legend()
# plt.show()
pltshow(plt)
Esempio n. 3
0
y = y_data.values
x_pred = test.values

x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    train_size=0.8,
                                                    random_state=33)

# model = DecisionTreeRegressor(max_depth =4)                     # max_depth 몇 이상 올라가면 구분 잘 못함
# model = RandomForestRegressor(n_estimators = 200, max_depth=3)
# model = GradientBoostingRegressor()
# model = XGBRegressor()

model = MultiOutputRegressor(GradientBoostingRegressor())
model.fit(x_train, y_train)
score = model.score(x_test, y_test)
print(score)
y4 = model.predict(test.values)

#여기서 definition과 for 문을 써준 이유는 GB와 XGB에서는 스칼라 형태일때만 정보가 받아지기 때문에 저 두개의 모델을 구동시키기 위해서는
#현재 가지고 있는 데이터셋을 총 4번(4컬럼이니까) 으로 잘라줘서 스칼라의 형태로 만들어주는 것이다 . 이 for문은 그것을 진행해주기 위해서 있는것이다.
#나머지 random forest와 decision tree는 스칼라의 형태로 구동을 하더라도 전혀 상관 없이 잘 구동된다.

# y_predict = tree_fit(y_train, y_test)

print(y4.shape)
'''
# submission
a = np.arange(10000,20000)
submission = pd.DataFrame(y4, a)
submission.to_csv('D:/Study/Bitcamp/Dacon/comp1/sub_GB.csv',index = True, header=['hhb','hbo2','ca','na'],index_label='id')
Esempio n. 4
0
    days = observe_days + predict_days

    for i in range(num_times - days +1):
        x_data.append(np.concatenate([infected[i:observe_days+i],deaths[i:observe_days+i],]))
        y_data.append(np.concatenate([infected[observe_days+i:days+i],deaths[observe_days+i:days+i]]))
# %% split the datset in to train annd test
x_data = np.array(x_data)
y_data = np.array(y_data)
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.25, random_state=87)
# %% load dataset in xgboost format
from sklearn.multioutput import MultiOutputRegressor
multioutputregressor = MultiOutputRegressor(xgb.XGBRegressor()).fit(x_train, y_train)

# %%
multioutputregressor.score(x_train,y_train)
# %%
multioutputregressor.score(x_test,y_test)
#%%
((multioutputregressor.predict(x_test) - y_test)**2).sum()
# %%
test_target = 7777
#multioutputregressor.predict(x_test)[test_target]
# %%
import matplotlib.pyplot as plt
plt.figure()
plt.title('infected')
plt.plot(np.arange(observe_days),x_test[test_target][0:observe_days],label='observe')
plt.plot(np.arange(predict_days)+observe_days,y_test[test_target][0:predict_days],'-o',label='true')
plt.plot(np.arange(predict_days)+observe_days,multioutputregressor.predict(x_test)[test_target][0:predict_days],'-o',label='predict')
plt.legend()
    y_test[:, 1],
    edgecolor="k",
    c="navy",
    s=s,
    marker="s",
    alpha=a,
    label="Data",
)
plt.scatter(
    y_multirf[:, 0],
    y_multirf[:, 1],
    edgecolor="k",
    c="cornflowerblue",
    s=s,
    alpha=a,
    label="Multi RF score=%.2f" % regr_multirf.score(X_test, y_test),
)
plt.scatter(
    y_rf[:, 0],
    y_rf[:, 1],
    edgecolor="k",
    c="c",
    s=s,
    marker="^",
    alpha=a,
    label="RF score=%.2f" % regr_rf.score(X_test, y_test),
)
plt.xlim([-6, 6])
plt.ylim([-6, 6])
plt.xlabel("target 1")
plt.ylabel("target 2")
Esempio n. 6
0
#mit Python 3 ausfuehren
import numpy as np

training_data = np.loadtxt("/home/pi/git-repos/hello-world/training_data")

x = training_data[:, 0:3]
y = training_data[:, 3:5]

from sklearn.model_selection import train_test_split
from sklearn.datasets import make_regression
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import GradientBoostingRegressor

x_train, x_test, y_train, y_test = train_test_split(x, y)
print("x_train.shape", x_train.shape)
print("y_train.shape", y_train.shape)

reg = MultiOutputRegressor(GradientBoostingRegressor()).fit(x_train, y_train)

print("reg.score(x_train, y_train)", reg.score(x_train, y_train))
print("reg.score(x_test, y_test)", reg.score(x_test, y_test))

output = reg.predict(x_test)
Esempio n. 7
0
x_train = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)

# 모델 구성

# model = XGBRegressor(max_depth=4)
model = MultiOutputRegressor(
    XGBRegressor(random_state=12, n_estimators=500,
                 max_depth=30)).fit(x_train, y_train)

# 훈련
model.fit(x_train, y_train)

y_pred = model.predict(x_test)

score = model.score(y_test, y_pred)

print("score : ", score)

# 평가 및 예측

# loss, mse = model.evaluate(x_test, y_test, batch_size=1)

x_predict = np.array([[795, 1550, 1746, 1690]])
x_predict = scaler.fit_transform(x_predict)

y_real = np.array([777, 1559, 1762, 1659])
y_predict = model.predict(x_predict)

# print("loss : ", loss)
# print("mse : ", mse)
Esempio n. 8
0
def main():
    ##PRIMERO

    print("######### REGRESION MULTISALIDA #########")

    #Read data
    X=np.loadtxt('fileEnd_X.pos')
    y=np.loadtxt("fileEnd_Y.pof")


    #Split on training set and test set
    X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)


    #Training model
    clf=MultiOutputRegressor(Ridge(random_state=123)).fit(X_train, y_train)

    #Does prediction
    Y_pred = clf.predict(X_test) #Values of test set
    print(clf.score(X_train,y_train))

    # Creating dataset predition test
    z = Y_pred[:,0]
    x = Y_pred[:,1]
    y = Y_pred[:,2]

    # Creating dataset test test
    z1 = y_test[:,0]
    x1 = y_test[:,1]
    y1 = y_test[:,2]

 
    # Creating figure
    fig = plt.figure(figsize = (10, 7))
    ax = plt.axes(projection ="3d")
 
    # Creating plot
    ax.scatter3D(x, y, z, color = "blue", alpha=0.1)

    ax.scatter3D(x1, y1, z1, color = "red")
    plt.title("Results predict Y and Test set")

 
    # show plot
    plt.show()

    ##SEGUNDO
    

    print("######### REGRESION POLINOMIAL #########")
    ##Preparar la data
    data = pd.read_csv(filename,sep="\t",header=0)

    # Entendimiento de la data
    #print('Informacion del data set')
    #print(data.shape)
    #print(data.head(78))
    #print(data.columns)


    #### PREPARAR DATA PARA REGRESION POLINOMIAL ###

    #Defino entradas X Solamente la columna 6
    X_p = data['age']

    #Defino Y
    y_p = data['length']


    
    #Defino el algoritmo a usar
    pr = linear_model.LinearRegression()
    
    #Definir grado del polinomio
    print("Ingrese el valor para degree")
    input_degree = input()
    deg = int(input_degree)
    poli_reg = PolynomialFeatures(degree = deg)
    
    precision = 0
    data = data.values
    k_iterations = 100
    n_size = len(data)
    print("n_size", n_size)
    

    #Comienza bootstraping
    for i in range(k_iterations):
        train = resample(data,n_samples = n_size)
        test = np.array([x for x in data if x.tolist() not in train.tolist()])
        
        X_train = train[:,0].reshape(-1,1) #0 seria age 1 seria length
        y_train = train[:,1].reshape(-1,1)
        X_test = test[:,0].reshape(-1,1)
        y_test = test[:,1].reshape(-1,1)
         
        X_train_poli = poli_reg.fit_transform(X_train)
        X_test_poli = poli_reg.fit_transform(X_test)
        
        #entrenar
        pr.fit(X_train_poli,y_train)

        #precision
        Y_pred_pr = pr.predict(X_test_poli)
    
        #print("Datos reales")
        #print(y_test)

        #print("Datos obtenidos")
        #print(Y_pred_pr)

        
        #Calculo precision cada iteracion de bootstraping
        precision += pr.score(X_train_poli, y_train)
        
    plt.scatter(X_p,y_p) #Data set completo
    plt.scatter(X_test,Y_pred_pr,color="red",linewidth=3)
    plt.show()


    print("Precision")
    print(precision/k_iterations)

    mse = mean_squared_error(y_test,Y_pred_pr)
    print("MSE ", mse)
Esempio n. 9
0
SEQUENCE = np.load('SequenceArray.npz')['SequenceArray']  #載入矩陣
print(SEQUENCE[0])
print('Data loaded')
print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()))
NUMBER = len(SEQUENCE)
SEQUENCE2 = []
for i in range(int(NUMBER / 2)):
    SEQUENCE2.append(SEQUENCE[2 * i])
#Step = 3
SEQUENCE3 = []
for i in range(int(NUMBER / 3)):
    SEQUENCE3.append(SEQUENCE[3 * i])
#def GetSequence()
BASIC_SEQUENCE = np.zeros((NUMBER - Frames, Weight * Height))
NEXT_SEQUENCE = np.zeros((NUMBER - Frames, Frames * Weight))
for i in range(Frames):
    print(i)
    BASIC_SEQUENCE[:, i] = SEQUENCE[i:i + NUMBER - Frames]
    NEXT_SEQUENCE[:, i] = SEQUENCE[i + 1:i + NUMBER - Frames + 1]
train_X, test_X, train_y, test_y = train_test_split(BASIC_SEQUENCE[:10],
                                                    NEXT_SEQUENCE[:10],
                                                    test_size=0.2,
                                                    random_state=4)
model = MultiOutputRegressor(LinearSVR(loss='mean_square_error', C=1.0))
model.fit(train_X, train_y)
score = model.score(test_X, test_y)
train_loss = mean_squared_error(train_X, test_y)
val_loss = mean_squared_error(test_X, test_y)
print("Score", score)
print("train loss: %.4f - val_loss: %.4f" % (train_loss, val_loss))
Esempio n. 10
0
def run_sklearn_poly(filename):
	dataframe = pandas.read_csv(filename, header=None)
	dataset = dataframe.values
	X = dataset[:,0].astype(float)
	# Y = dataset[:,1:].astype(float)
	# Y = dataset[:,257:].astype(float) # predict just one curve
	Y = dataset[:,1:].astype(float) # predict just one curve

	m = dataframe.shape[0] # ROWS or test samples
	X_test = X[m-1]
	Y_test = Y[m-1]

	# preprocess
	# min_max_scaler = preprocessing.MinMaxScaler()
	# min_max_scaler.fit(X)
	# X = min_max_scaler.transform(X)

	# min_max_scaler = preprocessing.MinMaxScaler()
	# min_max_scaler.fit(Y)
	# Y = min_max_scaler.transform(Y)

	# print(Y)
	# Y = Y.reshape(-1, 1)


	# 1000: 56
	# 2000: 60




	##
	X = X.reshape(-1, 1)

	# X = np.sort(5 * np.random.rand(40, 1), axis=0)
	# Y = np.sin(X).ravel()
	# print(X)
	# print(Y)
	# model = make_pipeline(PolynomialFeatures(500), Ridge())
	# svr_rbf = SVR(kernel='rbf')

	svr_multi = MultiOutputRegressor(SVR(kernel='rbf', C=1e6),n_jobs=-1)
	svr_multi.fit(X, Y)
	y_rbf = svr_multi.predict(X)

	# svr_rbf = SVR(kernel='rbf', C=10000)
	# y_rbf = svr_rbf.fit(X, Y).predict(X)

	# svr_poly = SVR(kernel='poly', C=1e3, degree=2)

	# Score
	print("SCORE=%.2f" % svr_multi.score(X, y_rbf))

	f, subplots = plt.subplots(2)

	# model.fit(X, Y)
	subplots[0].plot(Y[2])
	# subplots[1].plot(Y)

	Out = np.fft.ifft(Y[2])
	Out = Out * 100

	subplots[1].plot(Out)

	# mx = np.max(Out)
	mx = 32767
	audio = np.fromiter((s * mx for s in Out), dtype=np.int16)
	wavio.write('out.wav', audio, 44100)
	# plt.plot(X, y_rbf)
	# plt.plot(X, Y)
	
	np.savetxt("predsvm.csv", y_rbf, fmt='%.7f', delimiter=",")


	# Plot against freqs
	Fs = 44100
	samples = 512
	f = Fs * np.mgrid[0:512/2 + 1]/512
	# plt.plot(Y)

	plt.show()
Esempio n. 11
0
class HyperProcessModel:
    def __init__(self):
        self.in_shape = None
        self.shapes = []
        self.conditions = []
        self.b_params = []
        self.degree = -1
        self.optimization = False
        self.hyper_model = False

    # HPM
    def decomposition(self):
        """
        Performs the decomposition of all shapes using eigenvectors
        :return: all eigenvalues, all eigenvectors
        """
        print('Start Decomposing...')
        models = self.shapes.transpose()

        covariance = np.cov(models)
        eigenvalues, eigenvectors = np.linalg.eig(covariance)

        print('Decomposing complete!')

        return eigenvalues.real, eigenvectors.real

    # gives preference to the max_variance
    def get_suitable_eigen(self,
                           eigenvals,
                           n_components=None,
                           max_variance=0.95):
        """
        Select the most suitable eigenvectors to use in the SSM. In this particular case,
        when both number of components and maximum variance are specified, preference is
        given to variance. Iterating through all eigenvectors, if it first reaches the
        target variance, it returns the corresponding eigenvectors, if not, if it reaches
        the number of components, it return the corresponding eigenvectors.
        :param eigenvals: array with all eigenvalurs
        :param n_components: int, default=None - number of components to be included
        :param max_variance: float, default=0.95 - variance to be reached
        :return: int - number of suitable components
        """

        sum_eigenvals = sum(eigenvals)
        variance = eigenvals / sum_eigenvals
        variance = [
            value if np.abs(value) > 0.00001 else 0 for value in variance
        ]

        comulative_sum = 0

        for i in range(0, len(eigenvals)):
            comulative_sum += variance[i]

            # if (comulative_sum >= variance_max and i > 0):
            if (comulative_sum >= max_variance):
                return i + 1

            if n_components is not None:
                if i + 1 == n_components:
                    return i + 1

        return len(eigenvals)

    def get_b_param(self, mean, shape, evec):
        """
        According to SSM, the b parameters are the deformable parameters that allow to reproduce
        back the original shape using the decomposed shapes (eigenvectors) and mean shape
        :param mean: array (nsamples*nfeatures) - array with all values for mean shape.
        :param shape: array (nsamples*nfeatures) - shape used to calculate b parameters from SSM
        :param evec: ndarray - eigenvectors to be used for the b paramters transformation
        :return: array - b paramters / deformable parameters for the corresponding shape
        """
        sub = (shape - mean)
        return np.dot(np.transpose(evec), np.transpose(sub))

    def get_in_shape(self):
        """
        Return the input used to generate shapes for all process models
        :return: ndarray - (nsamples, nfeatures)
        """
        return self.in_shape

    def generate_shape(self, b):
        """
        Based on a deformable parameter (b), generates the corresponding shape
        :param b: array - set of deformable parameters
        :return: array (nsamples*nfeatures) - generated shape that needs to be reshaped as (nsamples, nfeatures)
        """
        return self.mean_shape + np.transpose(np.dot(self.eigenvectors, b))

    def set_pol_degree(self, degree):
        """
        Set the polynomial degree for the hyper model
        :param degree: int
        :return: None
        """
        self.degree = degree

    def stochastic_factorial_design(self, granularity, n_samples, min, max):
        """
        Creates a combination of values, bounded to a minimum and maximum, for a "level" number of combinations
        :param granularity: int - level of detail (granularity) or steps in each feature. Linear space between min and max with n "granularity" values.
        :param n_samples: int - number of samples to be produced
        :param min: array (nfeatures) - minimum value for all features
        :param max: array (nfeatures) - maximum value for all features
        :return: ndarray - all combinations
        """

        if granularity < 2:
            print('Granularity provided is less than 2')
        elif len(np.array(min)) == 1:
            self.in_shape = np.linspace(min, max, granularity).transpose()[0]
        else:
            array_temp = np.array([])
            factor = len(min)

            for i in range(factor):
                array_temp = np.append(
                    array_temp, np.linspace(min[i], max[i], granularity))

            matrix_temp = array_temp.reshape(factor, granularity)

            combo = matrix_temp
            my_sample = []

            while len(my_sample) < n_samples:
                # Choose one random item from each list; that forms an element
                elem = [
                    comp[random.randint(0,
                                        len(comp) - 1)] for comp in combo
                ]
                # Using a set elminates duplicates easily
                my_sample.append(elem)

            result = np.matrix(my_sample)

            self.in_shape = result

        return self.in_shape

    def add_shape(self, shape):
        """
        Adds a shape to be used in the SSM. Conditions should be added in the same order as shapes
        :param shape: array (nsamples*nfeatures) - shape to be added
        :return: None
        """
        # Sample 100 datapoints from the trained source models - Produce the shapes
        if len(self.shapes) == 0:
            self.shapes = np.matrix(shape)
        else:
            self.shapes = np.vstack((self.shapes, shape))

    def add_condition(self, cond):
        """
        Adds a certain condition to be used by the hyper model. Shapes should be added in the same order as conditions
        :param cond: array - conditions
        :return: None
        """
        # Sample 100 datapoints from the trained source models - Produce the shapes
        if len(self.conditions) == 0:
            self.conditions = np.matrix(cond)
        else:
            self.conditions = np.vstack((self.conditions, cond))

    def get_mean_shape(self):
        """
        Calculates and returns the mean shape based on all previously added shapes.
        :return: array (nsamples*nfeatures) - Mean shape for SSM
        """
        self.mean_shape = np.mean(self.shapes, axis=0)
        return self.mean_shape

    def get_eigen(self, n_components, max_variance):
        """
        Calculates all eigevectors and eigenvalues and returns only the most suitable ones. Meanwhile, all deformable
        parameters are calculated for all available shapes. This is a combination of previously existing functions to
        automate the calculation process.
        :param n_components: int - number of components
        :param max_variance: float - variance to be reaches
        :return: eigenvalues (array), eigenvectors (ndarray)
        """
        # eigenvectors
        self.eigenvalues, self.eigenvectors = self.decomposition()

        # suitable eigenvectors and not all of them
        modes_def = self.get_suitable_eigen(self.eigenvalues, n_components,
                                            max_variance)

        # filter suitable eigenvectors
        self.eigenvalues = self.eigenvalues[0:modes_def]
        self.eigenvectors = np.transpose(
            np.transpose(self.eigenvectors)[0:modes_def])

        ##########################################
        # Calculate B params
        for i in range(len(self.shapes)):
            if len(self.b_params) == 0:
                self.b_params = np.matrix(
                    self.get_b_param(self.mean_shape, self.shapes[i],
                                     self.eigenvectors)).transpose()
            else:
                self.b_params = np.vstack(
                    (self.b_params,
                     self.get_b_param(self.mean_shape, self.shapes[i],
                                      self.eigenvectors).transpose()))
        ##########################################

        return self.eigenvalues, self.eigenvectors

    def train_hyper_model(self, n_components=None, max_variance=0.95):
        """
        Train the hyper model. The normal and most interesting scenario is when the number of conditions (c) is
        higher than the number of deformable parameters (b), so the hyper model can be trained as such h: c -> b.
        However, it might be case that b is higher than c, so, ideally the model would be trained
        as such h: b -> c. In this case, either 1) the inverse of h needs to be calculated or 2) an optimization
        problem needs to be formulated to estimate b based on a target c. To ease this latter case,
        a MultiOutputRegressor is used for hyper model, meaning that a model per target is trained, so the case
        of b being higher than c is no longer a problem.
        :param n_components: int - number of components
        :param max_variance: float - variance
        :return: float - score (R^2) of the trained model (do not mistake with error)
        """
        self.get_mean_shape()
        self.get_eigen(n_components, max_variance)

        if self.degree == -1:
            print("Please define first the degree of the Hyper Model")
            exit()

        self.hyper_model = MultiOutputRegressor(
            make_pipeline(
                PolynomialFeatures(self.degree),
                linear_model.LinearRegression(fit_intercept=True,
                                              normalize=True)))

        print("Dimension of conditions:", self.conditions.shape[1])
        print("Dimension of b parameters:", self.b_params.shape[1])
        ##################################################################
        # Predict
        self.hyper_model.fit(self.conditions, self.b_params)
        score = self.hyper_model.score(self.conditions, self.b_params)

        return score

    def predict(self, new_cond):
        """
        Makes a prediction of the deformable parameters to be used in the SSM based on the new conditions provided.
        :param new_cond: array - new conditions
        :return: array - deformable parameters
        """
        return self.hyper_model.predict(new_cond)

    def get_new_shape(self, new_cond):
        """
        Based on the new conditions, returns the generated shape to be used for further training.
        :param new_cond: array - new conditions
        :return: array (nsamples*nfeatures) - new generated shape
        """
        result_def = self.predict(new_cond)[0]

        new_gen_shape = self.generate_shape(result_def)
        new_gen_shape = np.array(new_gen_shape)[0]

        return new_gen_shape

    def set_hyper_model(self, model):
        """
        This function should be used if a new method needs to be used for the hyper model instead of the
        default polynomial.
        :param model: Predictor
        :return: None
        """
        self.hyper_model = model
Esempio n. 12
0
import pandas as pd

# Pre-processing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, PolynomialFeatures

# Regressors
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RidgeCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.ensemble import GradientBoostingRegressor

# Load data from file
TimeConstraint = pd.read_csv("../Data/time_constraint.csv")
le = LabelEncoder()
TimeConstraint.topology = le.fit_transform(TimeConstraint.topology)
x = TimeConstraint.throughput
y = TimeConstraint.drop(['throughput'], axis=1)
x = np.array(x).reshape(-1, 1)
x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.20,
                                                    random_state=0)
clf = MultiOutputRegressor(RandomForestRegressor(random_state=0))
clf.fit(x, y)
print(clf.predict([[47817.84]]))
print(clf.score(x_test, y_test))
print len(X.columns)
y = df.iloc[:, -33:]

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)
scaler = StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
#X_train, explainedVarianceRatio, X_test = principalComponent(X_train, X_test)
# rf1 = RandomForestRegressor(n_estimators = 500, n_jobs = -1, random_state = 42)
# rf1.fit(X, y.iloc[:,0])

regressor = MultiOutputRegressor(RandomForestRegressor())

regressor.fit(X_train, y_train)

score = regressor.score(X_test, y_test)
print score

# feat_imp = pd.DataFrame({'importance': rf1.feature_importances_})
# feat_imp['Feature Index'] = X.columns
# feat_imp.sort_values(by = 'importance', ascending = True, inplace = True)
'''plt.barh(range(len(feat_imp)), feat_imp['importance'], color = 'b', align = 'center')
plt.yticks(range(len(feat_imp)), feat_imp['Feature Index'], size = 6)
plt.xlabel('Feature importance')
plt.ylabel('Index')
plt.tight_layout()

plt.savefig('feat_imp1.jpg', format = 'jpg', dpi = 500)'''
Esempio n. 14
0
X_test, y_test = df_test.loc[:, [
    'voltage_min', 'cycle_count', 'soc', 'temperature_max'
]], df_test.loc[:, ['age', 'capacity']]

SVR_model = SVR()

model = MultiOutputRegressor(estimator=SVR_model)
print(model)

import time
start_time = time.time()

model.fit(X_train, y_train)

score = model.score(X_train, y_train)
#print("Training score:", score)
preds_train = model.predict(X_train)

preds_test = model.predict(X_test)

score = model.score(X_train, y_train)
#print("Training score:", score)

ypred_X_test = model.predict(X_test)
ypred_X_train = model.predict(X_train)

y_test = pd.DataFrame(y_test)
ypred_X_test = pd.DataFrame(ypred_X_test)
ypred_X_train = pd.DataFrame(ypred_X_train)
Esempio n. 15
0
"""
EFS
==========
"""
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from sparsereg.model.efs import EFS

x, y = make_regression(n_samples=1000,
                       n_features=10,
                       n_informative=10,
                       n_targets=3)
x_train, x_test, y_train, y_test = train_test_split(x, y)
steps = ("scaler", StandardScaler()), ("estimator",
                                       EFS(mu=1, q=3, max_stall_iter=5))
model = MultiOutputRegressor(Pipeline(list(steps)))
model.fit(x_train, y_train)
print(model.score(x_test, y_test))