def linearRegreSin(url,degree): [a,b] = getData(url) trainA = a[0:139] trainB = b[0:139] testA = a[140:] testB = b[140:] poly = PolynomialFeatures(degree) trainA = np.float64(poly.fit_transform(trainA)) testA = np.float64(poly.fit_transform(testA)) theta = np.dot(np.dot(np.linalg.inv(np.dot(trainA.T,trainA)),trainA.T),trainB) plt.figure(1) plt.xlabel('x') plt.ylabel('y') plt.title('data') plt.plot(trainA[:,1],trainB,"r*") y=np.dot(trainA, theta) print(pow(sum((y-trainB)**2),1/2)/140) #print MSE y=np.dot(testA, theta) #plt.plot(testA[:,1], testB, "r.") plt.plot(testA[:,1],y,"k*") print(pow(sum((y-testB)**2),1/2)/60) #print MSE plt.show() print(theta)
def main(): # linear model (using polynomial features) model = linear_model.Ridge(alpha=0.1) # get data out = getData() X = out["x_train"] y = out["y_train"] perc = int(0.75 * len(X)) X_train = X[0:perc] X_cv = X[perc:] y_train = y[0:perc] y_cv = y[perc:] X_test = out["x_test"] Id = out["test_id"] # add bias unit poly = PolynomialFeatures(degree=3) X_train = poly.fit_transform(X_train) # train model model.fit(X_train, y_train) # score X_cv = poly.fit_transform(X_cv) print model.score(X_cv, y_cv) # predict X_test = poly.fit_transform(X_test) pred = model.predict(X_test) f = open("submissions/PolyReg3.csv", "w") f.write("Id,Hazard\n") for i in xrange(len(pred)): f.write(str(Id[i]) + "," + str(pred[i]) + "\n")
def batterLife_chargeMoreThan4(chargeTime): import numpy as np trainDataArr = np.genfromtxt("trainingdata_batteryLife.txt", delimiter = ",") trainDataArr = trainDataArr[trainDataArr[ :,0] > 4] trainData = trainDataArr[:, 0] trainData = trainData.reshape(-1,1) trainValue = trainDataArr[:,1] testData = np.array(chargeTime) testData = testData.reshape(-1,1) from sklearn.preprocessing import PolynomialFeatures from sklearn import linear_model # Plot outputs import matplotlib.pyplot as plt plt.scatter(trainData, trainValue, color='black') plt.xticks(()) plt.yticks(()) plt.show() # Fit regression model poly = PolynomialFeatures(degree = 1) trainData_ = poly.fit_transform(trainData) testData_ = poly.fit_transform(testData) clf = linear_model.LinearRegression() clf.fit(trainData_, trainValue) return clf.predict(testData_)
def logistic_regression(x,y): """ Ierative multifeatures regression. Find the best theta by changing it in each iteration Print the training and testing errors for each mapping """ errors_training_fmeasure = [] errors_training_accuracy = [] errors_testing_fmeasure = [] errors_testing_accuracy = [] regr = LogisticRegressionKClasses() poly = PolynomialFeatures(degree=1) # Cross validation cv = KFold(len(x), n_folds=10) for train_idx, test_idx in cv: x_train = x[train_idx] x_test = x[test_idx] y_train = y[train_idx] y_test = y[test_idx] x_ = poly.fit_transform(x_train) x_2 = poly.fit_transform(x_test) regr.fit(x_,y_train) # Predict over the testing data and getting the error predicted_y = regr.predict(x_2) conf_matrix = confusion_matrix(y_test, predicted_y) precision, recall, f_measure, accuracy = get_measures(conf_matrix, len(set(y_train))) print 'Precision:', precision, ' Recall:', recall, ' Accuracy:', accuracy, ' F-Measure:', f_measure
def newtonRaphson(inputFiles): pol = PolynomialFeatures(2) errors = [] for File in inputFiles: data = tools.readData(File) X = data[:, :-1] Y = data[:, -1] kf = KFold(len(Y), n_folds = 10) trainError = 0 testError = 0 for train, test in kf: Z = pol.fit_transform(X[train]) row, col = Z.shape theta = np.empty(col, dtype='float') meanDiff = 1.0 i = 1 #print "Theta iteration %s: \n%s" % ('0', str(theta)) while abs(meanDiff) > 1.0e-15 : theta_new = recalculateTheta(theta, Z, Y[train]) diff = np.subtract(theta_new, theta) meanDiff = np.mean(diff) #print "Theta iteration %s: \n%s" % (str(i), str(theta_new)) #print "Diff: %s" % str(meanDiff) theta = theta_new i += 1 Z_test = pol.fit_transform(X[test]) Y_hat_test = np.dot(Z_test, theta) Y_hat = np.dot(Z, theta) trainError += tools.findError(Y_hat, Y[train]) testError += tools.findError(Y_hat_test, Y[test]) trainError = trainError/len(kf) testError = testError/len(kf) iterative_error = [trainError, testError] errors. append(iterative_error) return np.asarray(errors)
def polyRegressionKFold(inputFiles, deg=2): print "***************************" print "Degree: %s" % deg start_time = time.time() errors = [] for File in inputFiles: print "___________________________" print "Data Set: %s" % File data = tools.readData(File) data = data[np.argsort(data[:,0])] X = data[:, :-1] Y = data[:, len(data[1,:]) - 1] kf = KFold(len(data), n_folds = 10, shuffle = True) TrainError = 0 TestError = 0 for train, test in kf: pol = PolynomialFeatures(deg) Z = pol.fit_transform(X[train]) Z_test = pol.fit_transform(X[test]) theta = regress(Z, Y[train]) Y_hat = np.dot(Z, theta) Y_hat_test = np.dot(Z_test, theta) TrainError += mean_squared_error(Y[train], Y_hat) TestError += mean_squared_error(Y[test], Y_hat_test) TestError /= len(kf) TrainError /= len(kf) errors.append([TestError, deg]) print "---------------------------" print "Test Error: %s" % TestError print "Train Error: %s" % TrainError time_taken = start_time - time.time() print "Time Taken for primal: %s" % str(time_taken) return np.asarray(errors)
def housing_polynomial_regression(): ''' housing数据1元多次 :return: ''' X = df[['LSTAT']].values y = df['MEDV'].values regr = LinearRegression() # create polynomial features quadratic = PolynomialFeatures(degree=2) cubic = PolynomialFeatures(degree=3) X_quad = quadratic.fit_transform(X) X_cubic = cubic.fit_transform(X) # linear fit X_fit = np.arange(X.min(), X.max(), 1)[:, np.newaxis] regr = regr.fit(X, y) y_lin_fit = regr.predict(X_fit) linear_r2 = r2_score(y, regr.predict(X)) # quadratic fit regr = regr.fit(X_quad, y) y_quad_fit = regr.predict(quadratic.fit_transform(X_fit)) quadratic_r2 = r2_score(y, regr.predict(X_quad)) # cubic fit regr = regr.fit(X_cubic, y) y_cubic_fit = regr.predict(cubic.fit_transform(X_fit)) cubic_r2 = r2_score(y, regr.predict(X_cubic)) # plot results plt.scatter(X, y, label='training points', color='lightgray') plt.plot(X_fit, y_lin_fit, label='linear (d=1), $R^2=%.2f$' % linear_r2, color='blue', lw=2, linestyle=':') plt.plot(X_fit, y_quad_fit, label='quadratic (d=2), $R^2=%.2f$' % quadratic_r2, color='red', lw=2, linestyle='-') plt.plot(X_fit, y_cubic_fit, label='cubic (d=3), $R^2=%.2f$' % cubic_r2, color='green', lw=2, linestyle='--') plt.xlabel('% lower status of the population [LSTAT]') plt.ylabel('Price in $1000\'s [MEDV]') plt.legend(loc='upper right') plt.show()
def get_cl(tau, consider='EE', degree=5): if consider == 'EE': values = values_EE else: values = values_BB v = values#[:100] p = points#[:100] poly = PolynomialFeatures(degree=degree) # Vandermonde matrix of pre-computed paramter values. X_ = poly.fit_transform(p.reshape(-1,1)) predict = np.array([tau]).reshape(1,-1) # Creates matrix of values you want to estimate from the existing # measurements. Computation speed scales very slowly when you ask for # estimate of many sets of parameters. predict_ = poly.fit_transform(predict) clf = LinearRegression() estimate = [] for l in range(2, v.shape[1]): values_l = v[:,l] clf.fit(X_, values_l) estimate_l = clf.predict(predict_) estimate.append(estimate_l) estimate = np.array(estimate) ell = np.arange(2, l+1) Z = 2*np.pi/(ell*(ell+1)) return ell, Z*estimate[:,0]
def polynomial_expansion(self, rank=2): """ Expand the features with polynonial of rank rnank """ pf = PolynomialFeatures(degree=2) self.X_red = pf.fit_transform(self.X_red) self.X_white = pf.fit_transform(self.X_white)
def lassoRegression(X,y): print("\n### ~~~~~~~~~~~~~~~~~~~~ ###") print("Lasso Regression") ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### myDegree = 40 polynomialFeatures = PolynomialFeatures(degree=myDegree, include_bias=False) Xp = polynomialFeatures.fit_transform(X) myScaler = StandardScaler() scaled_Xp = myScaler.fit_transform(Xp) ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### lassoRegression = Lasso(alpha=1e-7) lassoRegression.fit(scaled_Xp,y) ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### dummyX = np.arange(0,2,0.01) dummyX = dummyX.reshape((dummyX.shape[0],1)) dummyXp = polynomialFeatures.fit_transform(dummyX) scaled_dummyXp = myScaler.transform(dummyXp) dummyY = lassoRegression.predict(scaled_dummyXp) outputFILE = 'plot-lassoRegression.png' fig, ax = plt.subplots() fig.set_size_inches(h = 6.0, w = 10.0) ax.axis([0,2,0,15]) ax.scatter(X,y,color="black",s=10.0) ax.plot(dummyX, dummyY, color='red', linewidth=1.5) plt.savefig(filename = outputFILE, bbox_inches='tight', pad_inches=0.2, dpi = 600) ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### return( None )
def myTradingSystem(DATE, OPEN, HIGH, LOW, CLOSE, VOL, OI, P, R, RINFO, exposure, equity, settings): """ This system uses linear regression to allocate capital into the desired equities""" # Get parameters from setting nMarkets = len(settings['markets']) lookback = settings['lookback'] dimension = settings['dimension'] threshold = settings['threshold'] pos = np.zeros(nMarkets, dtype=np.float) poly = PolynomialFeatures(degree=dimension) for market in range(nMarkets): reg = linear_model.LinearRegression() try: reg.fit(poly.fit_transform(np.arange(lookback).reshape(-1, 1)), CLOSE[:, market]) trend = (reg.predict(poly.fit_transform(np.array([[lookback]]))) - CLOSE[-1, market]) / CLOSE[-1, market] if abs(trend[0]) < threshold: trend[0] = 0 pos[market] = np.sign(trend) # for NaN data set position to 0 except ValueError: pos[market] = .0 return pos, settings
def detrend(data, degree=1): """ Take 2D (i.e. image) data and remove the background using a polynomial fit Eventually this will be generalized to data of any dimension and perhaps Parameters ---------- data : ndarray (NxM) data to detrend degree : int the degree of the polynomial with which to model the background Returns ------- out : tuple of ndarrays (NxM) (data without background and background) """ x = np.arange(data.shape[1]) y = np.arange(data.shape[0]) xx, yy = np.meshgrid(x, y) # We have to take our 2D data and transform it into a list of 2D # coordinates X = np.dstack((xx.ravel(), yy.ravel())).reshape((np.prod(data.shape), 2)) # We have to ravel our data so that it is a list of points vector = data.ravel() # now we can continue as before predict = X poly = PolynomialFeatures(degree) X_ = poly.fit_transform(X) predict_ = poly.fit_transform(predict) clf = linear_model.RANSACRegressor() # try the fit a few times, as it seems prone to failure ntries = 10 for i in range(ntries): try: # try the fit clf.fit(X_, vector) except ValueError as e: # except the fit but do nothing # unless the number of tries has been reached if i == ntries - 1: # then raise the error raise e else: # if no error is thrown, break out of the loop. break # we have to reshape our fit to mirror our original data background = clf.predict(predict_).reshape(data.shape) data_nb = data - background return data_nb, background
def regression_polynomiale(dicon,value, test, test_value,degre): X = dicon.values() poly = PolynomialFeatures(degree=degre) #transform the input to the polynomial model poly.fit_transform(X) print(dicon.values()) model = Pipeline([('poly', PolynomialFeatures(degree=degre)),('linear', linear_model.LinearRegression(fit_intercept=False))]) model.fit(X,value.values()) model.named_steps['linear'].coef_ print("prediction du model polynomial : ", model.named_steps['linear'].predict(poly.fit_transform([4.0, 5.0, 9, 2, 91.1, 132.3, 812.1, 12.5, 15.9, 38.0, 5.4])))
def fitting(self,XTrain, YTrain, XTest,YTest ): ###trasformazione non lineare for degree in range(2,3): poly = PolynomialFeatures(degree=degree,include_bias=False) XTrain_transf = poly.fit_transform(XTrain) XTest_transf = poly.fit_transform(XTest) ##centratura dei dati XTrain_transf, YTrain_, X_mean, y_mean, X_std = center_data(XTrain_transf, YTrain, fit_intercept=True, normalize = True) XTest_transf, YTest_ = center_test(XTest_transf,YTest,X_mean,y_mean,X_std) new_loss, _ =compute_lasso(XTrain_transf, YTrain_, XTest_transf, YTest_,score = "r2_score") print("loss polinomio grado", str(degree),":", new_loss )
def interactor(df): """ This function takes in a data frame and creates binary interaction terms from all numerical and categorical variables as well as the assessment questions, and outputs a data frame """ my_data_complete = df.dropna() # interactions can only be done for non-missings colnames = list(my_data_complete.columns.values) # id and date columns id_cols_list = [ x for x in colnames # only for continuous vars if not (bool(re.search("_N$", x)) | bool(re.search("_C$", x)) | bool(re.search("_Q$", x))) ] # actual feature columns - to make interactions from new_cols_list = [ x for x in colnames # only for continuous vars if (bool(re.search("_N$", x)) | bool(re.search("_C$", x)) | bool(re.search("_Q$", x))) ] othervars = my_data_complete[id_cols_list] little_df = my_data_complete[new_cols_list] # computing all binary interaction terms poly = PolynomialFeatures(degree=2, interaction_only=True) theints = pd.DataFrame(poly.fit_transform(little_df)) theints = theints.drop(theints.columns[0], axis=1) # dropping the first column theints.columns = list(new_cols_list + list(itertools.combinations(new_cols_list, 2))) # concatenating the interaction terms to the original data frame df = pd.DataFrame(othervars.join(theints)) new_features = theints.columns.values return df, new_features
def learning_curve(classifier, X, y, cv, sample_sizes, degree=1, pickle_path=None, verbose=True): """ Learning curve """ learning_curves = [] for i, (train_index, test_index) in enumerate(cv): X_train = X[train_index] X_test = X[test_index] y_train = y[train_index] y_test = y[test_index] if degree > 1: poly = PolynomialFeatures(degree=degree, interaction_only=False, include_bias=True) X_train = poly.fit_transform(X_train) X_test = poly.transform(X_test) lc = [] for sample in sample_sizes: classifier.fit(X_train[:sample], y_train[:sample]) # apply classifier on test set y_pred = classifier.predict(X_test) confusion = metrics.confusion_matrix(y_test, y_pred) lc.append(balanced_accuracy_expected(confusion)) learning_curves.append(lc) if verbose: print(i, end=' ') # pickle learning curve if pickle_path: with open(pickle_path, 'wb') as f: pickle.dump(learning_curves, f, protocol=4) if verbose: print()
class SoftmaxLayer: """Class encapsulating the functionality of the final softmax layer pertaining to a neural network.""" num_neurons = None neuron_outs = None learn_rate = 0. momentum = 0. num_inputs_per_neuron = 0 z = None y = None diffs = None weights = None __prev_delta_weights = None __poly = None def __init__(self, num_outs, max_num_inputs, learnRate=.0001, momentum=.0005, randomStartWeights=False): self.neuron_outs = np.zeros(num_outs) self.num_neurons = num_outs self.learn_rate = learnRate self.momentum = momentum self.num_inputs_per_neuron = max_num_inputs + 1 #1 is added to incorporate the bias weight. if randomStartWeights is True: self.weights = np.random.rand(self.num_neurons, self.num_inputs_per_neuron) else: self.weights = np.ones([self.num_neurons, self.num_inputs_per_neuron]) self.__prev_delta_weights = np.zeros([self.num_neurons, self.num_inputs_per_neuron]) self.__poly = PolynomialFeatures(1) def load_Zs(self, z): self.z = copy.deepcopy(self.__poly.fit_transform(z)) def load_Ys(self, y): self.y = copy.deepcopy(y) def softmax(self, arr, j): if arr[j] < 10: res = np.exp(arr[j]) / np.sum(np.exp(arr)) else: arr -= arr.max() res = np.exp(arr[j]) / np.sum(np.exp(arr)) return res def estimate_Ys(self, z=None): if z is not None: self.load_Zs(z) prods = np.dot(self.z, self.weights.T) self.neuron_outs = np.array([[self.softmax(prod, i) for i in xrange(prods.shape[1])] for prod in prods]) def train_layer(self): self.diffs = self.neuron_outs - self.y delta_weights = np.array([np.sum([d[i] * self.z[i] for i in xrange(d.shape[0])], axis=0) for d in self.diffs.T]) self.weights -= delta_weights * self.learn_rate + self.__prev_delta_weights * self.momentum self.__prev_delta_weights = copy.deepcopy(delta_weights)
def predict(self, x): ## as it is trained on polynominal features, we need to transform x poly = PolynomialFeatures(degree=self.degree) polynominal_features = poly.fit_transform(x)[0] print polynominal_features.reshape return self.model.predict(polynominal_features)
def main(): testfile = sys.argv[1] modelfile = sys.argv[2] polyorder = int(sys.argv[3]) testweeks = sys.argv[4] test_data = np.genfromtxt(testfile, delimiter=',', skip_header=1) X = test_data[:,:-1] y = test_data[:,-1] poly = PolynomialFeatures(degree=polyorder) Xpoly = poly.fit_transform(X) with open(modelfile, 'rb') as model, open(testweeks) as weeks: lr = pickle.load(model) games_per_week = (int(line) for line in weeks) ranges = [] pos = 0 for week in games_per_week: newpos = pos + week ranges.append( (pos, newpos) ) pos = newpos print('W\tL\tPoints') weekly_results = (evaluate_week(week, Xpoly, y, lr) for week in ranges) for result in weekly_results: print('\t'.join(str(piece) for piece in result))
def mvr(data): x = data[:, 0:len(data[0])-1] y = data[:, -1] minTestingError = np.inf for dim in xrange(1,3): if(dim > 1): print("Mapping into higher dimension of {} \n".format(dim)) else: evaluateGradientDesc(data) print("Explicit solution\n") poly = PolynomialFeatures(dim) z = poly.fit_transform(x) theta = fitModel(z , y) print("Intercept : {} \nCoefficients : {}\n".format(theta[0], theta[1:])) testingError, sol = evaluateModel(z,y, False) if(dim == 1): print "Testing Error :", testingError if (testingError < minTestingError): minTestingError = testingError optimalDimension = dim optSol = sol print "Optimal Dimension : {}, Testing Error : {} ".format(optimalDimension, minTestingError) return optSol
def test_polynomial_fits(x, y, n_comps, model, k_folds=3): for i in range(1,6): poly = PolynomialFeatures(degree=i) poly_x = poly.fit_transform(x) r2_mean, r2_std, mse_mean, mse_std = run_conventional_linkage(poly_x, y, n_comps, model) print r2_mean, r2_std, mse_mean, mse_std print
def computeZs(w, x_train): z = [] for i in xrange(x_train.shape[0]): z.append(np.array([sigmoid(w[j], x_train[i]) for j in xrange(w.shape[0])])) poly = PolynomialFeatures(1) z = poly.fit_transform(z) return np.array(z)
def prob_max(x): poly=PolynomialFeatures(degree=2) x=poly.fit_transform(x) ####define best fit coefficient arrays theta_0=np.array([5.017034759466216798e+00,-4.953976374628412532e-02,-5.853604893727188709e-03,-1.732076056200582692e-01,4.646876717720006822e-02,-2.787195959859810248e-04,-1.222728739255723981e-07,6.120106921025333935e-02,4.604924515407455714e-06,-1.475861223279032741e-01,-4.060326310707941784e-09,1.177855732870812001e-02,3.113699082333943463e-02,-8.110887996756119586e-12,-1.113811480228766704e-05,-1.501651909640449069e-07,-2.190797370951344465e-06,-1.718990505473245339e-05,-1.199898098055512375e-13,-2.571924773608319866e-07,-2.147269697093823931e-12,-3.256296536440236682e-05,-2.581007347409745425e-05,1.392377894191479523e-03,-4.129157456238496948e-02,-1.811677361205055875e-02,-7.083807139833804416e-06,4.116671309652412958e-02,3.361594896247442773e-04,-8.223201336497298203e-03,-1.862209709284966395e-07,1.527880447451521184e-02,-3.672245027121902317e-02,-4.975817315933817863e-10,-6.237344094335352815e-04,-1.217106713769066128e-05,-1.489610233924158246e-04,-1.156461881655085214e-03,-5.159561821638818347e-12,-1.884192981459143558e-05,-1.825179242529750414e-10,-5.438522396156177874e-07,4.167833399722946711e-05,5.607144654864255374e-03,-3.093787958451529527e-02,-2.041422430639949412e-04,7.895983583095988675e-03,1.293062803926413491e-02,5.899640081165494730e-03,-1.021176015149306061e-05,8.486220614842233598e-03,5.822368958314040610e-03,-2.243937133831174112e-08,-8.464968966797879399e-03,-1.906386791427585779e-04,-1.795243901952780228e-03,-1.046895210502369993e-02,-3.330917120202175767e-10,-4.235251180738666644e-04,-5.694559236692822056e-09,-1.583929993116185621e-03,1.629024063907276165e-01,-6.967989967191325247e-03,-3.673107962032413740e-06,-2.280088579624509337e-01,1.726846693277796316e-04,1.013912471248917396e-01,-7.647706080406362405e-08,-3.240179256710575273e-01,1.214811767523774205e-01,-3.401281050759457049e-10,-1.670938331047893612e-07,-7.369899627351106136e-06,-9.856333774434332797e-05,-4.534506039623955074e-05,-9.599184784444142215e-12,-5.151527253102048208e-06,-1.030689454605035745e-10,4.646876717720006822e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-2.787195959859810248e-04,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.222728739255723981e-07,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,6.120106921025333935e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,4.604924515407455714e-06,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.475861223279032741e-01,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-4.060326310707941784e-09,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,1.177855732870812001e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,3.113699082333943463e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-8.110887996756119586e-12,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.113811480228766704e-05,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.501651909640449069e-07,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-2.190797370951344465e-06,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.718990505473245339e-05,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.199898098055512375e-13,0.000000000000000000e+00,0.000000000000000000e+00,-2.571924773608319866e-07,0.000000000000000000e+00,-2.147269697093823931e-12]) theta_1=np.array([-9.861773710361221745e+00,1.930173314618176539e-01,6.178220889126870112e-03,9.590504030349773779e-02,-2.071552578564340164e-01,1.061401114719375260e-01,3.276754675639340086e-02,-1.761411784826558136e-02,-1.219468120911304441e-06,-9.174348236081142360e-02,2.132582599900693932e-02,-1.168137887722866912e-02,1.014151234082172059e-01,9.598987135568204463e-04,-4.542651588690940767e-02,-7.514592183950008714e-04,1.113651743862166532e-03,3.535033504077587929e-02,1.348878960300472899e-06,4.158088521609443200e-02,1.744377835470558925e-06,-8.830070079582454969e-04,-6.118986593694282407e-05,-4.784785490618059583e-04,6.231388236713353984e-02,1.984193321394733464e-02,3.807758267577563555e-02,-1.857758661936751918e-02,-8.902117282652563328e-05,1.684544497032977118e-03,-4.354918224284388961e-02,8.135671785350261087e-03,1.838040795364327554e-03,4.648089395429296639e-02,1.603282923510754299e-02,-5.706248765095311287e-02,6.474737189221846378e-02,-1.666585875194382532e-02,5.800179529291954185e-05,6.960244357250958136e-02,1.482721160150063508e-04,-5.299760763074679222e-07,-4.512899253144341872e-05,-9.330422825892547602e-04,-3.692049341246863322e-04,-7.641113350637301687e-04,-3.553288559473667197e-04,-3.424266483519060756e-03,4.323086081437536800e-04,-4.955185382381825611e-04,-5.468633412309427573e-03,3.023053081335558886e-04,2.032432933463332054e-03,-1.868881428527514009e-04,5.907286677952040300e-03,1.224575926635180362e-03,1.491552037995557810e-03,3.744487993794240379e-03,-1.585824627682363985e-03,4.626090019667926378e-03,2.914276434916693195e-04,-6.421237001048539506e-04,1.343912634023189216e-02,1.202887078507273999e-02,4.579648647433440592e-03,-4.573005453417482836e-05,-2.603037492365091118e-02,1.093608117200833424e-01,3.532167048002045617e-01,-1.790610728587208392e-02,-7.755213616683120925e-02,-5.213887650785711293e-03,-1.747560651202587356e-01,-4.635745132339050972e-02,-5.689835106400319142e-02,1.079103168240419384e-04,8.490464847112829186e-03,8.373013610258914587e-05,-2.071552578564340164e-01,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,1.061401114719375260e-01,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,3.276754675639340086e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.761411784826558136e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.219468120911304441e-06,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-9.174348236081142360e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,2.132582599900693932e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.168137887722866912e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,1.014151234082172059e-01,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,9.598987135568204463e-04,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-4.542651588690940767e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-7.514592183950008714e-04,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,1.113651743862166532e-03,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,3.535033504077587929e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,1.348878960300472899e-06,0.000000000000000000e+00,0.000000000000000000e+00,4.158088521609443200e-02,0.000000000000000000e+00,1.744377835470558925e-06]) theta_2=np.array([-2.604982997969506187e+01,2.522175474048852784e-01,6.275718741926675920e-03,1.273176496282046599e-01,-1.716361908427019300e-01,-8.312891928874267811e-02,4.068642760504040390e-02,-2.445951924349220458e-02,-8.746331909292573688e-07,-1.542657353435612777e-02,-1.765684782956331370e-02,-3.195224775777173168e-04,2.484350665759416446e-02,4.813993958703906978e-03,1.759866699719525307e-01,5.747258345660388864e-04,-1.022129045161229450e-03,5.567310929387970370e-02,-9.063835339582872293e-07,-8.930479773136143495e-02,-9.138473645722535673e-07,-7.459379939882724523e-04,-4.125238423403655301e-05,-4.278814974555324602e-04,1.234252674940865789e-02,4.708747007997553247e-02,2.657070242802176546e-02,6.926664427951148562e-02,-6.384822293781164664e-05,4.964280033292678418e-02,9.853135356553717472e-02,-2.621681271491586862e-02,6.630289966406467672e-02,-2.208061355155441774e-01,4.922574438806641417e-02,4.310173077725486246e-02,-5.622794820973487512e-02,1.006576646572381883e-01,-3.897449196020566275e-05,-7.080593340274707326e-03,-7.767702598866720021e-05,-3.990070230109308789e-07,-1.651061082255117919e-05,1.537024690049966936e-03,8.005698436542285070e-04,8.994568249232704014e-04,5.470196351385650481e-04,-2.455970000128474082e-03,4.988277998095904915e-04,1.262763556509414152e-03,4.601679612131920685e-03,-1.194497842888761268e-04,2.882224654372331132e-03,5.875401491502118233e-04,-2.458015081252763658e-03,5.859965255224170106e-04,-2.547687917446368093e-04,-2.516120690268733393e-03,2.300462784971263851e-03,-2.423523210845587861e-03,-1.539288004294190964e-04,-1.260645266526524456e-02,-2.136594669075533859e-02,-1.240381092246360673e-02,1.775253607050698845e-02,-3.279874465984122252e-05,1.667948986384345557e-03,-1.177656364439296638e-01,-8.947706286380961412e-04,5.282554584883104691e-03,9.528953029071411673e-02,-1.953324553475337816e-03,1.692159896831275101e-01,6.332910268512657870e-02,-3.059270306265245501e-02,-7.251068271668771679e-05,-2.748819360572268139e-02,-4.386467349947201168e-05,-1.716361908427019300e-01,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-8.312891928874267811e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,4.068642760504040390e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-2.445951924349220458e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-8.746331909292573688e-07,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.542657353435612777e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.765684782956331370e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-3.195224775777173168e-04,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,2.484350665759416446e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,4.813993958703906978e-03,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,1.759866699719525307e-01,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,5.747258345660388864e-04,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.022129045161229450e-03,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,5.567310929387970370e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-9.063835339582872293e-07,0.000000000000000000e+00,0.000000000000000000e+00,-8.930479773136143495e-02,0.000000000000000000e+00,-9.138473645722535673e-07]) theta_3=np.array([-2.972725322795918140e-02,-2.504227156229747453e-01,-9.722118342779062158e-03,1.229149113213241912e-01,2.039923850853684467e-02,-1.805107341267933943e-02,7.334563069172345476e-03,-6.475321568310828764e-04,-8.474944289249388250e-08,9.714545617984883855e-05,-2.075035998257516458e-07,-1.221820060933139164e-05,-1.714475447964966190e-02,-2.129377838506303893e-03,-1.277321374533818017e-06,-2.380156363764723250e-06,-5.273025783548628525e-06,-1.984111789391731009e-03,-1.335426121119178434e-07,3.339996589013074558e-04,-2.141039464532945376e-07,2.576647414932395786e-03,2.945797215512836505e-06,-1.895000552612198606e-04,-1.462288947682845522e-02,-1.951654268095479733e-02,-1.630737487857820273e-02,-5.655678104343885015e-02,-6.186709331152055607e-06,-2.000570956968860184e-02,-1.460798045208372536e-05,-9.892777618470509349e-04,-6.134943418829629652e-02,5.204243735868804843e-02,-6.976997540713989398e-05,-1.924795146172466416e-04,-3.585644621246710678e-04,-8.751744876445026466e-02,-5.742332320812468485e-06,-2.493414480788682872e-02,-1.819883544853002577e-05,1.673188626427698019e-06,-2.199004526442708865e-05,3.929065891591175703e-03,3.411106034336343351e-03,4.689455918427083009e-03,-1.623583183295337906e-02,-2.379764356421232188e-04,4.563617516957610247e-03,-5.845377676580722996e-04,-5.550332977089329420e-03,5.817926665026248653e-03,3.489254807540806587e-03,-8.968364091790517831e-04,-2.770023159211470760e-03,-4.227833625220314175e-03,1.685174688793472349e-03,-3.707142912226834078e-04,-5.865829701672146956e-03,-5.678036659941369801e-04,8.344188249964974876e-05,-2.863247383273172242e-02,-6.482258485425367728e-03,-4.199374526758931081e-02,-1.256077522453134809e-02,-3.178104108468527999e-06,-3.440396173308768457e-02,-9.901849306080791411e-06,-3.180423753092536477e-05,-7.452030759889874401e-02,-6.907406950607837548e-02,5.971308793973397274e-07,-1.155260382492086013e-04,-2.332571299853613211e-04,1.410515664042338024e-01,-1.068340896895342663e-05,2.499449671921087357e-01,-1.027698942975813230e-05,2.039923850853684467e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.805107341267933943e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,7.334563069172345476e-03,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-6.475321568310828764e-04,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-8.474944289249388250e-08,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,9.714545617984883855e-05,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-2.075035998257516458e-07,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.221820060933139164e-05,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.714475447964966190e-02,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-2.129377838506303893e-03,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.277321374533818017e-06,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-2.380156363764723250e-06,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-5.273025783548628525e-06,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.984111789391731009e-03,0.000000000000000000e+00,0.000000000000000000e+00,0.000000000000000000e+00,-1.335426121119178434e-07,0.000000000000000000e+00,0.000000000000000000e+00,3.339996589013074558e-04,0.000000000000000000e+00,-2.141039464532945376e-07,]) #####calculate probabilities z_0=np.sum(x*theta_0) z_1=np.sum(x*theta_1) z_2=np.sum(x*theta_2) z_3=np.sum(x*theta_3) p_0=1./(1.+np.exp(-z_0)) p_1=1./(1.+np.exp(-z_1)) p_2=1./(1.+np.exp(-z_2)) p_3=1./(1.+np.exp(-z_3)) prob_arra=np.array([p_0, p_1, p_2, p_3]) return prob_arra.argmax()
def init_predict(mode): """ 整理为用于预测的 X i: features o: X """ import scipy.io as sio import scipy as sp from sklearn.preprocessing import PolynomialFeatures uid_ave = sio.loadmat('predict_cut_uid_ave.mat')['X'] poly = PolynomialFeatures(degree=2) poly_uid_ave = poly.fit_transform(uid_ave) combined_list = [sp.sparse.csc_matrix(poly_uid_ave)] if mode == 'f': X_words = sio.loadmat('predict_cut_Xf.mat')['X'] elif mode == 'c': X_words = sio.loadmat('predict_cut_Xc.mat')['X'] else: X_words = sio.loadmat('predict_cut_Xl.mat')['X'] #transformer = TfidfTransformer() #X_tfidf = transformer.fit_transform(X_words) combined_list.append(X_words) X = sp.sparse.hstack(combined_list) print(X.shape) return X
def predict(self, X, coefs): # first column of Z is time # we will replace the other columns with regressed data # clean-up from before Z = self.X.copy() print type(Z), Z.head() print type(coefs), coefs.head() poly = PolynomialFeatures(degree=self.n) for trial_index, (coefficients, x) in enumerate(izip(coefs, Z)): print trial_index, coefficients.shape, x.shape # reshape required by t t = poly.fit_transform((x[:,0]).reshape(-1,1)) # only regress on data past reference time t = t[self.reference_time:] z = np.zeros(x.shape) # first column is time z[:,0] = x[:,0] # columns up to reference time are just 0 and were not regressed z[:self.reference_time, 1:] = 0 # columns after reference_time were regressed with coefficients print t.shape, z.shape, coefficients.shape z[self.reference_time:, 1:] = np.dot(t, coefficients) Z.iloc[trial_index] = z return Z
def set_pdynome_degree(degree, lis): lis = [lis] ploy = PolynomialFeatures(degree) result = ploy.fit_transform(lis) result = result.tolist() result = result[0] return result
def prepare(file, survived_info=True): df = pd.read_csv(file, header=0) df = pd.concat([df, pd.get_dummies(df['Embarked'], prefix='Embarked')], axis=1) df = pd.concat([df, pd.get_dummies(df['Sex'], prefix='Sex')], axis=1) df = pd.concat([df, pd.get_dummies(df['Pclass'], prefix='Pclass')], axis=1) df = df.fillna(value={'Age': df['Age'].dropna().median(), 'Fare': df['Fare'].dropna().median()}) survived = None if survived_info: survived = df['Survived'].values df = df.drop(['Survived'], axis=1) ids = df['PassengerId'].values df = df.drop(['PassengerId', 'Pclass', 'Name', 'Sex', 'Ticket', 'Cabin', 'Embarked'], axis=1) poly = PolynomialFeatures(interaction_only=True) polydata = poly.fit_transform(df) cols = np.hstack((['1s'], df.columns, [None]*(polydata.shape[1] - len(df.columns) -1))) polydf = pd.DataFrame.from_records(polydata, columns=cols) if survived_info: polydf['Survived'] = survived return (polydf, ids)
def log_reg_kclass(x, y, nfolds=4, degree=1, limit=None): """Performs logistic regression experiments on Iris dataset for k class discrimination.""" #print 'Training k Class classifier on Iris dataset' if limit is not None: print 'Considering only', limit, ' datapoints' x = x[:limit] y = y[:limit] #x /= x.max(axis=0) poly = PolynomialFeatures(degree) x = poly.fit_transform(x) num_classes = len(set(y)) avg_accuracy = 0.; avg_precision = np.zeros(num_classes); avg_recall = np.zeros(num_classes); avg_fscore = np.zeros(num_classes); avg_conf_mat = np.zeros([num_classes, num_classes]) kf = KFold(y.shape[0], n_folds=nfolds, shuffle=True) for train_ids, test_ids in kf: thetas = log_reg_trainer_kclass(x[train_ids], y[train_ids]) y_pred = log_reg_pred_kclass(thetas, x[test_ids]) acc = accuracy_score(y[test_ids], y_pred) avg_accuracy += acc precision1, recall1, fscore1, supp1 = precision_recall_fscore_support(y[test_ids], y_pred) conf_mat = confusion_matrix(y[test_ids], y_pred) avg_precision += precision1; avg_recall += recall1; avg_fscore += fscore1; avg_conf_mat += conf_mat lol=0 return avg_accuracy / nfolds, avg_precision / nfolds, avg_recall / nfolds, avg_fscore / nfolds, avg_conf_mat / nfolds
def poly_model(ins,outs,degrees): poly = PolynomialFeatures(degree=degrees) X = poly.fit_transform(ins) regr = linear_model.LinearRegression() regr.fit(X, outs) print_model("poly-"+str(degrees), regr, X, outs)
def hidden_layer(self, X, w): # The dimension of matrix Z is (R + 1) * m. The extra dimension is constant # extra 1 dimension for bias. Z = sigmoid(np.dot(X, w.T)) p = PolynomialFeatures(degree = 1) Z = p.fit_transform(Z) return Z
dataset = pd.read_csv("Position_Salaries.csv") #preparing attributes and dependent values X = dataset.iloc[:, 1:2].values #attributes Y = dataset.iloc[:, 2:3].values #dependent values #__________________________________________POLYNOMIAL REGRESSION__________________________________________ # >>> Fitting first linear regression to the dataset lin_reg = LinearRegression() lin_reg.fit(X, Y) # >>> Fitting Polynomial regression to the dataset poly_reg = PolynomialFeatures(degree=4) X_poly = poly_reg.fit_transform(X) # >>> Fitting second linear regression to the polynomial dataset lin_reg2 = LinearRegression() lin_reg2.fit(X_poly, Y) #__________________________________________VISUALIZATION__________________________________________ # >>> Linear regression plot plt.scatter(X, Y, color="red") plt.plot(X, lin_reg.predict(X)) plt.title("Linear Regression") plt.xlabel("Position") plt.ylabel("Salary") plt.show()
Y = dataset.iloc[:, 2:].values #Fitting Linear Regression to the dataset from sklearn.linear_model import LinearRegression lin_reg = LinearRegression() lin_reg.fit(X, Y) #Fitting Polynomial Regression to the dataset #Below polymorphic object is a transformer tool #that will transform our matrix of features X into #a new matrix of features named X_poly from sklearn.preprocessing import PolynomialFeatures poly_reg = PolynomialFeatures(degree=3) X_poly = poly_reg.fit_transform(X) poly_reg.fit(X_poly, Y) #Once this new matrix of polynomial features X_poly was created #Near linear regression object that we fitted to this new matrix X_poly and Y lin_reg_2 = LinearRegression() lin_reg_2.fit(X_poly, Y) #Visualising the Linear Regression Results plt.scatter(X, Y, color="red") plt.plot(X, lin_reg.predict(X), color="blue") plt.title("Truth or Bluff (Linear Regression)") plt.xlabel("Position Level") plt.ylabel("Salary") plt.show
#Reseting the dataset1 dataset1 = dataset1.reset_index() #Spliting the Date Coloumn dataset1['Date'] = pd.to_datetime(dataset1['Date']) dataset1.insert(1, "year", dataset1.Date.dt.year, True) dataset1.insert(2, "month", dataset1.Date.dt.month, True) dataset1.insert(3, "Day", dataset1.Date.dt.day, True) #Droping the Date coloumn dataset1.drop('Date', axis=1, inplace=True) #Dividing the dataset1 into X and Y Matrix X = dataset1.filter(['year', 'month', 'Day']) Y = dataset1.filter(['Total cases']) #Dividing the dataset1 into training and testing data #from sklearn.model_selection import train_test_split #X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.1, random_state = 0) #Training the Model with train data from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LinearRegression poly_reg = PolynomialFeatures(degree=4) X_poly = poly_reg.fit_transform(X) lin_reg_2 = LinearRegression() lin_reg_2.fit(X_poly, Y) #Predecting the Model by giving Values Y_pred = lin_reg_2.predict(poly_reg.fit_transform([['2020', '3', '10']]))
p = dataset.loc[(dataset.Source == 'GCAG'), ['Year']] q = dataset.loc[(dataset.Source == 'GCAG'), ['Mean']] # Splitting the dataset into the Training set and Test set """from sklearn.cross_validation import train_test_split p_train, p_test, q_train, q_test = train_test_split(p, q, test_size = 0.2, random_state = 0)""" # Fitting Linear Regression to the dataset from sklearn.linear_model import LinearRegression lin_reg = LinearRegression() lin_reg.fit(p, q) # Fitting Polynomial Regression to the dataset from sklearn.preprocessing import PolynomialFeatures poly_reg = PolynomialFeatures(degree=4) p_poly = poly_reg.fit_transform(p) poly_reg.fit(p_poly, q) lin_reg_2 = LinearRegression() lin_reg_2.fit(p_poly, q) # Visualising the Linear Regression results plt.scatter(p, q, color='brown') # Creating Scatter Plot plt.plot(p, lin_reg.predict(p), color='orange', label='Best Fit Line') # Creating Best Fit Line tnrfont = {'fontname': 'Times New Roman'} # Setting the font "Times New Roman" plt.title('Linear Regression for GCAG', **tnrfont) # Setting the Title plt.xlabel('Year', **tnrfont) # Labelling x-axis plt.ylabel('Mean Temperature', **tnrfont) #Labelling y-axis plt.grid(color='grey', linestyle='-', linewidth=0.25, alpha=0.5) # Creating Grid leg = plt.legend(fancybox=True, framealpha=1, shadow=True, borderpad=1)
from sklearn.preprocessing import PolynomialFeatures from sklearn.grid_search import GridSearchCV X = [[6], [8], [10], [14], [18]] Y = [[7], [9], [13], [17.5], [18]] poly2 = PolynomialFeatures(degree=2) print(poly2.fit_transform(X))
from sklearn.preprocessing import StandardScaler sc_X = StandardScaler() X_train = sc_X.fit_transform(X_train) X_test = sc_X.transform(X_test) sc_y = StandardScaler() Y_train = sc_y.fit_transform(y_train.reshape(-1, 1)) #Linear regression from sklearn.linear_model import LinearRegression regressor = LinearRegression() regressor.fit(X_train, y_train) #poly regression from sklearn.preprocessing import PolynomialFeatures poly_reg = PolynomialFeatures(degree=4) X_poly = poly_reg.fit_transform(X) poly_reg.fit(X_poly, y) lin_reg_2 = LinearRegression() lin_reg_2.fit(X_poly, y) lin_reg_2.predict(poly_reg.fit_transform([[6.5]])) #svr from sklearn.svm import SVR regressor = SVR(kernel='rbf') regressor.fit(X, y) #decision tree regression from sklearn.tree import DecisionTreeRegressor regressor = DecisionTreeRegressor(random_state=0) regressor.fit(X, y)
# Polynomial Regression # Importing Libraries import numpy as np import matplotlib.pyplot as plt import pandas as pd # Importing the dataset dataset = pd.read_csv('Position_salaries.csv') X = dataset.iloc[:, 1:2].values Y = dataset.iloc[:, 2].values # Fitting Linear Regression to the dataset from sklearn.linear_model import LinearRegression lin_reg = LinearRegression() lin_reg.fit(X, Y) # Fitting Polynomial Regression to the dataset from sklearn.preprocessing import PolynomialFeatures poly_reg = PolynomialFeatures(degree = 4) X_poly = poly_reg.fit_transform(X) lin_reg_2 = LinearRegression() lin_reg_2.fit(X_poly, Y) # Visualising the Polynomial Regression and Linear Regression together plt.scatter(X, Y, color = 'red') plt.plot(X, lin_reg_2.predict(poly_reg.fit_transform(X)), color = 'blue') plt.plot(X, lin_reg.predict(X), color = 'violet') plt.xlabel('Position Level of an Employee') plt.ylabel('Salary') plt.title('Truth or Bluff (Polynomial Regression)') plt.show()
y = dataset.iloc[:, 2].values #y is vector #check the kind of relationship so obtained plt.plot(x, y) #do not divide into train and test set, because less data and more accuracy is necessary #make linear regression model just as a reference from sklearn.linear_model import LinearRegression linreg = LinearRegression() linreg.fit(x, y) #make polynomial regression from sklearn.preprocessing import PolynomialFeatures polyreg = PolynomialFeatures(degree=4) x_poly = polyreg.fit_transform(x) linreg2 = LinearRegression() linreg2.fit(x_poly, y) #visualising linear model plt.scatter(x, y, color='red') plt.plot(x, linreg.predict(x), color='blue') plt.show() #visualising polynomialmodel plt.scatter(x, y, color='red') #do not use x_poly as y here because it's already defined and we want to generalise the model for all inputs plt.plot(x, linreg2.predict(polyreg.fit_transform(x)), color='blue') plt.show()
elanet = ElasticNet(alpha=1.0, l1_ratio=0.5) # if we set the l1_ratio to 1.0, elanet would be equal to Lasso """# Turning a linear regression model into a curve - polynomial regression""" # AN EXAMPLE # First adding a second degree polynomial term from sklearn.preprocessing import PolynomialFeatures X = np.array([258.0, 270.0, 294.0, 320.0, 342.0, 368.0, 396.0, 446.0, 480.0, 586.0])[:, np.newaxis] y = np.array([236.4, 234.4, 252.8, 298.6, 314.2, 342.2, 360.8, 368.0, 391.2, 390.8]) lr = LinearRegression() pr = LinearRegression() quadratic = PolynomialFeatures(degree=2) X_quad = quadratic.fit_transform(X) # Fitting a linear regression model for comparison lr.fit(X, y) X_fit = np.arange(250, 600, 10)[:, np.newaxis] y_lin_fit = lr.predict(X_fit) # Fitting a multiple regression model on the transformed features for # polynomial regression pr.fit(X_quad, y) y_quad_fit = pr.predict(quadratic.fit_transform(X_fit)) # Plotting the results plt.scatter(X, y, label='Training Points') plt.plot(X_fit, y_lin_fit, label='Linear Fit', linestyle='--') plt.plot(X_fit, y_quad_fit, label='Quadratic Fit')
@author: lopes """ import pandas as pd import numpy as np df = pd.read_csv('house_prices.csv') X = df.iloc[:, 3:19].values y = df.iloc[:, 2].values from sklearn.model_selection import train_test_split X_treinamento, X_teste, y_treinamento, y_teste = train_test_split( X, y, random_state=0) from sklearn.preprocessing import PolynomialFeatures poly = PolynomialFeatures(degree=4) X_treinamento_poly = poly.fit_transform(X_treinamento) X_teste_poly = poly.transform(X_teste) from sklearn.linear_model import LinearRegression regressor = LinearRegression() regressor.fit(X_treinamento_poly, y_treinamento) score = regressor.score(X_teste_poly, y_teste) previsoes = regressor.predict(X_teste_poly) from sklearn.metrics import mean_absolute_error mae = mean_absolute_error(y_teste, previsoes)
threshold = 2 df_label = df_label[(z < threshold)] # Reset the index for the polynomial features merge df_label = df_label.reset_index(drop=True) # Get polynomial features polyTrans = PolynomialFeatures(degree=2, include_bias=False) df_label_Num = df_label[[ "level", "temperature", "usage", "Brightness", "RAM" ]] df_label = df_label.drop( ["level", "temperature", "usage", "Brightness", "RAM"], axis=1) # Drop them to get back later the poly Trans of them polyData_Num = polyTrans.fit_transform(df_label_Num) columnNames = polyTrans.get_feature_names( ["level", "temperature", "usage", "Brightness", "RAM"]) df_label_Num = pandas.DataFrame(polyData_Num, columns=columnNames) for column in columnNames: df_label[column] = pandas.Series(df_label_Num[column]) # Get dataframes y_label = df_label["output"] X_label = df_label.drop(["output"], axis=1) # Split data training and testing ... X_train_label, X_test_label, y_train_label, y_test_label = train_test_split( X_label, y_label, test_size=0.25, random_state=42)
for i in range(0,train_rows): row = raw_input().split(" ") row = [float(i) for i in row] train_features.append(row[0:2]) train_value.append(row[-1]) test_rows = int(raw_input()) for i in range(0,test_rows): row = raw_input().split(" ") row = [float(i) for i in row] test_features.append(row[0:2]) poly_feature = PolynomialFeatures(degree=3) train_features = poly_feature.fit_transform(train_features) model = LinearRegression().fit(train_features,train_value) test_features = poly_feature.fit_transform(test_features) prediction=model.predict(test_features) for i in prediction: print round(i,2)
data_set = pd.read_csv('./Position_Salaries.csv') print(data_set) X = data_set.iloc[:, 1:2].values y = data_set.iloc[:, 2:3].values print(X) print(y) from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LinearRegression poly_reg = PolynomialFeatures(degree=10) X_poly = poly_reg.fit_transform(X) poly_reg.fit(X_poly, y) linear_reg = LinearRegression() linear_reg.fit(X_poly, y) print(X) pred_y = linear_reg.predict(X_poly) print(pred_y) plt.scatter(X, y, color='red') plt.plot(X, pred_y, color='green') plt.show()
def get_poly_features(X, degree): poly = PolynomialFeatures(degree, interaction_only=True) X2 = poly.fit_transform(X) return X2
df = pd.read_csv("Position_Salaries.csv") X = df.iloc[:, 1:2].values y = df.iloc[:, 2].values from sklearn.linear_model import LinearRegression regressor = LinearRegression() regressor.fit(X, y) """nessa parte PolynomialFeatures é uma ferramenta usada para transformar uma matriz em forma polynomial, ou seja, criando os fatores ao quadrado. Feito isso, voce cria outro objeto linear com a matrix polynomial """ from sklearn.preprocessing import PolynomialFeatures poly_reg = PolynomialFeatures(degree=4) X_poly = poly_reg.fit_transform(X) poly_reg.fit(X_poly, y) regressor_2 = LinearRegression() regressor_2.fit(X_poly, y) plt.scatter(X, y, color='red') plt.plot(X, regressor.predict(X), color='blue') plt.show() X_grid = np.arange(min(X), max(X), 0.1) X_grid = X_grid.reshape((len(X_grid), 1)) plt.scatter(X, y, color='red') plt.plot(X_grid, regressor_2.predict(poly_reg.fit_transform(X_grid)), color='blue') plt.show()
class ContinuousToPolynomialBasisHypergridAdapter(HypergridAdapter): """ Adds polynomial basis function features for each continuous dimension in the adaptee hypergrid using https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.PolynomialFeatures.html. All non-continuous adaptee dimensions will be present in the target hypergrid. Beware: Because HierarchicalHypergrids may have NaN values for some points, these NaNs will be replaced by zeros. Parameters ---------- degree: integer The degree of the polynomial features. Default = 2. interaction_only: boolean If true, only interaction features are produced: features that are products of at most degree distinct input features (so not x[1] ** 2, x[0] * x[2] ** 3, etc.). Default = False include_bias: boolean If True, then include a bias column, the feature in which all polynomial powers are zero (i.e. a column of ones - acts as an intercept term in a linear model). Default = True """ def __init__(self, adaptee: Hypergrid, degree: int = 2, include_bias: bool = True, interaction_only: bool = False): if not HypergridAdapter.is_like_simple_hypergrid(adaptee): raise ValueError("Adaptee must implement a Hypergrid Interface.") HypergridAdapter.__init__(self, name=adaptee.name, random_state=adaptee.random_state) self._adaptee: Hypergrid = adaptee self._polynomial_features_kwargs = { 'degree': degree, 'interaction_only': interaction_only, 'include_bias': include_bias, 'order': 'C' } self._target: Hypergrid = None if self._adaptee.is_hierarchical(): self._adaptee = HierarchicalToFlatHypergridAdapter( adaptee=self._adaptee) # Record which adaptee dimensions are continuous self._adaptee_contains_dimensions_to_transform = False self._adaptee_dimension_names_to_transform = [] for adaptee_dimension in self._adaptee.dimensions: if isinstance(adaptee_dimension, ContinuousDimension): self._adaptee_dimension_names_to_transform.append( adaptee_dimension.name) self._num_dimensions_to_transform = len( self._adaptee_dimension_names_to_transform) self._adaptee_contains_dimensions_to_transform = self._num_dimensions_to_transform > 0 # see definition of _get_polynomial_feature_names() for usage self._internal_feature_name_terminal_char = '_' # Since sklearn PolynomialFeatures does not accept NaNs and these may appear in data frames from hierarchical hypergrids, # the NaNs will be replaced with an imputed (finite) value. The following sets the value used. self._nan_imputed_finite_value = 0 # instantiate sklearn's polynomial features instance self._polynomial_features = PolynomialFeatures( **self._polynomial_features_kwargs) # because the exact number of additional dimensions that will be added depends on the parameters to sklearn's PF, # *and* the sklearn PF instance above doesn't determine this information until after the .fit() method is called (requiring a dataframe), # *and* the target hypergrid can not be constructed without knowing the resulting number of continuous dimensions, # a trivial dataframe is constructed (all 1s) and .fit_transform() of _polynomial_features instance is called. trivial_continuous_dim_x = np.ones( (1, self._num_dimensions_to_transform)) trivial_polynomial_features_y = self._polynomial_features.fit_transform( trivial_continuous_dim_x) self._polynomial_features_powers = self._polynomial_features.powers_ self._num_polynomial_basis_dimensions_in_target = trivial_polynomial_features_y.shape[ 1] self._target_polynomial_feature_map = { } # keys are target dimension names, values are index in features self._build_simple_hypergrid_target() def _build_simple_hypergrid_target(self) -> None: self._target = SimpleHypergrid(name=self._adaptee.name, dimensions=None, random_state=self._adaptee.random_state) # Add non-transformed adaptee dimensions to the target for adaptee_dimension in self._adaptee.dimensions: if adaptee_dimension.name not in self._adaptee_dimension_names_to_transform: self._target.add_dimension(adaptee_dimension.copy()) if not self._adaptee_contains_dimensions_to_transform: return # add new dimensions to be created by sklearn PolynomialFeatures # construct target dim names using adaptee dim names and polynomial feature powers matrix # This logic is worked out explicitly here so we have control over the derived dimension names. # Currently, the code only substitutes adaptee feature names into the default feature_names produced by # sklearn's PolynomialFeatures .get_feature_names() method. poly_feature_dim_names = self._get_polynomial_feature_names() for i, poly_feature_name in enumerate(poly_feature_dim_names): ith_terms_powers = self._polynomial_features_powers[i] if not self._polynomial_features_kwargs[ 'include_bias'] and ith_terms_powers.sum() == 0: # the constant term is skipped continue else: # replace adaptee dim names for poly feature name {x0_, x1_, ...} representatives target_dim_name = poly_feature_name for j, adaptee_dim_name in enumerate( self._adaptee_dimension_names_to_transform): adaptee_dim_power = ith_terms_powers[j] if adaptee_dim_power == 0: continue if adaptee_dim_power == 1: poly_feature_adaptee_dim_name_standin = f'x{j}{self._internal_feature_name_terminal_char}' adaptee_dim_replacement_name = adaptee_dim_name else: # power > 1 cases poly_feature_adaptee_dim_name_standin = f'x{j}{self._internal_feature_name_terminal_char}^{adaptee_dim_power}' adaptee_dim_replacement_name = f'{adaptee_dim_name}^{adaptee_dim_power}' target_dim_name = target_dim_name.replace( poly_feature_adaptee_dim_name_standin, adaptee_dim_replacement_name) # add target dimension # min and max are placed at -Inf and +Inf since .random() on the target hypergrid is generated on the original # hypergrid and passed through the adapters. self._target.add_dimension( ContinuousDimension(name=target_dim_name, min=-math.inf, max=math.inf)) self._target_polynomial_feature_map[target_dim_name] = i @property def adaptee(self) -> Hypergrid: return self._adaptee @property def target(self) -> Hypergrid: return self._target @property def polynomial_features_kwargs(self) -> dict: return self._polynomial_features_kwargs @property def nan_imputed_finite_value(self): return self._nan_imputed_finite_value def get_column_names_for_polynomial_features(self, degree=None): # column names ordered by target dimension index as this coincides with the polynomial_features.powers_ table sorted_by_column_index = { k: v for k, v in sorted(self._target_polynomial_feature_map.items(), key=lambda item: item[1]) } if degree is None: return list(sorted_by_column_index.keys()) dim_names = [] for ith_terms_powers, poly_feature_name in zip( self._polynomial_features_powers, self._get_polynomial_feature_names()): if ith_terms_powers.sum() == degree: dim_names.append(poly_feature_name) return dim_names def get_polynomial_feature_powers_table(self): return self._polynomial_features_powers def get_num_polynomial_features(self): return self._polynomial_features_powers.shape[0] def _get_polynomial_feature_names(self): # The default polynomial feature feature names returned from .get_feature_names() look like: ['1', 'x0', 'x1', 'x0^2', 'x0 x1', 'x1^2'] # They are altered below by adding a terminal char so string substitutions don't confuse # a derived feature named 'x1 x12' with another potentially derived feature named 'x10 x124' replaceable_feature_names = [] for i in range(len(self._adaptee_dimension_names_to_transform)): replaceable_feature_names.append( f'x{i}{self._internal_feature_name_terminal_char}') return self._polynomial_features.get_feature_names( replaceable_feature_names) def _project_dataframe(self, df: DataFrame, in_place=True) -> DataFrame: if not in_place: df = df.copy(deep=True) # replace NaNs with zeros df.fillna(self._nan_imputed_finite_value, inplace=True) # Transform the continuous columns and add the higher order columns to the df # Filtering columns to transform b/c dataframes coming from hierarchical hypergrid points # may not contain all possible dimensions knowable from hypergrid x_to_transform = np.zeros( (len(df.index), len(self._adaptee_dimension_names_to_transform))) for i, dim_name in enumerate( self._adaptee_dimension_names_to_transform): if dim_name in df.columns.values: x_to_transform[:, i] = df[dim_name] all_poly_features = self._polynomial_features.transform(x_to_transform) for target_dim_name in self._target_polynomial_feature_map: target_dim_index = self._target_polynomial_feature_map[ target_dim_name] df[target_dim_name] = all_poly_features[:, target_dim_index] return df def _unproject_dataframe(self, df: DataFrame, in_place=True) -> DataFrame: if not in_place: df = df.copy(deep=True) # unproject simply drops the monomial columns whose degree is not 1 polynomial_feature_powers = self.get_polynomial_feature_powers_table() column_names_to_drop = [] for target_dim_name, powers_table_index in self._target_polynomial_feature_map.items( ): target_powers = polynomial_feature_powers[powers_table_index] if target_powers.sum() == 1: continue column_names_to_drop.append(target_dim_name) df.drop(columns=column_names_to_drop, inplace=True) return df
# 최소값에서 최대값까지 1씩 증가 데이터 생성 X_test = np.arange(X_train.min(), X_train.max(), 1)[:, np.newaxis] #X_test = np.arange(X_train.min(), X_train.max(), 1).reshape(-1,1) #print(X_test) # Linear regression model_boston = LinearRegression() model_boston.fit(X_train, y_train) linear_pred = model_boston.predict(X_test) # Polynomial regression Degress 2 poly_linear_model2 = LinearRegression() polynomial2 = PolynomialFeatures(degree=2) #다차에 맞게 데이터 변형 X_train_transformed2 = polynomial2.fit_transform(X_train) #print("X_train_transformed.shape :", X_train_transformed.shape) poly_linear_model2.fit(X_train_transformed2, y_train) # 훈련 데이터 적용 X_test_transformed2 = polynomial2.fit_transform(X_test) pre2 = poly_linear_model2.predict(X_test_transformed2) # Polynomial regression Degress 5 poly_linear_model5 = LinearRegression() polynomial5 = PolynomialFeatures(degree=3) X_train_transformed5 = polynomial5.fit_transform(X_train) #print("X_train_transformed.shape :", X_train_transformed.shape) poly_linear_model5.fit(X_train_transformed5, y_train)
def map_feature(x, degree=2): poly = PolynomialFeatures(degree) if len(x.shape) == 1: x = x.reshape(len(x), 1) return poly.fit_transform(x)
# TODO: Add import statements import numpy as np import pandas as pd from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures # Assign the data to ppolredictor and outcome variables # TODO: Load the data train_data = pd.read_csv('poly.csv') X = train_data['Var_X'].values.reshape(-1, 1) y = train_data['Var_Y'].values # Create polynomial features # TODO: Create a PolynomialFeatures object, then fit and transform the # predictor feature poly_feat = PolynomialFeatures(degree=4) X_poly = poly_feat.fit_transform(X) # Make and fit the polynomial regression model # TODO: Create a LinearRegression object and fit it to the polynomial predictor # features poly_model = LinearRegression(fit_intercept=False).fit(X_poly, y)
"""from sklearn.preprocessing import StandardScaler sc_X = StandardScaler() X_train = sc_X.fit_transform(X_train) X_test = sc_X.transform(X_test) sc_y = StandardScaler() y_train = sc_y.fit_transform(y_train)""" # Fitting Linear Regression to the dataset from sklearn.linear_model import LinearRegression lin_reg = LinearRegression() lin_reg.fit(X, y) # Fitting Polynomial Regression to the dataset from sklearn.preprocessing import PolynomialFeatures poly_reg = PolynomialFeatures(degree = 10) X_poly = poly_reg.fit_transform(X) lin_reg_2 = LinearRegression() lin_reg_2.fit(X_poly, y) # Visualizing the Linear Regression results plt.scatter(X, y, color = 'red') plt.plot(X, lin_reg.predict(X), color = 'blue') plt.title('Truth or Bluff (Linear Regression)') plt.xlabel('Position Level') plt.ylabel('Salary') plt.show() # Visualizing the polynomial Regression results X_grid = np.arange(min(X), max(X), 0.1) X_grid = X_grid.reshape((len(X_grid), 1)) plt.scatter(X, y, color = 'red')
#Y = df.pop("Loan_Status") #t= df.pop("Loan_ID") a1, b1 = df.shape ###################################################### Below imp #x_col = ['Gender','Married','Education','Self_Employed','Property_Area'] cat_feature = [ "Gender", "Married", "Dependents", "Education", "Self_Employed", "Loan_Amount_Term", "Credit_History", "Property_Area" ] hotencoder = OneHotEncoder(sparse=False) onehot = hotencoder.fit_transform(df[cat_feature]) onehotpd = pd.DataFrame(onehot) poly = PolynomialFeatures(12) dfnum = pd.DataFrame(poly.fit_transform(df[num_feature])) X1 = pd.concat([onehotpd, dfnum], axis=1) X11 = X1.head(a1 - a) X12 = X1.tail(a) Xres, Yres = smote_learn(X11, Y) Xtrain, Xtest, Ytrain, Ytest = train_test_split(X11, Y, test_size=0.3, random_state=42) print("Nimai 1") ################################################################################################## XGBoost start_time = time.time()
y_pred_test = lm.predict(X_test) print('LINEAR REGRESSION') print('\nDesempenho no conjunto de treinamento:') print('MSE = %.3f' % mean_squared_error(Y_train, y_pred_train)) print('RMSE = %.3f' % math.sqrt(mean_squared_error(Y_train, y_pred_train))) print('R2 = %.3f' % r2_score(Y_train, y_pred_train)) print('\nDesempenho no conjunto de teste:') print('MSE = %.3f' % mean_squared_error(y_test, y_pred_test)) print('RMSE = %.3f' % math.sqrt(mean_squared_error(y_test, y_pred_test))) print('R2 = %.3f' % r2_score(y_test, y_pred_test)) ################################################################################# ## PolynomialFeatures + LinearRegression ####################################### from sklearn.preprocessing import PolynomialFeatures pf = PolynomialFeatures(2) X_train_poly = pf.fit_transform(X_train) X_test_poly = pf.fit_transform(X_test) lm = LinearRegression() lm.fit(X_train_poly, Y_train) y_pred_train = lm.predict(X_train_poly) y_pred_test = lm.predict(X_test_poly) print('POLINOMIAL LINEAR REGRESSION') print('\nDesempenho no conjunto de treinamento:') print('MSE = %.3f' % mean_squared_error(Y_train, y_pred_train)) print('RMSE = %.3f' % math.sqrt(mean_squared_error(Y_train, y_pred_train))) print('R2 = %.3f' % r2_score(Y_train, y_pred_train)) print('\nDesempenho no conjunto de teste:') print('MSE = %.3f' % mean_squared_error(y_test, y_pred_test)) print('RMSE = %.3f' % math.sqrt(mean_squared_error(y_test, y_pred_test))) print('R2 = %.3f' % r2_score(y_test, y_pred_test)) #################################################################################
cat_impute = SimpleImputer(strategy='constant') X_train[:, i] = cat_impute.fit_transform(X_train[:, i].reshape(-1, 1)).reshape(-1) onehot = OneHotEncoder() onehot_model = onehot.fit(X_train[:, i].reshape(-1, 1)) onehot_cats.append(onehot_model.categories_) tmp_l = list() for l in onehot_cats: tmp_l.append(l[0].tolist()) X_train = preprocessing(X_train, categories=tmp_l) X_test = preprocessing(X_test, categories=tmp_l) X_pred = preprocessing(data_set["pred_X"].values, categories=tmp_l) poly = PolynomialFeatures(degree=2) X_train = poly.fit_transform(X_train) X_test = poly.fit_transform(X_test) X_pred = poly.fit_transform(X_pred) linear = LinearRegression(fit_intercept=False, normalize=False) # ridge = RidgeCV() lasso = LassoCV(n_alphas=1000) model = lasso model = model.fit(X_train, y_train) train = model.predict(X_train) test = model.predict(X_test) error1 = mean_squared_error(y_train, train)
print "R2 score =", round(sm.r2_score(y_test, y_test_pred), 2) print "\nRIDGE:" print "Mean absolute error =", round( sm.mean_absolute_error(y_test, y_test_pred_ridge), 2) print "Mean squared error =", round( sm.mean_squared_error(y_test, y_test_pred_ridge), 2) print "Median absolute error =", round( sm.median_absolute_error(y_test, y_test_pred_ridge), 2) print "Explained variance score =", round( sm.explained_variance_score(y_test, y_test_pred_ridge), 2) print "R2 score =", round(sm.r2_score(y_test, y_test_pred_ridge), 2) # Polynomial regression from sklearn.preprocessing import PolynomialFeatures polynomial = PolynomialFeatures(degree=10) X_train_transformed = polynomial.fit_transform(X_train) datapoint = [0.39, 2.78, 7.11] poly_datapoint = polynomial.fit_transform(datapoint) poly_linear_model = linear_model.LinearRegression() poly_linear_model.fit(X_train_transformed, y_train) print "\nLinear regression:\n", linear_regressor.predict(datapoint) print "\nPolynomial regression:\n", poly_linear_model.predict(poly_datapoint) # Stochastic Gradient Descent regressor sgd_regressor = linear_model.SGDRegressor(loss='huber', n_iter=50) sgd_regressor.fit(X_train, y_train) print "\nSGD regressor:\n", sgd_regressor.predict(datapoint)
import pickle from sklearn.linear_model import LinearRegression from sklearn.preprocessing import PolynomialFeatures dataset = pd.read_csv('losatank50mg.csv') X = pd.DataFrame({}) y = pd.DataFrame({}) tempX_set = {'x': []} tempY_set = {'y': []} count = 0 for index, row in dataset.iterrows(): for i in range(7): tempX_set['x'].append(count) tempY_set['y'].append(row[i + 1]) count = count + 1 X = pd.DataFrame(tempX_set) Y = pd.DataFrame(tempY_set) poly = PolynomialFeatures(degree=2) X_poly = poly.fit_transform(X) poly.fit(X_poly, y) regressor = LinearRegression() regressor.fit(X_poly, Y) pickle.dump(regressor, open('losatank_model.pkl', 'wb'))
#print(x_train.shape) #print(y_train.shape) #print(x_test.shape) #print(y_test.shape) # In[4]: train_err = [] test_err = [] # In[5]: #Due to memory limitation I performed it in manual fashion. Recoreded and polulatated errors in the array. gc.collect() poly = PolynomialFeatures(degree=10) x_train = poly.fit_transform(x_train) y_train = poly.fit_transform(y_train) x_test = poly.fit_transform(x_test) y_test = poly.fit_transform(y_test) alpha_vals = np.linspace(1, 20, 20) for alpha_v in alpha_vals: regr = Lasso(alpha=alpha_v, normalize=True, max_iter=10e5) regr.fit(x_train, y_train) train_err.append( math.sqrt(mean_squared_error(y_train, regr.predict(x_train)))) test_err.append(math.sqrt(mean_squared_error(y_test, regr.predict(x_test)))) # In[6]:
X = dataset[['Level']].values y = dataset['Salary'].values plt.scatter(X, y) plt.show() # Fitting Linear Regression to the dataset from sklearn.linear_model import LinearRegression lin_reg = LinearRegression() lin_reg.fit(X, y) y_pred_lin = lin_reg.predict(X) # Fitting Polynomial Regression to the dataset from sklearn.preprocessing import PolynomialFeatures poly_reg = PolynomialFeatures(degree=2) poly_X = poly_reg.fit_transform( X) #Fitting the value of X in polynomial fashion poly_reg.fit(poly_X, y) lin_reg_2 = LinearRegression() lin_reg_2.fit(poly_X, y) y_pred_poly = lin_reg_2.predict(poly_X) # Visualising the Linear Regression results plt.scatter(X, y) plt.plot(X, y_pred_lin) plt.title('Linear Regression') plt.xlabel('Position level') plt.ylabel('Salary') plt.show() # Visualising the Polynomial Regression results plt.scatter(X, y)
from sklearn.preprocessing import Imputer imp = Imputer(missing_values='NaN', strategy='mean', axis=0) imp.fit([[1, 2], [np.nan, 3], [7, 6]]) X = [[np.nan, 2], [6, np.nan], [7, 6]] print(imp.transform(X)) import scipy.sparse as sp X = sp.csc_matrix([[1, 2], [0, 3], [7, 6]]) imp = Imputer(missing_values=0, strategy='mean', axis=0) imp.fit(X) X_test = sp.csc_matrix([[0, 2], [6, 0], [7, 6]]) print(imp.transform(X_test)) ''' 很多情况下,考虑输入数据中的非线性特征来增加模型的复杂性是非常有效的。一个简单常用的方法就是使用多项式特征,它能捕捉到特征中高阶和相互作用的项。 ''' from sklearn.preprocessing import PolynomialFeatures x = np.arange(6).reshape(3, 2) poly = PolynomialFeatures(2) print(poly.fit_transform(x)) # 特征向量X从:math:(X_1, X_2) 被转换成:math:(1, X_1, X_2, X_1^2, X_1X_2, X_2^2)。 # 在一些情况中,我们只需要特征中的相互作用项 interaction_only=True x = np.arange(9).reshape(3, 3) poly = PolynomialFeatures(degree=3, interaction_only=True) print(poly.fit_transform(x)) # 装换器定制 from sklearn.preprocessing import FunctionTransformer transformer = FunctionTransformer(np.log1p) x = np.array([[0, 1], [2, 3]]) transformer.transform(x)
train_data = pd.DataFrame(lines) #train train_data = train_data.apply(toFloat, axis=1) #convert data from string to float X_pred = pd.DataFrame(lines2).apply( toFloat, axis=1) #predict the price (y) for each row ####################Preprocessing##############333 #polynomial features##### from sklearn.preprocessing import PolynomialFeatures from sklearn.linear_model import LinearRegression X_train = train_data.iloc[:, :-1] Y_train = train_data.iloc[:, -1] poly = PolynomialFeatures(degree=3) poly_features = poly.fit_transform(X_train) #polynomial features for training X_pred_poly = poly.fit_transform(X_pred) #####model###### model = LinearRegression() model.fit(poly_features, Y_train) #training the model #make predictions##### #print(X_pred) predictions = model.predict(X_pred_poly) for i in predictions: print(i)
# matrix of features (independent variables) X = dataset.iloc[:, 1:2].values # dependant variable vector y = dataset.iloc[:, -1].values # Fitting Linear regression from sklearn.linear_model import LinearRegression linear_regressor = LinearRegression() linear_regressor.fit(X, y) # Fitting Polynomial Regression from sklearn.preprocessing import PolynomialFeatures poly_regressor = PolynomialFeatures(degree=4) X_poly = poly_regressor.fit_transform(X) lin_poly_reg = LinearRegression() lin_poly_reg.fit(X_poly, y) # Visualize the results # 1. plt.scatter(X, y, color='red') plt.plot(X, linear_regressor.predict(X), color='green') plt.title('Linear Regression plot') plt.xlabel('Position level') plt.ylabel('Salary') # 2. X_grid = np.arange(min(X), max(X), 0.1) X_grid = X_grid.reshape((len(X_grid), 1))