def fit_KNeighbors(features_train, labels_train, features_pred, n_neighbors=5):
	model = KNeighborsRegressor(n_neighbors=n_neighbors)
	model.fit(features_train, labels_train)
	labels_pred = model.predict(features_pred)
	score = model.score(features_train, labels_train)
	print "KNeighbors - coefficient of determination R^2 of the prediction: ", score
	return labels_pred
Beispiel #2
1
def fill_income(df):

    income_imputer = KNeighborsRegressor(n_neighbors=2)
    df_w_monthly_income = df[df.monthly_income.isnull() == False].copy()
    df_w_null_monthly_income = df[df.monthly_income.isnull() == True].copy()
    cols = ["number_real_estate_loans_or_lines", "number_of_open_credit_lines_and_loans"]
    income_imputer.fit(df_w_monthly_income[cols], df_w_monthly_income.monthly_income)
    new_values = income_imputer.predict(df_w_null_monthly_income[cols])
    df_w_null_monthly_income.loc[:, "monthly_income"] = new_values
    df2 = df_w_monthly_income.append(df_w_null_monthly_income)
    return df2
Beispiel #3
0
def calc_linear_regression(reg_training_path):
    dataset = read_reg_train_data(reg_training_path)
    rmse = 0
    n_folds = 5
    folds = KFold(n=len(dataset), n_folds=n_folds, shuffle=False)

    fold = 0
    for train_indices, test_indices in folds:
        fold += 1
        training_set = [dataset[i] for i in train_indices]
        test_set = [dataset[i] for i in test_indices]
        training_dataframe = get_data_frame(training_set)
        test_dataframe = get_data_frame(test_set)
        column_names = ['cf_item', 'cf_user', 'svd', 'content_item', 'actual_rating']
        training_dataframe.columns = column_names
        test_dataframe.columns = column_names

        actual_rating_training_column = training_dataframe['actual_rating']
        #actual_rating_test_column = test_dataframe['actual_rating']

        training_dataframe = training_dataframe.drop('actual_rating', axis=1)
        test_dataframe = test_dataframe.drop('actual_rating', axis=1)

        neigh = KNeighborsRegressor(n_neighbors=10)
        #print('Initialized k nearest neighbors regressor with k =', i)
        neigh.fit(training_dataframe, actual_rating_training_column)
        #print('Fit data models')
        predict_set = neigh.predict(test_dataframe)
        print(predict_set)
        rmse += mean_squared_error([rec[4] for rec in test_set], [rec for rec in predict_set]) ** 0.5
        print("Fold (%d) finished with accumulated RMSE of (%f) (%s)" % (fold, rmse, time.strftime('%y_%m_%d_%H_%M_%S')))
    return rmse / float(n_folds)
def run_kNeighbors(distances, loadings, test_vars, 
                   weightings=('uniform',), k_list=(3)):
    """
    Run Knearest neighbor using precomputed distances to create an ontological mapping
    
    Args:
        distances: square distance matrix to pass to KNeighborsRegressors
        loadings: loading matrix for training
        test_vars: variable to reconstruct
        weightings: (optional) list of weightings to pass to KNeighbors
        k_list: list of k values to pass to KNeighbors as n_neighbors
    """
    train_distances = distances.loc[loadings.index, loadings.index]
    test_distances = distances.loc[test_vars, loadings.index]
    to_return = pd.DataFrame()
    for weighting in weightings:
        for k in k_list:
            clf = KNeighborsRegressor(metric='precomputed', n_neighbors=k, weights=weighting)
            clf.fit(train_distances, loadings)
            out = clf.predict(test_distances)
            out = pd.DataFrame(out, columns=loadings.columns)
            out['var'] = test_vars
            out['k'] = k
            out['weighting'] = weighting
            # add neighbors and distances
            neighbors = clf.kneighbors(test_distances)
            out['distances'] = tuple(neighbors[0])
            out['neighbors'] = tuple(test_distances.columns[neighbors[1]])
            to_return = pd.concat([to_return, out], sort=False)
    return to_return
Beispiel #5
0
def knn_model(train, y_train, test):
    model = KNeighborsRegressor(n_neighbors = 10, weights='distance', n_jobs=-1)
    model.fit(train, y_train)
    test_probs = model.predict(test)
    indices = test_probs < 0
    test_probs[indices] = 0
    return test_probs
def main(featureFile, outputfolder):
    with open(featureFile, 'r') as csvfile:
        my_data = pd.read_csv(csvfile, delimiter="\t", low_memory=False)

    random_indices = permutation(my_data.index)
    # how many time do we want the data in our test set?
    test_cutoff = math.floor(len(my_data)/3)
    test = my_data

    # Generate the training set with the rest of the data.
    train = my_data.loc[random_indices[test_cutoff:]]

    x_columns = ["Row"=="1", "Student ID"=="2", "Problem Hierarchy" == "3", "Problem Name"=="4", "Problem View" == "5", "Step Name" == "6",
            "KC(Default)"=="7", "Opportunity (Default)" == "8"]
    x_columns = [int(i) for i in x_columns]
    # y columns show the predicted feature, in this case, the correct first attempt
    y_column = ["Correct First Attempt"]

    # Look at the Ten closest neighbors, to offset potential noise in the data
    knn = KNeighborsRegressor(n_neighbors=10)
    knn.fit(train[x_columns], train[y_column])

    # Make point predictions on the test set using the fit model.
    predictions = knn.predict(test[x_columns])
    actual = test[y_column]
    result = test[['Anon Student Id','Correct First Attempt']]
    result.to_csv(outputfolder, sep='\t')

    # Compute the root mean squared error of our predictions.
    rmse = math.sqrt((((predictions - actual) ** 2).sum()) / len(predictions))
    print('RMSE=')
    print(rmse)
def apply_knn():
    regr = KNeighborsRegressor()
    regr.fit(Xtr, Ytr)

    pred = regr.predict(Xte)
    temp = mean_squared_error(Yte, pred)
    return pred, temp
    def transform(self, X, y=None):
        """
        :param X: multidimensional numpy array like.
        """
        rows, features = X.shape

        mask = list(map(lambda x: reduce(lambda h, t: h or t, x), np.isnan(X)))
        criteria_for_bad = np.where(mask)[0]
        criteria_for_good = np.where(mask == np.zeros(len(mask)))[0]

        X_bad = X[criteria_for_bad]
        X_good = X[criteria_for_good]

        knn = KNeighborsRegressor(n_neighbors=self.k)

        for idx, x_bad in zip(criteria_for_bad.tolist(), X_bad):
            missing = np.isnan(x_bad)
            bad_dim = np.where(missing)[0]
            good_dim = np.where(missing == False)[0]

            for d in bad_dim:
                x = X_good[:, good_dim]
                y = X_good[:, d]
                knn.fit(x, y)

                X[idx, d] = knn.predict(x_bad[good_dim])

        return X
Beispiel #9
0
def kNN(X_train, y_train, X_test, y_test, uselog=False):
  '''

  :param X_train:
  :param y_train:
  :param X_test:
  :param y_test:
  :return:
  '''

  scaler = StandardScaler()
  print X_train.shape
  print X_test.shape

  X = scaler.fit_transform(X_train)
  test = scaler.transform(X_test)

  clf = KNeighborsRegressor(n_neighbors=550)

  clf.fit(X, y_train)

  result = clf.predict(test)

  if uselog:
    result = map(lambda x: math.log(1 + x), result)

  return result
def knnPredictor(df):

    dataTrainX, dataTrainY, dataTestX, dataTestY = sample(df)
    corelationCoefficiantDictionary = {}
    corelationCoefficiantArray = []

    for k in range(1, 200, 1):
        knnModel = KNeighborsRegressor(n_neighbors=k)

        knnModel.fit(dataTrainX, dataTrainY)

        knnpredicted = knnModel.predict(dataTestX)
        corelationCoefficient = pearsonr(dataTestY, knnpredicted)
        corelationCoefficiantDictionary[k] = corelationCoefficient[0]
        corelationCoefficiantArray.append(corelationCoefficient[0])

    # plotter.plot(corelationCoefficiantArray)
    bestK = max(corelationCoefficiantDictionary, key=corelationCoefficiantDictionary.get)

    knnModelBest = KNeighborsRegressor(n_neighbors=bestK)
    knnModelBest.fit(dataTrainX, dataTrainY)
    print("K = ")
    print(bestK)
    print("Corelation Coeff:")
    print(corelationCoefficiantDictionary[bestK])

    knnpredictedBest = knnModelBest.predict(dataTestX)

    fig, ax = plotter.subplots()
    corelationCoefficient = pearsonr(dataTestY, knnpredictedBest)
    print(corelationCoefficient[0])
    ax.set_ylabel('Predicted KNN Weekly')
    ax.scatter(dataTestY, knnpredictedBest)
    ax.set_xlabel('Measured')
    plotter.show()
def k_nearest_neighbours():
	filepath = "bondchanges.arff"
	all_data = arff_read_to_array(filepath)
	X_data = all_data["data"]
	Y_data = all_data["target"]
	Y_data_map = {}
        new_Y_data = np.array([])
        i = 01
	for index,data in enumerate(Y_data):
		data1 = data.split('_')[0]
		split_data = (".").join(data1.split('.')[:1])
		if not split_data in  Y_data_map:
			Y_data_map[split_data] = i
                        i+=1
 		print split_data
                new_Y_data = np.append(new_Y_data,[Y_data_map[split_data]],0) #Create
	X_training = X_data[:0.9*len(X_data)]
	Y_training = new_Y_data[:0.9*len(Y_data)]
	print X_training
	print 
        print Y_training
	X_test = X_data[0.9*len(X_data):]
	Y_test = new_Y_data[0.9*len(Y_data):]
	#svc = svm.SVC(C=1, kernel='')
	knn = KNeighborsClassifier() 
	knnr= KNeighborsRegressor(n_neighbors=20000)
	print knnr.fit(X_training, Y_training).score(X_test,Y_test)
Beispiel #12
0
	def __init__(self,dataFrame):
		self.dataFrameKNN = {}
		self.KNNWeightage = {'Avg-High Ratio':100,'Avg-Low Ratio':100,'Deliverable Qty':300,'Turnover':100,'Growth':150,'Trend':100,'Output':100}
		self.valid = True
		self.KNNModelHash = {}
		self.dataFrameKNN = pd.DataFrame()
		self.dataFrameKNN['Avg-High Ratio'] = dataFrame['High Price'][1:] - dataFrame['Average Price'][1:]
		self.dataFrameKNN['Avg-Low Ratio'] = dataFrame['Average Price'][1:] - dataFrame['Low Price'][1:]
		self.dataFrameKNN['Deliverable Qty'] = dataFrame['Deliverable Qty'][1:]
		self.dataFrameKNN['Turnover'] = dataFrame['Turnover in Lacs'][1:]
		self.dataFrameKNN['Growth'] = dataFrame['Close Price'][1:]-dataFrame['Prev Close'][1:]
		self.dataFrameKNN['Trend'] = dataFrame['Turnover in Lacs'][1:]
		self.dataFrameKNN['Output'] = dataFrame['High Price'][1:]-dataFrame['Prev Close'][1:]
		self.KNNModelHash['mean'] = self.dataFrameKNN['Output'].mean()
		self.KNNModelHash['std'] = self.dataFrameKNN['Output'].std()
		for key in self.dataFrameKNN:
			self.normalizeKNNModel(key)
		#trainData has the data to be trained, but the last data is the testData
		trainData =	self.dataFrameKNN[['Avg-High Ratio','Avg-Low Ratio','Deliverable Qty','Growth']][:-1].values
		testData = self.dataFrameKNN[['Avg-High Ratio','Avg-Low Ratio','Deliverable Qty','Growth']][-1:].values
		#trainOutput contains the output corresponding to train Data but the first one is garbage
		trainOutput = self.dataFrameKNN['Output'][1:].values
		KNNModel = KNeighborsRegressor(n_neighbors=3,weights = 'distance')
		KNNModel.fit(trainData[100:400],trainOutput[100:400])
		prediction = KNNModel.predict(trainData[400:450])
		weightage = self.KNNWeightage['Output']
		for i in range(50):
			prediction[i] = ((prediction[i]*self.KNNModelHash['std'])+self.KNNModelHash['mean'])/weightage
			trainOutput[400+i] = ((trainOutput[400+i]*self.KNNModelHash['std'])+self.KNNModelHash['mean'])/weightage
			print "%-40s %-40s " %(prediction[i],trainOutput[400+i])
Beispiel #13
0
 def smooth(self, X, y):
   # KNN algorithm for smooth
   nbrs = KNeighborsRegressor(n_neighbors = 20)
   X = X.reshape(-1, 1)
   nbrs.fit(X, y)
   proba = nbrs.predict(X)
   return proba
def predictKnn(data, priceToPredict):
    corelationCoefficiantDictionary = {}
    corelationCoefficiantArray = []
    openingPriceTrain, openingPriceTest, closingPriceTrain, closingPriceTest = \
        data["openingPriceTrain"], data["openingPriceTest"], data["closingPriceTrain"], data["closingPriceTest"]

    for k in range( 1 , 100 , 1):
        neigh = KNeighborsRegressor(n_neighbors=k)
        #n = 7 best fits
        neigh.fit(openingPriceTrain, closingPriceTrain)

        closingPriceTestArray = np.reshape(closingPriceTest,-1)
        knnpr = neigh.predict(openingPriceTest)
        predictedArray = np.reshape(knnpr,-1)

        corelationCoefficient = pearsonr(closingPriceTestArray,predictedArray)
        corelationCoefficiantDictionary[k] = corelationCoefficient[0]
        corelationCoefficiantArray.append(corelationCoefficient[0])
    plotter.plot(corelationCoefficiantArray)
    # plotter.show()

    bestK = max(corelationCoefficiantDictionary, key=corelationCoefficiantDictionary.get)
    neighBest = KNeighborsRegressor(n_neighbors=bestK)
    neighBest.fit(openingPriceTrain, closingPriceTrain)
    openingPriceToPredict = np.array([priceToPredict])
    print("K = ")
    print(bestK)
    print(neighBest.predict(openingPriceToPredict))
Beispiel #15
0
def Round2(X, y):
    # Set parameters
    min_score = {}
    for neigh in [5, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000]:

        model = KNeighborsRegressor(n_neighbors=neigh)
        n = len(y)

        # Perform 5-fold cross validation
        scores = []
        kf = KFold(n, n_folds=5, shuffle=True)

        # Calculate mean absolute deviation for train/test for each fold
        for train_idx, test_idx in kf:
            X_train, X_test = X[train_idx], X[test_idx]
            y_train, y_test = y[train_idx], y[test_idx]
            model.fit(X_train, y_train)
            prediction = model.predict(X_test)
            rmse = np.sqrt(mean_squared_error(y_test, prediction))
            # score = model.score(X_test, y_test)
            scores.append(rmse)
        if len(min_score) == 0:
            min_score['neighbor'] = neigh
            min_score['scores'] = scores
        else:
            if np.mean(scores) < np.mean(min_score['scores']):
                min_score['neighbor'] = neigh
                min_score['scores'] = scores
        print "Neighbors:", neigh
        print scores
        print np.mean(scores)
    return min_score
Beispiel #16
0
def run_network(mdl=None, data=None):
    global_start_time = time.time()
    sequence_length = 10

    if data is None:
        print('Loading data... ')
        X_train, y_train, X_test, y_test = train_test_traffic_data(15773, sequence_length)
    else:
        X_train, y_train, X_test, y_test = data

    print('\nData Loaded...\n')

    if mdl is None:
        mdl = KNeighborsRegressor(5, weights='distance')

    try:
        mdl.fit(X_train, y_train)
        predicted_trffic = mdl.predict(X_test)
    except KeyboardInterrupt:
        print('Training duration (s) : ', time.time() - global_start_time)
        return mdl, y_test, 0

    print('Training duration (s) : ', time.time() - global_start_time)

    return mdl, y_test, predicted_trffic
Beispiel #17
0
    def fit(self, start_date, end_date):

        for ticker in self.tickers:
            self.stocks[ticker] = Stock(ticker)

        params_svr = [{
            'n_neighbors': [2, 5, 10, 15]}]
        params = ParameterGrid(params_svr)

        # Find the split for training and CV
        mid_date = train_test_split(start_date, end_date)
        for ticker, stock in self.stocks.items():

            # pdb.set_trace()
            X_train, y_train = stock.get_data(start_date, mid_date, fit=True)
            X_cv, y_cv = stock.get_data(mid_date, end_date)

            lowest_mse = np.inf
            for i, param in enumerate(params):
                knn = KNeighborsRegressor(**param)
                # ada = AdaBoostRegressor(knn)
                knn.fit(X_train.values, y_train.values)
                mse = mean_squared_error(y_cv, knn.predict(X_cv.values))
                if mse <= lowest_mse:
                    self.models[ticker] = knn

        return self
    def train(self, x, y, param_names, random_search=100, **kwargs):
        start = time.time()
        scaled_x = self._set_and_preprocess(x=x, param_names=param_names)

        # Check that each input is between 0 and 1
        self._check_scaling(scaled_x=scaled_x)

        if self._debug:
            print "Shape of training data: ", scaled_x.shape
            print "Param names: ", self._used_param_names
            print "First training sample\n", scaled_x[0]
            print "Encode: ", self._encode

        # Do a random search
        n_neighbors = self._random_search(random_iter=100, x=scaled_x, y=y)

        # Now train model
        knn = KNeighborsRegressor(n_neighbors=n_neighbors,
                                  weights='uniform',
                                  algorithm='auto',
                                  leaf_size=30,
                                  p=2,
                                  metric='minkowski')
        knn.fit(scaled_x, y)
        self._model = knn

        duration = time.time() - start
        self._training_finished = True
        return duration
Beispiel #19
0
class ModelNNReg(ScikitPredictor):
    '''Nearest neighbor regression'''

    def generate_model(self):
        self.model = KNeighborsRegressor(**self.model_kwargs)

    def fit_model(self, x, y):
        self.model.fit(x, y)
    def predictDayType (self,week,day):
        
        knn = KNeighborsRegressor(n_neighbors=5)
        knn.fit(self.rawData, self.dayType)

        X = np.array([week,day])   
        predictions = knn.predict(X)
        return predictions
Beispiel #21
0
def nnVerify_2(city_data,x,y):
    """ Using SKLearn's KNeighborsRegressor """
    X,Y = city_data.data, city_data.target
    clf = KNeighborsRegressor(n_neighbors=2)
    clf.fit(X,Y)
    y_pred = clf.predict(x)
    print("KNeighborsRegressor")
    print("Y pred(KNN) : ", y_pred)
Beispiel #22
0
def calculateKNearestNeighborsModel(data, numberOfNeighbors):
	# Select input variables as x and typecast to numpy array
	x = np.array(data.iloc[0:,0:11])
	# Select output variable (quality) as y and typecast to numpy array
	y = np.array(data.quality)
	neighbors = KNeighborsRegressor(n_neighbors=numberOfNeighbors)
	neighbors.fit(x, y)
	return neighbors
Beispiel #23
0
def main():
    # read the images
    image_from = io.imread(name_from) / 256
    image_to = io.imread(name_to) / 256

    # change to hsv domain (if requested)
    if args.use_hsv:
        image_from[:] = rgb2hsv(image_from)
        image_to[:] = rgb2hsv(image_to)

    # get shapes
    shape_from = image_from.shape
    shape_to = image_to.shape

    # flatten
    X_from = im2mat(image_from)
    X_to = im2mat(image_to)

    # number of pixes
    n_pixels_from = X_from.shape[0]
    n_pixels_to = X_to.shape[0]

    # subsample
    X_from_ss = X_from[np.random.randint(0, n_pixels_from-1, n_pixels),:]
    X_to_ss = X_to[np.random.randint(0, n_pixels_to-1, n_pixels),:]

    if save_col_distribution:
        import matplotlib.pyplot as plt
        import seaborn as sns
        sns.set_style('white')

        fig, axes = plt.subplots(nrows=2, figsize=(5, 10))
        for ax, X in zip(axes, [X_from_ss, X_to_ss]):
            ax.scatter(X[:,0], X[:,1], color=X)
            if args.use_hsv:
                ax.set_xhsvel('hue')
                ax.set_yhsvel('value')
            else:
                ax.set_xhsvel('red')
                ax.set_yhsvel('green')
        axes[0].set_title('distr. from')
        axes[1].set_title('distr. to')
        fig.tight_layout()
        fig.savefig('color_distributions.png')

    # optimal tranportation
    ot_color = OptimalTransport(X_to_ss, X_from_ss, lam=lam,
                                    distance_metric=distance_metric)

    # model transfer
    transfer_model = KNeighborsRegressor(n_neighbors=n_neighbors)
    transfer_model.fit(X_to_ss, n_pixels * ot_color.P @ X_from_ss)
    X_transfered = transfer_model.predict(X_to)

    image_transferd = minmax(mat2im(X_transfered, shape_to))
    if args.use_hsv:
        image_transferd[:] = hsv2rgb(image_transferd)
    io.imsave(name_out, image_transferd)
Beispiel #24
0
class Knn(ContextEngineBase):
    y_Test = np.empty([0])
    # Knn object
    knnRegressor = None

    def __init__(self, numInputs, outputClassifier, inputClassifiers, appFieldsDict):
        ContextEngineBase.__init__(self, numInputs, outputClassifier, inputClassifiers, appFieldsDict)
        # Passed parameters
        self.n_neighbors = appFieldsDict['n_neighbors']
        self.weights = appFieldsDict['weights']
        self.algorithm = appFieldsDict['algorithm']
        self.n_jobs = appFieldsDict['n_jobs']
        # Defining a Knn object with given parameters
        self.knnRegressor = KNeighborsRegressor(n_neighbors = self.n_neighbors, 
                                                weights = self.weights,
                                                algorithm = self.algorithm,
                                                n_jobs = self.n_jobs)

    #  Add a set of training observations, with the newInputObsMatrix being a
    #  matrix of doubles, where the row magnitude must match the number of inputs,
    #  and the column magnitude must match the number of observations.
    #  and newOutputVector being a column vector of doubles
    def addBatchObservations(self, newInputObsMatrix, newOutputVector):
        if(len(newInputObsMatrix.shape) == 2 and newInputObsMatrix.shape[1] == self.numInputs
            and newOutputVector.shape[0] == newInputObsMatrix.shape[0]):
            # print("All good!")
            newOutputVector = newOutputVector.ravel()
            i = 0
            for newInputVector in newInputObsMatrix:
                newOutputValue = newOutputVector[i]
                self.addSingleObservation(newInputVector, newOutputValue)
                i += 1
        else:
            print("Wrong dimensions!")

    #  Train the coefficients on the existing observation matrix if there are
    #  enough observations.
    def train(self):
        if (self.numObservations > 0):
            # print("Training started")
            self.knnRegressor.fit(self.observationMatrix, self.outputVector)
            return True
        else:
            print("Not enough observations to train!")
            return False

    #  Execute the trained matrix against the given input observation
    #  inputObsVector is a row vector of doubles
    def execute(self, inputObsVector):
        if(len(inputObsVector) == self.numInputs):
            # print("Begin execute")
            #x_Test = np.vstack((self.x_Test,inputObsVector))
            x_Test = np.reshape(inputObsVector,(1,self.numInputs))
            self.y_Test = self.knnRegressor.predict(x_Test)
            return self.y_Test[0]
        else:
            print("Wrong dimensions, fail to execute")
            return None
Beispiel #25
0
def knn_regressor(features, solutions, verbose=0):
    columns = solutions.columns

    clf = KNeighborsRegressor(n_neighbors=5, weights='distance')

    print('Training Model... ')
    clf.fit(features, solutions)
    print('Done Training')
    return (clf, columns)
Beispiel #26
0
def impute_KNN(df,var,features,k,):
    var_imputer = KNeighborsRegressor(n_neighbors=k)
    df_full = df[df[var].isnull()==False]
    df_null = df[df[var].isnull()==True]
    var_imputer.fit(df_full[features], df_full[var])
    impute = var_imputer.predict(df_null[features])
    df_null[var] = impute
    df = df_full.append(df_null)
    return df
Beispiel #27
0
class kNN():
    '''
        kNN classifier
        -------------
    '''

    def __init__(self,N_i,N_o,k=5,n=20):
        # note: N_o=1 assumed for now
        self.N_i = N_i
        self.n = n
        self.i = 0
        self.k = k
        self.X = zeros((self.n,N_i))
        self.y = zeros((self.n))
        self.h = KNeighborsRegressor(n_neighbors=k,weights='distance')#='distance')
        self.c = 0
        #self.error_rate = 0

    def predict(self,x):
        '''
            Predict
            --------------
        '''

        if self.c < 1.:
            print "[Warning!] No training examples!"
            return 0.0
        elif self.c <= self.k:
            dist,ind = self.h.kneighbors(self.X[0:self.c],n_neighbors=1)
            i_max = argmax(ind)
            return self.y[i_max]

        return self.h.predict(x)#.reshape(1,-1))

#    def samples_X(self):
#        ''' return samples of the WEIGHTS '''
#        if self.c <= 0:
#            return self.X[0,:]
#        return self.X[0:self.c,:]

    def update(self, x, y):
        '''
            Update
            --------------
        '''
        self.X[self.i,:] = x
        self.y[self.i] = y

        #self.error_rate = (y - self.predict(x))**2

        self.i = (self.i + 1) % self.n

        if self.c < self.n:
            self.c = self.c + 1

        self.h.fit(self.X[0:self.c,:], self.y[0:self.c])
Beispiel #28
0
def nearest_neighbors_impute(df, coordinate_columns, data_columns, knr_params={}):
    from sklearn.neighbors import KNeighborsRegressor
    for column in data_columns:
        not_null = df[column].notnull()
        if (~not_null).sum() == 0:
            continue
        knr = KNeighborsRegressor(**knr_params)
        knr.fit(df.loc[not_null,coordinate_columns], df.loc[not_null,[column]])
        predicted = knr.predict(df.loc[~not_null,coordinate_columns])
        df.loc[ (~not_null),[column]] = predicted
Beispiel #29
0
 def addJKRegionLabels(self):
     data = zip(self.data['RA'],self.data['DEC'])
     randoms = zip(self.randoms['RA'],self.randoms['DEC'])
     
     finder = KMeans(n_clusters=self.config['n_jackknife'])
     self.data_jk_indices = finder.fit_predict(data)
     
     nbrs = KNeighborsRegressor(n_neighbors=1)
     nbrs.fit(data,self.data_jk_indices)
     self.random_jk_indices = nbrs.predict(randoms)
Beispiel #30
0
 def knn(X, Y):
   neigh = KNeighborsRegressor()
   neigh.fit(X, Y)
   def explore(x):
     score = -1 * neigh.predict([x])
     return score
   minimized = differential_evolution(explore, ((0, 1), (0, 1), (0, 1), (0, 1), (0, 1)))
   return {
     'X_min': list(minimized.x),
     'score': neigh.score(X, Y)
   }
Beispiel #31
0
'''------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------'''
'''fit knn to the model'''
from sklearn.neighbors import KNeighborsRegressor #KNeighborsRegressor if linear regression
knn = KNeighborsRegressor() 

'''find the optimal parameters in KNN'''
param_dict = {
                'n_neighbors': [5,10,15],
                'weights': ['uniform', 'distance' ],
                'p' :[1, 2]          
             }

from sklearn.model_selection import GridSearchCV
knn = GridSearchCV(knn,param_dict)
knn.fit(X_train,y_train)
knn.best_params_ 
knn.best_score_


'''refit knn to the model with optimal parameters'''
knn=KNeighborsRegressor(n_neighbors= 15, p=1, weights='uniform')
knn.fit(X_train,y_train)
#predictions for test
y_pred2 = knn.predict(X_test)


'''R2 and adjusted R2, and rmse'''
r2=knn.score(X_test,y_test)
print(r2)
adj_r2 = 1 - ((len(X_test)-1)/(len(X_test)-i-1))*(1-knn.score(X_test, y_test))
Beispiel #32
0
data = data.swapaxes(0,1)
y.shape = (1,-1)
y = y.swapaxes(0,1)

print('sample dataset is generated')


X = data
X[isnan(X)]=0
y[isnan(y)]=0

Xp = cube2[:].reshape(cube2.shape[0],-1).swapaxes(0,1)

from sklearn.neighbors import KNeighborsRegressor
estimator = KNeighborsRegressor(weights='distance')
estimator.fit(X, y)
y_pred = estimator.predict(Xp)


dvf = ma.array(y_pred).reshape(128,128,1)
dvf.mask=dvf < 5e3
figure()
subplot(121)
imshow(dvf[:,:,0],cmap='seismic',vmin=dvf.min(),vmax=dvf.max())
contour(cube.max(0),levels=linspace(0.2,40,12))
subplot(122)
imshow(dvf[:,:,1],cmap='seismic',vmin=dvf.min(),vmax=dvf.max())
contour(cube.max(0),levels=linspace(0.2,40,12))
y = y.reshape(y.shape)

B2 = linspace(0,37,n_grid)
Beispiel #33
0
import numpy as np
from sklearn.svm import SVR
from sklearn.decomposition import PCA
from sklearn.preprocessing import RobustScaler
from sklearn.pipeline import make_pipeline

train = pd.read_csv('mercedes_train.csv')
test = pd.read_csv('mercedes_test.csv')

y_train = train['y'].values
id_test = test['ID']

num_train = len(train)
df_all = pd.concat([train, test])
df_all.drop(['ID', 'y'], axis=1, inplace=True)

# One-hot encoding of categorical/strings
df_all = pd.get_dummies(df_all, drop_first=True)

train = df_all[:num_train]
test = df_all[num_train:]

from sklearn.neighbors import KNeighborsRegressor

knn = KNeighborsRegressor(n_neighbors=5)

knn.fit(train, y_train)

df_sub = pd.DataFrame({'ID': id_test, 'y': knn.predict(test)})
df_sub.to_csv('mercedes-submission.csv', index=False)
Beispiel #34
0
#y_train = ss_y.fit_transform(y_train)
#y_test = ss_y.fit_transform(y_test)

# 从sklearn.neighbors导入KNeighborRegressor(K近邻回归器)。
from sklearn.neighbors import KNeighborsRegressor
mse1 = []
mse2 = []
mae1 = []
mae2 = []
r21 = []
r22 = []
for i in range(4, 40):
    n_neighbors = i
    # 初始化K近邻回归器,并且调整配置,使得预测的方式为平均回归:weights='uniform'。
    uni_knr = KNeighborsRegressor(weights='uniform', n_neighbors=n_neighbors)
    uni_knr.fit(X_train, y_train)
    uni_knr_y_predict = uni_knr.predict(X_test)

    # 初始化K近邻回归器,并且调整配置,使得预测的方式为根据距离加权回归:weights='distance'。
    dis_knr = KNeighborsRegressor(weights='distance', n_neighbors=n_neighbors)
    dis_knr.fit(X_train, y_train)
    dis_knr_y_predict = dis_knr.predict(X_test)

    from sklearn.metrics import mean_absolute_error, mean_squared_error
    # 使用R-squared、MSE以及MAE三种指标对平均回归配置的K近邻模型在测试集上进行性能评估。
    mse1.append(mean_squared_error(y_test, uni_knr_y_predict))
    mse2.append(mean_squared_error(y_test, dis_knr_y_predict))
    print('R-squared value of uniform-weighted KNeighorRegression:',
          uni_knr.score(X_test, y_test))  #此处是R^2
    print('The mean squared error of uniform-weighted KNeighorRegression:',
          mean_squared_error(y_test, uni_knr_y_predict))
Beispiel #35
0
                sdpval = str(float(sd[1]))
                open(
                    "/home/paul/mesa_models/python_ml_models/results/" +
                    train_pop + "_2_" + pop + "_svr_rbf_cor_test_chr" +
                    str(chrom) + ".txt",
                    "a").write(gene + "\t" + gene_name + "\t" + pacoef + "\t" +
                               papval + "\t" + pbcoef + "\t" + pbpval + "\t" +
                               sccoef + "\t" + scpval + "\t" + sdcoef + "\t" +
                               sdpval + "\n")

                #KNN
                #knn_t0 = time.time()#time it
                #knn_cv = str(float(mean(cross_val_score(knn, cis_gt, adj_exp.ravel(), cv=5))))
                #knn_t1 = time.time()
                #knn_tt = str(float(knn_t1 - knn_t0))
                knn.fit(cis_gt, adj_exp.ravel())
                ypred = knn.predict(test_cis_gt)

                #prepare ypred for writing out to a file
                yprep_pd = pd.DataFrame(ypred)

                ypred_pd.columns = gg
                ypred_pd.index = test_ids
                ypred_frame_knn = pd.concat([ypred_frame_knn, ypred_pd],
                                            axis=1,
                                            sort=True)

                pa = stats.pearsonr(test_adj_exp, ypred)
                pacoef = str(float(pa[0]))
                papval = str(float(pa[1]))
                pb = stats.pearsonr(test_yobs, ypred)
Beispiel #36
0
from open3d import read_point_cloud, write_point_cloud, Vector3dVector
from sklearn.neighbors import KNeighborsRegressor
from numpy import array, concatenate

cloud = read_point_cloud('tree.ply')
calibrate = read_point_cloud('photo_test.ply')

neigh0 = KNeighborsRegressor(4, 'distance', n_jobs=-1)
neigh0.fit(calibrate.points, calibrate.colors)

arr_points = array(cloud.points)
arr_colors = array(cloud.colors)
arr_filter = array(cloud.normals)[:, 0] * (arr_colors[:, 2] > 0.5)
points_other = arr_points.compress(True - arr_filter, 0)
colors_other = arr_colors.compress(True - arr_filter, 0)

neigh1 = KNeighborsRegressor(1, n_jobs=-1)
neigh1.fit(points_other, colors_other)

points_abn = arr_points.compress(arr_filter, 0)
colors_abn = (neigh0.predict(points_abn) + neigh1.predict(points_abn)) / 2
cloud.points = Vector3dVector(concatenate((points_other, points_abn)))
cloud.colors = Vector3dVector(concatenate((colors_other, colors_abn)))
cloud.normals = Vector3dVector()
write_point_cloud('corr.ply', cloud)
Beispiel #37
0
def task2(data):

	df = data

	dfreg = df.loc[:,['Adj Close','Volume']]
	dfreg['HL_PCT'] = (df['High'] - df['Low']) / df['Close'] * 100.0
	dfreg['PCT_change'] = (df['Close'] - df['Open']) / df['Open'] * 100.0

	# Drop missing value
	dfreg.fillna(value=-99999, inplace=True)
	# We want to separate 1 percent of the data to forecast
	forecast_out = int(math.ceil(0.01 * len(dfreg)))
	# Separating the label here, we want to predict the AdjClose
	forecast_col = 'Adj Close'
	dfreg['label'] = dfreg[forecast_col].shift(-forecast_out)
	X = np.array(dfreg.drop(['label'], 1))
	# Scale the X so that everyone can have the same distribution for linear regression
	X = preprocessing.scale(X)
	# Finally We want to find Data Series of late X and early X (train) for model generation and evaluation
	X_lately = X[-forecast_out:]
	X = X[:-forecast_out]
	# Separate label and identify it as y
	y = np.array(dfreg['label'])
	y = y[:-forecast_out]
	
	#Split data
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

	##################
	##################
	##################


	# Linear regression
	clfreg = LinearRegression(n_jobs=-1)
	# 1 - First save the models to local device in models folder
	# filename = 'models/clfreg_model.sav'
	# pickle.dump(clfreg, open(filename, 'wb'))

	# 2 - load the models from disk onces first instruction is done once.
	# clfreg = pickle.load(open(filename, 'rb'))
	clfreg.fit(X_train, y_train)


	# Quadratic Regression 2
	clfpoly2 = make_pipeline(PolynomialFeatures(2), Ridge())
	#Save model to a pickle
	# filename1 = 'models/clfpoly2_model.sav'
	# pickle.dump(clfpoly2, open(filename1, 'wb'))
	
	# 2 - load the models from disk onces first instruction is done once.
	# clfpoly2 = pickle.load(open(filename1, 'rb'))
	clfpoly2.fit(X_train, y_train)


	# Quadratic Regression 3
	clfpoly3 = make_pipeline(PolynomialFeatures(3), Ridge())
	#Save model to a pickle
	# filename2 = 'models/clfpoly3_model.sav'
	# pickle.dump(clfpoly3, open(filename2, 'wb'))
	
	# 2 - load the models from disk onces first instruction is done once.
	# clfpoly3 = pickle.load(open(filename2, 'rb'))
	clfpoly3.fit(X_train, y_train)


	# KNN Regression
	clfknn = KNeighborsRegressor(n_neighbors=2)
	#Save model to a pickle
	# filename3 = 'models/clfknn_model.sav'
	# pickle.dump(clfknn, open(filename3, 'wb'))
	
	# 2 - load the models from disk onces first instruction is done once.
	# clfknn = pickle.load(open(filename3, 'rb'))
	clfknn.fit(X_train, y_train)


	# Lasso Regression
	clflas = Lasso()
	#Save model to a pickle
	# filename4 = 'models/clflas_model.sav'
	# pickle.dump(clflas, open(filename4, 'wb'))
	
	# 2 - load the models from disk onces first instruction is done once.
	# clflas = pickle.load(open(filename4, 'rb'))
	clflas.fit(X_train, y_train)


	# Multitask Lasso Regression
	# clfmtl = MultiTaskLasso(alpha=1.)
	# clfmtl.fit(X_train, y_train).coef_


	# Bayesian Ridge Regression
	clfbyr = BayesianRidge()
	clfbyr.fit(X_train, y_train)
	#Save model to a pickle
	# filename5 = 'models/clfbyr_model.sav'
	# pickle.dump(clfbyr, open(filename5, 'wb'))
	
	# 2 - load the models from disk onces first instruction is done once.
	# clfbyr = pickle.load(open(filename5, 'rb'))


	# Lasso LARS Regression
	clflar = LassoLars(alpha=.1)
	clflar.fit(X_train, y_train)
	#Save model to a pickle
	# filename6 = 'models/clflar_model.sav'
	# pickle.dump(clflar, open(filename6, 'wb'))
	
	# 2 - load the models from disk onces first instruction is done once.
	# clflar = pickle.load(open(filename6, 'rb'))


	# Orthogonal Matching Pursuit Regression
	clfomp = OrthogonalMatchingPursuit(n_nonzero_coefs=2)
	clfomp.fit(X_train, y_train)
	#Save model to a pickle
	# filename7 = 'models/clfomp_model.sav'
	# pickle.dump(clfomp, open(filename7, 'wb'))
	
	# 2 - load the models from disk onces first instruction is done once.
	# clfomp = pickle.load(open(filename7, 'rb'))


	# Automatic Relevance Determination Regression
	clfard = ARDRegression(compute_score=True)
	clfard.fit(X_train, y_train)
	#Save model to a pickle
	# filename8 = 'models/clfard_model.sav'
	# pickle.dump(clfard, open(filename8, 'wb'))
	
	# 2 - load the models from disk onces first instruction is done once.
	# clfard = pickle.load(open(filename8, 'rb'))


	# Logistic Regression
	# clflgr = linear_model.LogisticRegression(penalty='l1', solver='saga', tol=1e-6, max_iter=int(1e6), warm_start=True)
	# coefs_ = []
	# for c in cs:
	#   clflgr.set_params(C=c)
	#   clflgr.fit(X_train, y_train)
	#   coefs_.append(clflgr.coef_.ravel().copy())


	#SGD Regression
	clfsgd = SGDRegressor(random_state=0, max_iter=1000, tol=1e-3)
	clfsgd.fit(X_train, y_train)
	#Save model to a pickle
	# filename9 = 'models/clfsgd_model.sav'
	# pickle.dump(clfsgd, open(filename9, 'wb'))
	
	# 2 - load the models from disk onces first instruction is done once.
	# clfsgd = pickle.load(open(filename9, 'rb'))


	##################
	##################
	##################


	#Create confindence scores
	confidencereg = clfreg.score(X_test, y_test)
	confidencepoly2 = clfpoly2.score(X_test,y_test)
	confidencepoly3 = clfpoly3.score(X_test,y_test)
	confidenceknn = clfknn.score(X_test, y_test)
	confidencelas = clflas.score(X_test, y_test)
	# confidencemtl = clfmtl.score(X_test, y_test)
	confidencebyr = clfbyr.score(X_test, y_test)
	confidencelar = clflar.score(X_test, y_test)
	confidenceomp = clfomp.score(X_test, y_test)
	confidenceard = clfard.score(X_test, y_test)
	confidencesgd = clfsgd.score(X_test, y_test)

	# results
	print('The linear regression confidence is:',confidencereg*100)
	print('The quadratic regression 2 confidence is:',confidencepoly2*100)
	print('The quadratic regression 3 confidence is:',confidencepoly3*100)
	print('The knn regression confidence is:',confidenceknn*100)
	print('The lasso regression confidence is:',confidencelas*100)
	# print('The lasso regression confidence is:',confidencemtl*100)
	print('The Bayesian Ridge regression confidence is:',confidencebyr*100)
	print('The Lasso LARS regression confidence is:',confidencelar*100)
	print('The OMP regression confidence is:',confidenceomp*100)
	print('The ARD regression confidence is:',confidenceard*100)
	print('The SGD regression confidence is:',confidencesgd*100)

	#Create new columns
	forecast_reg = clfreg.predict(X_lately)
	forecast_pol2 = clfpoly2.predict(X_lately)
	forecast_pol3 = clfpoly3.predict(X_lately)
	forecast_knn = clfknn.predict(X_lately)
	forecast_las = clflas.predict(X_lately)
	forecast_byr = clfbyr.predict(X_lately)
	forecast_lar = clflar.predict(X_lately)
	forecast_omp = clfomp.predict(X_lately)
	forecast_ard = clfard.predict(X_lately)
	forecast_sgd = clfsgd.predict(X_lately)

	#Process all new columns data
	dfreg['Forecast_reg'] = np.nan

	last_date = dfreg.iloc[-1].name
	last_unix = last_date
	next_unix = last_unix + datetime.timedelta(days=1)

	for i in forecast_reg:
	    next_date = next_unix
	    next_unix += datetime.timedelta(days=1)
	    dfreg.loc[next_date] = [np.nan for _ in range(len(dfreg.columns))]
	    dfreg['Forecast_reg'].loc[next_date] = i
	    
	dfreg['Forecast_pol2'] = np.nan

	last_date = dfreg.iloc[-26].name
	last_unix = last_date
	next_unix = last_unix + datetime.timedelta(days=1)
	    
	for i in forecast_pol2:
	    next_date = next_unix
	    next_unix += datetime.timedelta(days=1)
	    dfreg['Forecast_pol2'].loc[next_date] = i

	dfreg['Forecast_pol3'] = np.nan

	last_date = dfreg.iloc[-26].name
	last_unix = last_date
	next_unix = last_unix + datetime.timedelta(days=1)
	    
	for i in forecast_pol3:
	    next_date = next_unix
	    next_unix += datetime.timedelta(days=1)
	    dfreg['Forecast_pol3'].loc[next_date] = i
	    
	dfreg['Forecast_knn'] = np.nan

	last_date = dfreg.iloc[-26].name
	last_unix = last_date
	next_unix = last_unix + datetime.timedelta(days=1)
	    
	for i in forecast_knn:
	    next_date = next_unix
	    next_unix += datetime.timedelta(days=1)
	    dfreg['Forecast_knn'].loc[next_date] = i
	        
	dfreg['Forecast_las'] = np.nan

	last_date = dfreg.iloc[-26].name
	last_unix = last_date
	next_unix = last_unix + datetime.timedelta(days=1)
	    
	for i in forecast_las:
	    next_date = next_unix
	    next_unix += datetime.timedelta(days=1)
	    dfreg['Forecast_las'].loc[next_date] = i
	    
	dfreg['Forecast_byr'] = np.nan

	last_date = dfreg.iloc[-26].name
	last_unix = last_date
	next_unix = last_unix + datetime.timedelta(days=1)
	    
	for i in forecast_byr:
	    next_date = next_unix
	    next_unix += datetime.timedelta(days=1)
	    dfreg['Forecast_byr'].loc[next_date] = i
	    
	dfreg['Forecast_lar'] = np.nan

	last_date = dfreg.iloc[-26].name
	last_unix = last_date
	next_unix = last_unix + datetime.timedelta(days=1)
	    
	for i in forecast_lar:
	    next_date = next_unix
	    next_unix += datetime.timedelta(days=1)
	    dfreg['Forecast_lar'].loc[next_date] = i
	    
	dfreg['Forecast_omp'] = np.nan

	last_date = dfreg.iloc[-26].name
	last_unix = last_date
	next_unix = last_unix + datetime.timedelta(days=1)
	    
	for i in forecast_omp:
	    next_date = next_unix
	    next_unix += datetime.timedelta(days=1)
	    dfreg['Forecast_omp'].loc[next_date] = i
	    
	dfreg['Forecast_ard'] = np.nan

	last_date = dfreg.iloc[-26].name
	last_unix = last_date
	next_unix = last_unix + datetime.timedelta(days=1)
	    
	for i in forecast_ard:
	    next_date = next_unix
	    next_unix += datetime.timedelta(days=1)
	    dfreg['Forecast_ard'].loc[next_date] = i
	    
	dfreg['Forecast_sgd'] = np.nan

	last_date = dfreg.iloc[-26].name
	last_unix = last_date
	next_unix = last_unix + datetime.timedelta(days=1)
	    
	for i in forecast_sgd:
	    next_date = next_unix
	    next_unix += datetime.timedelta(days=1)
	    dfreg['Forecast_sgd'].loc[next_date] = i

	return dfreg.index.format(formatter=lambda x: x.strftime('%Y-%m-%d')), dfreg['Adj Close'].to_list(), dfreg['Forecast_reg'].to_list(), dfreg['Forecast_pol2'].to_list(), dfreg['Forecast_pol3'].to_list(), dfreg['Forecast_knn'].to_list(), dfreg['Forecast_las'].to_list(), dfreg['Forecast_byr'].to_list(), dfreg['Forecast_lar'].to_list(), dfreg['Forecast_omp'].to_list(), dfreg['Forecast_ard'].to_list(), dfreg['Forecast_sgd'].to_list()
Beispiel #38
0
def regression_subset(predictions, train, test, method):

    mean_error = []
    if (method == 1):
        machine_learn = KNeighborsRegressor(n_neighbors=5, weights='distance')
    elif (method == 2):
        machine_learn = MLPRegressor(random_state=0)
    #for each building
    for i in range(3):

        new_train = train.loc[
            train['BUILDINGID'] ==
            i]  #select for training only buildings with that label (0,1, or 2)
        indexes = [x for x in range(len(predictions)) if predictions[x] == i
                   ]  #get the position of the samples that have building == i

        if (indexes):  #if list is not empty
            #training, samples with building == i
            X_train = new_train.ix[:, 0:519]
            Y_train = new_train[['LONGITUDE', 'LATITUDE']]
            machine_learn.fit(X_train, Y_train)

            #testing samples with prediction building == i
            new_test = test.iloc[indexes, :]
            X_test = new_test.ix[:, 0:519]
            Y_test = new_test[['LONGITUDE', 'LATITUDE']]

            #Turn into list
            predicts_lon_lat = machine_learn.predict(X_test).tolist()
            Y_test = Y_test.values.tolist()

            distance = []
            for j in range(len(predicts_lon_lat)):

                #change the latitude and longitude unit
                myProj = Proj(
                    "+proj=utm +zone=23K, +south +ellps=WGS84 +datum=WGS84 +units=m +no_defs"
                )
                lon_pred, lat_pred = myProj(predicts_lon_lat[j][0],
                                            predicts_lon_lat[j][1],
                                            inverse=True)
                lon_Y, lat_Y = myProj(Y_test[j][0], Y_test[j][1], inverse=True)

                #join in a unique list
                Y = []
                Y.append(lon_Y)
                Y.append(lat_Y)
                predict = []
                predict.append(lon_pred)
                predict.append(lat_pred)

                #The distance between the two latitudes is the error
                distance.append(vincenty(Y, predict).meters)

                #If you want to use haversine distance, uncomment the line below
                #print haversine(lon_Y, lat_Y, lon_pred, lat_pred)

            mean_error.append(np.mean(distance))
            #print(np.mean(distance))

    return np.mean(mean_error)
Beispiel #39
0
    def train_knn(self, data, target, n_neighbors):
        model = KNeighborsRegressor(n_neighbors)

        model.fit(data, target)
        return model
def test_knn_imputer_weight_distance(na):
    X = np.array([
        [0, 0],
        [na, 2],
        [4, 3],
        [5, 6],
        [7, 7],
        [9, 8],
        [11, 10]
    ])

    # Test with "distance" weight
    nn = KNeighborsRegressor(metric="euclidean", weights="distance")
    X_rows_idx = [0, 2, 3, 4, 5, 6]
    nn.fit(X[X_rows_idx, 1:], X[X_rows_idx, 0])
    knn_imputed_value = nn.predict(X[1:2, 1:])[0]

    # Manual calculation
    X_neighbors_idx = [0, 2, 3, 4, 5]
    dist = nan_euclidean_distances(X[1:2, :], X, missing_values=na)
    weights = 1 / dist[:, X_neighbors_idx].ravel()
    manual_imputed_value = np.average(X[X_neighbors_idx, 0], weights=weights)

    X_imputed_distance1 = np.array([
        [0, 0],
        [manual_imputed_value, 2],
        [4, 3],
        [5, 6],
        [7, 7],
        [9, 8],
        [11, 10]
    ])

    # NearestNeighbor calculation
    X_imputed_distance2 = np.array([
        [0, 0],
        [knn_imputed_value, 2],
        [4, 3],
        [5, 6],
        [7, 7],
        [9, 8],
        [11, 10]
    ])

    imputer = KNNImputer(weights="distance", missing_values=na)
    assert_allclose(imputer.fit_transform(X), X_imputed_distance1)
    assert_allclose(imputer.fit_transform(X), X_imputed_distance2)

    # Test with weights = "distance" and n_neighbors=2
    X = np.array([
        [na, 0, 0],
        [2, 1, 2],
        [3, 2, 3],
        [4, 5, 5],
    ])

    # neighbors are rows 1, 2, the nan_euclidean_distances are:
    dist_0_1 = np.sqrt((3/2)*((1 - 0)**2 + (2 - 0)**2))
    dist_0_2 = np.sqrt((3/2)*((2 - 0)**2 + (3 - 0)**2))
    imputed_value = np.average([2, 3], weights=[1 / dist_0_1, 1 / dist_0_2])

    X_imputed = np.array([
        [imputed_value, 0, 0],
        [2, 1, 2],
        [3, 2, 3],
        [4, 5, 5],
    ])

    imputer = KNNImputer(n_neighbors=2, weights="distance", missing_values=na)
    assert_allclose(imputer.fit_transform(X), X_imputed)

    # Test with varying missingness patterns
    X = np.array([
        [1, 0, 0, 1],
        [0, na, 1, na],
        [1, 1, 1, na],
        [0, 1, 0, 0],
        [0, 0, 0, 0],
        [1, 0, 1, 1],
        [10, 10, 10, 10],
    ])

    # Get weights of donor neighbors
    dist = nan_euclidean_distances(X, missing_values=na)
    r1c1_nbor_dists = dist[1, [0, 2, 3, 4, 5]]
    r1c3_nbor_dists = dist[1, [0, 3, 4, 5, 6]]
    r1c1_nbor_wt = 1 / r1c1_nbor_dists
    r1c3_nbor_wt = 1 / r1c3_nbor_dists

    r2c3_nbor_dists = dist[2, [0, 3, 4, 5, 6]]
    r2c3_nbor_wt = 1 / r2c3_nbor_dists

    # Collect donor values
    col1_donor_values = np.ma.masked_invalid(X[[0, 2, 3, 4, 5], 1]).copy()
    col3_donor_values = np.ma.masked_invalid(X[[0, 3, 4, 5, 6], 3]).copy()

    # Final imputed values
    r1c1_imp = np.ma.average(col1_donor_values, weights=r1c1_nbor_wt)
    r1c3_imp = np.ma.average(col3_donor_values, weights=r1c3_nbor_wt)
    r2c3_imp = np.ma.average(col3_donor_values, weights=r2c3_nbor_wt)

    X_imputed = np.array([
        [1, 0, 0, 1],
        [0, r1c1_imp, 1, r1c3_imp],
        [1, 1, 1, r2c3_imp],
        [0, 1, 0, 0],
        [0, 0, 0, 0],
        [1, 0, 1, 1],
        [10, 10, 10, 10],
    ])

    imputer = KNNImputer(weights="distance", missing_values=na)
    assert_allclose(imputer.fit_transform(X), X_imputed)

    X = np.array([
        [0, 0, 0, na],
        [1, 1, 1, na],
        [2, 2, na, 2],
        [3, 3, 3, 3],
        [4, 4, 4, 4],
        [5, 5, 5, 5],
        [6, 6, 6, 6],
        [na, 7, 7, 7]
    ])

    dist = pairwise_distances(X, metric="nan_euclidean", squared=False,
                              missing_values=na)

    # Calculate weights
    r0c3_w = 1.0 / dist[0, 2:-1]
    r1c3_w = 1.0 / dist[1, 2:-1]
    r2c2_w = 1.0 / dist[2, (0, 1, 3, 4, 5)]
    r7c0_w = 1.0 / dist[7, 2:7]

    # Calculate weighted averages
    r0c3 = np.average(X[2:-1, -1], weights=r0c3_w)
    r1c3 = np.average(X[2:-1, -1], weights=r1c3_w)
    r2c2 = np.average(X[(0, 1, 3, 4, 5), 2], weights=r2c2_w)
    r7c0 = np.average(X[2:7, 0], weights=r7c0_w)

    X_imputed = np.array([
        [0, 0, 0, r0c3],
        [1, 1, 1, r1c3],
        [2, 2, r2c2, 2],
        [3, 3, 3, 3],
        [4, 4, 4, 4],
        [5, 5, 5, 5],
        [6, 6, 6, 6],
        [r7c0, 7, 7, 7]
    ])

    imputer_comp_wt = KNNImputer(missing_values=na, weights="distance")
    assert_allclose(imputer_comp_wt.fit_transform(X), X_imputed)
def knnRegressor(train_data, train_targets, test_data, k):
    classifier = KNeighborsRegressor(k)
    classifier.fit(train_data, train_targets)
    predictions = classifier.predict(test_data)
    return predictions
y_train = y

# Linear regression
clfreg = LinearRegression(n_jobs=-1)
clfreg.fit(X_train, y_train)
# Quadratic Regression 2
clfpoly2 = make_pipeline(PolynomialFeatures(2), Ridge())
clfpoly2.fit(X_train, y_train)

# Quadratic Regression 3
clfpoly3 = make_pipeline(PolynomialFeatures(3), Ridge())
clfpoly3.fit(X_train, y_train)

# KNN Regression
clfknn = KNeighborsRegressor(n_neighbors=2)
clfknn.fit(X_train, y_train)

X_train.shape
y_train.shape

X_test.shape
y_test.shape

X_test = X[-forecast_out:]
y_test = y[-forecast_out:]

confidencereg = clfreg.score(X_test, y_test)
confidencepoly2 = clfpoly2.score(X_test, y_test)
confidencepoly3 = clfpoly3.score(X_test, y_test)
confidenceknn = clfknn.score(X_test, y_test)
def coord_regression(predictions_b,predictions,train,test,method):
		
	mean_error = []

	if(method==1):
		machine_learn = KNeighborsRegressor(n_neighbors=5, weights = 'distance')
	elif(method==2):
		#machine_learn = MLPClassifier(solver='sgd',learning_rate = 'adaptive',verbose='true',activation='tanh',alpha=1e-5)		
		machine_learn = MLPClassifier(solver='sgd',learning_rate = 'adaptive',verbose='false',activation='tanh',alpha=1e-5,max_iter=400) #THE BEST
		#machine_learn = MLPClassifier(hidden_layer_sizes=(100,5), solver='sgd',learning_rate = 'adaptive',verbose='true',activation='tanh',alpha=1e-5,max_iter=500)
		#model = MLPClassifier(learning_rate = 'adaptive')
		#solvers = ['lbfgs', 'sgd', 'adam']
		#activations = ['identity', 'logistic', 'tanh', 'relu']
		#max_its = [200,400,600]
		#machine_learn = GridSearchCV(estimator=model, param_grid=dict(activation =activations,max_iter=max_its),n_jobs=7) #GRID

			#for each building
	for j in range(3):
		new_train1 = train.loc[train['BUILDINGID'] == j] #select for training only buildings with that label (0,1, or 2)
		ind = [x for x in range(len(predictions_b)) if predictions_b[x]==j] #get the position of the samples that have building == i	
		new_test1 = test.iloc[ind,:]

		if(ind):
		#for each floor
			for i in range(5):
				
				new_train2 = new_train1.loc[new_train1['FLOOR'] == i]
				if(not new_train2.empty): 
					indexes = [x for x in range(len(predictions)) if (predictions[x]==i and predictions_b[x]==j)] #get the position of the samples that have building == i
				else:
					index = []

				if (indexes): #if list is not empty

					X_train = new_train2.ix[:,0:519]
					Y_train = new_train2[['LONGITUDE','LATITUDE']]
					machine_learn.fit(X_train,Y_train)                                   
					
					#testing samples with prediction building == i
					new_test2 = test.iloc[indexes,:]
					X_test = new_test2.ix[:,0:519] 
					Y_test = new_test2[['LONGITUDE','LATITUDE']]

					#Turn into list
					predicts_lon_lat = machine_learn.predict(X_test).tolist()
					Y_test = Y_test.values.tolist()

					distance = []
					for j in range(len(predicts_lon_lat)):
					
						#change the latitude and longitude unit
						myProj = Proj("+proj=utm +zone=23K, +south +ellps=WGS84 +datum=WGS84 +units=m +no_defs")
						lon_pred,lat_pred = myProj(predicts_lon_lat[j][0], predicts_lon_lat[j][1], inverse=True)
						lon_Y, lat_Y = myProj(Y_test[j][0], Y_test[j][1], inverse=True)
					
						#join in a unique list
						Y = []
						Y.append(lon_Y)
						Y.append(lat_Y)
						predict = []
						predict.append(lon_pred)
						predict.append(lat_pred)			

						#The distance between the two latitudes is the error
						distance.append(vincenty(Y, predict).meters)
						print "distance"
						print distance
						#If you want to use haversine distance, uncomment the line below
						#print haversine(lon_Y, lat_Y, lon_pred, lat_pred)

					mean_error.append(np.mean(distance))	
					#print(np.mean(distance))
		
	return np.mean(mean_error)
Beispiel #44
0
    # interpolate to form time-interval representation
    X = []
    y = []

    for ii in range(len(ts) - 1):
        X.append(ts[ii])
        y.append(ps[ii])
        X.append(ts[ii + 1] - delta)
        y.append(ps[ii])

    X = np.asarray(X)
    y = np.asarray(y)
    from sklearn.neighbors import KNeighborsRegressor
    interpolator = KNeighborsRegressor(1)
    interpolator.fit(X.reshape(-1, 1), y)
    X = np.arange(0, np.max(ts) + 2 * Fs, 1.0 / Fs)  # add a little buffer
    TIntRep = interpolator.predict(X.reshape(-1, 1))

    # now break up time-interval representation into parts
    numparts = np.floor(len(TIntRep) / (Fs * 6 * 8))
    TIntRep = TIntRep[0:int(numparts * Fs * 6 * 8)]
    TIntRep_re = TIntRep.reshape((int(numparts), int(Fs * 6 * 8)))
    cumsumfs = np.cumsum(TIntRep_re, axis=1) / Fs
    cumsumfs_meancentered = cumsumfs.T - cumsumfs.mean(axis=1)

    FX = np.fft.fft(TIntRep_re / Fs)
    cauto = np.fft.ifft(FX * FX.conj()).real

    TIHist = np.zeros((int(numparts), len(binsforhistogram) - 1))
    for ii in range(int(numparts)):
#####################################################################

#####################################################################
## H2O                                                             ##
#####################################################################

#####################################################################
# Nearest Neighbors                                                 #
#####################################################################

#####################################################################
## Scikit Learn                                                    ##
#####################################################################

knn_model = KNeighborsRegressor()
knn_model.fit(x_train_values, y_train_values)
knn_model_predictions = knn_model.predict(x_test_values)
generate_submission_file(knn_model_predictions, test_data["Id"],
                         "../results/" + user + "_KNN.csv")

param_list = {"n_neighbors": [2, 4, 6]}
knn_gridsearch = GridSearchCV(KNeighborsRegressor(), param_list)
knn_gridsearch.fit(x_train_values, y_train_values)
knn_best_model_predictions = knn_gridsearch.best_estimator_.predict(
    x_test_values)
generate_submission_file(knn_best_model_predictions, test_data["Id"],
                         "../results/" + user + "_KNN_GridSearchCV.csv")

#####################################################################
# Decision Trees                                                    #
#####################################################################
Beispiel #46
0
x1 = np.reshape(x1, (506, 1))

X = np.asarray(bos['AGE'])
x2 = np.reshape(X, (506, 1))

#X_train= np.concatenate(x1,x2)

X_train = np.asarray([x1, x2])
X_train = np.reshape(X_train, (506, 2))

xnew = np.concatenate((x1, x2), axis=1)

lm.fit(xnew, y)

lm.predict([6.5, 80])

lm.coef_

lm.intercept_

lm.score(new, y)

#%% K Nearest Neighbors

from sklearn.neighbors import KNeighborsRegressor

kreg = KNeighborsRegressor()
kreg.fit(xnew, y)

kreg.score(xnew, y)
Beispiel #47
0
N = 300
X = np.expand_dims(np.linspace(0, np.pi * 2, N), axis=1).astype(np.float32)
y = np.sin(X).astype(np.float32)

X += np.random.normal(0, 0.1, X.shape)
y += np.random.normal(0, 0.1, y.shape)

sampleN = 100
samples = np.expand_dims(np.linspace(0, np.pi * 2, sampleN),
                         axis=1).astype(np.float32)

n_neighbors = 20

# numpy version

neigh = KNeighborsRegressor(n_neighbors=n_neighbors)  #, weights='distance')
neigh.fit(X, y)
y_pred1 = neigh.predict(samples)

save('result1.json', X.reshape(N), y.reshape(N), sampleN,
     samples.reshape(sampleN), y_pred1.reshape(sampleN))

# PANENE version

neigh = KNNRegressor(X, y, n_neighbors=n_neighbors)  #, weights='distance')
y_pred2 = neigh.predict(samples)

save('result2.json', X.reshape(N), y.reshape(N), sampleN,
     samples.reshape(sampleN), y_pred2.reshape(sampleN))
Beispiel #48
0
def build_k_nearest_neighbours_model():
    print("\n--- CREATING K NEAREST NEIGHBOURS REGRESSOR MODEL ---")
    # Creating a K Nearest Neighbours Regressor Model
    knn_model = KNeighborsRegressor()
    knn_model.fit(train_X, train_y)

    # Model Validation
    test_pred = knn_model.predict(test_X)

    mae = mean_absolute_error(test_pred, test_y)
    r2 = r2_score(test_pred, test_y)

    print("\n" + "Mean Absolute Error:", mae)
    print("R2 Score:", r2)

    # Outputting first couple of rows
    print(test_pred[:5])
    print(test_y[:5])

    # Model Improvement
    # Creating discrete hyperparameter amounts to trial
    print("\n--- BEGINNING MODEL IMPROVEMENTS ---")
    n_neighbors = [1, 2, 3, 5, 10, 15, 50, 100, 1000]
    leaf_sizes = [2, 5, 10, 30, 50, 100]
    algorithms = ["auto", "ball_tree", "kd_tree", "brute"]

    print("\n--- ADJUSTING N NEIGHBOURS ---")

    best_n_neighbors_data = [0, 0]
    for n_neighbor in n_neighbors:
        knn_model = KNeighborsRegressor(n_neighbors=n_neighbor)
        knn_model.fit(train_X, train_y)
        preds = knn_model.predict(test_X)
        score = r2_score(preds, test_y)
        print("\nN Neighbors:", n_neighbor)
        print("R2 Score:", score)
        if score > best_n_neighbors_data[1]:
            best_n_neighbors_data = [n_neighbor, score]

    best_n_neighbors = best_n_neighbors_data[0]
    print("\nOptimal amount of n neighbours:", best_n_neighbors)

    print("\n--- ADJUSTING LEAF SIZE ---")

    best_leaf_size_data = [0, 0]
    for leaf_size in leaf_sizes:
        knn_model = KNeighborsRegressor(leaf_size=leaf_size)
        knn_model.fit(train_X, train_y)
        preds = knn_model.predict(test_X)
        score = r2_score(preds, test_y)
        print("\nLeaf size:", leaf_size)
        print("R2 Score:", score)
        if score > best_leaf_size_data[1]:
            best_leaf_size_data = [leaf_size, score]

    best_leaf_size = best_leaf_size_data[0]
    print("\nOptimal leaf size:", best_leaf_size)

    print("\n--- FINDING OPTIMAL ALGORITHM ---")

    best_algorithm_data = ["", 0]
    for algorithm in algorithms:
        knn_model = KNeighborsRegressor(algorithm=algorithm)
        knn_model.fit(train_X, train_y)
        preds = knn_model.predict(test_X)
        score = r2_score(preds, test_y)
        print("\nAlgorithm:", algorithm)
        print("R2 Score:", score)
        if score > best_algorithm_data[1]:
            best_algorithm_data = [algorithm, score]

    best_algorithm = best_algorithm_data[0]
    print("\nOptimal algorithm:", best_algorithm)

    print("---CREATING FINAL MODEL ---")
    model = KNeighborsRegressor(n_neighbors=best_n_neighbors,
                                leaf_size=best_leaf_size,
                                algorithm=best_algorithm)
    model.fit(train_X, train_y)
    preds = model.predict(test_X)
    score = r2_score(preds, test_y)
    print("R2 Score:", score)
Beispiel #49
0
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor

N = 200
X = np.linspace(0, 10, N).reshape(N, 1)
Y = np.sin(X)

Ntrain = 20
idx = np.random.choice(N, Ntrain)
Xtrain = X[idx]
Ytrain = Y[idx]

kneigh = KNeighborsRegressor(n_neighbors=2, weights='distance')
kneigh.fit(Xtrain, Ytrain)
Yhat = kneigh.predict(X)

deci = DecisionTreeRegressor()
deci.fit(Xtrain, Ytrain)
YhatDeci = deci.predict(X)

plt.scatter(Xtrain, Ytrain)
plt.plot(X, Y)
plt.plot(X, Yhat, label="Knn Regressor")
plt.plot(X, YhatDeci, label="DecisionTree Regressor")
plt.legend()
plt.show()
plt.savefig("images/KnnDistAndDecisionTreeSklearn")
Beispiel #50
0
Datei: a.py Projekt: zeronek/ai
gildong = LinearRegression()
gildong.fit(train_df_part1[features], train_df_part1['price'])
score = gildong.score(train_df_part2[features], train_df_part2['price'])
print(format(score, '.3f'))

# ### 3.4 K-NN 알고리즘 이용하여 예측하기

# > **이제까지는 선형회귀 모델을 이용하여 주택 가격을 예측해 보았습니다. 이제부터는 몇 가지 다른 모델을 이용하여 시험해 보겠습니다. 먼저 K-근접 이웃 방법입니다.**

# In[ ]:

from sklearn.neighbors import KNeighborsRegressor

babo = KNeighborsRegressor(n_neighbors=10)
babo.fit(train_df_part1[features], train_df_part1['price'])
score = babo.score(train_df_part2[features], train_df_part2['price'])
print(format(score, '.3f'))

# ### 3.5 결정트리 알고리즘 이용하여 예측하기

# > **결정트리 모델입니다. 점수와 예측한 주택 가격을 출력해 보았습니다.**

# In[ ]:

youngja = DecisionTreeRegressor(random_state=0)
youngja.fit(train_df_part1[features], train_df_part1['price'])
score = youngja.score(train_df_part2[features], train_df_part2['price'])
print(format(score, '.3f'))

predicted = youngja.predict(train_df_part2[features])
Beispiel #51
0
"""
KNN regression model
author: Alsu Vakhitova
"""
import training.datahelper as datahelper
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split

x, y = datahelper.get_xy('data/', num_hours=3, error_minutes=15)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

for n in range(1, 10):
    print("\nNumber of neighbors: ", n)
    neigh = KNeighborsRegressor(n_neighbors=n)
    neigh.fit(x_train, y_train)

    print("R^2 for all measurements: ", neigh.score(x_test, y_test), '\n')

    m = mean_absolute_error(y_test,
                            neigh.predict(x_test),
                            multioutput='raw_values')
    print('Average mean absolute error: ', np.average(m))
    print("Mean absolute error for measurements:")
    for col, err in zip(list(x.columns.values), m):
        print(col, ": ", err)
                case_training_data[:, 1][i]) + "\n"
    fd.write(writeRow)

fd.close()

#train model
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.neighbors import KNeighborsRegressor

df = pd.read_csv('Case_01_training.csv')
X = df.loc[:, 'YbyH':'dummy3']
Y = df.loc[:, 'UbyVw']

knn = KNeighborsRegressor()
knn.fit(X, Y)

#test model
case_test_data = case_test_data = np.loadtxt('Case_01_sim_test.dat')

X_test = np.zeros((case_test_data.shape[0], 7))
#X_test[0,:]=0.0,case_VW,case_H,case_DPDX,case_d1,case_d2,case_d3

for i in range(0, case_test_data.shape[0]):
    X_test[i, :] = case_test_data[:, 0][
        i], case_VW, case_H, case_DPDX, case_d1, case_d2, case_d3

Y_predicted = knn.predict(X_test)
#print Y_predicted

plt.plot(case_test_data[:, 0], case_test_data[:, 1], '-b', label='sim')
        X_train.append(streamline_moving)
        Y_train.append(streamline_fixed - streamline_moving)
        # X_train[i*N_points:(i+1)*N_points, :] = streamline_moving
        # Y_train[i*N_points:(i+1)*N_points, :] = streamline_fixed - streamline_moving

    X_train = np.vstack(X_train)
    Y_train = np.vstack(Y_train)

    #%% KNeighborsRegressor
    print("Performs KNeighborsRegressor...")
    neigh = KNeighborsRegressor(n_neighbors=n_neighbors,
                                n_jobs=-1,
                                weights='distance')
    #neigh = RadiusNeighborsRegressor(n_jobs=1, radius=radius, weights='uniform')
    neigh.fit(X_train, Y_train)
    #%% Meshgrid
    suffix = suffix + "_nn" + str(n_neighbors)

    #X_offset=aff_moving[:,-1][0]*aff_moving[0,0]
    #Y_offset=aff_moving[:,-1][1]*aff_moving[1,1]
    #Z_offset=aff_moving[:,-1][2]*aff_moving[2,2]

    step = 1
    X_range = np.arange(0, X_grid_size, step).astype(int)  #+X_offset
    Y_range = np.arange(0, Y_grid_size, step).astype(int)  #+Y_offset
    Z_range = np.arange(0, Z_grid_size, step).astype(int)  #+Z_offset

    #Y_range=np.arange(0,X_grid_size,step)#+X_offset

    XX, YY, ZZ = np.meshgrid(X_range, Y_range, Z_range)
Beispiel #54
0
### LONGITUDE, LATITUDE and FLOOR

## Define models
modelkNN_long = KNeighborsRegressor(n_neighbors=7, weights='distance')
modelRF_long = RandomForestRegressor()

modelkNN_lat = KNeighborsRegressor(n_neighbors=2, weights='distance')
modelRF_lat = RandomForestRegressor()

modelkNN_floor = KNeighborsClassifier(n_neighbors=2, weights='distance')
modelRF_floor = RandomForestClassifier()


## Fit models
modelkNN_long.fit(X_train_normObs_casc, y_train_long)
modelRF_long.fit(X_train_normObs_casc, y_train_long)

modelkNN_lat.fit(X_train_normObs_casc, y_train_lat)
modelRF_lat.fit(X_train_normObs_casc, y_train_lat)

modelkNN_floor.fit(X_train_normObs_casc, y_train_floor)
modelRF_floor.fit(X_train_normObs_casc, y_train_floor)


## Make predictions
pred_long_kNN = modelkNN_long.predict(X_val_normObs_casc)
pred_long_RF = modelRF_long.predict(X_val_normObs_casc)

pred_lat_kNN = modelkNN_lat.predict(X_val_normObs_casc)
pred_lat_RF  = modelRF_lat.predict(X_val_normObs_casc)
    for idx, i in enumerate(pca_X_test):
        #加入情感特征
        pca_X_test[idx].append(float(emotion_test[idx][0]))
        pca_X_test[idx].append(float(emotion_test[idx][1]))
        pca_X_test[idx].append(float(emotion_train[idx][2]))
        #加入公司特征
        pca_X_test[idx].append(float(firm_test[idx][0]))
        pca_X_test[idx].append(float(firm_test[idx][1]))
        pca_X_test[idx].append(float(firm_test[idx][2]))
        pca_X_test[idx].append(float(firm_test[idx][3]))
        pca_X_test[idx].append(float(firm_test[idx][4]))
        pca_X_test[idx].append(float(firm_test[idx][5]))
    '''KNN回归'''
    uni_knr = KNeighborsRegressor(
        weights='distance')  #uniform平均回归,distance是根据距离加权回归
    uni_knr.fit(pca_X_train, y_train)
    uni_knr_y_pred = uni_knr.predict(pca_X_test)
    print 'KNR的R方', uni_knr.score(pca_X_test, y_test)
    final_pred_knr.append(uni_knr_y_pred[0])
for i in final_pred_knr:
    if i >= 0: di_pred_knr.append(1)
    else: di_pred_knr.append(0)

print 'KNR的报告:'
print classification_report(di_pred_knr, di_real_test)
'''输出预测文本,哪里需要,把这段代码粘哪'''
f = open(
    r'E:\study\master of TJU\0Subject research\code\Important\5_1_mock_trading\pred_result.txt',
    'w')
testlen = len(date_list) - ori_split_num
for i in range(0, testlen):
Beispiel #56
0
clock = pygame.time.Clock()

# file to take data
#sample = open("game.csv", "w")
#print("x,y,vx,vy,paddle.y", file=sample)

pong = pd.read_csv('game.csv')
pong = pong.drop_duplicates()

X = pong.drop(columns="paddle.y")
y = pong['paddle.y']

from sklearn.neighbors import KNeighborsRegressor

clf = KNeighborsRegressor(n_neighbors=3)
clf.fit(X,y)

df = pd.DataFrame(columns=['x', 'y', 'vx', 'vy'])

while True:
    e = pygame.event.poll()
    if e.type == pygame.QUIT:
        break
    
    clock.tick(FRAMERATE)
    
    pygame.display.flip()
    
    toPredict = df.append({'x': ball.x, 'y': ball.y, 'vx': ball.vx,
                           'vy': ball.vy,}, ignore_index=True)
    
Beispiel #57
0
]]

housing_not_missing = housing[housing.total_bedrooms.notna()]

X_train = housing_not_missing[[
    'longitude', 'latitude', 'housing_median_age', 'total_rooms', 'population',
    'households', 'median_income', 'median_house_value', '1h_ocean', 'island',
    'inland', 'near_ocean', 'near_bay'
]]  # data  (X) -> longitude, latitude, total_rooms, population, households, median_house_value

y_train = housing_not_missing[[
    'total_bedrooms'
]]  # label (y) -> total_bedrooms (column we want to predict)

regressor = KNeighborsRegressor(n_neighbors=5)
regressor.fit(X_train, y_train)

predicted_values = []
for index in range(0, len(housing_missing)):
    values = housing_missing.iloc[index].tolist()
    y_pred = regressor.predict([values])
    predicted_values.append(y_pred[0][0])

housing.loc[housing.total_bedrooms.isna(), 'total_bedrooms'] = predicted_values

print("\n3. replace with values from nearest neighbour")
print("--------------------------------")
knn_model = regression(housing)
knn_model['name'] = 'replaced missing values with KNN'

# 4. use regression with the values in the total_rooms column as prior knowledge
Beispiel #58
0
X = df_dummies.loc[:, df_dummies.columns !='Item_Outlet_Sales'].values
y = df_dummies.loc[:, 'Item_Outlet_Sales'].values
reg = LinearRegression(fit_intercept=True)
reg.fit(X,y)
y_result=reg.predict(X)
score = reg.score(X, y)
print('Linear Score: ',score)


# # Method 2 ------------------KNN Regression:
X=df_dummies.loc[:,df_dummies.columns !='Item_Outlet_Sales']
# print(X.shape)
y=df_dummies['Item_Outlet_Sales']
# print(y.shape)
knn = KNeighborsRegressor(n_neighbors=3)
knn.fit(X, y)
predict = knn.predict(X)
score=knn.score(X,y)
print('KNN Score: ',score)

# print('KNN Model fits better than the Linear Regression ')

# Method 3 -------------------- 

# #instantiate the Random Forest Regressor Model


X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42)

clf = RandomForestRegressor(n_estimators=500,max_depth=5,min_samples_split=3,bootstrap=True,oob_score=True)
clf.fit(X_train,y_train)
Beispiel #59
0
    #Array of non-linear regressors
    nonLRegression = []
    for i in range(numberOfLS):
        # Generate a dataset
        dataset = generate_dataset(trainingSize)
        X.append(dataset[0])
        Y.append(dataset[1])

        # Create and train a linear regressor on the dataset
        lr = LinearRegression()
        lr.fit(dataset[0], dataset[1])
        lRegressions.append(lr)

        # Create and train a non-linear regressor on the dataset
        knn = KNeighborsRegressor(nNeighbors)
        knn.fit(dataset[0], dataset[1])
        nonLRegression.append(knn)

    # Generate datasets for each x
    x = np.linspace(-4, 4, 100)
    datasetsX0 = []
    for i in x:
        datasetsX0.append(generate_y_dataset(i, trainingSize))

    # Residual error
    y = []

    # Compute residual error for each x
    for data in datasetsX0:
        y.append(np.var(data))
Beispiel #60
0
split_one = dc_listings.iloc[0:1862].copy()
split_two = dc_listings.iloc[1862:].copy()

## 2. Holdout Validation ##

from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error

train_one = split_one
test_one = split_two
train_two = split_two
test_two = split_one
# First half
model = KNeighborsRegressor()
model.fit(train_one[["accommodates"]], train_one["price"])
test_one["predicted_price"] = model.predict(test_one[["accommodates"]])
iteration_one_rmse = mean_squared_error(test_one["price"],
                                        test_one["predicted_price"])**(1 / 2)

# Second half
model.fit(train_two[["accommodates"]], train_two["price"])
test_two["predicted_price"] = model.predict(test_two[["accommodates"]])
iteration_two_rmse = mean_squared_error(test_two["price"],
                                        test_two["predicted_price"])**(1 / 2)

avg_rmse = np.mean([iteration_two_rmse, iteration_one_rmse])

print(iteration_one_rmse, iteration_two_rmse, avg_rmse)

## 3. K-Fold Cross Validation ##