Esempio n. 1
0
def createRadiusNeighborsRegressor(params=None):
    info("Creating Radius Neighbors Regressor", ind=4)
    error("This doesn't work")
    return {"estimator": None, "params": None}

    ## Params
    params = mergeParams(RadiusNeighborsRegressor(), params)
    tuneParams = getRadiusNeighborsRegressorParams()
    grid = tuneParams['grid']

    info("With Parameters", ind=4)
    algorithm = setParam('algorithm', params, grid, force=False)
    info("Param: algorithm = {0}".format(algorithm), ind=6)

    leaf_size = setParam('leaf_size', params, grid, force=False)
    info("Param: leaf_size = {0}".format(leaf_size), ind=6)

    metric = setParam('metric', params, grid, force=False)
    info("Param: metric = {0}".format(metric), ind=6)

    radius = setParam('radius', params, grid, force=False)
    info("Param: radius = {0}".format(radius), ind=6)

    weights = setParam('weights', params, grid, force=False)
    info("Param: weights = {0}".format(weights), ind=6)

    ## Estimator
    reg = RadiusNeighborsRegressor(algorithm=algorithm,
                                   leaf_size=leaf_size,
                                   metric=metric,
                                   radius=radius,
                                   weights=weights)

    return {"estimator": reg, "params": tuneParams}
Esempio n. 2
0
def sampling_fix(df, name, start, stop, radius, medianFilter, plot):
    #Filter dataset based on depth range
    df = df[(df['Measured Depth m'] > start) & (df['Measured Depth m'] < stop)]
    #remove NaNs from dataset
    df = df[np.isfinite(df[name])]
    X = df['Measured Depth m']

    #reshape the depth to matcch regressor requirements
    X = X.values.reshape(X.shape[0], 1)
    from sklearn.neighbors import RadiusNeighborsRegressor
    #define regressor with provided radius
    reg = RadiusNeighborsRegressor(radius=radius, weights='uniform')

    #apply median filter with back filling (to remove NaNs at the beginning of dataset)
    y = df[name].rolling(medianFilter).median().bfill()

    #fit regressor
    reg.fit(X, y)

    #check if plotting was required or should the model be returned
    if plot == 0:
        return reg
    else:
        import matplotlib.pyplot as plt
        #plot the chart. Original data is plotted as well as the regression data.
        plt.scatter(X, y, label=name)
        plt.plot(X, reg.predict(X), c='r', label="prediction")
        plt.legend()
        plt.show()
Esempio n. 3
0
def get_best_rnn_radius(low, high, step):
    """ Return the best radius value in step range [low, high] to be used in rnn algorithm. """
    radii = []
    mae_rnn = []

    for r in np.arange(low, high + step, step):
        rnn_regressor = RadiusNeighborsRegressor(radius=r, weights='distance')
        rnn_regressor.fit(train_df[['temperatura', 'vacuo']],
                          train_df[['energia']])
        energia_rnn = rnn_regressor.predict(test_df[['temperatura', 'vacuo']])

        radii.append(r)
        mae_rnn.append(
            metrics.mean_absolute_error(test_df['energia'], energia_rnn))

    best_radius = radii[np.argmin(mae_rnn)]

    fig, ax = plt.subplots()
    ax.set_title('Parameter evaluation for RNN')
    ax.set_xlabel('Radius')
    ax.set_ylabel('Mean absolute error')
    ax.set_xlim(low, high)
    ax.set_xticks(list(ax.get_xticks()) + [best_radius])
    ax.plot(radii, mae_rnn, c='orange', linewidth=2)
    fig.savefig('rnn_param.png')

    return best_radius
Esempio n. 4
0
def rNeighbours2dPlot(X,y,r=0.5,res=100,dist_scale='normalize',im_kws={},reg_kws={},ax=None):
    if isinstance(X,pd.core.frame.DataFrame):
        X = X.values

    if 'origin' not in reg_kws:
        im_kws['origin'] ='lower'

    if 'extent' not in im_kws:
        im_kws['extent'] = (X[:,0].min(),X[:,0].max(),X[:,1].min(),X[:,1].max())

    if  'aspect' not in im_kws:
        im_kws['aspect'] = (X[:,0].max()-X[:,0].min())/(X[:,1].max()-X[:,1].min())

    if dist_scale is not None:
        if dist_scale == 'normalize':
            X = X/(X.max(axis=0) - X.min(axis=0))
        else:
            X = X/dist_scale

    kneighbours = RadiusNeighborsRegressor(radius=r,**reg_kws)
    kneighbours.fit(X,y)

    xx,yy = np.meshgrid(np.linspace(X[:,0].min(),X[:,0].max(),res),np.linspace(X[:,1].min(),X[:,1].max(),res))
    X_grid = np.vstack([xx.ravel(),yy.ravel()]).T

    y_hat = kneighbours.predict(X_grid)
    Y_hat = y_hat.reshape((res,res))
    if ax is None:
        return plt.imshow(Y_hat,**im_kws)
    else:
        return ax.imshow(Y_hat,**im_kws)
Esempio n. 5
0
 def __init__(self, args, env_params):
     # Save args
     self.args, self.env_params = args, env_params
     # Create the KNN model
     self.knn_model = RadiusNeighborsRegressor(radius=args.neighbor_radius,
                                               weights='uniform')
     # Flag
     self.is_fit = False
Esempio n. 6
0
 def __init__(self, in_dim, radius, out_dim):
     # Save args
     self.in_dim = in_dim
     self.radius = radius
     self.out_dim = out_dim
     # Create the KNN model
     self.knn_model = RadiusNeighborsRegressor(radius=radius,
                                               weights='uniform',
                                               metric='manhattan')
     # Flag
     self.is_fit = False
Esempio n. 7
0
def plot_std_dev(folder):
    data = load_images(folder)

    for channel in range(data.shape[3]):
        channel_stack = data[:, :, :, channel]
        std_dev_img = np.std(channel_stack, axis=0)
        mean_img = np.mean(channel_stack, axis=0)
        # print(std_dev_img)
        # print(np.mean(std_dev_img))
        if 1:
            plt.subplot(2, 2, 1)
            # plt.imshow(mean_img)
            display_image(mean_img, z=1)
            plt.title('mean')
            plt.subplot(2, 2, 2)
            display_image(std_dev_img, z=1)
            plt.title('std')

            plt.subplot(2, 2, 3)
            display_image(mean_img / std_dev_img, z=1)
            plt.title('mean / std')

            plt.subplot(2, 2, 4)
            bins = np.arange(np.min(channel_stack), np.max(channel_stack) + 1)
            plt.hist(channel_stack.flatten(), bins=bins)
            plt.grid(True)
            plt.show()

        # skip = 10
        # for img_channel in channel_stack:
        # 	plt.scatter(img_channel.flatten()[::skip], mean_img.flatten()[::skip], alpha = 0.1, color='black', s=1)

        rnr = RadiusNeighborsRegressor(radius=10, weights='distance')
        rnr.fit(np.expand_dims(mean_img.flatten(), axis=1),
                std_dev_img.flatten())

        line_x = np.arange(np.min(mean_img), np.max(mean_img) + 1)
        line_y = rnr.predict(np.expand_dims(line_x, axis=1))

        fit = np.polyfit(mean_img.flatten(), std_dev_img.flatten(), deg=1)
        linear_y = np.polyval(fit, line_x)

        # for d in range(deg+1):
        # 	fits[y//n, :, channel, d] = section_fits[d]

        plt.scatter(mean_img.flatten(),
                    std_dev_img.flatten(),
                    alpha=0.1,
                    color='black',
                    s=1)
        plt.plot(line_x, line_y, 'r')
        plt.plot(line_x, linear_y, 'orange')
        plt.grid(True)
        plt.show()
Esempio n. 8
0
class KNNDynamicsResidual:
    def __init__(self, args, env_params):
        # Save args
        self.args, self.env_params = args, env_params
        # Create the KNN model
        self.knn_model = RadiusNeighborsRegressor(radius=args.neighbor_radius,
                                                  weights='uniform')
        # Flag
        self.is_fit = False

    def fit(self, X, y):
        '''
        X should be the data matrix N x d, where each row is a 4D vector
        consisting of object pos and gripper pos
        y should be target matrix N x d, where each row is a 4D vector 
        consisting of next object pos and next gripper pos
        '''
        self.knn_model.fit(X, y)
        self.is_fit = True
        return self.loss(X, y)

    def predict(self, X):
        '''
        X should be the data matrix N x d, where each row is a 4D vector
        consisting of object pos and gripper pos
        '''
        ypred = np.zeros(X.shape)
        if not self.is_fit:
            # KNN model is not fit
            return ypred
        # Get neighbors of X
        neighbors = self.knn_model.radius_neighbors(X)
        # Check if any of the X doesn't have any neighbors by getting nonzero mask
        neighbor_mask = [x.shape[0] != 0 for x in neighbors[1]]
        # If none of X has any neighbors
        if X[neighbor_mask].shape[0] == 0:
            return ypred
        # Else, for the X that have neighbors use the KNN prediction
        ypred[neighbor_mask] = self.knn_model.predict(X[neighbor_mask])
        return ypred

    def get_num_neighbors(self, X):
        if not self.is_fit:
            return np.zeros(X.shape[0])
        neighbors = self.knn_model.radius_neighbors(X)
        num_neighbors = np.array([x.shape[0] for x in neighbors[1]])
        return num_neighbors

    def loss(self, X, y):
        ypred = self.predict(X)
        # Loss is just the mean distance between predictions and true targets
        loss = np.linalg.norm(ypred - y, axis=1).mean()
        return loss
Esempio n. 9
0
    def estimate_ns_act(self, term, coords=None, **kwargs):
        """
        Uses KNN to estimate Neurosynth term activation (tf-idf) at
        specified coordinates. If no coordinates are passed, ABA sampled
        locations in corresponding NsabaBase are used.

        Parameters
        ----------
        term : str
            NS term whose activation is to be estimated

        coords : np.array [int], optional
            Coordinates where NS term activation is to be estimated.

        kwargs : dict, optional
                'rnn_args' : dict
                    SKLearn RadiusNeighborsRegressor() optional arguments.
                    http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.RadiusNeighborsRegressor.html
                    for default arguments.

        """
        if not self.is_term(term):
            raise ValueError("'%s' is not a registered term." % term)

        self.term[term] = {}

        if coords is None:
            coords = self._aba['mni_coords'].data
            self.term[term]['coord_type'] = 'ABA MNI'
        else:
            self.term[term]['coords'] = coords
            if 'coord_type' in kwargs:
                self.term[term]['coord_type'] = kwargs['coord_type']

        ns_coord_tree, ns_coord_act_df = self._term_to_coords(term, 0)

        if 'rnn_args' in kwargs:
            if 'radius' not in kwargs['rnn_args']:
                kwargs['rnn_args']['radius'] = 5
            self.term[term]['classifier'] = RadiusNeighborsRegressor(
                **kwargs['rnn_args'])
        else:
            self.term[term]['classifier'] = RadiusNeighborsRegressor(radius=5)

        X = ns_coord_tree.data
        y = ns_coord_act_df[term].as_matrix()

        self.term[term]['classifier'].fit(X, y)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            self.term[term]['act'] = self.term[term]['classifier'].predict(
                coords)
Esempio n. 10
0
 def test_onnx_simple_text_plot_knnr(self):
     x = numpy.random.randn(10, 3)
     y = numpy.random.randn(10)
     model = RadiusNeighborsRegressor(3)
     model.fit(x, y)
     onx = to_onnx(model, x.astype(numpy.float32), target_opset=15)
     text = onnx_simple_text_plot(onx, verbose=False)
     expected = "              Neg(arange_y0) -> arange_Y0"
     self.assertIn(expected, text)
     self.assertIn(", to=7)", text)
     self.assertIn(", keepdims=0)", text)
     self.assertIn(", perm=[1,0])", text)
Esempio n. 11
0
 def __init__(self,
              radius=1.0,
              weights='uniform',
              algorithm='auto',
              leaf_size=30,
              p=2,
              metric='minkowski',
              metric_params=None,
              **kwargs):
     _RadiusNeighborsRegressor.__init__(self, radius, weights, algorithm,
                                        leaf_size, p, metric, metric_params,
                                        **kwargs)
     BaseWrapperReg.__init__(self)
Esempio n. 12
0
class _RadiusNeighborsRegressorImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def predict(self, X):
        return self._wrapped_model.predict(X)
Esempio n. 13
0
def GridSearchCVRadiusNeighborsRegressor(X_train, y_train):
    d = [i for i in range(1, 100)]
    d.append(None)

    param_grid = {
        'weights': ['uniform', 'distance'],
        'radius': range(1, 100),
        'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    }

    model = RadiusNeighborsRegressor()

    scores = [
        'r2',
    ]

    reg = GridSearchCV(model,
                       cv=3,
                       param_grid=param_grid,
                       verbose=0,
                       n_jobs=-1,
                       scoring=scores,
                       refit='r2',
                       iid=True)

    reg.fit(X_train, y_train)

    return reg.best_estimator_, reg.best_params_, reg.best_score_
 def test_model_knn_regressor_radius(self):
     model, X = self._fit_model(RadiusNeighborsRegressor())
     model_onnx = convert_sklearn(model, "KNN regressor",
                                  [("input", FloatTensorType([None, 4]))],
                                  target_opset=TARGET_OPSET,
                                  options={id(model): {'optim': 'cdist'}})
     sess = InferenceSession(model_onnx.SerializeToString())
     got = sess.run(None, {'input': X.astype(numpy.float32)})[0]
     exp = model.predict(X.astype(numpy.float32))
     if any(numpy.isnan(got.ravel())):
         # The model is unexpectedly producing nan values
         # not on all platforms.
         rows = ['--EXP--', str(exp), '--GOT--', str(got),
                 '--EVERY-OUTPUT--']
         for out in enumerate_model_node_outputs(
                 model_onnx, add_node=False):
             onx = select_model_inputs_outputs(model_onnx, out)
             sess = InferenceSession(onx.SerializeToString())
             res = sess.run(
                 None, {'input': X.astype(numpy.float32)})
             rows.append('--{}--'.format(out))
             rows.append(str(res))
         if (StrictVersion(onnxruntime.__version__) <
                 StrictVersion("1.4.0")):
             return
         raise AssertionError('\n'.join(rows))
     assert_almost_equal(exp.ravel(), got.ravel(), decimal=3)
 def __init__(self,
              regression=True,
              radius=1.0,
              weights='distance',
              algorithm='auto',
              leaf_size=30,
              p=2,
              metric='minkowski',
              outlier_label=None,
              metric_params=None):
     self._regression = regression
     self._radius = radius
     self._weights = weights
     self._algorithm = algorithm
     self._leaf_size = leaf_size
     self._p = p
     self._metric = metric
     self._metric_params = metric_params
     self._outlier_label = outlier_label
     if regression:
         self._model = RadiusNeighborsRegressor(radius, weights, algorithm,
                                                leaf_size, p, metric,
                                                metric_params)
     else:
         self._model = RadiusNeighborsClassifier(radius, weights, algorithm,
                                                 leaf_size, p, metric,
                                                 metric_params)
     return super().__init__()
Esempio n. 16
0
    def fit(self, featureMatrix, labels):
        # locally weighted regression
        if self.method.lower() == 'lwr': 
            kernel = self.params['kernel']
            fit_intercept = self.params['fit_intercept']
            alpha = self.params['alpha']
            self.model = local_regression.LWRegressor(kernel=kernel, alpha=alpha, 
                                                    fit_intercept=fit_intercept) 

        # radius neighbors regression
        elif self.method.lower() == 'rnn':
            radius = self.params['radius']
            weights = self.params['weights']
            leaf_size = self.params['leaf_size']
            self.model = RadiusNeighborsRegressor(radius=radius, weights=weights, 
                                                    leaf_size=leaf_size)

        # k-nearest neighbors regression
        elif self.method.lower() == 'knn': 
            n_neighbors = self.params['n_neighbors']
            weights = self.params['weights']
            leaf_size = self.params['leaf_size']
            p = self.params['p']
            self.model = KNeighborsRegressor(n_neighbors=n_neighbors, weights=weights, 
                                            leaf_size=leaf_size, p=p)

        # linear regression
        else: 
            self.model = linear_model.LinearRegression(fit_intercept=False)

        # fit model to data
        self.model.fit(featureMatrix, labels)
Esempio n. 17
0
    def _check_coords_for_distance_weighting(self, coords, check_radius, check_weights, X, y_mean):
        """
        Checks that coords won't break the distance weighting function

        """
        valid_inds = []
        for coord in xrange(len(coords)):

            temp = RadiusNeighborsRegressor(radius=check_radius, weights=check_weights)
            temp.fit(X, y_mean)
            try:
                temp.predict([coords[coord]])
                valid_inds.append(coord)
            except ZeroDivisionError:
                continue
        return valid_inds
Esempio n. 18
0
def get_hyperparameters_model():
    param_dist = { }

    clf = RadiusNeighborsRegressor()

    model = {'radius_neighbors_regressor': {'model': clf, 'param_distributions': param_dist}}
    return model
Esempio n. 19
0
def get_author_list_with_pruning_method(feature_list, author_list, qp, radius):
    """
        feature_list - the feature list to indicate the stylometric features
        author_list - the author list to indicate a paragraph is written by whom
        qp - the query point, mostly represents a document

        This function will return a shortened author list, which can greatly
        reduce the size of training set by removing those data points too far
        from the query point. Since it takes time to calculate the Hausdorff
        distance, reducing the size of testing set can speed up the process

        Please refer to the following link for more information
        http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.RadiusNeighborsRegressor.html#sklearn.neighbors.RadiusNeighborsRegressor
    """
    neigh = RadiusNeighborsRegressor(radius=radius, algorithm='brute', p=2)
    neigh.fit(feature_list, author_list)
    return neigh.radius_neighbors(qp, return_distance=True)
def build_model(args, C, seed):
    if args.dc_tree:
        model = DecisionTreeRegressor(random_state=seed)
    elif args.nn_radius:
        model = RadiusNeighborsRegressor(radius=1.0)
    else:
        model = svm.LinearSVR(C=complexities[comp], random_state=seed)

    return model
Esempio n. 21
0
    def predict(self):
        """
         trains the scikit-learn  python machine learning algorithm library function
         https://scikit-learn.org

         then passes the trained algorithm the features set and returns the
         predicted y test values form, the function

         then compares the y_test values from scikit-learn predicted to
         y_test values passed in

         then returns the accuracy
         """
        algorithm = RadiusNeighborsRegressor(
            radius=get_ohe_config().rnr_radius)
        algorithm.fit(self.X_train, self.y_train)
        y_pred = list(algorithm.predict(self.X_test))
        self.acc = OneHotPredictor.get_accuracy(y_pred, self.y_test)
        return self.acc
 def test_model_knn_regressor_yint_radius(self):
     model, X = self._fit_model(
         RadiusNeighborsRegressor(), label_int=True)
     model_onnx = convert_sklearn(model, "KNN regressor",
                                  [("input", FloatTensorType([None, 4]))],
                                  target_opset=TARGET_OPSET)
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(
         X.astype(numpy.float32)[:7],
         model, model_onnx,
         basename="SklearnRadiusNeighborsRegressorYInt")
Esempio n. 23
0
def compare_multiple_stacks(folder):
    subfolders = os.listdir(folder)

    all_data = []

    for subfolder in tqdm.tqdm(subfolders):
        all_data.append(load_images(os.path.join(folder, subfolder)))

    all_data = np.array(all_data)
    print(all_data.shape)

    for channel in range(3):
        for subfolder_index in range(all_data.shape[0]):

            channel_stack = all_data[subfolder_index][:, :, :, channel]

            img_mean = np.mean(channel_stack, axis=0)
            img_sigma_clip = np.mean(astropy.stats.sigma_clip(channel_stack,
                                                              sigma=2,
                                                              axis=0),
                                     axis=0)

            img_sigma_ratio = (img_mean / img_sigma_clip - 1) * 1E3
            skip = 1
            flat_ratios = img_sigma_ratio.flatten()[::skip]
            mean_values = img_mean.flatten()[::skip]

            # plt.scatter(mean_values, flat_ratios, alpha=0.1, color='black', s=1)

            rnr = RadiusNeighborsRegressor(radius=50, weights='uniform')
            rnr.fit(np.expand_dims(mean_values, axis=1), flat_ratios.flatten())

            x = np.arange(
                np.min(mean_values) + 200,
                np.max(mean_values) + 1 - 200, 10)
            line_y = rnr.predict(np.expand_dims(x, axis=1))
            plt.plot(x, line_y, label=str(subfolder_index))

        plt.legend()
        plt.grid(True)
        plt.show()
Esempio n. 24
0
def compare_error_vs_brightness(folder):
    data = load_images(folder)

    for channel in range(data.shape[3]):
        channel_stack = data[:, :, :, channel]

        img_mean = np.mean(channel_stack, axis=0)
        img_sigma_clip = np.mean(astropy.stats.sigma_clip(channel_stack,
                                                          sigma=2,
                                                          axis=0),
                                 axis=0)

        img_sigma_ratio = (img_mean / img_sigma_clip - 1) * 1E3

        x = np.arange(np.min(img_mean), np.max(img_mean) + 1)
        bit_flip_change = 128 if channel == 1 else 256
        y_top = ((channel_stack.shape[0] * x) /
                 (channel_stack.shape[0] * x - bit_flip_change) - 1) * 1E3
        y_bottom = ((channel_stack.shape[0] * x) /
                    (channel_stack.shape[0] * x + bit_flip_change) - 1) * 1E3
        plt.plot(x, y_top, 'r')
        plt.plot(x, y_bottom, 'r')
        plt.scatter(img_mean.flatten(),
                    img_sigma_ratio.flatten(),
                    alpha=0.1,
                    color='black',
                    s=1)

        rnr = RadiusNeighborsRegressor(radius=50, weights='distance')
        rnr.fit(np.expand_dims(img_mean.flatten(), axis=1),
                img_sigma_ratio.flatten())

        x = np.arange(np.min(img_mean), np.max(img_mean) + 1)
        line_y = rnr.predict(np.expand_dims(x, axis=1))
        plt.plot(x, line_y, 'g')

        plt.grid(True)
        plt.show()
    def RadiusNeighborsRegressor(radius=1.0,
                                 weights='distance',
                                 algorithm='auto',
                                 p=2):

        model = RadiusNeighborsRegressor(radius=radius,
                                         weights=weights,
                                         algorithm=algorithm,
                                         leaf_size=30,
                                         p=p,
                                         metric='minkowski',
                                         metric_params=None)

        return model
def grid_points_2d(mesh, cell_size=10):
    grid = vtk_Voxel.from_mesh(mesh, cell_size, 2)

    cells = grid.cell_centers().points

    radius = cell_size * 0.5
    tmat = np.full(cells.shape[0], np.nan)
    print("sample min", np.min(mesh.points[:, 2]), "max",
          np.max(mesh.points[:, 2]))
    while np.any(np.isnan(tmat)):
        # keep increasing radius until all cells have values
        radius *= 1.5
        print("RadiusNeighborsRegressor =", radius, "m")
        neigh = RadiusNeighborsRegressor(radius, 'distance')
        neigh.fit(mesh.points[:, :2], mesh.points[:, 2])
        rmat = neigh.predict(cells[:, :2])
        np.putmask(tmat, np.isnan(tmat), rmat)
    print("regression min", np.min(tmat), "max", np.max(tmat))
    grid.cell_arrays['Elevation'] = tmat
    surf = grid.extract_surface()
    surf = surf.ctp()
    surf.points[:, 2] = surf.point_arrays['Elevation']

    return surf
def powerproduction():
    if fl.request.method == "POST":
        speed = {}
        speed = float(fl.request.form['speed'])
        # speed = requests.get(data['input_s'])
        # import csv data and convert to pandas dataframe
        df = pd.read_csv("powerproduction.csv")

        # remove all zeros
        df = df[df.power != 0]

        # put rows in order of speed
        df = df.sort_values('speed')

        # set each column to a numpy array for processing
        S = df['speed'].to_numpy()
        p = df['power'].to_numpy()

        neigh_radius = RadiusNeighborsRegressor(radius=1.7, weights='distance', p = 2)
        neigh_radius.fit(S.reshape(-1, 1), p)

        p_pred = neigh_radius.predict([[speed]])

        return {'value': p_pred[0]}
 def test_model_knn_regressor2_1_radius(self):
     model, X = self._fit_model_simple(
         RadiusNeighborsRegressor(algorithm="brute"), n_targets=2)
     X = X[:-1]
     model_onnx = convert_sklearn(
         model,
         "KNN regressor", [("input", FloatTensorType([None, X.shape[1]]))],
         target_opset=TARGET_OPSET)
     self.assertIsNotNone(model_onnx)
     sess = InferenceSession(model_onnx.SerializeToString())
     got = sess.run(None, {'input': X.astype(numpy.float32)})[0]
     exp = model.predict(X.astype(numpy.float32))
     if any(numpy.isnan(got.ravel())):
         # The model is unexpectedly producing nan values
         # not on all platforms.
         # It happens when two matrices are multiplied,
         # one is (2, 20, 20), second is (20, 20)
         # and contains only 0 or 1 values.
         # The output contains nan values on the first row
         # but not on the second one.
         rows = [
             '--EXP--',
             str(exp), '--GOT--',
             str(got), '--EVERY-OUTPUT--'
         ]
         for out in enumerate_model_node_outputs(model_onnx,
                                                 add_node=False):
             onx = select_model_inputs_outputs(model_onnx, out)
             sess = InferenceSession(onx.SerializeToString())
             res = sess.run(None, {'input': X.astype(numpy.float32)})
             rows.append('--{}--'.format(out))
             rows.append(str(res))
         if (onnxruntime.__version__.startswith('1.4.')
                 or onnxruntime.__version__.startswith('1.5.')):
             # TODO: investigate the regression in onnxruntime 1.4
             # One broadcasted multiplication unexpectedly produces nan.
             whole = '\n'.join(rows)
             if "[        nan" in whole:
                 warnings.warn(whole)
                 return
             raise AssertionError(whole)
         if (onnxruntime.__version__.startswith('1.3.')
                 and sys.platform == 'win32'):
             # Same error but different line number for further
             # investigation.
             raise AssertionError(whole)
         raise AssertionError('\n'.join(rows))
     assert_almost_equal(exp, got, decimal=5)
 def test_model_knn_regressor_double_radius(self):
     model, X = self._fit_model(RadiusNeighborsRegressor())
     model_onnx = convert_sklearn(
         model, "KNN regressor",
         [("input", DoubleTensorType([None, 4]))],
         target_opset=TARGET_OPSET,
         options={id(model): {'optim': 'cdist'}})
     self.assertIsNotNone(model_onnx)
     dump_data_and_model(
         X.astype(numpy.float64)[:7],
         model, model_onnx,
         basename="SklearnRadiusNeighborsRegressor64")
     dump_data_and_model(
         (X + 0.1).astype(numpy.float64)[:7],
         model, model_onnx,
         basename="SklearnRadiusNeighborsRegressor64")
 def test_model_knn_regressor_weights_distance_11_radius(self):
     model, X = self._fit_model_simple(
         RadiusNeighborsRegressor(
             weights="distance", algorithm="brute", radius=100))
     for op in sorted(set([TARGET_OPSET, 12, 11])):
         if op > TARGET_OPSET:
             continue
         with self.subTest(opset=op):
             model_onnx = convert_sklearn(
                 model, "KNN regressor",
                 [("input", FloatTensorType([None, X.shape[1]]))],
                 target_opset=op)
             self.assertIsNotNone(model_onnx)
             sess = InferenceSession(model_onnx.SerializeToString())
             got = sess.run(None, {'input': X.astype(numpy.float32)})[0]
             exp = model.predict(X.astype(numpy.float32))
             assert_almost_equal(exp, got.ravel(), decimal=3)
Esempio n. 31
0
def initializeModel(name,
                    param_1=5,
                    neighbors=5,
                    radius=1.0,
                    weights='uniform'):
    if (name == 'knn'):
        model = KNeighborsClassifier(n_neighbors=param_1)
    elif (name == 'tree'):
        model = tree.DecisionTreeClassifier()
    elif (name == 'forest'):
        model = RandomForestClassifier()
    elif (name == 'knnr'):
        model = KNeighborsRegressor(n_neighbors=neighbors)
    elif (name == 'rnr'):
        model = RadiusNeighborsRegressor(radius=radius,
                                         weights=weights,
                                         n_jobs=-1)
    return model
Esempio n. 32
0
def mydist(x, y):
    distance_assignement = (0. if x[0]==y[0] else 1.)
    distance_time = (0. if x[2]==y[2] else 1.)
    distance_day = (0. if x[1]==y[1] else 1.)
    #distance_week_day = (1 if x[0]==y[0] else 0)
    #distance_time = abs(x[3] - y[3])%1440

    distance = distance_assignement + distance_time + distance_day
    return distance

#dist = neighbors.DistanceMetric.get_metric('pyfunc', func=distance)

preprocessing = fp.feature_preprocessing()
preprocessing.full_preprocess(used_columns=['ASS_ID', 'WEEK_DAY', 'TIME', 'CSPL_RECEIVED_CALLS'])
data = preprocessing.data[:1000]
Y = data['CSPL_RECEIVED_CALLS']
X = data.drop(['CSPL_RECEIVED_CALLS'], axis=1)

X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, Y, test_size=0.1, random_state=0)

neigh = RadiusNeighborsRegressor(radius=0.5, metric='pyfunc', func=mydist, algorithm='auto')
print('fitting...')
neigh.fit(X_train, y_train)
print('fitted')
#error = neigh.score(X_test, y_test)

#print(error)

y_pred = neigh.predict(X_test)

Esempio n. 33
0
from sklearn.naive_bayes import MultinomialNB
from sklearn import svm
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neighbors import RadiusNeighborsRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import RidgeClassifierCV
from sklearn.ensemble import GradientBoostingClassifier

gdc = GradientBoostingClassifier()
lr = LogisticRegression()
clf = svm.SVR()
et = ExtraTreesClassifier()
rgr = RadiusNeighborsRegressor()
forest = RandomForestRegressor(n_estimators = 100, n_jobs = 2, oob_score=True)
adaboost = AdaBoostRegressor()
nb = GaussianNB()
rd = RidgeClassifierCV()
kf = KFold(report.shape[0], n_folds = 5)

for train_index, test_index in kf:
    #print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = variables.ix[list(train_index),], variables.ix[list(test_index),]
    y_train = report['survey_participant'].ix[list(train_index),]
    y_test = report['survey_participant'].ix[list(test_index),]
    forest.fit(X_train,y_train)
    adaboost.fit(X_train,y_train)
    gdc.fit(X_train, y_train)
    rd.fit(X_train, y_train)
Esempio n. 34
0
import pandas as pd
import numpy as np
from sklearn.neighbors import RadiusNeighborsRegressor
from sklearn import cross_validation

# Membaca data training dan test
df = pd.read_hdf(sys.argv[1])
tdf = pd.read_hdf(sys.argv[2])

# Mengubah menjadi array numpy yang digunakan scikit-learn
X_train = df.as_matrix(['lat', 'lon'])
y_train = (df.length.as_matrix())*15
X_test = tdf.as_matrix(['lat', 'lon'])
y_test = (tdf.length.as_matrix())*15

id_test = tdf.index.to_series().as_matrix()

# Inisialisasi model
model = RadiusNeighborsRegressor(radius=0.0005, weights='distance')

# Training
model.fit(X_train, y_train)

# Prediksi
y_try = model.predict(X_test)

# Penulisan hasil
resdf = pd.DataFrame({'idx': id_test, 'predict': (y_try), 'actual': (y_test)}).set_index('idx')

resdf.to_csv(sys.argv[3])
Esempio n. 35
0
print "Train: ", lin3.score(X_train, y_train)
print "Test: ", lin3.score(X_test, y_test)
print "Intercept: ", lin3.intercept_
for k, v in enumerate(lin3.coef_[0]):
	print threeYrXcol[k], ": ", v

# KNeighborsRegressor
kn3 = KNReg(weights='uniform')
#kn3.fit(df_3avg[threeYrXcol].values, df_3avg[threeYrycol].values)
kn3.fit(X_train, y_train)
print "Train: ", kn3.score(X_train, y_train)
print "Test: ", kn3.score(X_test, y_test)
# print kn3.score(df_3avg[threeYrXcol].values, df_3avg[threeYrycol].values)

# RadiusNeighborsRegressor
rn3 = RNReg(radius=7.0)
#rn3.fit(df_3avg[threeYrXcol].values, df_3avg[threeYrycol].values)
rn3.fit(X_train, y_train)
print "Train: ", rn3.score(X_train, y_train)
print "Test: ", rn3.score(X_test, y_test)
print rn3.score(df_3avg[threeYrXcol].values, df_3avg[threeYrycol].values)

# Test 2010/11/12 stats and 2013 projections against 2013 actuals
y=2013
y3 = [y-1,y-2,y-3]
tms_include = np.intersect1d(df[df.Year == y3[0]].Team.values, df[df.Year == y3[2]].Team.values)
df2012 = pd.merge(df[(df.Year.isin(y3)) & (df.Team.isin(tms_include))].groupby('Team')[Xvar].mean(), df[(df.Year == y3[0]) & (df.Team.isin(tms_include))].groupby('Team')[Xvar].mean(), how='left',left_index=True, right_index=True, suffixes=['_3yr_avg','_yr3'])
df2012['f2013'] = lin3.predict(df2012.values)
df2012.sort('f_yr3', ascending=False, inplace=True)
df2012['rnk_2012'] = range(1,df2012.shape[0]+1)
df2012.sort('f2013', ascending=False, inplace=True)