class KNNDynamicsResidual: def __init__(self, args, env_params): # Save args self.args, self.env_params = args, env_params # Create the KNN model self.knn_model = RadiusNeighborsRegressor(radius=args.neighbor_radius, weights='uniform') # Flag self.is_fit = False def fit(self, X, y): ''' X should be the data matrix N x d, where each row is a 4D vector consisting of object pos and gripper pos y should be target matrix N x d, where each row is a 4D vector consisting of next object pos and next gripper pos ''' self.knn_model.fit(X, y) self.is_fit = True return self.loss(X, y) def predict(self, X): ''' X should be the data matrix N x d, where each row is a 4D vector consisting of object pos and gripper pos ''' ypred = np.zeros(X.shape) if not self.is_fit: # KNN model is not fit return ypred # Get neighbors of X neighbors = self.knn_model.radius_neighbors(X) # Check if any of the X doesn't have any neighbors by getting nonzero mask neighbor_mask = [x.shape[0] != 0 for x in neighbors[1]] # If none of X has any neighbors if X[neighbor_mask].shape[0] == 0: return ypred # Else, for the X that have neighbors use the KNN prediction ypred[neighbor_mask] = self.knn_model.predict(X[neighbor_mask]) return ypred def get_num_neighbors(self, X): if not self.is_fit: return np.zeros(X.shape[0]) neighbors = self.knn_model.radius_neighbors(X) num_neighbors = np.array([x.shape[0] for x in neighbors[1]]) return num_neighbors def loss(self, X, y): ypred = self.predict(X) # Loss is just the mean distance between predictions and true targets loss = np.linalg.norm(ypred - y, axis=1).mean() return loss
def get_author_list_with_pruning_method(feature_list, author_list, qp, radius): """ feature_list - the feature list to indicate the stylometric features author_list - the author list to indicate a paragraph is written by whom qp - the query point, mostly represents a document This function will return a shortened author list, which can greatly reduce the size of training set by removing those data points too far from the query point. Since it takes time to calculate the Hausdorff distance, reducing the size of testing set can speed up the process Please refer to the following link for more information http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.RadiusNeighborsRegressor.html#sklearn.neighbors.RadiusNeighborsRegressor """ neigh = RadiusNeighborsRegressor(radius=radius, algorithm='brute', p=2) neigh.fit(feature_list, author_list) return neigh.radius_neighbors(qp, return_distance=True)