def _apply_weights(self, neighbor_vals, neighbor_weights): # weighted mean/mode of neighbors for a single regression target if neighbor_vals.ndim == 2: if self.measure == "mean": X = np.ma.average(neighbor_vals, weights=neighbor_weights, axis=1) else: X, _ = weighted_mode(neighbor_vals, neighbor_weights, axis=1) # weighted mean of neighbors for a multi-target regression # neighbor_vals = (n_samples, n_neighbors, n_targets) else: X = np.zeros((neighbor_vals.shape[0], neighbor_vals.shape[2])) if self.measure == "mean": for i in range(neighbor_vals.shape[-1]): X[:, i] = np.ma.average(neighbor_vals[:, :, i], weights=neighbor_weights, axis=1) else: for i in range(neighbor_vals.shape[-1]): X[:, i], _ = weighted_mode(neighbor_vals[:, :, i], neighbor_weights, axis=1) return X
def predict(self, X): """Predict the class labels for the provided data Parameters ---------- X: array A 2-D array representing the test points. Returns ------- labels: array List of class labels (one for each data sample). """ X = np.atleast_2d(X) neigh_dist, neigh_ind = self.kneighbors(X) pred_labels = self._y[neigh_ind] weights = _get_weights(neigh_dist, self.weights) if weights is None: mode, _ = smart_mode(pred_labels, axis=1) else: mode, _ = weighted_mode(pred_labels, weights, axis=1) return mode.flatten().astype(np.int)
def simKNNpredict(X,Y,Xtest,Ytest,L,k=3,weighted=True,method='brute'): if not isinstance(X, np.matrix): X=np.matrix(X) if not isinstance(Xtest, np.matrix): Xtest=np.matrix(Xtest) #transform data X=X*L.T Xtest=(L*Xtest.T) n=Xtest.shape[1] y_pred = np.empty(n, dtype=Ytest.dtype) if method=='tree': #build tree print ('not implemented') else: #resort to brute force n=Xtest.shape[1] for x in range(n): w=np.squeeze(np.asarray(X*Xtest[:,x])) neighb=np.argpartition(-w, k)[:k] #cannot use bottleneck unless numpy version >= 1.9 available #argpartsort(np.squeeze(np.asarray(X*Xtest[:,x])),k)[:k] #unweighted #lmode, num=mode(Y[neighb], axis=0) #weighted lmode, weight = weighted_mode(Y[neighb], w[neighb]) y_pred[x]=lmode return y_pred
def predict_loo(self): """Predict the class labels for the training data via leave-one-out. Returns ------- y : ndarray of shape (n_queries,) or (n_queries, n_outputs) Class labels for each training data sample. """ neigh_dist, neigh_ind = self.kneighbors() classes_ = self.classes_ _y = self._y if not self.outputs_2d_: _y = self._y.reshape((-1, 1)) classes_ = [self.classes_] n_outputs = len(classes_) n_queries = len(neigh_dist) weights = _get_weights(neigh_dist, self.weights) y_pred = np.empty((n_queries, n_outputs), dtype=classes_[0].dtype) for k, classes_k in enumerate(classes_): if weights is None: mode, _ = stats.mode(_y[neigh_ind, k], axis=1) else: mode, _ = weighted_mode(_y[neigh_ind, k], weights, axis=1) mode = np.asarray(mode.ravel(), dtype=np.intp) y_pred[:, k] = classes_k.take(mode) if not self.outputs_2d_: y_pred = y_pred.ravel() return y_pred
def get_bin_indices(hits, bins, Rmax=207): segclass = 'segclass' binclass = 'binclass' fiducial_cut = (hits.x**2+hits.y**2)<Rmax**2 binsX, binsY, binsZ = bins boundary_cut = (hits.x>=binsX.min()) & (hits.x<=binsX.max())\ & (hits.y>=binsY.min()) & (hits.y<=binsY.max())\ & (hits.z>=binsZ.min()) & (hits.z<=binsZ.max()) hits_act = hits[fiducial_cut & boundary_cut].reset_index(drop = True) xbin = pd.cut(hits_act.x, binsX, labels = np.arange(0, len(binsX)-1)).astype(int) ybin = pd.cut(hits_act.y, binsY, labels = np.arange(0, len(binsY)-1)).astype(int) zbin = pd.cut(hits_act.z, binsZ, labels = np.arange(0, len(binsZ)-1)).astype(int) hits_act = hits_act.assign(xbin=xbin, ybin=ybin, zbin=zbin) hits_act.event_id = hits_act.event_id.astype(np.int64) if segclass not in hits.columns: hits_act = hits_act.assign(segclass = -1) if binclass not in hits.columns: hits_act = hits_act.assign(binclass = -1) #outputs df with bins index and energy, and optional label out = hits_act.groupby(['xbin', 'ybin', 'zbin', 'event_id']).apply( lambda df:pd.Series({'energy':df['energy'].sum(), segclass:int(weighted_mode(df[segclass], df['energy'])[0][0]), binclass:int(df[binclass].unique()[0])})).reset_index() out[segclass] = out[segclass].astype(int) out[binclass] = out[binclass].astype(int) return out
def infer(query, samples=None, db=None, sample_db_fn=None, depth=None, d_type='d1'): ''' infer a query, return it's ap arguments query : a dict with three keys, see the template { 'img': <path_to_img>, 'cls': <img class>, 'hist' <img histogram> } samples : a list of { 'img': <path_to_img>, 'cls': <img class>, 'hist' <img histogram> } db : an instance of class Database sample_db_fn: a function making samples, should be given if Database != None depth : retrieved depth during inference, the default depth is equal to database size d_type : distance type ''' assert samples != None or ( db != None and sample_db_fn != None ), "need to give either samples or db plus sample_db_fn" if db: samples = sample_db_fn(db) q_img, q_cls, q_hist = query['img'], query['cls'], query['hist'] results = [] for idx, sample in enumerate(samples): s_img, s_cls, s_hist = sample['img'], sample['cls'], sample['hist'] if q_img == s_img: continue results.append({ 'dis': distance(q_hist, s_hist, d_type=d_type), 'cls': s_cls, 'img': s_img }) results = sorted(results, key=lambda x: x['dis']) if depth and depth <= len(results): results = results[:depth] print(q_img) list_im = [sub['img'] for sub in results] print(list_im) pred = [sub['cls'] for sub in results] weig = [sub['dis'] for sub in results] weig = np.reciprocal(weig) pred2 = weighted_mode(pred, weig) pred = np.array_str(pred2[0])[2:-2] ap = AP(q_cls, results, sort=False) return ap, pred
def predict_classification(train_data, train_target, test_row, num_neighbors, p, weight_mode): neighbors, neighbors_w = get_neighbors(train_data, train_target, test_row, num_neighbors, p, weight_mode) result = weighted_mode(neighbors, neighbors_w) prediction = int(result[0]) return prediction
def test_uniform_weights(): # with uniform weights, results should be identical to stats.mode x = np.random.randint(10, size=(10, 5)) weights = np.ones(x.shape) for axis in (None, 0, 1): mode, score = stats.mode(x, axis) mode2, score2 = weighted_mode(x, weights, axis) assert np.all(mode == mode2) assert np.all(score == score2)
def predict(self, X): train_X, train_Y = self.data_ dist = self.pairwise_distance(train_X, X) assert np.all(dist >= 0) idx = np.argsort(dist, axis=1) nn_idx = idx[:, :self.K] nn_dist = dist[np.arange(len(X))[:, None], nn_idx] nn_labels = train_Y[nn_idx] weights = _get_weights(nn_dist, 'distance') # Weighted KNN a, _ = weighted_mode(nn_labels, weights, axis=1) return a.reshape(-1)
def test_uniform_weights(): # with uniform weights, results should be identical to stats.mode rng = np.random.RandomState(0) x = rng.randint(10, size=(10, 5)) weights = np.ones(x.shape) for axis in (None, 0, 1): mode, score = stats.mode(x, axis) mode2, score2 = weighted_mode(x, weights, axis) assert_array_equal(mode, mode2) assert_array_equal(score, score2)
def predict(self, X): """Predict the class labels for the provided data Parameters ---------- X : sktime-format pandas dataframe or array-like, shape (n_query, n_features), \ or (n_query, n_indexed) if metric == 'precomputed' Test samples. Returns ------- y : array of shape [n_samples] or [n_samples, n_outputs] Class labels for each data sample. """ self.check_is_fitted() if hasattr(check_array, '__wrapped__'): temp = check_array.__wrapped__.__code__ check_array.__wrapped__.__code__ = _check_array_ts.__code__ else: temp = check_array.__code__ check_array.__code__ = _check_array_ts.__code__ neigh_dist, neigh_ind = self.kneighbors(X) classes_ = self.classes_ _y = self._y if not self.outputs_2d_: _y = self._y.reshape((-1, 1)) classes_ = [self.classes_] n_outputs = len(classes_) n_samples = X.shape[0] weights = _get_weights(neigh_dist, self.weights) y_pred = np.empty((n_samples, n_outputs), dtype=classes_[0].dtype) for k, classes_k in enumerate(classes_): if weights is None: mode, _ = stats.mode(_y[neigh_ind, k], axis=1) else: mode, _ = weighted_mode(_y[neigh_ind, k], weights, axis=1) mode = np.asarray(mode.ravel(), dtype=np.intp) y_pred[:, k] = classes_k.take(mode) if not self.outputs_2d_: y_pred = y_pred.ravel() if hasattr(check_array, '__wrapped__'): check_array.__wrapped__.__code__ = temp else: check_array.__code__ = temp return y_pred
def predict(self, x): """Predict the label of the given point Parameters ---------- x : numpy.ndarray Point to be predicted. Array of shape (1, m) where m is the number of features Returns ------- predict_label : int Predicted label/class """ if not isinstance(x, np.ndarray): x = np.array(x) k = self.n_neighbours # the metric object used to calculate the distance between training data and new data if self.metric_params is None: dist_metric = DistMetric.get_metric(self.metric) else: dist_metric = DistMetric.get_metric(self.metric, *self.metric_params) dist = dist_metric.dist(self.X_train_, x) # if self.weights is 'distance', the weights are the inverse of the distance between training data and the # point weights = _get_weights(dist, self.weights) if weights is None: # zip together in order to sort then unzip to get back the sorted lists zipped = sorted(zip(dist, self.classes_)) dist, class_labels = zip(*zipped) # get the labels of the k closest points to x class_labels = np.array(class_labels)[:k] # get the most common label predict_label = Counter(class_labels).most_common(1)[0][0] return predict_label else: zipped = sorted(zip(dist, self.classes_, weights)) dist, class_labels, weights = zip(*zipped) class_labels = np.array(class_labels)[:k] weights = np.array(weights)[:k] predict_label = weighted_mode(class_labels, weights)[0][0] return predict_label
def predict(self, X): X = X.astype(np.float32) X = np.ascontiguousarray(X) if X.ndim == 1: X = X[np.newaxis] D, I = self.index.search(X, self.n_neighbors) outputs = np.squeeze(self.labels[I]) weights = _get_weights(D, self.weights) if weights is None: y_pred, _ = mode(outputs, axis=1) else: y_pred, _ = weighted_mode(outputs, weights, axis=1) return y_pred
def predict(self, X, idx=None): neigh_dist, neigh_ind = self.kneighbors(X, idx) pred_labels = self._y[neigh_ind] weights = _get_weights(neigh_dist, self.weights) if weights is None: mode, _ = smart_mode(pred_labels, axis=1) else: mode, _ = weighted_mode(pred_labels, weights) return mode.flatten().astype(np.int)
def test_random_weights(): # set this up so that each row should have a weighted mode of 6, # with a score that is easily reproduced mode_result = 6 x = np.random.randint(mode_result, size=(100, 10)) w = np.random.random(x.shape) x[:, :5] = mode_result w[:, :5] += 1 mode, score = weighted_mode(x, w, axis=1) assert np.all(mode == mode_result) assert np.all(score.ravel() == w[:, :5].sum(1))
def predict(self, X, idx=None): neigh_dist, neigh_ind = self.kneighbors(X,idx) pred_labels = self._y[neigh_ind] weights = _get_weights(neigh_dist, self.weights) if weights is None: mode, _ = stats.mode(pred_labels, axis=1) else: # Randomly permute the neighbors to tie-break randomly if necessary perm = np.random.permutation(n_neighbors) ind = ind[perm] mode, _ = weighted_mode(pred_labels,weights,axis) return mode.flatten().astype(np.int)
def test_random_weights(): # set this up so that each row should have a weighted mode of 6, # with a score that is easily reproduced mode_result = 6 rng = np.random.RandomState(0) x = rng.randint(mode_result, size=(100, 10)) w = rng.random_sample(x.shape) x[:, :5] = mode_result w[:, :5] += 1 mode, score = weighted_mode(x, w, axis=1) assert_array_equal(mode, mode_result) assert_array_almost_equal(score.ravel(), w[:, :5].sum(1))
def test_random_weights(): # set this up so that each row should have a weighted mode of 6, # with a score that is easily reproduced mode_result = 6 rng = np.random.RandomState(0) x = rng.randint(mode_result, size=(100, 10)) w = rng.random_sample(x.shape) x[:, :5] = mode_result w[:, :5] += 1 mode, score = weighted_mode(x, w, axis=1) np.testing.assert_array_equal(mode, mode_result) np.testing.assert_array_almost_equal(score.ravel(), w[:, :5].sum(1))
def majority_vote(scores, n_classes=2, weights=None): """Combination method to merge the scores from multiple estimators by majority vote. Parameters ---------- scores : numpy array of shape (n_samples, n_estimators) Score matrix from multiple estimators on the same samples. n_classes : int, optional (default=2) The number of classes in scores matrix weights : numpy array of shape (1, n_estimators) If specified, using weighted majority weight. Returns ------- combined_scores : numpy array of shape (n_samples, ) The combined scores. """ scores = check_array(scores) # assert only discrete scores are combined with majority vote check_classification_targets(scores) assert (len(np.unique(scores)) == n_classes) n_samples, n_estimators = scores.shape[0], scores.shape[1] vote_results = np.zeros([ n_samples, ]) if weights is not None: assert_equal(scores.shape[1], weights.shape[1]) # equal weights if not set else: weights = np.ones([1, n_estimators]) for i in range(n_samples): vote_results[i] = weighted_mode(scores[i, :], weights)[0][0] return vote_results.ravel()
def predict(self, X): """Predict the class labels for the provided data Parameters ---------- X : array-like, shape (n_ts, sz, d) Test samples. """ X_ = to_time_series_dataset(X) neigh_dist, neigh_ind = self.kneighbors(X_) weights = _get_weights(neigh_dist, self.weights) if weights is None: mode, _ = stats.mode(self._fit_y[neigh_ind], axis=1) else: mode, _ = weighted_mode(self._fit_y[neigh_ind], weights, axis=1) return mode[:, 0]
def predict(self, X, E): #IY """Predict the class labels for the provided data taking into account data uncertainties (chi^2 distances) Parameters ---------- X : array-like, shape (n_query, n_features), \ or (n_query, n_indexed) if metric == 'precomputed' Test samples. E : array-like, same shape as X, \ Data errors Returns ------- y : array of shape [n_samples] or [n_samples, n_outputs] Class labels for each data sample. """ X = check_array(X, accept_sparse='csr') neigh_dist, neigh_ind = self.kneighbors(X,E=E) classes_ = self.classes_ _y = self._y if not self.outputs_2d_: _y = self._y.reshape((-1, 1)) classes_ = [self.classes_] n_outputs = len(classes_) n_samples = X.shape[0] weights = _get_weights(neigh_dist, self.weights) y_pred = np.empty((n_samples, n_outputs), dtype=classes_[0].dtype) for k, classes_k in enumerate(classes_): if weights is None: mode, _ = stats.mode(_y[neigh_ind, k], axis=1) else: mode, _ = weighted_mode(_y[neigh_ind, k], weights, axis=1) mode = np.asarray(mode.ravel(), dtype=np.intp) y_pred[:, k] = classes_k.take(mode) if not self.outputs_2d_: y_pred = y_pred.ravel() return y_pred
def predict(self, X): """Predict the class labels for the provided data Parameters ---------- X : array-like, shape (n_ts, sz, d) Test samples. """ if self.metric == "min_dist" and self.variables_size > 1: X_ = X else: X_ = to_time_series_dataset(X,self.variables_size) neigh_dist, neigh_ind = self.kneighbors(X_,self.multivariate_output,None,True) weights = _get_weights(neigh_dist, self.weights) if weights is None: mode, _ = stats.mode(self._fit_y[neigh_ind], axis=1) else: mode, _ = weighted_mode(self._fit_y[neigh_ind], weights, axis=1) return mode[:, 0]
def infer(query, samples=None, db=None, sample_db_fn=None, depth=None, d_type='d1'): assert samples is not None or ( db is not None and sample_db_fn is not None), "need to give either samples or db plus sample_db_fn" if db: samples = sample_db_fn(db) q_img, q_cls, q_hist = query['img'], query['cls'], query['hist'] results = [] for idx, sample in enumerate(samples): s_img, s_cls, s_hist = sample['img'], sample['cls'], sample['hist'] if q_img == s_img: continue results.append({ 'dis': distance(q_hist, s_hist, d_type=d_type), 'cls': s_cls, 'img': s_img }) results = sorted(results, key=lambda x: x['dis']) if depth and depth <= len(results): results = results[:depth] # list_images = [sub['img'] for sub in results] pred = [sub['cls'] for sub in results] weig = [sub['dis'] for sub in results] weig = np.reciprocal(weig) pred2 = weighted_mode(pred, weig) pred = np.array_str(pred2[0])[2:-2] ap = customAP(q_cls, results, sort=False) return ap, pred
def main(): parser = argparse.ArgumentParser() parser.add_argument('--data', type=str, required=True, help='data') parser.add_argument('--valid-keys', type=str, required=True, help='validation keys') parser.add_argument('--input-dim', default=40, type=int, help='feats dim') parser.add_argument('--seq-len', default=500, type=int, help='input sequence length') parser.add_argument('--batch-size', default=64, type=int, help='mini-batch size') parser.add_argument('--models', nargs='+', help='weights file') parser.add_argument('--model-types', nargs='+', help='model type') parser.add_argument('--output', type=str, help='output file') parser.add_argument('--filters', type=int, default=64, help='number of filters') parser.add_argument('--groups', type=int, default=16, help='number of groups') parser.add_argument('--mtl', type=str, default=None, help='MTL training mode') parser.add_argument('--units', type=int, default=16, help='number of LSTM cells') parser.add_argument('--lstm-depth', type=int, default=2, help='number of LSTM layers') parser.add_argument('--norm', type=str, help='normalization file') parser.add_argument('--decision', type=str, default='vote', help='decision method') parser.add_argument('--weights', nargs='+', help='voting weights') args = parser.parse_args() valid_keys = [] with open(args.valid_keys, 'r') as f: key = f.readline().strip() while key: valid_keys.append(key) key = f.readline().strip() # read best model models = [] input = Input((args.input_dim, args.seq_len, 1)) for n, type in enumerate(args.model_types): if type == 'base': output = DCASE_CNN_2019()(input) elif type == 'cnn4': output = CNN4()(input) elif type == 'cnn8': output = CNN8()(input) elif type == 'gcnn': output = GCNN_GRU()(input) elif type == 'gcnn-lstm': output = GCNN_GRU(lstm=True)(input) elif type == 'vgg_fcn': output = VGG_FCN(init_filters=args.filters)(input) elif type == 'vgg_lstm': output = VGG_LSTM(units=args.units, lstm_depth=args.lstm_depth, init_filters=args.filters)(input) model = Model(input, output) model.load_weights(args.models[n]) models.append(model) validation_generator = ASCDataGenerator(args.data, keys=valid_keys, dim=(args.input_dim, args.seq_len), batch_size=args.batch_size, mode=args.mtl) norm_path = os.path.join(args.norm) validation_generator.load_norm(norm_path) # バッチの総数を取得 per_epoch = validation_generator.__len__() csv = os.path.join(args.output) mat = np.zeros((ASC_CLASS, ASC_CLASS), dtype=int) # バッチ総数分のループ for n in range(per_epoch): # データを取得 x_batch: 入力特徴,y_batch 正解ラベル x_batch, y_batch = validation_generator.__getitem__(n) # 予測値を計算 preds = [] for model in models: pred = model.predict(x_batch, x_batch.shape[0], verbose=1) preds.append(np.expand_dims(pred, axis=0)) preds = np.concatenate(preds, axis=0) if args.decision == 'vote': # 多数決 y_pred = np.argmax(preds, axis=2) y_pred, counts = stats.mode(y_pred, axis=0) # 確率が最大となるインデックスを求める elif args.decision == 'weight_vote': y_pred = np.argmax(preds, axis=2) y_pred, score = weighted_mode(y_pred, args.weights) elif args.decision == 'mean': y_pred = np.mean(preds, axis=0) y_pred = np.argmax(pred, axis=1) y_true = np.argmax(y_batch, axis=1) y_pred = np.reshape(y_pred, (batch_size, )) # 正解率の計算に使う行列を作る cmat = confusion_matrix(y_true, y_pred) mat = np.add(cmat, mat) np.savetxt(csv, mat.astype(int))
def main(args): # Use the digits dataset. data, target = sklearn.datasets.load_digits(n_class=args.classes, return_X_y=True) data = sklearn.preprocessing.MinMaxScaler().fit_transform(data) # Split the dataset into a train set and a test set. train_data, test_data, train_target, test_target = sklearn.model_selection.train_test_split( data, target, test_size=args.test_size, random_state=args.seed) #TODO: Fill in Array c0, c1, c2, c3, c4, c5, c6, c7, c8, c9 = [], [], [], [], [], [], [], [], [], [] c0_t, c1_t, c2_t, c3_t, c4_t, c5_t, c6_t, c7_t, c8_t, c9_t = [], [], [], [], [], [], [], [], [], [] for i in range(len(train_data)): if train_target[i] == 0: c0.append(train_data[i]) elif train_target[i] == 1: c1.append(train_data[i]) elif train_target[i] == 2: c2.append(train_data[i]) elif train_target[i] == 3: c3.append(train_data[i]) elif train_target[i] == 4: c4.append(train_data[i]) elif train_target[i] == 5: c5.append(train_data[i]) elif train_target[i] == 6: c6.append(train_data[i]) elif train_target[i] == 7: c7.append(train_data[i]) elif train_target[i] == 8: c8.append(train_data[i]) else: c9.append(train_data[i]) for i in range(len(test_data)): if test_target[i] == 0: c0_t.append(test_data[i]) elif test_target[i] == 1: c1_t.append(test_data[i]) elif test_target[i] == 2: c2_t.append(test_data[i]) elif test_target[i] == 3: c3_t.append(test_data[i]) elif test_target[i] == 4: c4_t.append(test_data[i]) elif test_target[i] == 5: c5_t.append(test_data[i]) elif test_target[i] == 6: c6_t.append(test_data[i]) elif test_target[i] == 7: c7_t.append(test_data[i]) elif test_target[i] == 8: c8_t.append(test_data[i]) else: c9_t.append(test_data[i]) xs = [ np.array(c0), np.array(c1), np.array(c2), np.array(c3), np.array(c4), np.array(c5), np.array(c6), np.array(c7), np.array(c8), np.array(c9) ] ys = [ np.full(len(c0), 0), np.full(len(c1), 1), np.full(len(c2), 2), np.full(len(c3), 3), np.full(len(c4), 4), np.full(len(c5), 5), np.full(len(c6), 6), np.full(len(c7), 7), np.full(len(c8), 8), np.full(len(c9), 9) ] xs_t = [ np.array(c0_t), np.array(c1_t), np.array(c2_t), np.array(c3_t), np.array(c4_t), np.array(c5_t), np.array(c6_t), np.array(c7_t), np.array(c8_t), np.array(c9_t) ] ys_t = [ np.full(len(c0_t), 0), np.full(len(c1_t), 1), np.full(len(c2_t), 2), np.full(len(c3_t), 3), np.full(len(c4_t), 4), np.full(len(c5_t), 5), np.full(len(c6_t), 6), np.full(len(c7_t), 7), np.full(len(c8_t), 8), np.full(len(c9_t), 9) ] '''i = 1 j = 2 print(train_data) print(train_target) print(len(train_data[(train_target == i) | (train_target == j)])) print(len(train_target[(train_target == i) | (train_target == j)]))''' empty = np.empty((len(test_target), 1)) result = np.empty((len(test_target), 1)) d = dict() for i in range(args.classes): d[i] = dict() for j in range(args.classes): if j <= i: continue d[i][j] = dict() #TODO: problem here solved #res_x = np.vstack([xs[i], xs[j]]) #res_x = train_data[(train_target == i) | (train_target == j)] #res_x = np.vstack([xs[i], xs[j]]) copy_res_x = train_data[(train_target == i) | (train_target == j)] #res_y = np.concatenate([ys[i], ys[j]]) #res_y = train_target[(train_target == i) | (train_target == j)] #res_y = np.concatenate([ys[i], ys[j]]) #res_y = [1 if res_y[m] == i else -1 for m in range(len(res_y))] copy_res_y = train_target[(train_target == i) | (train_target == j)] copy_res_y = [ 1 if copy_res_y[m] == i else -1 for m in range(len(copy_res_y)) ] #res_x_t = train_data[(train_target == i) | (train_target == j)] #res_y_t = train_target[(train_target == i) | (train_target == j)] #res_y_t = [1 if res_y_t[m] == i else -1 for m in range(len(res_y_t))] #TODO: =================================== New Prediction =================================== x_votes = test_data s_vectors, s_weights, b, _, _ = smo(args, copy_res_x, copy_res_y, copy_res_x, copy_res_y) def predict_votes(row): sums = 0 for oo in range(len(s_weights)): sums += s_weights[oo] * test_kernels_votes[row, oo] return sums + b test_kernels_votes = np.empty((len(x_votes), len(s_vectors)), dtype=float) for x1, x1_ in enumerate(x_votes): for x2, x2_ in enumerate(s_vectors): test_kernels_votes[x1][x2] = kernel(args, x1_, x2_)[0] votes = [] for t in range(len(x_votes)): my_predict_votes = predict_votes(t) if my_predict_votes >= 0: votes.append(i) else: votes.append(j) votes = np.array(votes).reshape(-1, 1) if i == 0 and j == 1: result = np.c_[empty, votes] else: result = np.c_[result, votes] my_test = [] for row in result: row = np.delete(row, 0) #print(row) select = weighted_mode(row, np.full(len(row), 1)) my_test.append(int(select[0])) my_test = np.array(my_test) test_accuracy = sklearn.metrics.accuracy_score(my_test, test_target) return test_accuracy
def decisionBoundary(self, X, y): # function to return plots assert (len(list(X.columns)) == 2) color = ["r", "y", "b"] lookup = {"Setosa": 0, "Versicolor": 1, "Virginica": 2} fig1, ax1 = plt.subplots(1, len(self.trees), figsize=(5 * len(self.trees), 4)) x_min, x_max = X.iloc[:, 0].min(), X.iloc[:, 0].max() y_min, y_max = X.iloc[:, 1].min(), X.iloc[:, 1].max() x_range = x_max - x_min y_range = y_max - y_min Zs = [] for i, tree in enumerate(self.trees): xx, yy = np.meshgrid( np.arange(x_min - 0.2, x_max + 0.2, (x_range) / 50), np.arange(y_min - 0.2, y_max + 0.2, (y_range) / 50)) Z = tree.predict( pd.DataFrame(np.c_[xx.ravel(), yy.ravel()], columns=list(X.columns))).to_numpy() Z = np.vectorize(lambda x: lookup[x])(Z) Z = Z.reshape(xx.shape) cs = ax1[i].contourf(xx, yy, Z, cmap=plt.cm.RdYlBu) fig1.colorbar(cs, ax=ax1[i], shrink=0.9) ax1[i].set_ylabel("X2") ax1[i].set_xlabel("X1") Zs.append(Z) for y_label in y.unique(): idx = y == y_label id = list(y.cat.categories).index(y[idx].iloc[0]) ax1[i].scatter(X[idx].iloc[:, 0], X[idx].iloc[:, 1], c=color[id], cmap=plt.cm.RdYlBu, edgecolor='black', s=30, label="Class: " + str(y_label)) ax1[i].set_title("Decision Surface Tree: " + str(i + 1)) ax1[i].legend() fig1.tight_layout() fig2, ax2 = plt.subplots(1, 1, figsize=(6, 4)) Zs = np.array(Zs) com_surface, _ = weighted_mode(Zs, np.ones(Zs.shape)) Z = np.mean(Zs, axis=0) cs = ax2.contourf(xx, yy, com_surface[0], cmap=plt.cm.RdYlBu) for y_label in y.unique(): idx = y == y_label id = list(y.cat.categories).index(y[idx].iloc[0]) ax2.scatter(X[idx].iloc[:, 0], X[idx].iloc[:, 1], c=color[id], cmap=plt.cm.RdYlBu, edgecolor='black', s=30, label="Class: " + str(y_label)) ax2.set_ylabel("X2") ax2.set_xlabel("X1") ax2.legend() ax2.set_title("Common Decision Surface") fig2.colorbar(cs, ax=ax2, shrink=0.9) # Saving Figures fig1.savefig(os.path.join("figures", "Q7_Fig1.png")) fig2.savefig(os.path.join("figures", "Q7_Fig2.png")) return fig1, fig2
def predict(self, X): """Predict the class labels for the provided data Parameters ---------- X: array-like, shape (n_query, n_features), \ or (n_query, n_indexed) if metric == 'precomputed' Test samples. Returns ------- y: array of shape [n_samples] or [n_samples, n_outputs] Class labels for each data sample. """ X = check_array(X, accept_sparse='csr') neigh_dist, neigh_ind = self.kneighbors(X) classes_ = self.classes_ _y = self._y if not self.outputs_2d_: _y = self._y.reshape((-1, 1)) classes_ = [self.classes_] n_outputs = len(classes_) n_samples = X.shape[0] weights = _get_weights(neigh_dist, self.weights) if issparse(self._y): y_pred = lil_matrix((n_outputs, n_samples), dtype=classes_[0].dtype) if weights is None: register_parallel_pytest_cov() with mp.Pool(processes=self.n_jobs) as pool: k_cls = list(tqdm(pool.imap_unordered(func=partial(_sparse_multilabel_classification, y=_y, neigh_ind=neigh_ind), iterable=enumerate(classes_), chunksize=10), disable=False if self.verbose else True, total=len(classes_), unit='classes', desc='Multilabel classification')) for k, cls in tqdm(k_cls, desc='Collecting results', disable=False if self.verbose else True): y_pred[k] = cls else: raise NotImplementedError y_pred = y_pred.tocsc().T else: y_pred = np.empty((n_samples, n_outputs), dtype=classes_[0].dtype) for k, classes_k in enumerate(classes_): if weights is None: mode, _ = stats.mode(_y[neigh_ind, k], axis=1) else: mode, _ = weighted_mode(_y[neigh_ind, k], weights, axis=1) mode = np.asarray(mode.ravel(), dtype=np.intp) y_pred[:, k] = classes_k.take(mode) if not self.outputs_2d_: y_pred = y_pred.ravel() return y_pred
def resample_pointdata(source, target, data, is_sphere=False, source_mask=None, target_mask=None, red_func='mean', k=3, fill=0, n_jobs=1, append=False, key=None): """Resample point data in source to target surface. Parameters ---------- source : vtkPolyData or BSPolyData Source surface. target : vtkPolyData or BSPolyData Target surface. data : str, 1D ndarray or list or str and ndarray Point data in source surface to resample. is_sphere : bool, optional If True, assume source and target are provided as spheres that are aligned. Default is False. source_mask : str or 1D ndarray, optional Boolean mask. If str, it must be in the point data attributes of `source`. Default is None. If specified, only consider points within the mask. target_mask : str or 1D ndarray, optional Boolean mask. If str, it must be in the point data attributes of `target`. Default is None. If specified, only consider points within the mask. red_func : {'mean', 'weighted_mean', 'mode', 'weighted_mode'}, optional Reduction function. Default is 'mean'. k : int, optional Number of closest points to consider during resampling. Only used when ``is_sphere==False``. Default is 3. fill : int or float, optional Value used for entries out of the mask. Only used if the `target_mask` is provided. Default is 0. n_jobs : int, optional Number of parallel jobs. Only used when ``is_sphere==False``. Default is 1. append: bool, optional If True, append array to point data attributes of target surface and return surface. Otherwise, only return resampled arrays. Default is False. key : str or list of str, optional Array names to append to target's point data attributes. Only used if ``append == True``. If None, use names in `source_name`. Default is None. Returns ------- output : vtkPolyData, BSPolyData or list of ndarray Resampled point data. Return ndarray or list of ndarray if ``append == False``. Otherwise, return target surface with the new arrays. Notes ----- This function is meant for the same source and target surfaces but with different number of points. For other types of resampling, see vtkResampleWithDataSet. """ opt = ['mean', 'mode', 'weighted_mean', 'weighted_mode'] is_list = True if not isinstance(data, list): data = [data] is_list = False if isinstance(red_func, str): red_func = [red_func] * len(data) if isinstance(source_mask, str): source_mask = source.PointData[source_mask] if isinstance(target_mask, str): target_mask = source.PointData[target_mask] if not is_sphere: use_weights = False if k > 1 and np.isin(red_func, opt[2:]).any(): use_weights = True pids = _get_pids_naive(source, target, k=k, source_mask=source_mask, target_mask=target_mask, n_jobs=n_jobs, return_weights=use_weights) if use_weights: pids, w = pids else: pids, w = _get_pids_sphere(source, target, source_mask=source_mask, target_mask=target_mask) k = None for i, rf in enumerate(red_func): if rf in ['mean', 'mode']: red_func[i] = 'weighted_%s' % rf resampled = [None] * len(data) for i, d in enumerate(data): if isinstance(d, str): d = source.PointData[d] if source_mask is not None: d = d[source_mask] if k == 1: feat = d[pids] elif red_func[i] == 'mean': feat = np.mean(d[pids], axis=1) elif red_func[i] == 'weighted_mean': feat = np.average(d[pids], weights=w, axis=1) elif red_func[i] == 'mode': feat = mode(d[pids], axis=1)[0].squeeze() elif red_func[i] == 'weighted_mode': feat = weighted_mode(d[pids], w, axis=1)[0].squeeze() feat = feat.astype(d.dtype) else: raise ValueError('Unknown red_func: {0}'.format(red_func[i])) if target_mask is not None: feat = map_to_mask(feat, mask=target_mask, fill=fill) resampled[i] = feat if append and key is not None: for i, feat in enumerate(resampled): target.append_array(feat, name=key[i], at='p') return resampled if is_list else resampled[0]
def predict(self, X): """Predict the class labels for the provided data Parameters ---------- X : array-like, shape (n_query, n_features), \ or (n_query, n_indexed) if metric == 'precomputed' Test samples. Returns ------- y : array of shape [n_samples] Class labels for each data sample. """ X = check_array(X, accept_sparse="csr") n_samples = X.shape[0] neigh_dist, neigh_ind = self.radius_neighbors(X) inliers = [i for i, nind in enumerate(neigh_ind) if len(nind) != 0] outliers = [i for i, nind in enumerate(neigh_ind) if len(nind) == 0] classes_ = self.classes_ _y = self._y if not self.outputs_2d_: _y = self._y.reshape((-1, 1)) classes_ = [self.classes_] n_outputs = len(classes_) if self.outlier_function is None and outliers: raise ValueError( "No neighbors found for test samples %r, " "you can try using larger radius, " "give a function for outliers, " "or consider removing them from your dataset." % outliers ) if type(neigh_ind) is int: neigh_ind = [neigh_ind] weights = self.weight_function(neigh_dist=neigh_dist, neigh_ind=neigh_ind, target_space=self.target_space) y_pred = np.empty((n_samples, n_outputs), dtype=classes_[0].dtype) for k, classes_k in enumerate(classes_): pred_labels = np.array([_y[ind, k] for ind in neigh_ind], dtype=object) if weights is None: mode = np.array([stats.mode(pl)[0] for pl in pred_labels[inliers]], dtype=np.int) else: mode = np.array( [weighted_mode(pl, w)[0] for (pl, w) in zip(pred_labels[inliers], weights)], dtype=np.int ) mode = mode.ravel() y_pred[inliers, k] = classes_k.take(mode) if outliers: for outlier in outliers: y_pred[outlier, 0] = self.outlier_function.predict(X[outlier]) if not self.outputs_2d_: y_pred = y_pred.ravel() return y_pred
def fred(x, w): return weighted_mode(x, w, axis=axis) elif red_op == 'sum':
def _custom_weighting(self, neighbor_vals, neighbor_dist): neighbor_weights = self.weights(neighbor_dist) new_X = weighted_mode(neighbor_vals, neighbor_weights, axis=1) return new_X
def _distance_weighting(neighbor_vals, neighbor_dist): neighbor_weights = 1 / neighbor_dist X = weighted_mode(neighbor_vals, neighbor_weights, axis=1) return X
def infer(query, samples=None, db=None, sample_db_fn=None, depth=None, d_type='d1'): ''' infer a query, return it's ap arguments query : a dict with three keys, see the template { 'img': <path_to_img>, 'cls': <img class>, 'hist' <img histogram> } samples : a list of { 'img': <path_to_img>, 'cls': <img class>, 'hist' <img histogram> } db : an instance of class Database sample_db_fn: a function making samples, should be given if Database != None depth : retrieved depth during inference, the default depth is equal to database size d_type : distance type ''' assert samples != None or ( db != None and sample_db_fn != None ), "need to give either samples or db plus sample_db_fn" if db: samples = sample_db_fn(db) q_img, q_cls, q_hist = query['img'], query['cls'], query['hist'] results = [] for idx, sample in enumerate(samples): s_img, s_cls, s_hist = sample['img'], sample['cls'], sample['hist'] if q_img == s_img: continue results.append({ 'img': s_img, 'dis': distance(q_hist, s_hist, d_type=d_type), 'cls': s_cls }) results = sorted(results, key=lambda x: x['dis']) if depth and depth <= len(results): results = results[:depth] print(q_img) list_im = [sub['img'] for sub in results] print(list_im) list_im.insert(0, q_img) pred = [sub['cls'] for sub in results] weig = [sub['dis'] for sub in results] weig = np.reciprocal(weig) pred2 = weighted_mode(pred, weig) pred = np.array_str(pred2[0])[2:-2] list_im.insert(0, q_img) imgs = [PIL.Image.open(i) for i in list_im] min_shape = sorted([(np.sum(i.size), i.size) for i in imgs])[0][1] imgs_comb = np.hstack((np.asarray(i.resize(min_shape)) for i in imgs)) plt.imshow(imgs_comb / 255.) plt.pause(0.1) plt.close() ap = myAP(q_cls, results, sort=True) return ap, pred
def predict(self, X): """Predict the class labels for the provided data Parameters ---------- X: array-like, shape (n_query, n_features), \ or (n_query, n_indexed) if metric == 'precomputed' Test samples. Returns ------- y: array of shape [n_samples] or [n_samples, n_outputs] Class labels for each data sample. """ X = check_array(X, accept_sparse='csr') n_samples = X.shape[0] neigh_dist, neigh_ind = self.radius_neighbors(X) inliers = [i for i, nind in enumerate(neigh_ind) if len(nind) != 0] outliers = [i for i, nind in enumerate(neigh_ind) if len(nind) == 0] classes_ = self.classes_ _y = self._y if not self.outputs_2d_: _y = self._y.reshape((-1, 1)) classes_ = [self.classes_] n_outputs = len(classes_) if self.outlier_label is not None: neigh_dist[outliers] = 1e-6 elif outliers: raise ValueError('No neighbors found for test samples %r, ' 'you can try using larger radius, ' 'give a label for outliers, ' 'or consider removing them from your dataset.' % outliers) weights = _get_weights(neigh_dist, self.weights) y_pred = np.empty((n_samples, n_outputs), dtype=classes_[0].dtype) for k, classes_k in enumerate(classes_): pred_labels = np.zeros(len(neigh_ind), dtype=object) pred_labels[:] = [_y[ind, k] for ind in neigh_ind] if weights is None: mode = np.array( [stats.mode(pl)[0] for pl in pred_labels[inliers]], dtype=np.int) else: mode = np.array([ weighted_mode(pl, w)[0] for (pl, w) in zip(pred_labels[inliers], weights[inliers]) ], dtype=np.int) mode = mode.ravel() y_pred[inliers, k] = classes_k.take(mode) if outliers: y_pred[outliers, :] = self.outlier_label if not self.outputs_2d_: y_pred = y_pred.ravel() return y_pred
def update(self, v): # check if data exists if (self.target_markers is not None) and (self.target_markers is not None) and (self.pred_labels is not None): # obtain the chunks from the target/non-target stream target_chunk = self.target_markers.chunks['markers'] target_marker_time = target_chunk.block.axes[0].times target_marker_rec = target_chunk.block.axes[0].data # obtain the chunks from the Row/Col stream rowcol_chunk = self.rolcol_markers.chunks['markers'] rowcol_marker_time = rowcol_chunk.block.axes[0].times rowcol_marker_rec = rowcol_chunk.block.axes[0].data # obtain the chunks from the predicted labels y_pred_chunk, y = extract_chunks(self.pred_labels) if (len(target_marker_rec) > 0) and ( len(rowcol_marker_rec) > 0) and (y_pred_chunk is not None): import numpy as np import sklearn.metrics as metrics import scipy.stats as stats from sklearn.utils.extmath import weighted_mode target_marker = [] target_marker_num = [] for i in range(len(target_marker_rec)): curr_marker = target_marker_rec[i][0] target_marker.append(curr_marker) if curr_marker == 'NonTarget': target_marker_num.append(0) elif curr_marker == 'Target': target_marker_num.append(1) else: target_marker_num.append(np.nan) target_marker_num = np.stack(target_marker_num, axis=-1) if np.any(np.isnan(target_marker_num)): raise Exception( 'Faulty naming with target/non target labels') rowcol_marker = [] rowcol_marker_num = [] for i in range(len(rowcol_marker_rec)): curr_marker = rowcol_marker_rec[i][0] rowcol_marker.append(curr_marker) if curr_marker == 'Col1': rowcol_marker_num.append(1) elif curr_marker == 'Col2': rowcol_marker_num.append(2) elif curr_marker == 'Col3': rowcol_marker_num.append(3) elif curr_marker == 'Col4': rowcol_marker_num.append(4) elif curr_marker == 'Col5': rowcol_marker_num.append(5) elif curr_marker == 'Col6': rowcol_marker_num.append(6) elif curr_marker == 'Row1': rowcol_marker_num.append(7) elif curr_marker == 'Row2': rowcol_marker_num.append(8) elif curr_marker == 'Row3': rowcol_marker_num.append(9) elif curr_marker == 'Row4': rowcol_marker_num.append(10) elif curr_marker == 'Row5': rowcol_marker_num.append(11) elif curr_marker == 'Row6': rowcol_marker_num.append(12) else: rowcol_marker_num.append(np.nan) rowcol_marker_num = np.stack(rowcol_marker_num, axis=-1) if np.any(np.isnan(rowcol_marker_num)): raise Exception('Faulty naming with row/column labels') bool_stream = y_pred_chunk.props['is_streaming'] # obtain the predicted labels and the time stamps for the events y_pred_marker_time = y_pred_chunk.block.axes[0].times y_pred_score = y_pred_chunk.block.data if y_pred_score.shape[1] == 1: y_pred = y_pred_score.reshape(-1) y_pred_score_target = np.ones_like(y_pred) elif y_pred_score.shape[1] == 2: y_pred = np.argmax(y_pred_score, axis=1) y_pred_score_target = y_pred_score[:, 1] else: y_pred = None y_pred_score_target = None if not bool_stream: y = y.astype(np.int) if not np.all(target_marker_num == y): raise Exception( "Ground truth label streaming doesn't match label extracted from marker" ) # sanity check with all the time stamps if not np.all( y_pred_marker_time == target_marker_time ) and np.all(y_pred_marker_time == rowcol_marker_time): raise Exception("Time stamps of markers don't agree") # compute the accuracy, recall, precision and AUC using sklearn acc = metrics.accuracy_score(y, y_pred) recall = metrics.recall_score(y, y_pred) precision = metrics.precision_score(y, y_pred) auc = metrics.roc_auc_score(y, y_pred) # display the statistics logger.info('Current Accuracy: {}'.format(acc)) logger.info('Current Recall: {}'.format(recall)) logger.info('Current Precision: {}'.format(precision)) logger.info('Current AUC: {}'.format(auc)) # now try to decipher the characters # case 1: ALS dataset, when there are multiples words in each file n_flashes = len(y) if len(self.n_char_per_word) == 1: # offline version # sanity check if n_flashes % (self.n_char_per_word[0] * self.n_flash_per_char) != 0: raise Exception( "Number of flashes doesn't match number of characters" ) n_flash_per_char = self.n_flash_per_char n_char_per_word_curr = self.n_char_per_word[0] n_flashes_per_word = n_char_per_word_curr * n_flash_per_char # loop through the words for idx_word in range( int(n_flashes / n_flashes_per_word)): rowcol_marker_num_word_curr = rowcol_marker_num[ idx_word * n_flashes_per_word:(idx_word + 1) * n_flashes_per_word] y_pred_word_curr = y_pred[idx_word * n_flashes_per_word: (idx_word + 1) * n_flashes_per_word] y_word_curr = y[idx_word * n_flashes_per_word:(idx_word + 1) * n_flashes_per_word] y_pred_score_target_word_curr = y_pred_score_target[ idx_word * n_flashes_per_word:(idx_word + 1) * n_flashes_per_word] str_word_pred = '' str_word = '' logger.info( 'Predicting the {}-th word'.format(idx_word + 1)) # loop through the characters for idx_char in range(n_char_per_word_curr): logger.info('{}-th character'.format(idx_char + 1)) rowcol_marker_num_char_curr = rowcol_marker_num_word_curr[ idx_char * n_flash_per_char:(idx_char + 1) * n_flash_per_char] y_pred_char_curr = y_pred_word_curr[ idx_char * n_flash_per_char:(idx_char + 1) * n_flash_per_char] y_char_curr = y_word_curr[idx_char * n_flash_per_char: (idx_char + 1) * n_flash_per_char] y_pred_score_target_char_curr = y_pred_score_target_word_curr[ idx_char * n_flash_per_char:(idx_char + 1) * n_flash_per_char] # obtain where the flashes are targets y_pred_char_hit_curr = rowcol_marker_num_char_curr[ y_pred_char_curr.astype(bool)] y_char_hit_curr = rowcol_marker_num_char_curr[ y_char_curr.astype(bool)] y_pred_score_target_char_hit_curr = y_pred_score_target_char_curr[ y_pred_char_curr.astype(bool)] # test for the rows and columns separately y_char_hit_curr_row, _ = stats.mode( y_char_hit_curr[y_char_hit_curr >= 7]) y_char_hit_curr_row = y_char_hit_curr_row[0] y_char_hit_curr_col, _ = stats.mode( y_char_hit_curr[y_char_hit_curr < 7]) y_char_hit_curr_col = y_char_hit_curr_col[0] # y_pred_char_hit_curr_row, _ = stats.mode(y_pred_char_hit_curr[y_pred_char_hit_curr >= 7]) # if len(y_pred_char_hit_curr_row) >= 1: # y_pred_char_hit_curr_row = y_pred_char_hit_curr_row[0] # else: # # random guess if not available # y_pred_char_hit_curr_row = 7 # y_pred_char_hit_curr_col, _ = stats.mode(y_pred_char_hit_curr[y_pred_char_hit_curr < 7]) # if len(y_pred_char_hit_curr_col) >= 1: # y_pred_char_hit_curr_col = y_pred_char_hit_curr_col[0] # else: # # random guess if not available # y_pred_char_hit_curr_col = 1 if np.sum(y_pred_char_hit_curr >= 7) > 0: y_pred_char_hit_curr_row, _ = weighted_mode( y_pred_char_hit_curr[ y_pred_char_hit_curr >= 7], y_pred_score_target_char_hit_curr[ y_pred_char_hit_curr >= 7]) y_pred_char_hit_curr_row = int( y_pred_char_hit_curr_row[0]) else: # random guess if not available y_pred_char_hit_curr_row = 7 if np.sum(y_pred_char_hit_curr < 7) > 0: y_pred_char_hit_curr_col, _ = weighted_mode( y_pred_char_hit_curr[ y_pred_char_hit_curr < 7], y_pred_score_target_char_hit_curr[ y_pred_char_hit_curr < 7]) y_pred_char_hit_curr_col = int( y_pred_char_hit_curr_col[0]) else: # random guess if not available y_pred_char_hit_curr_col = 1 str_y_pred_char_curr = self._char_mapping( y_pred_char_hit_curr_row, y_pred_char_hit_curr_col) str_y_char_curr = self._char_mapping( y_char_hit_curr_row, y_char_hit_curr_col) str_word_pred += str_y_pred_char_curr str_word += str_y_char_curr logger.info( 'Current character: {}, Predicted Character: {}' .format(str_y_char_curr, str_y_pred_char_curr)) logger.info( 'The correct word is: {}'.format(str_word)) logger.info('The predicted word is: {}\n'.format( str_word_pred)) self.finished = True # case 2: BCI Comp dataset, when there is a single word in each file else: pass # online version else: y = self.pred_labels.chunks['segmented-markers'].block.data y = y.astype(np.int) # could be missing some markers so skip the check # sanity check with all the time stamps if not np.all(rowcol_marker_time == target_marker_time): raise Exception("Time stamps of markers don't agree") # case 1: ALS dataset, when there are multiples words in each file if len(self.n_char_per_word) == 1: if ((len(self.y_pred_buffer) + len(y_pred)) < self.n_flash_per_char * self.n_char_per_word[0]) and \ ((len(self.target_markers_num_buffer) + len(target_marker_num)) < self.n_flash_per_char * self.n_char_per_word[0]): if len(self.y_pred_buffer) == 0: self.target_markers_num_buffer = target_marker_num self.target_markers_time_buffer = target_marker_time self.y_pred_buffer = y_pred self.y_pred_marker_time_buffer = y_pred_marker_time self.y_buffer = y self.y_pred_score_target_buffer = y_pred_score_target self.rowcol_marker_num_buffer = rowcol_marker_num self.rowcol_marker_time_buffer = rowcol_marker_time self.y_buffer_full = y self.y_pred_buffer_full = y_pred else: self.target_markers_num_buffer = np.concatenate( (self.target_markers_num_buffer, target_marker_num), axis=0) self.target_markers_time_buffer = np.concatenate( (self.target_markers_time_buffer, target_marker_time), axis=0) self.y_pred_buffer = np.concatenate( (self.y_pred_buffer, y_pred), axis=0) self.y_pred_marker_time_buffer = np.concatenate( (self.y_pred_marker_time_buffer, y_pred_marker_time), axis=0) self.y_buffer = np.concatenate( (self.y_buffer, y), axis=0) self.y_pred_score_target_buffer = np.concatenate( (self.y_pred_score_target_buffer, y_pred_score_target), axis=0) self.rowcol_marker_num_buffer = np.concatenate( (self.rowcol_marker_num_buffer, rowcol_marker_num), axis=0) self.rowcol_marker_time_buffer = np.concatenate( (self.rowcol_marker_time_buffer, rowcol_marker_time), axis=0) self.y_buffer_full = np.concatenate( (self.y_buffer_full, y), axis=0) self.y_pred_buffer_full = np.concatenate( (self.y_pred_buffer_full, y_pred), axis=0) else: target_markers_num_curr = np.concatenate( (self.target_markers_num_buffer, target_marker_num), axis=0) target_markers_time_curr = np.concatenate( (self.target_markers_time_buffer, target_marker_time), axis=0) y_pred_curr = np.concatenate( (self.y_pred_buffer, y_pred), axis=0) y_pred_marker_time_curr = np.concatenate( (self.y_pred_marker_time_buffer, y_pred_marker_time), axis=0) y_curr = np.concatenate((self.y_buffer, y), axis=0) y_pred_score_target_curr = np.concatenate( (self.y_pred_score_target_buffer, y_pred_score_target), axis=0) rowcol_marker_num_curr = np.concatenate( (self.rowcol_marker_num_buffer, rowcol_marker_num), axis=0) rowcol_marker_time_curr = np.concatenate( (self.rowcol_marker_time_buffer, rowcol_marker_time), axis=0) self.y_buffer_full = np.concatenate( (self.y_buffer_full, y), axis=0) self.y_pred_buffer_full = np.concatenate( (self.y_pred_buffer_full, y_pred), axis=0) # reset the buffers for future iterations n_flash_per_char = self.n_flash_per_char n_char_per_word_curr = self.n_char_per_word[0] n_flashes_per_word = n_char_per_word_curr * n_flash_per_char # if extra epochs are around if len(self.target_markers_num_buffer) + len( target_marker_num) > n_flashes_per_word: self.target_markers_num_buffer = target_markers_num_curr[ n_flashes_per_word:] self.target_markers_time_buffer = target_markers_time_curr[ n_flashes_per_word:] self.rowcol_marker_num_buffer = rowcol_marker_num_curr[ n_flashes_per_word:] self.rowcol_marker_time_buffer = rowcol_marker_time_curr[ n_flashes_per_word:] # if everything is perfectly aligned: empty the buffers else: self.target_markers_num_buffer = [] self.target_markers_time_buffer = [] self.rowcol_marker_num_buffer = [] self.rowcol_marker_time_buffer = [] if len(self.y_buffer) + len( y_pred) > n_flashes_per_word: self.y_pred_buffer = y_pred_curr[ n_flashes_per_word:] self.y_pred_marker_time_buffer = y_pred_marker_time_curr[ n_flashes_per_word:] self.y_buffer = y_curr[n_flashes_per_word:] self.y_pred_score_target_buffer = y_pred_score_target_curr[ n_flashes_per_word:] else: self.y_pred_buffer = [] self.y_pred_marker_time_buffer = [] self.y_buffer = [] self.y_pred_score_target_buffer = [] # try to obtain all information corresponding to current character target_marker_num = target_markers_num_curr[: n_flashes_per_word] target_marker_time = target_markers_time_curr[: n_flashes_per_word] y_pred = y_pred_curr[:n_flashes_per_word] y_pred_marker_time = y_pred_marker_time_curr[: n_flashes_per_word] y = y_curr[:n_flashes_per_word] y_pred_score_target = y_pred_score_target_curr[: n_flashes_per_word] rowcol_marker_num = rowcol_marker_num_curr[: n_flashes_per_word] rowcol_marker_time = rowcol_marker_time_curr[: n_flashes_per_word] # sanity check with all the time stamps and data if not np.all(target_marker_num == y): raise Exception( "Ground truth label streaming doesn't match label extracted from marker" ) if not np.all( y_pred_marker_time == target_marker_time ) and np.all( y_pred_marker_time == rowcol_marker_time): raise Exception( "Time stamps of markers don't agree") # loop through the words if self.idx_word is None: self.idx_word = 0 else: self.idx_word += 1 idx_word = self.idx_word str_word_pred = '' str_word = '' logger.info( 'Predicting the {}-th word'.format(idx_word + 1)) # loop through the characters for idx_char in range(n_char_per_word_curr): logger.info('{}-th character'.format(idx_char + 1)) rowcol_marker_num_char_curr = rowcol_marker_num[ idx_char * n_flash_per_char:(idx_char + 1) * n_flash_per_char] y_pred_char_curr = y_pred[idx_char * n_flash_per_char: (idx_char + 1) * n_flash_per_char] y_char_curr = y[idx_char * n_flash_per_char:(idx_char + 1) * n_flash_per_char] y_pred_score_target_char_curr = y_pred_score_target[ idx_char * n_flash_per_char:(idx_char + 1) * n_flash_per_char] # obtain where the flashes are targets y_pred_char_hit_curr = rowcol_marker_num_char_curr[ y_pred_char_curr.astype(bool)] y_char_hit_curr = rowcol_marker_num_char_curr[ y_char_curr.astype(bool)] y_pred_score_target_char_hit_curr = y_pred_score_target_char_curr[ y_pred_char_curr.astype(bool)] # test for the rows and columns separately y_char_hit_curr_row, _ = stats.mode( y_char_hit_curr[y_char_hit_curr >= 7]) y_char_hit_curr_row = y_char_hit_curr_row[0] y_char_hit_curr_col, _ = stats.mode( y_char_hit_curr[y_char_hit_curr < 7]) y_char_hit_curr_col = y_char_hit_curr_col[0] if np.sum(y_pred_char_hit_curr >= 7) > 0: y_pred_char_hit_curr_row, _ = weighted_mode( y_pred_char_hit_curr[ y_pred_char_hit_curr >= 7], y_pred_score_target_char_hit_curr[ y_pred_char_hit_curr >= 7]) y_pred_char_hit_curr_row = int( y_pred_char_hit_curr_row[0]) else: # random guess if not available y_pred_char_hit_curr_row = 7 if np.sum(y_pred_char_hit_curr < 7) > 0: y_pred_char_hit_curr_col, _ = weighted_mode( y_pred_char_hit_curr[ y_pred_char_hit_curr < 7], y_pred_score_target_char_hit_curr[ y_pred_char_hit_curr < 7]) y_pred_char_hit_curr_col = int( y_pred_char_hit_curr_col[0]) else: # random guess if not available y_pred_char_hit_curr_col = 1 str_y_pred_char_curr = self._char_mapping( y_pred_char_hit_curr_row, y_pred_char_hit_curr_col) str_y_char_curr = self._char_mapping( y_char_hit_curr_row, y_char_hit_curr_col) str_word_pred += str_y_pred_char_curr str_word += str_y_char_curr logger.info( 'Current character: {}, Predicted Character: {}' .format(str_y_char_curr, str_y_pred_char_curr)) logger.info( 'The correct word is: {}'.format(str_word)) logger.info('The predicted word is: {}\n'.format( str_word_pred)) # compute the accuracy, recall, precision and AUC using sklearn if self.idx_word == 3: acc = metrics.accuracy_score( self.y_buffer_full, self.y_pred_buffer_full) recall = metrics.recall_score( self.y_buffer_full, self.y_pred_buffer_full) precision = metrics.precision_score( self.y_buffer_full, self.y_pred_buffer_full) auc = metrics.roc_auc_score( self.y_buffer_full, self.y_pred_buffer_full) # display the statistics logger.info('\nFinal Test Set Results') logger.info('Current Accuracy: {}'.format(acc)) logger.info('Current Recall: {}'.format(recall)) logger.info( 'Current Precision: {}'.format(precision)) logger.info('Current AUC: {}'.format(auc)) # case 2: BCI Comp dataset, when there is a single word in each file else: pass