class Classifier(): def __init__( self, configs=(), # block configuration num_l=2, # number of labels min_val=0.0, # minimum input value max_val=1.0, # maximum input value num_i=1024, # number of input statelets num_ai=128, # number of active input statelets num_s=512, # number of statelets num_as=8, # number of active statelets pct_pool=0.8, # pooling percentage pct_conn=0.5, # initially connected percentage pct_learn=0.3): # learn percentage PERM_THR = 20 PERM_INC = 2 PERM_DEC = 1 # seed the random number generator # bb.seed(0) # TODO: fix seeding self.st = ScalarTransformer(min_val, max_val, num_i, num_ai) self.pc = PatternClassifier(num_l, num_s, num_as, PERM_THR, PERM_INC, PERM_DEC, pct_pool, pct_conn, pct_learn) self.pc.input.add_child(self.st.output, 0) self.pc.init() #def save(self, path='./', name='classifier'): # self.pc.save(path + name + "_pc.bin") #def load(self, path='./', name='classifier'): # self.pc.load(path + name + "_pc.bin") def fit(self, value=0.0, label=0): self.st.set_value(value) self.pc.set_label(label) self.st.feedforward() self.pc.feedforward(learn=True) return self.pc.get_probabilities() def predict(self, value=0.0): self.st.set_value(value) self.st.feedforward() self.pc.feedforward(learn=False) return self.pc.get_probabilities()
class BBClassifier: def __init__( self, # Training Arguments num_epochs=3, use_undefined_class=False, # Distributed Pattern Classifier Arguments num_l=2, # number of labels num_s=512, # number of statelets num_as=8, # number of active statelets pct_pool=0.8, # percent pooled pct_conn=0.8, # percent initially connected pct_learn=0.3, # percent learn seed=0, # HyperGrid Transform Arguments num_bins=4, num_acts=1, num_grids=64, num_subspace_dims=1, origin=None, num_input_dims=None, max_period=2.0, min_period=0.05, use_orthogonal_bases=False, use_normal_dist_bases=False, use_standard_bases=False, set_bases=None, set_periods=None, use_random_uniform_periods=False, use_evenly_spaced_periods=False, random_state=None): """Classify N-dimensional inputs using Hypergrid Transform and Distributed Pattern Classifier Parameters ---------- num_epochs: integer Number of training epochs use_undefined_class: boolean Whether to reserve a class for test samples that have no training data num_s: integer Number of class detectors to allocate for the distributed pattern classifier num_as: integer Number of active class detectors per time step for the distributed pattern classifier pct_pool: float, Between 0.0 and 1.0 Percentage of random bits an individual detector has potential access to. pct_conn: float, Between 0.0 and pct_pool Percentage of random bits an individual detector is currently connected to. pct_learn: float, Between 0.0 and 1.0 Percentage of bits to update when training occurs. num_bins: integer Number of bins to create for each grid. num_acts: integer Number of contiguous bins to activate along each subspace dimension for each grid. num_grids: integer Number of grids to generate. num_subspace_dims: integer Dimensionality of subspaces to map input to origin: array-like, shape {num_features} Point of origin in input space for embedding a sample into the grids. max_period: float Maximum bound on grid period min_period: float Minimum bound on grid period use_orthogonal_bases: boolean Generate random orthogonal basis vectors for each subspace use_normal_dist_bases: boolean Generate normal distribution of basis vectors, points sampled on a sphere use_standard_bases: boolean Use randomly selected standard basis vectors for each grid set_bases: array-like, shape {num_grids, num_subspace_dims, num_features} Use manually specified subspace basis vectors for each grid set_periods: array-like, shape {num_grids, num_subspace_dims} Use manually specified periods for each grid and its subspaces use_random_uniform_periods: boolean Use random periods for subspace grids use_evenly_spaced_periods: boolean Use evenly spaced periods for subspace grids over the interval min_period to max_period random_state: integer Seed for random number generators """ self.num_epochs = num_epochs self.use_undefined_class = use_undefined_class self._y = [] self.classes_ = np.array([]) self.outputs_2d_ = False self.dpc_config = dict(num_l=num_l, num_s=num_s, num_as=num_as, perm_thr=20, perm_inc=2, perm_dec=1, pct_pool=pct_pool, pct_conn=pct_conn, pct_learn=pct_learn, num_t=2, seed=seed) self.hgt_config = dict( num_grids=num_grids, num_bins=num_bins, num_acts=num_acts, num_subspace_dims=num_subspace_dims, origin=origin, num_input_dims=num_input_dims, max_period=max_period, min_period=min_period, use_normal_dist_bases=use_normal_dist_bases, use_standard_bases=use_standard_bases, use_orthogonal_bases=use_orthogonal_bases, use_evenly_spaced_periods=use_evenly_spaced_periods, use_random_uniform_periods=use_random_uniform_periods, set_bases=set_bases, set_periods=set_periods, random_state=random_state, flatten_output=True) def __del__(self): pass def _generate_config(self, num_labels=2): # connect BlankBlock to DPC # optionally add PatternPooler pass def reset(self): pass def fit(self, X, y): """Fit the model using X as training data and y as target values Parameters ---------- X : {array-like, sparse matrix} Training data. If array or matrix, shape [num_samples, num_features], y : {array-like, sparse matrix} Target values of shape = [num_samples] or [num_samples, num_outputs] Returns ------- y_new : array, shape (num_samples, num_outputs) Classified data """ X, y = check_X_y(X, y, multi_output=True) #X, y = check_X_y(X, y) if y.ndim == 1 or y.ndim == 2 and y.shape[1] == 1: if y.ndim != 1: warnings.warn( "A column-vector y was passed when a 1d array " "was expected. Please change the shape of y to " "(num_samples, ), for example using ravel().", DataConversionWarning, stacklevel=2) self.outputs_2d_ = False y = y.reshape((-1, 1)) else: self.outputs_2d_ = True check_classification_targets(y) self.classes_ = [] self.class_indices_ = [] self._y = np.empty(y.shape, dtype=np.int) for k in range(self._y.shape[1]): classes, self._y[:, k] = np.unique(y[:, k], return_inverse=True) self.classes_.append(classes) #class_names, self.class_indices_ = np.unique(self.classes_, return_inverse=True) #self.labels = np.array([str(val) for val in self.class_indices_]) if not self.outputs_2d_: self.classes_ = self.classes_[0] self._y = self._y.ravel() # class for undefined and unseen values if self.use_undefined_class: undef_class = np.max(self.classes_) + 1 self.classes_ = np.append(self.classes_, undef_class) #print("with undefined class:", self.classes_) # update labels in DPC config self.dpc_config['num_l'] = len(self.classes_) #print("classes_:", self.classes_) #print("self._y:", self._y) # instantiate the HyperGrid Transform self.gridEncoder = HyperGridTransform(**self.hgt_config) # fit the HyperGrid transform to the data X_new = self.gridEncoder.fit_transform(X) #print("HyperGrid Parameters") #print(self.gridEncoder.subspace_periods) #print(self.gridEncoder.subspace_vectors) # get the number of bits being used for transformed output self.num_bits = self.gridEncoder.num_bits self.num_act_bits = self.gridEncoder.num_act_bits #print("BlankBlock:") #print("num_bits:", self.num_bits) #print("num_act_bits:", self.num_act_bits) # Blank Block to hold the hypergrid output self.blankBlock = BlankBlock(num_s=self.num_bits) # Create PatternClassifier block self.dpc = PatternClassifier(**self.dpc_config) #print("PatternClassifier:") #self.dpc.print_parameters() # connect blocks together self.dpc.input.add_child(self.blankBlock.output, 0) # Train Network probs = self._fit(X_new, self._y) #print("data:", X_new) #print("labels:", self._y) #print("training:", probs) return self def predict(self, X): """Predict the class labels for the provided data Parameters ---------- X : array-like, shape (num_query, num_features) Test samples. Returns ------- y : array of shape [num_samples] or [num_samples, num_outputs] Class labels for each data sample. """ X = check_array(X) num_samples, num_features = X.shape if self._y.ndim == 1 or self._y.ndim == 2 and self._y.shape[1] == 1: num_outputs = 1 else: num_outputs = self._y.shape[1] # transform data X_new = self.gridEncoder.transform(X) probabilities = self._predict(X_new) classes_ = self.classes_ if not self.outputs_2d_: classes_ = [self.classes_] #classes_ = self.labels #if not self.outputs_2d_: # classes_ = [self.labels] y_pred = np.empty((num_samples, num_outputs), dtype=classes_[0].dtype) # print("y_pred:", type(y_pred), y_pred.shape) # print("y_pred:", y_pred) for k in range(num_samples): py = probabilities[k, :] y_best = np.argmax(py) # print("y_best =", y_best) y_pred[k, :] = y_best if not self.outputs_2d_: y_pred = y_pred.ravel() # print("y_pred:", type(y_pred), y_pred.shape) # print("y_pred:", y_pred) return y_pred def predict_proba(self, X): """Return probability estimates for the test data X. Parameters ---------- X : array-like, shape (num_query, num_features), \ or (num_query, num_indexed) if metric == 'precomputed' Test samples. Returns ------- p : array of shape = [num_samples, num_classes], or a list of num_outputs of such arrays if num_outputs > 1. The class probabilities of the input samples. Classes are ordered by lexicographic order. """ X = check_array(X) num_samples, num_features = X.shape if self._y.ndim == 1 or self._y.ndim == 2 and self._y.shape[1] == 1: num_outputs = 1 else: num_outputs = self._y.shape[1] # transform data X_new = self.gridEncoder.transform(X) return self._predict(X_new) def score(self, X, y, sample_weight=None): """Returns the mean accuracy on the given test data and labels. In multi-label classification, this is the subset accuracy which is a harsh metric since you require for each sample that each label set be correctly predicted. Parameters ---------- X : array-like, shape = (num_samples, num_features) Test samples. y : array-like, shape = (num_samples) or (num_samples, num_outputs) True labels for X. sample_weight : array-like, shape = [num_samples], optional Sample weights. Returns ------- score : float Mean accuracy of self.predict(X) wrt. y. """ from sklearn.metrics import accuracy_score return accuracy_score(y, self.predict(X), sample_weight=sample_weight) def _fit(self, X, y): probabilities = [] # train pattern classifier for i in range(self.num_epochs): epoch_probs = [] # Train Network #t0 = time.time() for k in range(y.shape[0]): input = X[k, :] target = y[k] self.blankBlock.output.bits = input self.blankBlock.feedforward() self.dpc.set_label(target) self.dpc.feedforward(learn=True) curr_prob = self.dpc.get_probabilities() epoch_probs.append(curr_prob) #t1 = time.time() #print("train epoch time = %fs with size %d" % ((t1 - t0), y.shape[0])) probabilities.append(epoch_probs) return np.asarray(probabilities) def _predict(self, X): probabilities = [] # num_points = 1000 num_points = X.shape[0] #t0 = time.time() for k in range(X.shape[0]): input = X[k, :] self.blankBlock.output.bits = input self.blankBlock.feedforward() self.dpc.feedforward(learn=False) curr_prob = self.dpc.get_probabilities() probabilities.append(curr_prob) #t1 = time.time() #print("%d points, time = %fs" % (num_points, (t1 - t0))) return np.asarray(probabilities)
num_s=512, # number of statelets num_as=8, # number of active statelets perm_thr=20, # receptor permanence threshold perm_inc=2, # receptor permanence increment perm_dec=1, # receptor permanence decrement pct_pool=0.8, # percent pooled pct_conn=0.5, # percent initially connected pct_learn=0.3) # percent learn # Connect blocks pp.input.add_child(st.output, 0) # Fit for i in range(len(x_trains)): st.set_value(x_trains[i]) pp.set_label(y_trains_ints[i]) st.feedforward() pp.feedforward(learn=True) # Predict probs = [] for i in range(len(x_tests)): st.set_value(x_tests[i]) st.feedforward() pp.feedforward(learn=True) probs.append(pp.get_probabilities()) # Print output print("x, p_a, p_b") for i in range(len(x_tests)): print("%0.1f, %0.1f, %0.1f" % (x_tests[i], probs[i][0], probs[i][1]))
pct_conn=1.0, pct_learn=0.3) classifier.input.add_child(blankblock.output, 0) # Train BrainBlocks classifier bb_train_time = 0 print("Training...", flush=True) for _ in range(num_epochs): for i in range(num_trains): bitimage = binarize(x_train[i], pixel_thresh) blankblock.output.bits = flatten(bitimage) blankblock.feedforward() t0 = time.time() classifier.set_label(y_train[i]) classifier.feedforward(learn=True) t1 = time.time() bb_train_time += t1 - t0 # Test BrainBlocks classifier num_error = 0 bb_test_time = 0 print("Testing...", flush=True) for i in range(num_tests): bitimage = binarize(x_test[i], pixel_thresh) blankblock.output.bits = flatten(bitimage) blankblock.feedforward() t0 = time.time() classifier.feedforward(learn=False)
print('val scr lbl prob ae output_active_statelets') for i in range(len(values)): st.set_value(values[i]) st.feedforward() sl.feedforward(learn=True) pc.feedforward(learn=False) score = sl.get_anomaly_score() probs = pc.get_probabilities() abnormal_event = aed.compute(score) if abnormal_event: for _ in range(50): pc.set_label(new_label) pc.feedforward(learn=True) new_label += 1 winner = np.argmax(probs) winner_str = '-' #winner_str = str(winner) if probs[winner] >= 0.75: winner_str = str(winner) sl_acts = '[' + ', '.join(map(str, sl.output.acts)) + ']' print('%0.1f %0.1f %3s %0.2f %2d %s' % (values[i], score, winner_str, probs[winner], abnormal_event, sl_acts))