class Prediction2FeaturesTestCase(unittest.TestCase): """ Test Prediction2FeaturesNode :Author: Titiruck Nuntapramote ([email protected]) :Created: 2011/11/16 """ def setUp(self): ran1 = random.randint(2, 100) ran2 = random.randint(2, 100) list1 = range(ran1) list2 = range(ran2) self.pv1 = PredictionVector(prediction=1) self.pv2 = PredictionVector(prediction=2) self.pv3 = PredictionVector(prediction=list1) self.pv4 = PredictionVector(prediction=list2) self.fv1 = Prediction2FeaturesNode()._execute(self.pv1) self.fv2 = Prediction2FeaturesNode(name="test")._execute(self.pv2) self.fv3 = Prediction2FeaturesNode()._execute(self.pv3) self.fv4 = Prediction2FeaturesNode(name="test")._execute(self.pv4) def test_array(self): self.assertEqual(self.fv1.view(numpy.ndarray), self.pv1.view(numpy.ndarray), "fv1 is incorrect") self.assertEqual(self.fv2.view(numpy.ndarray), self.pv2.view(numpy.ndarray), "fv2 is incorrect") diff = np.array(self.fv3) - np.array(self.pv3) self.assertTrue(not (diff.all()), "fv3 is incorrect") diff2 = np.array(self.fv4) - np.array(self.pv4) self.assertTrue(not (diff2.all()), "fv4 is incorrect") def test_name(self): self.assertEqual(self.fv1.feature_names, ["prediction"], "fv1 is incorrect") self.assertEqual(self.fv2.feature_names, ["testprediction"], "fv1 is incorrect") # compare size self.assertEqual(self.fv3.shape[1], len(self.fv3.feature_names), "len(fv3.feature_names) is incorrect") self.assertEqual(self.fv4.shape[1], len(self.fv4.feature_names), "len(fv4.feature_names) is incorrect") msg = "" equal = True for i in range(len(self.fv3.feature_names)): if self.fv3.feature_names[i] != "prediction_" + str(i): equal = False msg = str(i) break self.assertTrue(equal, "fv3.feature_names is incorrect at" + msg) equal2 = True for i in range(len(self.fv4.feature_names)): if self.fv4.feature_names[i] != "testprediction_" + str(i): equal = False msg = str(i) break self.assertTrue(equal2, "fv4.feature_names is incorrect at" + msg)
def _execute(self, x): """ Executes the classifier on the given data vector in the linear case prediction value = <w,data>+b """ if self.zero_training and self.num_samples == 0: self.w = numpy.zeros(x.shape[1], dtype=numpy.float) self.b = 0.0 self.dual_solution = numpy.zeros(self.num_samples) return PredictionVector(label=self.classes[0], prediction=0, predictor=self) if self.kernel_type == 'LINEAR': return super(SorSvmNode, self)._execute(x) # else: data = x.view(numpy.ndarray) data = data[0, :] prediction = self.b for i in range(self.num_samples): dual = self.dual_solution[i] if not dual == 0: prediction += dual * self.bi[i] * \ self.kernel_func(data, self.samples[i]) # Look up class label # prediction --> {-1,1} --> {0,1} --> Labels if prediction > 0: label = self.classes[1] else: label = self.classes[0] return PredictionVector(label=label, prediction=prediction, predictor=self)
def _execute(self, data): """ Executes the classifier on the given data vector *data* """ prediction_value = numpy.mean( [prediction for prediction in data.prediction]) votes_counter = defaultdict(int) for label in data.label: votes_counter[label] += 1 voting = sorted( (votes, label) for label, votes in votes_counter.iteritems()) max_label = [ label for votes, label in voting if votes == voting[-1][0] ] if len(max_label) == 1: majority_vote = voting[-1][1] return PredictionVector(prediction=prediction_value, label=majority_vote, predictor=self) else: relevant_indices = [ index for index, label in enumerate(data.label) if label in max_label ] new_data = PredictionVector( prediction=[data.prediction[i] for i in relevant_indices], label=[data.label[i] for i in relevant_indices], predictor=[data.predictor[i] for i in relevant_indices]) return super(LabelVotingGatingNode, self)._execute(new_data)
def test_PredictionVector(self): # Exception should be raised if both input_array and prediction not provided self.assertRaises(TypeError, PredictionVector.__new__) p2 = PredictionVector([[1, 2, 3, 4, 5, 6]]) self.assertEqual(p2.prediction, [1, 2, 3, 4, 5, 6]) p3 = PredictionVector(prediction=1) self.assertEqual(p3.prediction, 1) p4 = PredictionVector([[1, 2]], prediction=[1, 2])
def _execute(self, x): """ Executes the classifier on the given data vector in the linear case prediction value = <w,data>+b """ if self.kernel_type == 'LINEAR': data = x.view(numpy.ndarray) # Let the SVM classify the given data: <w,data>+b if self.w is None: prediction_value = 0 self.w = numpy.zeros(x.shape[1]) else: prediction_value = float(numpy.dot(self.w.T, data[0, :])) + self.b # one-class multinomial handling of REST class if "REST" in self.classes and self.multinomial: if "REST" == self.classes[0]: label = self.classes[1] elif "REST" == self.classes[1]: label = self.classes[0] prediction_value *= -1 # Look up class label # prediction_value --> {-1,1} --> {0,1} --> Labels elif prediction_value > 0: label = self.classes[1] else: label = self.classes[0] return PredictionVector(label=label, prediction=prediction_value, predictor=self)
def _execute(self, data): """ Executes the classifier on the given data vector """ predicted_class = None # add feature that is constantly one (bias term) data = numpy.vstack((numpy.array([1]), data.T)) # offset due to prior probabilities prior_shift = numpy.log(float(self.prior_probability[0])/ \ float(self.prior_probability[1])) # prediciton is [0,delta mu]*iECM*[1,x] (eq. 45 in [2]) # (this is eqivalent to [b,w]*[1,x].T (eq. 39)) m = numpy.dot( numpy.dot( numpy.hstack((numpy.array([0]), self.mu_p1 - self.mu_m1)).T, self.iECM), data)[0] + prior_shift if m > 0: predicted_class = self.classes[0] else: predicted_class = self.classes[1] return PredictionVector(label=predicted_class, prediction=m, predictor=self)
def _execute(self, data): """ Executes the classifier on the given data vector """ predicted_class = None # add feature that is constantly one (bias term) data = numpy.vstack((numpy.array([1]), data.T)).T # The QDA evaluation currently uses the wikipedia formula, because # I didn't find a textbook that has it -.- # Basically, we perform a likelihood ratio test. the likelihood for # class j is # (2*pi*det(Sgima_j))^(-1/2) * exp(-1/2 xF_jx.T) where # F_j = (x-mu_j) * iSigma_j * (x-mu_j).T # = [1,x] * {iECM_j - [1,0; 0,0]} * [1,x].T) # we use the log of the likelihood ratio, which boils down to: # {xFx+log(det(Sigma))}_i - {xFx+log(det(Sigma))}_j c = numpy.zeros_like(self.iECM_p1) c[0, 0] = 1 # c:=[1,0; 0,0] # xFx terms: xFx_p1 = float(numpy.dot(data, numpy.dot(self.iECM_p1 - c, data.T))) xFx_m1 = float(numpy.dot(data, numpy.dot(self.iECM_m1 - c, data.T))) # offset due to prior probabilities prior_shift = 2 * numpy.log(float(self.prior_probability[0])/ \ float(self.prior_probability[1])) D = (xFx_p1 + self.logdet_p1) - (xFx_m1 + self.logdet_m1) + prior_shift if D < 0: predicted_class = self.classes[0] else: predicted_class = self.classes[1] return PredictionVector(label=predicted_class, prediction=D, predictor=self)
def _execute(self, data): x = data.view(numpy.ndarray) try: prediction = self.scikits_alg.predict(x)[0] except Exception as e: raise type(e), \ type(e)("in node %s:\n\t"%self.__class__.__name__+e.args[0]), \ sys.exc_info()[2] if hasattr(self.scikits_alg, "predict_proba"): try: score = self.scikits_alg.predict_proba(x)[0, 1] except Exception as e: warnings.warn("%s in node %s:\n\t" \ %(type(e).__name__,self.__class__.__name__)+e.args[0]) try: score = self.scikits_alg.decision_function(x)[0] except: score = prediction elif hasattr(self.scikits_alg, "decision_function"): score = self.scikits_alg.decision_function(x)[0] else: # if nothing else works, we set the score of the # prediction to be equal to the prediction itself. score = prediction return PredictionVector(label=prediction, prediction=score, predictor=self)
def _execute(self, x): """ Executes the classifier on the given data vector x""" num_channels = numpy.size(x, 1) data = x.view(numpy.ndarray) if (self.num_channels_above <= 0): warnings.warn( "num_channels_above_threshold was set to %d. The value has to be greater then zero, now its set to 1" % (self.num_channels_above)) self.num_channels_above = 1 elif (self.num_channels_above > num_channels): warnings.warn( "num_channels_above_threshold was set to %d. But only %d channels are retained, now its set to %d" % (self.num_channels_above, num_channels, num_channels)) self.num_channels_above = num_channels movements_found = numpy.zeros(num_channels) #For each sample of each retained channel for i in range(num_channels): if (numpy.any(data[:, i])): movements_found[i] = 1 # If onsets in enough channels were found label with positive vale else with negative label = self.labels[1] if numpy.sum( movements_found) >= self.num_channels_above else self.labels[0] return PredictionVector(label=label, prediction=self.labels.index(label), predictor=self)
def _execute(self, data): x = data.view(numpy.ndarray) try: prediction = self.scikit_alg.predict(x)[0] except Exception as e: raise type(e), \ type(e)("in node %s:\n\t"%self.__class__.__name__+e.args[0]), \ sys.exc_info()[2] if hasattr(self.scikit_alg, "predict_proba"): try: score = self.scikit_alg.predict_proba(x)[0, 1] except Exception as e: warnings.warn("%s in node %s:\n\t"\ %(type(e).__name__,self.__class__.__name__)+e.args[0]) try: score = self.scikit_alg.decision_function(x)[0] except: score = prediction elif hasattr(self.scikit_alg, "decision_function"): score = self.scikit_alg.decision_function(x)[0] else: score = prediction label = self.class_labels[prediction] return PredictionVector(label=label, prediction=score, predictor=self)
def _execute(self, data): """ Executes the classifier on the given data vector x""" # Classify randomly label = random.choice(self.labels) return PredictionVector(label=label, prediction=self.labels.index(label), predictor=self)
def setUp(self): ran1 = random.randint(2, 100) ran2 = random.randint(2, 100) list1 = range(ran1) list2 = range(ran2) self.pv1 = PredictionVector(prediction=1) self.pv2 = PredictionVector(prediction=2) self.pv3 = PredictionVector(prediction=list1) self.pv4 = PredictionVector(prediction=list2) self.fv1 = Prediction2FeaturesNode()._execute(self.pv1) self.fv2 = Prediction2FeaturesNode(name='test')._execute(self.pv2) self.fv3 = Prediction2FeaturesNode()._execute(self.pv3) self.fv4 = Prediction2FeaturesNode(name='test')._execute(self.pv4)
def _execute(self, data): """ Extract the prediction features from the given data""" assert (type(data) == FeatureVector), \ "Features2PredictionNode requires FeatureVector inputs " \ "not %s" % type(data) classification_rule = lambda x: self.class_labels[0] if x <= 0 \ else self.class_labels[1] data = data.view(numpy.ndarray) return PredictionVector(label=map(classification_rule, data[0, :]), prediction=list(data[0, :]))
def _execute(self, x): """ (x+o)*s < d """ p = x.prediction prediction = (p + self.offset) * self.scaling if self.decision_boundary is None: label = x.label elif self.decision_boundary < prediction: label = self.class_labels[0] else: label = self.class_labels[1] return PredictionVector(prediction=prediction, label=label, predictor=x.predictor)
def _execute(self, data): data_array = data.view(numpy.ndarray) if self.center is None: self.center = data_array distance = float(numpy.linalg.norm(self.center - data_array)) prediction = distance - self.radius if prediction > 0: label = self.classes[1] else: label = self.classes[0] return PredictionVector(prediction=prediction, label=label, predictor=self)
def _execute(self, data): # Compute data's hash data_hash = hash(tuple(data.flatten())) # Load ensemble's cache if self.cache == None: if self.cache_dir: self._load_cache() else: # Caching disabled self.cache = defaultdict(dict) # Try to lookup the result of this ensemble for the given data in the cache labels = [] predictions = [] for i, flow_path in enumerate(self.flow_pathes): if data_hash in self.cache[flow_path]: label, prediction = self.cache[flow_path][data_hash] else: self.cache_updated = True if self.ensemble == None: # Load ensemble since data is not cached self._load_ensemble() node_result = self.ensemble.nodes[i].execute(data) label = node_result.label prediction = node_result.prediction self.cache[flow_path][data_hash] = (label, prediction) labels.append(label) predictions.append(prediction) result = PredictionVector(label=labels, prediction=predictions, predictor=self) result.dim_names = self.feature_names return result
def _execute(self, data): result = RMM2Node._execute(self, data) label = result.label prediction = result.prediction + 1 if self.outer_boundary and \ prediction < (-1.0 * self.range + 1) * 0.5: prediction = (-1.0 * self.range + 1) - prediction if prediction > 0: label = self.classes[1] elif 0 >= prediction: label = self.classes[0] return PredictionVector(prediction=prediction, label=label, predictor=self)
def _execute(self, data): """ Label with highest sum of prediction values wins """ pred = defaultdict(float) for i, label in enumerate(data.label): if self.enforce_absolute_values: pred[label] += abs(data.prediction[i]) else: pred[label] += data.prediction[i] res = sorted(pred.items(), key=lambda t: t[1]) best = res[-1] return PredictionVector(prediction=best[1], label=best[0], predictor=self)
def _execute(self, data): """ Executes the classifier on the given data vector *data* Classifies as class 1 if the dot product of weights and data is larger than the the classification threshold else as class 2. .. todo:: Check mapping""" data = map(lambda x: self.class_labels.index(x.strip()) * 2 - 1, data.label) value = numpy.dot(self.weights, data) vote = self.class_labels[ 1] if value > self.classification_threshold else self.class_labels[ 0] return PredictionVector(prediction=value, label=vote, predictor=self)
def _execute(self, x): """ Evaluate each prediction with the linear mapping learned.""" if x.prediction < -1.0 * self.max_range[0]: new_prediction = 0.0 elif x.prediction < self.max_range[1]: new_prediction = (x.prediction + \ self.max_range[self.class_labels.index(x.label)]) / \ (2.0 * self.max_range[self.class_labels.index(x.label)]) else: new_prediction = 1.0 return PredictionVector(label=x.label, prediction=new_prediction, predictor=x.predictor)
def _execute(self, data): """ Executes the classifier on the given data vector x""" res = numpy.zeros(len(self.ap)) for index, item in enumerate(self.ap): fac = 1.0 / (numpy.sqrt(2.0 * numpy.pi * self.var[index])) term = numpy.exp(-0.5 * ((data - self.mu[index])**2 / (self.var[index]))) c = fac * term res[index] = self.ap[index] * c.prod() classifications = res.argmax() return PredictionVector(label=self.class_labels[classifications], prediction=classifications, predictor=self)
def _execute(self, data): """Process the data through the internal nodes.""" feature_names = [] result_array = None label = [] prediction = [] predictor = [] for node_index, node in enumerate(self.nodes): node_result = node.execute(data) label.append(node_result.label) prediction.append(node_result.prediction) predictor.append(node_result.predictor) return PredictionVector(label=label, prediction=prediction, predictor=predictor)
def _execute(self, data): result = RmmPerceptronNode._execute(self, data) label = result.label prediction = result.prediction + 1 if prediction > 0: label = self.classes[1] elif 0 >= prediction > -1.0 * self.range + 1: label = self.classes[0] elif -1.0 * self.range + 1 >= prediction and self.outer_boundary: label = self.classes[1] prediction += self.range - 1 elif -1.0 * self.range + 1 >= prediction: label = self.classes[0] return PredictionVector(prediction=prediction, label=label, predictor=self)
def _execute(self, data): """ Evaluate each prediction with the sigmoid mapping learned. """ # code simply copied from PlattsSigmoidFitNode fur eventual future changes fApB = data.prediction * self.A + self.B if fApB < 0: new_prediction = 1 / (1.0 + numpy.exp(fApB)) else: new_prediction = numpy.exp(-fApB) / (numpy.exp(-fApB) + 1.0) # enforce mapping to interval [0,1] new_prediction = max(0, min(1, new_prediction)) new_label = self.class_labels[0] if new_prediction <= 0.5 \ else self.class_labels[1] return PredictionVector(label=new_label, prediction=new_prediction, predictor=data.predictor)
def _execute(self, x): """ Evaluate each prediction with the sigmoid mapping learned.""" fApB = x.prediction * self.A + self.B if fApB < 0: new_prediction = 1 / (1.0 + numpy.exp(fApB)) else: new_prediction = numpy.exp(-fApB) / (numpy.exp(-fApB) + 1.0) # enforce mapping to interval [0,1] new_prediction = max(0, min(1, new_prediction)) new_label = self.class_labels[0] if new_prediction <= 0.5 \ else self.class_labels[1] # Safe the new calculated probabilities if self.store_probabilities: self.probabilities.append([new_prediction, new_label]) return PredictionVector(label=new_label, prediction=new_prediction, predictor=x.predictor)
def _execute(self, data): """ Executes the classifier on the given data vector *data* """ # Count weighted votes for the two classes votes_counter = defaultdict(int) for index, prediction in enumerate(data.label): votes_counter[prediction] += self.weights[index][ prediction.strip()] # Compute ratio of votes that voted for class 1 vote_ratio = \ float(votes_counter[self.class_labels[0]]) / sum(votes_counter.values()) # If this ratio is above the threshold "self.required_vote_ratio", # classify instance as class 1 else as class 2 vote = self.class_labels[0] if vote_ratio >= self.required_vote_ratio \ else self.class_labels[1] return PredictionVector(prediction=vote_ratio, label=vote, predictor=self)
def _execute(self, data): """ Executes the classifier on the given data vector *data* """ distance_fct = lambda x, y: sum((numpy.array(x) != numpy.array(y))) label_distance = ((label, distance_fct(training_data.label, data.label)) for training_data, label in self.training_examples) n_smallest_labels = map( lambda x: x[0], heapq.nsmallest(self.n, label_distance, key=lambda x: x[1])) votes_counter = defaultdict(int) for label in n_smallest_labels: votes_counter[label] += 1 voting = sorted( (votes, label) for label, votes in votes_counter.iteritems()) majority_vote = voting[-1][1] return PredictionVector(label=majority_vote, predictor=self)
def _execute(self, data): """ Executes the classifier on the given data vector x""" predicted_class = None # add feature that is constantly one (bias term) data = data.transpose() data = numpy.vstack((data, numpy.array(1))) # compute mean of predictive distributions m = float(numpy.dot(self.b_w.transpose(), data)) if m < 0: predicted_class = self.class_labels[0] else: predicted_class = self.class_labels[1] return PredictionVector(label=predicted_class, prediction=m, predictor=self)
def _execute(self, data): """ Shift the data with the new offset """ if self.orientation_up: predicted_label = \ self.classes[1] if data.prediction > self.threshold \ else self.classes[0] else: predicted_label = \ self.classes[1] if data.prediction < self.threshold \ else self.classes[0] # print "data.prediction ", data.prediction # print "self.threshold ", self.threshold # print "self.classifier_threshold ", self.classifier_threshold if self.preserve_score: prediction_score = data.prediction else: prediction_score = data.prediction - \ (self.threshold - self.classifier_threshold) return PredictionVector(label = predicted_label, prediction = prediction_score, predictor = self)
def _execute(self, x, fda_range=None): """ Executes the classifier on the given data vector x""" if self.positive_class == None: # The FDA_Classifier_Node does not provide a mapping # from its continuous output to a class label # In order to do that, we test the class means and see # whether the yield in positive or negative results label_0 = self.class_labels[0] classifier_result = \ self.MDPflow.execute(self.MDPflow[0].means[label_0]) if classifier_result[:, 0] > 0.0: self.positive_class = self.class_labels[0] self.negative_class = self.class_labels[1] else: self.positive_class = self.class_labels[1] self.negative_class = self.class_labels[0] data = x.view(numpy.ndarray) f_projection = self.MDPflow.execute(data, fda_range) classifications = numpy.where(f_projection[:, 0] > 0.0, self.positive_class, self.negative_class) return PredictionVector(label=classifications[0], prediction=float(f_projection[:, 0]), predictor=self)
def _execute(self, data): """ Process the data through the internal nodes """ names = [] result_array = None result_label = [] result_predictor = [] result_prediction = [] # For all node-layers for node_index, node in enumerate(self.nodes): # Compute node's result node_result = node.execute(data) # Determine the output type of the node if self.output_type is None: self.output_type = type(node_result) else: assert (self.output_type == type(node_result)), \ "SameInputLayerNode requires that all of its layers return "\ "the same type. Types found: %s %s" \ % (self.output_type, type(node_result)) # Merge the nodes' outputs depending on the type if self.output_type == FeatureVector: result_array = \ self.add_feature_vector(node_result, node_index, result_array, names) elif self.output_type == PredictionVector: if type(node_result.label) == list: result_label.extend(node_result.label) else: # a single classification is expected here result_label.append(node_result.label) if type(node_result.prediction) == list: result_prediction.extend(node_result.prediction) else: result_prediction.append(node_result.prediction) if type(node_result.predictor) == list: result_predictor.extend(node_result.predictor) else: result_predictor.append(node_result.predictor) else: assert (self.output_type == TimeSeries), \ "SameInputLayerNode can not merge data of type %s." \ % self.output_type if self.names is None and not self.unique: names.extend(node_result.channel_names) elif self.names is None and self.unique: for name in node_result.channel_names: names.append("%i_%s" % (node_index, name)) if result_array == None: result_array = node_result if self.dtype == None: self.dtype = node_result.dtype else: result_array = numpy.concatenate( (result_array, node_result), axis=1) # Construct output with correct type and names if self.names is None: self.names = names if self.output_type == FeatureVector: return FeatureVector(result_array, self.names) elif self.output_type == PredictionVector: return PredictionVector(label=result_label, prediction=result_prediction, predictor=result_predictor) else: return TimeSeries(result_array, self.names, node_result.sampling_frequency, node_result.start_time, node_result.end_time, node_result.name, node_result.marker_name)