def generate_affine_backtransformation(self): """ Generate synthetic examples and test them to determine transformation This is the key method! """ if type(self.example) == FeatureVector: testsample = FeatureVector.replace_data( self.example, numpy.zeros(self.example.shape)) self.offset = numpy.longdouble(self._execute(testsample)) self.trafo = FeatureVector.replace_data( self.example, numpy.zeros(self.example.shape)) for j in range(len(self.example.feature_names)): testsample = FeatureVector.replace_data( self.example, numpy.zeros(self.example.shape)) testsample[0][j] = 1.0 self.trafo[0][j] = \ numpy.longdouble(self._execute(testsample) - self.offset) elif type(self.example) == TimeSeries: testsample = TimeSeries.replace_data( self.example, numpy.zeros(self.example.shape)) self.offset = numpy.longdouble(numpy.squeeze( self._execute(testsample))) self.trafo = TimeSeries.replace_data( self.example, numpy.zeros(self.example.shape)) for i in range(self.example.shape[0]): for j in range(self.example.shape[1]): testsample = TimeSeries.replace_data( self.example, numpy.zeros_like(self.example)) testsample[i][j] = 1.0 self.trafo[i][j] = \ numpy.longdouble(numpy.squeeze(self._execute(testsample)) - self.offset)
def calculate_classification_vector(self, model): """ Calculate classification vector w and the offset b """ # ctypes libsvm bindings # TODO get parameter maybe easier try: self.b = svmutil.svm_predict([0], [[0.0] * self.dim], model, "-q")[2][0][0] except ValueError: self.b = svmutil.svm_predict([0], [[0.0] * self.dim], model)[2][0][0] except IndexError: self._log("Classification failed. " + "Did you specify the parameters correctly?", level=logging.ERROR) self.b = 0 self.w = numpy.zeros(self.dim) self.features = FeatureVector( numpy.atleast_2d(self.w).astype(numpy.float64), self.feature_names) if model.get_labels() == [0, 1]: self.b = -self.b self.w = numpy.zeros(self.dim) for i in range(self.dim): e = [0.0] * self.dim e[i] = 1.0 try: self.w[i] = svmutil.svm_predict([0], [e], model, "-q")[2][0][0] except ValueError: try: self.w[i] = svmutil.svm_predict([0], [e], model)[2][0][0] except IndexError: pass except IndexError: pass if model.get_labels() == [0, 1]: self.w[i] = -self.w[i] self.w[i] -= self.b self.features = FeatureVector( numpy.atleast_2d(self.w).astype(numpy.float64), self.feature_names) try: wf = [] for i, feature in enumerate(self.feature_names): if not self.w[i] == 0: wf.append((self.w[i], feature)) wf.sort() w = numpy.array(wf, dtype='|S200') except ValueError: self._log('w could not be converted.', level=logging.WARNING) except IndexError: self._log('There are more feature names than features. \ Please check your feature generation and input data.', level=logging.CRITICAL) self.b = 0 w = numpy.zeros(self.dim) self.w = w # only features without zero multiplier are relevant self.num_retained_features = len(w) self.classifier_information["~~Num_Retained_Features~~"] = \ self.num_retained_features self.print_w = w
def setUp(self): """ Define basic needed FeatureVector instances """ self.x = FeatureVector([[0, 1, 2, 3, 4, 5]], ["a", "b", "ab", "cb", "c4", "abc"]) self.a = FeatureVector([[0, 2, 5]], ["a", "ab", "abc"]) self.na = FeatureVector([[1, 3, 4]], ["b", "cb", "c4"]) self.a4 = FeatureVector([[0, 2, 4, 5]], ["a", "ab", "c4", "abc"])
def generate_affine_backtransformation(self): """ Generate synthetic examples and test them to determine transformation This is the key method! """ if type(self.example) == FeatureVector: testsample = FeatureVector.replace_data( self.example, numpy.zeros(self.example.shape)) self.offset = numpy.longdouble(self._execute(testsample)) self.trafo = FeatureVector.replace_data( self.example, numpy.zeros(self.example.shape)) for j in range(len(self.example.feature_names)): testsample = FeatureVector.replace_data( self.example, numpy.zeros(self.example.shape)) testsample[0][j] = 1.0 self.trafo[0][j] = \ numpy.longdouble(self._execute(testsample) - self.offset) elif type(self.example) == TimeSeries: testsample = TimeSeries.replace_data( self.example, numpy.zeros(self.example.shape)) self.offset = numpy.longdouble( numpy.squeeze(self._execute(testsample))) self.trafo = TimeSeries.replace_data( self.example, numpy.zeros(self.example.shape)) for i in range(self.example.shape[0]): for j in range(self.example.shape[1]): testsample = TimeSeries.replace_data( self.example, numpy.zeros_like(self.example)) testsample[i][j] = 1.0 self.trafo[i][j] = \ numpy.longdouble(numpy.squeeze(self._execute(testsample)) - self.offset)
def setUp(self): """ Define some feature vectors""" # no tag self.f1 = FeatureVector([1,2,3,4,5,6],['a','b','c','d','e','f']) # no - self.f2 = FeatureVector([1,2,3,4,5,6],['a','b','c','d','e','f'], tag = 'Tag of f2') # no tag self.f3 = FeatureVector([1,2], ['a','b']) # no feature_names self.f4 = FeatureVector([1,2])
def test_replace_data(self): data = FeatureVector.replace_data(self.f2,[10,20,30,40,50,60]) self.assertFalse((data.view(numpy.ndarray)-[10,20,30,40,50,60]).any()) self.assertEqual(data.feature_names, ['a','b','c','d','e','f']) self.assertEqual(data.tag, 'Tag of f2') data2 = FeatureVector.replace_data(self.f1, [4,5,6,7,8,9], feature_names=['m','n','o','p','q','r']) self.assertFalse((data2.view(numpy.ndarray)-[4,5,6,7,8,9]).any()) self.assertEqual(data2.feature_names, ['m','n','o','p','q','r']) self.assertEqual(data2.tag, None)
def test_replace_data(self): data = FeatureVector.replace_data(self.f2, [10, 20, 30, 40, 50, 60]) self.assertFalse( (data.view(numpy.ndarray) - [10, 20, 30, 40, 50, 60]).any()) self.assertEqual(data.feature_names, ['a', 'b', 'c', 'd', 'e', 'f']) self.assertEqual(data.tag, 'Tag of f2') data2 = FeatureVector.replace_data( self.f1, [4, 5, 6, 7, 8, 9], feature_names=['m', 'n', 'o', 'p', 'q', 'r']) self.assertFalse( (data2.view(numpy.ndarray) - [4, 5, 6, 7, 8, 9]).any()) self.assertEqual(data2.feature_names, ['m', 'n', 'o', 'p', 'q', 'r']) self.assertEqual(data2.tag, None)
def _execute(self, data): """ Normalizes the samples vector to norm one """ if self.feature_names == []: self.feature_names = data.feature_names elif self.feature_names != data.feature_names: raise InconsistentFeatureVectorsException("Two feature vectors used during training do not contain the same features!") x = data.view(numpy.ndarray) a = x[0,:] if self.dim == None: self.dim = len(a) a = a*numpy.float128(1)/numpy.linalg.norm(a) if self.dimension_scale: a = FeatureVector([len(a)*a],self.feature_names) return a else: return FeatureVector([a],self.feature_names)
def get_sensor_ranking(self): """ Transform the classification vector to a sensor ranking This method will fail, if the classification vector variable ``self.features`` is not existing. This is for example the case when using nonlinear classification with kernels. """ if not "features" in self.__dict__: self.features = FeatureVector( numpy.atleast_2d(self.w).astype(numpy.float64), self.feature_names) self._log("No features variable existing to create generic sensor " "ranking in %s." % self.__class__.__name__, level=logging.ERROR) # channel name is what comes after the first underscore feat_channel_names = [ chnames.split('_')[1] for chnames in self.features.feature_names ] from collections import defaultdict ranking_dict = defaultdict(float) for i in range(len(self.features[0])): ranking_dict[feat_channel_names[i]] += abs(self.features[0][i]) ranking = sorted(ranking_dict.items(), key=lambda t: t[1]) return ranking
def normalization(self, sample): """ normalizes the results of the transformation to the same norm as the input **Principle** The function first computes the norm of the input and then applies the same norm to the self.trafo variable such that the results will be on the same scale .. note:: If either the input or the derivative have not been computed already the node will will raise an IOError. """ if self.trafo is None: raise IOError("The derivative has not be computed. Cannot perform normalization.") if sample is None: raise IOError("The initial sample has not been given. Cannot perform normalization.") initial = sample.view(numpy.ndarray) a = initial[0,:] norm_a = numpy.linalg.norm(a) if norm_a == 0: norm_a = 1 initial = self.trafo.view(numpy.ndarray) b = initial[0,:] norm_b = numpy.linalg.norm(b) if norm_b == 0: norm_b = 1 self.trafo = FeatureVector.replace_data(self.trafo, b*norm_a/norm_b)
def _execute(self, x): # Lazy computation of NFFT and noverlap if not hasattr(self, "NFFT"): # Compute NFFT to obtain the desired frequency resolution # (if possible) # self.NFFT has to be even self.NFFT = int(round(0.5 * x.sampling_frequency / \ self.frequency_resolution) * 2) self.noverlap = 0 # For each pair of channels, we compute the STFT features = [] feature_names = [] for i, channel_name1 in enumerate(x.channel_names): for j, channel_name2 in enumerate(x.channel_names[i + 1:]): (Cxy, freqs) = mlab.cohere(x[:, i], x[:, i + 1 + j], Fs=x.sampling_frequency, NFFT=self.NFFT, noverlap=self.noverlap) # TODO: This would be more efficient without the explicit loop for index1, freq in enumerate(freqs): if not (self.min_frequency <= freq <= self.max_frequency): continue # Append as feature features.append(Cxy[index1]) feature_names.append("Coherence_%s_%s_%.2fHz" % (channel_name1, channel_name2, freq)) feature_vector = \ FeatureVector(numpy.atleast_2d(features).astype(numpy.float64), feature_names) return feature_vector
def _execute(self, x): """ Extract the TD features from the given data x """ if self.datapoints == None: self.datapoints = range(x.shape[0]) y = x.view(numpy.ndarray) # From each selected channel we extract the specified data points indices = [] for datapoint in self.datapoints: indices.append(range(max(0, datapoint - \ self.moving_window_length / 2), min(x.shape[0], datapoint + \ (self.moving_window_length + 1) / 2))) channel_features = dict() for channel_name in x.channel_names: channel_index = x.channel_names.index(channel_name) for number, index_range in enumerate(indices): channel_features[(channel_name, number)] = \ numpy.mean(y[index_range, channel_index]) # Mapping from datapoint index to relative time to onset def indexToTime(index): if index >= 0: return float(index) / x.sampling_frequency else: return (x.end_time - x.start_time)/ 1000.0 \ + float(index) / x.sampling_frequency features = [] feature_names = [] for channel1, number1 in channel_features.iterkeys(): # intuitive derivative quotient number2 = number1 + 1 if (channel1, number2) in channel_features.iterkeys(): features.append(channel_features[(channel1, number2)] - \ channel_features[(channel1, number1)])#*sampling_frequency feature_names.append("Df2_%s_%.3fsec" % (channel1, indexToTime(number1))) # Method taken frome http://www.holoborodko.com/pavel/?page_id=245 # f'(x)=\\frac{2(f(x+h)-f(x-h))-(f(x+2h)-f(x-2h))}{8h} # Further smoothing functions are available, but seemingly not # necessary, because we have already a smoothing of the signal # when doing the subsampling. number3 = number1 + 4 number = number1 + 2 if (channel1, number3) in channel_features.iterkeys(): features.append(2.0 * (channel_features[(channel1, number+1)]\ - channel_features[(channel1, number-1)]) - \ (channel_features[(channel1, number+2)] - channel_features[(\ channel1, number-2)]))#*8*sampling_frequency feature_names.append("Df5_%s_%.3fsec" % (channel1, indexToTime(number))) feature_vector = \ FeatureVector(numpy.atleast_2d(features).astype(numpy.float64), feature_names) features = [] feature_names = [] channel_features = dict() return feature_vector
def test_no_change(self): """ checks what the node does to already Gaussian data **Principle** 1) generate data points for the FeatureVector which already follow the Gaussian distribution 2) train the node using these data points 3) stop the training in order to compute the multiplication variable and the translation variable In theory, since the data is already Gaussian generated: - the multiplication factor should be 1 - the translation factor should be 0 """ data_points = numpy.random.normal(loc=0., scale=1.0, size=(10000, 3)) f_names = ["TD_S1_0sec", "TD_S2_1sec", "TD_S3_1sec"] for point in data_points: self.node.train(FeatureVector(point, feature_names=f_names)) self.node.stop_training() # Since we are dealing with randomly generated data, fluctuations # are inherent and as such, we allow for a relative tolerance # margin of 5% self.assertTrue( numpy.allclose(self.node.mult, [1., 1., 1.], atol=3.e-2)) self.assertTrue( numpy.allclose(self.node.translation, [0., 0., 0.], atol=3.e-2))
def _execute(self, x): """ Extract the TD features from the given data x """ y = x.view(numpy.ndarray) if self.datapoints == "None": self.datapoints = None if self.datapoints is None or self.datapoints == 0: self.datapoints = range(y.shape[0]) # Mapping from data point index to relative time to onset def indexToTime(index): if index >= 0: return float(index) / x.sampling_frequency else: return (x.shape[0] + float(index)) / x.sampling_frequency # We project onto the data points that should be used as features y = y[self.datapoints, :] if self.absolute: y = numpy.fabs(y) y = y.T # Use all remaining values as features features = y.reshape((1, y.shape[0] * y.shape[1])) # If not already done, we determine the name of the features if self.feature_names == []: for channel_name in x.channel_names: for index in self.datapoints: self.feature_names.append( "TD_%s_%.3fsec" % (channel_name, indexToTime(index))) # Create and return the feature vector feature_vector = \ FeatureVector(numpy.atleast_2d(features).astype(numpy.float64), self.feature_names) return feature_vector
def normalization(self, sample): """ normalizes the results of the transformation to the same norm as the input **Principle** The function first computes the norm of the input and then applies the same norm to the self.trafo variable such that the results will be on the same scale .. note:: If either the input or the derivative have not been computed already the node will will raise an IOError. """ if self.trafo is None: raise IOError( "The derivative has not be computed. Cannot perform normalization." ) if sample is None: raise IOError( "The initial sample has not been given. Cannot perform normalization." ) initial = sample.view(numpy.ndarray) a = initial[0, :] norm_a = numpy.linalg.norm(a) if norm_a == 0: norm_a = 1 initial = self.trafo.view(numpy.ndarray) b = initial[0, :] norm_b = numpy.linalg.norm(b) if norm_b == 0: norm_b = 1 self.trafo = FeatureVector.replace_data(self.trafo, b * norm_a / norm_b)
def store_state(self, result_dir, index=None): """ Stores this node in the given directory *result_dir* """ if self.store and self.kernel_type == 'LINEAR': node_dir = os.path.join(result_dir, self.__class__.__name__) from pySPACE.tools.filesystem import create_directory create_directory(node_dir) try: self.features except: if type(self.w) == FeatureVector: self.features = self.w elif not self.w is None: self.features = FeatureVector(self.w.T, self.feature_names) else: self.features = None if not self.features is None: # This node stores the learned features name = "%s_sp%s.pickle" % ("features", self.current_split) result_file = open(os.path.join(node_dir, name), "wb") result_file.write(cPickle.dumps(self.features, protocol=2)) result_file.close() name = "%s_sp%s.yaml" % ("features", self.current_split) result_file = open(os.path.join(node_dir, name), "wb") result_file.write(str(self.features)) result_file.close() del self.features
def _execute(self, x): """ Extract the TD features from the given data x """ #TODO: Shorten maybe this code if self.datapoints == "None": self.datapoints = None if self.datapoints == None: self.datapoints = range(x.shape[0]) y = x.view(numpy.ndarray) #From each selected channel we extract the specified datapoints indices = [] for datapoint in self.datapoints: indices.append(range(max(0, datapoint - \ self.moving_window_length / 2), min(x.shape[0], datapoint + \ (self.moving_window_length + 1) / 2))) channel_features = dict() for channel_name in x.channel_names: channel_index = x.channel_names.index(channel_name) for number, index_range in enumerate(indices): channel_features[(channel_name, number)] = \ numpy.mean(y[index_range, channel_index]) # Mapping from datapoint index to relative time to onset def indexToTime(index): if index >= 0: return float(index) / x.sampling_frequency else: return (x.end_time - x.start_time)/ 1000.0 \ + float(index) / x.sampling_frequency features = [] for channel1, number1 in channel_features.iterkeys(): for channel2, number2 in channel_features.iterkeys(): if channel1 == channel2 and number1 > number2: features.append(channel_features[(channel1, number1)] - \ channel_features[(channel2, number2)]) elif number1 == number2 and channel1 != channel2: features.append(channel_features[(channel1, number1)] - \ channel_features[(channel2, number2)]) if self.feature_names == []: for channel1, number1 in channel_features.iterkeys(): for channel2, number2 in channel_features.iterkeys(): if channel1 == channel2 and number1 > number2: self.feature_names.append( \ "TDIntraChannel_%s_%.3fsec_%.3fsec" % (channel1, indexToTime(number1), indexToTime(number2))) elif number1 == number2 and channel1 != channel2: self.feature_names.append( \ "TDInterChannel_%s-%s_%.3fsec" % (channel1, channel2, indexToTime(number1))) feature_vector = \ FeatureVector(numpy.atleast_2d(features).astype(numpy.float64), self.feature_names) return feature_vector
def _execute(self, data): # Convert window_width and step size from milliseconds to data points segment_width = self.segment_width / 1000.0 * data.sampling_frequency segment_width = int(round(segment_width)) stepsize = self.stepsize / 1000.0 * data.sampling_frequency stepsize = int(round(stepsize)) if stepsize <= 0: stepsize = 1000 self._log("Too small stepsize used! Changed to 1000.", level=logging.ERROR) sample_width = int(1000 / data.sampling_frequency) # The subwindows of the time series to which a straight line is fitted num_windows = \ data.shape[1] * ((data.shape[0] - segment_width) / stepsize + 1) windows = numpy.zeros((segment_width, num_windows)) feature_names = [] counter = 0 data_array = data.view(numpy.ndarray) for channel_index, channel_name in enumerate(data.channel_names): start = 0 # Start of segment (index) while start + segment_width <= data.shape[0]: # Compute and round start and end of segment end = start + segment_width # calculate sub-windows windows[:, counter] = \ data_array[start:end, channel_index] #coefficients_used is inverted (see __init__) #feature name consists of start and end time if 0 in self.coefficients_used: feature_names.append("LSFOffset_%s_%.3fsec_%.3fsec" \ % (channel_name, float(start * sample_width)/1000.0, float(end * sample_width)/1000.0)) if 1 in self.coefficients_used: feature_names.append("LSFSlope_%s_%.3fsec_%.3fsec" \ % (channel_name, float(start * sample_width)/1000.0, float(end * sample_width)/1000.0)) # Move to next segment start = start + stepsize counter += 1 assert counter == windows.shape[1] # Compute the local straight line features coeffs = numpy.polyfit(range(windows.shape[0]), windows, 1) coeffs = coeffs[self.coefficients_used].flatten('F') feature_vector = \ FeatureVector(numpy.atleast_2d(coeffs).astype(numpy.float64), feature_names) return feature_vector
def setUp(self): """Create some example data """ # Create some TimeSeries: self.x1 = TimeSeries([1,2,3,4,5,6], ['a','b','c','d','e','f'], 12, marker_name='S4', name='Name_text ending with Standard', start_time=1000.0, end_time=1004.0) self.x1.specs={'Nice_Parameter': 1, 'Less_Nice_Param': '2'} self.x1.generate_meta() #automatically generate key and tag self.x2 = TimeSeries([1,2,3,4,5,6], ['a','b','c','d','e','f'], 12, marker_name='S4', start_time=2000.0, end_time=2004.0, name='Name_text ending with Standard') #manually generate key and tag import uuid self.x2_key=uuid.uuid4() self.x2.key=self.x2_key self.x2.tag='Tag of x2' self.x2.specs={'Nice_Parameter': 1, 'Less_Nice_Param': '2'} self.x3 = TimeSeries([1,2,3,4,5,6], ['a','b','c','d','e','f'], 12, marker_name='S4', start_time=3000.0, end_time=3004.0) self.x3.specs={'Nice_Parameter': 1, 'Less_Nice_Param': '2'} self.x3.generate_meta() self.x4 = TimeSeries([1,2,3,4,5,6], ['a','b','c','d','e','f'], 12,marker_name='S4') self.x4.specs={'Nice_Parameter': 1, 'Less_Nice_Param': '2'} self.x5 = TimeSeries([1,2], ['a','b'], 12) self.x5.inherit_meta_from(self.x2) self.x6 = TimeSeries([1,2,3,4,5,6], ['a','b','c','d','e','f'], 12) self.x6.specs={'Nice_Parameter': 11, 'Less_Nice_Param': '21'} self.x6.generate_meta() #safe information self.x6_key=self.x6.key self.x6.inherit_meta_from(self.x2) self.some_nice_dict = {'guido': 4127, 'irv': 4127, 'jack': 4098} self.x6.add_to_history(self.x5, self.some_nice_dict) # Create some FeatureVectors: self.f1 = FeatureVector([1,2,3,4,5,6],['a','b','c','d','e','f']) self.f1.specs={'NiceParam':1,'LessNiceParam':2} self.f2 = FeatureVector([1,2,3,4,5,6],['a','b','c','d','e','f'], tag = 'Tag of f2') self.f2.specs={'NiceParam':1,'LessNiceParam':2} self.f3 = FeatureVector([1,2], ['a','b']) self.f3.inherit_meta_from(self.x2) self.f3.add_to_history(self.x5)
def _execute(self, data): x = data.view(numpy.ndarray) out = self.scikits_alg.transform(x[0]) if self.feature_names is None: self.feature_names = \ ["%s_%s" % (self.__class__.__name__, i) for i in range(out.shape[1])] return FeatureVector(out, self.feature_names)
def test_fda(self): """ Train FDA and test on training data """ fda_node = FDAClassifierNode( ) #(generalized) Fisher Discriminant Analysis for x in self.x_b: fda_node.train(FeatureVector(x), 'b') for x in self.x_a: fda_node.train(FeatureVector(x), 'a') fda_node.stop_training() # for calling execute we need FeatureVectors since meta data is handled there self.x_a = [FeatureVector(numpy.atleast_2d(elem)) for elem in self.x_a] self.x_b = [FeatureVector(numpy.atleast_2d(elem)) for elem in self.x_b] classification_a = [fda_node.execute(fv).label for fv in self.x_a] classification_b = [fda_node.execute(fv).label for fv in self.x_b] self.assert_(numpy.alltrue(map(lambda x: x == 'a', classification_a))) self.assert_(numpy.alltrue(map(lambda x: x == 'b', classification_b)))
def _invert(self, data): """ The invert function is needed for the inverse node """ assert (type(data) == TimeSeries), \ "Feature2MonoTimeSeries inversion requires TimeSeries inputs " \ "not %s" % type(data) assert (data.shape[0] == 1), "Wrong array shape: %s." % data.shape[0] data_array = data.view(numpy.ndarray) new_data = FeatureVector(data_array, feature_names=data.channel_names) return new_data
def _execute(self, data): """ Extract the prediction features from the given data .. todo:: Give the possibility to give the new feature names to the transformation manually. Especially useful for ensemble approaches. """ assert (type(data) == PredictionVector), \ "Prediction2FeaturesNode requires PredictionVector inputs " \ "not %s" % type(data) if type(data.prediction) != list: f_name = self.name + "prediction" return FeatureVector(numpy.array([[data.prediction]]), [f_name]) else: #type(data.prediction) == list: f_names = [ self.name + "prediction_" + str(i) for i in range(len(data.prediction)) ] return FeatureVector(numpy.array([data.prediction]), f_names)
def _execute(self, data): """ Normalizes the samples vector to inf norm one""" x = data.view(numpy.ndarray) # always convert the array you do not start with an integer a = x[0, :].astype(numpy.double) inf_norm = numpy.max(numpy.abs(a)) if inf_norm == 0: inf_norm = 1 a /= inf_norm return FeatureVector([a], data.feature_names)
def _execute(self, x): # Compute the indices of the segment borders lazily when the data is # known if self.segment_border_indices == None: datapoints = x.shape[0] borders = [k * datapoints / (self.segments + 1) for k in range(0, self.segments + 2)] self.segment_border_indices = [(borders[i], borders[i + 2]) for i in range(self.segments)] data = x.view(numpy.ndarray) features = [] feature_names = [] # Iterate over all segment combinations: for segment_index_channel1 in range(self.segments): segment_borders1 = \ self.segment_border_indices[segment_index_channel1] for segment_index_channel2 in range(0, min(self.segments, segment_index_channel1 + self.max_segment_shift + 1)): segment_borders2 = \ self.segment_border_indices[segment_index_channel2] # Iterate over all channel pairs for i, channel1_name in enumerate(x.channel_names): channel1_index = x.channel_names.index(channel1_name) for channel2_name in x.channel_names[i+1:]: channel2_index = x.channel_names.index(channel2_name) # Get segments whose correlation should be computed segment1 = data[segment_borders1[0]:segment_borders1[1], channel1_index] segment2 = data[segment_borders2[0]:segment_borders2[1], channel2_index] # Bring segments to the same shape if segment1.shape[0] != segment2.shape[0]: min_shape = min(segment1.shape[0], segment2.shape[0]) segment1 = segment1[0:min_shape] segment2 = segment2[0:min_shape] # Compute the pearson correlation of the two segments correlation = scipy.corrcoef(segment1, segment2)[0,1] features.append(correlation) feature_names.append("Correlation_%s_%s_%ssec_%ssec_%s" % (channel1_name, channel2_name, segment_borders1[0] / x.sampling_frequency, segment_borders1[1] / x.sampling_frequency, segment_index_channel2 )) feature_vector = \ FeatureVector(numpy.atleast_2d(features).astype(numpy.float64), feature_names) return feature_vector
def _execute(self, data): """ Construct filter at first call and apply it on every vector """ if self.retained_indices is None: self.build_feature_selector(data=data) if self.feature_names is None: self.feature_names = [ data.feature_names[i] for i in self.retained_indices ] data = data.view(numpy.ndarray) return FeatureVector(data[:, self.retained_indices], self.feature_names)
def setUp(self): """ Define some feature vectors""" # no tag self.f1 = FeatureVector([1, 2, 3, 4, 5, 6], ['a', 'b', 'c', 'd', 'e', 'f']) # no - self.f2 = FeatureVector([1, 2, 3, 4, 5, 6], ['a', 'b', 'c', 'd', 'e', 'f'], tag='Tag of f2') # no tag self.f3 = FeatureVector([1, 2], ['a', 'b']) # no feature_names self.f4 = FeatureVector([1, 2])
def _execute(self, x): # Compute the indices of the segment borders lazily when the data is # known if self.segment_border_indices == None: datapoints = x.shape[0] borders = [ k * datapoints / (self.segments + 1) for k in range(0, self.segments + 2) ] self.segment_border_indices = [(borders[i], borders[i + 2]) for i in range(self.segments)] features = [] feature_names = [] # Iterate over all segments: for segment_borders in self.segment_border_indices: # Iterate over all channels for channel_name in x.channel_names: channel_index = x.channel_names.index(channel_name) # Correlation of the channel to the class average for label in self.class_averages.keys(): channel_seg_avg = self.class_averages[label] \ [segment_borders[0]:segment_borders[1], channel_index] sample_seq = \ x[segment_borders[0]:segment_borders[1], channel_index] correlation = scipy.corrcoef( channel_seg_avg, sample_seq) # 0,1 or 1.0 doesn't matter features.append(correlation) feature_names.append( "Pearson_%s_Class%s_%ssec_%ssec" % (channel_name, label, segment_borders[0] / x.sampling_frequency, segment_borders[1] / x.sampling_frequency)) # if segment_borders[0] == 14 and row == 0: # print correlation # import pylab # pylab.plot(avg, label = ("Avg %s" % label)) # pylab.plot(x[:,row], label = "Sample") # pylab.legend() # pylab.show() # raw_input() # pylab.gca().clear() feature_vector = \ FeatureVector(numpy.atleast_2d(features).astype(numpy.float64), feature_names) return feature_vector
def _execute(self, feature_vector): """ Projects the feature vector onto the retained features """ # Project the features onto the selected subspace proj_features = feature_vector[:, self.retained_feature_indices] # Update the feature_names list feature_names = [ feature_vector.feature_names[index] for index in self.retained_feature_indices ] # Create feature vector instance projected_feature_vector = FeatureVector(proj_features, feature_names) return projected_feature_vector
def _execute(self, data): """ Normalizes the samples vector to norm one """ if self.feature_names is None: self.feature_names = data.feature_names elif self.feature_names != data.feature_names: raise InconsistentFeatureVectorsException( "Two feature vectors do not contain the same features!") x = data.view(numpy.ndarray) a = x[0, :] norm = numpy.linalg.norm(a, self.ord) if norm == 0: norm = 1 return FeatureVector([a * numpy.longdouble(1) / norm], self.feature_names)
def _execute(self, data): """ Normalizes the feature vector data. Normalizes the feature vector data by subtracting the *translation* variable and scaling it with *mult*. .. todo:: check if problems in data transformation still occur """ if not (self.load_path is None or self.load_path == "already_loaded"): self.replace_keywords_in_load_path() load_file = open(self.load_path, 'r') self.translation, self.mult, self.feature_names = cPickle.load( load_file) self.load_path = "already_loaded" self.extract_feature_names(data) # mapping of feature names if current features are a subset # of loaded feature normalization in the training if self.feature_indices is None: try: if type(self.feature_names) is numpy.ndarray: self.feature_names = self.feature_names.tolist() self.feature_indices = [ self.feature_names.index(feature_name) for feature_name in data.feature_names ] except ValueError: raise InconsistentFeatureVectorsException( "Cannot normalize a feature vector " "with an unknown feature dimension!") # The data reference is not changed or deleted but here it is # temporarily replaced. if not self.translation is None: data = (data - self.translation[self.feature_indices]) \ * self.mult[self.feature_indices] else: data = data * 0 # Handle cases where lower and upper bound are identical # This is for example the case, when # one feature generating measurement device is off or out of order # TODO check if still needed data[numpy.isnan(data)] = 0.0 data[numpy.isinf(data)] = 0.0 # for i, v in enumerate(data[0,:]): # if v > 1: # data[0,i] = 1 + self.scaling*(1 - math.exp(1-v)) # elif v < 0: # data[0,i] = self.scaling*(math.exp(v)-1) return FeatureVector(data, data.feature_names)
class FeatureVectorTestCase(unittest.TestCase): """Test for FeatureVector data type""" def setUp(self): """ Define some feature vectors""" # no tag self.f1 = FeatureVector([1, 2, 3, 4, 5, 6], ['a', 'b', 'c', 'd', 'e', 'f']) # no - self.f2 = FeatureVector([1, 2, 3, 4, 5, 6], ['a', 'b', 'c', 'd', 'e', 'f'], tag='Tag of f2') # no tag self.f3 = FeatureVector([1, 2], ['a', 'b']) # no feature_names self.f4 = FeatureVector([1, 2]) def test_get_feature_names(self): self.assertEqual(self.f1.feature_names, self.f1.get_feature_names()) self.assertEqual(self.f2.feature_names, self.f2.get_feature_names()) self.assertEqual(self.f3.feature_names, self.f3.get_feature_names()) self.assertEqual(self.f4.get_feature_names(), ["feature_0_0.000sec", "feature_1_0.000sec"]) def test_set_feature_names(self): self.f1.set_feature_names(['m', 'n', 'o', 'p', 'q']) self.assertEqual(self.f1.feature_names, ['m', 'n', 'o', 'p', 'q']) self.f4.set_feature_names(['a', 'b']) self.assertEqual(self.f4.feature_names, ['a', 'b']) def test_replace_data(self): data = FeatureVector.replace_data(self.f2, [10, 20, 30, 40, 50, 60]) self.assertFalse( (data.view(numpy.ndarray) - [10, 20, 30, 40, 50, 60]).any()) self.assertEqual(data.feature_names, ['a', 'b', 'c', 'd', 'e', 'f']) self.assertEqual(data.tag, 'Tag of f2') data2 = FeatureVector.replace_data( self.f1, [4, 5, 6, 7, 8, 9], feature_names=['m', 'n', 'o', 'p', 'q', 'r']) self.assertFalse( (data2.view(numpy.ndarray) - [4, 5, 6, 7, 8, 9]).any()) self.assertEqual(data2.feature_names, ['m', 'n', 'o', 'p', 'q', 'r']) self.assertEqual(data2.tag, None)
def _execute(self, x): """ Extract the TD features from the given data x """ y = x.view(numpy.ndarray) if self.datapoints == "None": self.datapoints = None if self.datapoints == None or self.datapoints == 0: self.datapoints = range(y.shape[0]) # We project onto the data points that should be used as features y = y[self.datapoints, :] # generate feat_func from string representation if not done yet if self.feat_func == None: self.feat_func = eval("lambda x: numpy.atleast_1d(" + self.feature_function + ").flatten()") # initialize 2D array for transformation results nr_feats_per_channel = len(self.feat_func(y[:, 0])) res = numpy.zeros((nr_feats_per_channel, y.shape[1])) # eval fet_func for each channel for curr_chan in range(y.shape[1]): try: res[:, curr_chan] = self.feat_func(y[:, curr_chan]) except: # pass zeros as features res[:, curr_chan] = numpy.zeros_like(res[:, curr_chan]) warnings.warn("Feature Function failed or delivered wrong " + "dimensions for channel %s in window: %s. " % (x.channel_names[curr_chan], x.tag) + "Wrote zeros in the feature vector instead.") # flatten, such that feats from one channel stay grouped together features = res.flatten('F') # Feature names if self.feature_names == []: for channel_name in x.channel_names: for i in range(nr_feats_per_channel): self.feature_names.append("CustomFeature1_%s_%d" % (channel_name, i)) # Create and return the feature vector feature_vector = \ FeatureVector(numpy.atleast_2d(features).astype(numpy.float64), self.feature_names) return feature_vector
def _execute(self, feature_vector): """ Projects the feature vector onto the retained features """ if self.retained_feature_indices == None: # The indices of the features that will be retained self.retained_feature_indices = random.sample( range(feature_vector.shape[1]), self.num_retained_features) self.feature_names = feature_vector.feature_names # Project the features onto the selected subspace proj_features = feature_vector[:, self.retained_feature_indices] # Update the feature_names list feature_names = [ feature_vector.feature_names[index] for index in self.retained_feature_indices ] # Create feature vector instance projected_feature_vector = FeatureVector(proj_features, feature_names) return projected_feature_vector
class FeatureVectorTestCase(unittest.TestCase): """Test for FeatureVector data type""" def setUp(self): """ Define some feature vectors""" # no tag self.f1 = FeatureVector([1,2,3,4,5,6],['a','b','c','d','e','f']) # no - self.f2 = FeatureVector([1,2,3,4,5,6],['a','b','c','d','e','f'], tag = 'Tag of f2') # no tag self.f3 = FeatureVector([1,2], ['a','b']) # no feature_names self.f4 = FeatureVector([1,2]) def test_get_feature_names(self): self.assertEqual(self.f1.feature_names, self.f1.get_feature_names()) self.assertEqual(self.f2.feature_names, self.f2.get_feature_names()) self.assertEqual(self.f3.feature_names, self.f3.get_feature_names()) self.assertEqual(self.f4.get_feature_names(), ["feature_0_0.000sec","feature_1_0.000sec"]) def test_set_feature_names(self): self.f1.set_feature_names(['m','n','o','p','q']) self.assertEqual(self.f1.feature_names, ['m','n','o','p','q']) self.f4.set_feature_names(['a','b']) self.assertEqual(self.f4.feature_names, ['a','b']) def test_replace_data(self): data = FeatureVector.replace_data(self.f2,[10,20,30,40,50,60]) self.assertFalse((data.view(numpy.ndarray)-[10,20,30,40,50,60]).any()) self.assertEqual(data.feature_names, ['a','b','c','d','e','f']) self.assertEqual(data.tag, 'Tag of f2') data2 = FeatureVector.replace_data(self.f1, [4,5,6,7,8,9], feature_names=['m','n','o','p','q','r']) self.assertFalse((data2.view(numpy.ndarray)-[4,5,6,7,8,9]).any()) self.assertEqual(data2.feature_names, ['m','n','o','p','q','r']) self.assertEqual(data2.tag, None)
def test_normalization(self): """ compares the FeatureVector result with a manually computed one **Principle** Try to see for int and float data points whether the normalized arrays are the same as the ones obtained by running the nodes """ data_points = [ numpy.arange(start=1, stop=1000, dtype=numpy.int64), numpy.arange(start=1, stop=1000, dtype=numpy.longdouble) ] for point in data_points: theoretical = numpy.divide(point, numpy.sqrt(numpy.sum(point**2))) result = self.node.execute( FeatureVector(point, feature_names=point.astype(str))) self.assertTrue( numpy.allclose(result.view(numpy.ndarray)[0, :], theoretical, atol=0.)) self.setUp()
def _execute(self, x): """ General description of algorithm maybe followed by further details E.g. log "Hello" during first call and if P2 is set to True, always multiply data with P1 and in the other case forward the data. Logging is done using :func:`~pySPACE.missions.nodes.base_node.BaseNode._log`: .. code-block:: python self._log(self.P3, level=logging.DEBUG) To access only the data array and not the attached meta data, use `data = x.view(numpy.ndarray)` for preparation. """ if self.P3: self._log(self.P3, level=logging.DEBUG) self.P3 = False data = x.view(numpy.ndarray) if self.P2: data = self.P1 * data x = FeatureVector.replace_data(x, data) return x
def feature_vector_conversion(self, data): good_data = numpy.nan_to_num(data.get_data()) return FeatureVector.replace_data(data, good_data)
class BaseDataTestCase(unittest.TestCase): """Test BaseData data type""" def setUp(self): """Create some example data """ # Create some TimeSeries: self.x1 = TimeSeries([1,2,3,4,5,6], ['a','b','c','d','e','f'], 12, marker_name='S4', name='Name_text ending with Standard', start_time=1000.0, end_time=1004.0) self.x1.specs={'Nice_Parameter': 1, 'Less_Nice_Param': '2'} self.x1.generate_meta() #automatically generate key and tag self.x2 = TimeSeries([1,2,3,4,5,6], ['a','b','c','d','e','f'], 12, marker_name='S4', start_time=2000.0, end_time=2004.0, name='Name_text ending with Standard') #manually generate key and tag import uuid self.x2_key=uuid.uuid4() self.x2.key=self.x2_key self.x2.tag='Tag of x2' self.x2.specs={'Nice_Parameter': 1, 'Less_Nice_Param': '2'} self.x3 = TimeSeries([1,2,3,4,5,6], ['a','b','c','d','e','f'], 12, marker_name='S4', start_time=3000.0, end_time=3004.0) self.x3.specs={'Nice_Parameter': 1, 'Less_Nice_Param': '2'} self.x3.generate_meta() self.x4 = TimeSeries([1,2,3,4,5,6], ['a','b','c','d','e','f'], 12,marker_name='S4') self.x4.specs={'Nice_Parameter': 1, 'Less_Nice_Param': '2'} self.x5 = TimeSeries([1,2], ['a','b'], 12) self.x5.inherit_meta_from(self.x2) self.x6 = TimeSeries([1,2,3,4,5,6], ['a','b','c','d','e','f'], 12) self.x6.specs={'Nice_Parameter': 11, 'Less_Nice_Param': '21'} self.x6.generate_meta() #safe information self.x6_key=self.x6.key self.x6.inherit_meta_from(self.x2) self.some_nice_dict = {'guido': 4127, 'irv': 4127, 'jack': 4098} self.x6.add_to_history(self.x5, self.some_nice_dict) # Create some FeatureVectors: self.f1 = FeatureVector([1,2,3,4,5,6],['a','b','c','d','e','f']) self.f1.specs={'NiceParam':1,'LessNiceParam':2} self.f2 = FeatureVector([1,2,3,4,5,6],['a','b','c','d','e','f'], tag = 'Tag of f2') self.f2.specs={'NiceParam':1,'LessNiceParam':2} self.f3 = FeatureVector([1,2], ['a','b']) self.f3.inherit_meta_from(self.x2) self.f3.add_to_history(self.x5) def testTag(self): """Test tag behavior""" # Generate from Meta Data self.assertEqual(self.x1.tag, 'Epoch Start: 1000ms; End: 1004ms; Class: Standard') # Tag passed, use that! self.assertEqual(self.x2.tag, 'Tag of x2') self.assertEqual(self.f2.tag, 'Tag of f2') # No tag and only partial meta passed self.assertEqual(self.x3.tag, 'Epoch Start: 3000ms; End: 3004ms; Class: na') # No Tag and no meta passed, Tag remains None self.assertEqual(self.x4.tag, None) self.assertEqual(self.f1.tag, None) def testKey(self): """Test key behavior""" import uuid self.assertEqual(type(self.x1.key),uuid.UUID) # If Key passed, use that! self.assertEqual(self.x2.key, self.x2_key) def testInheritAndAddStuff(self): """test inheritance of meta data from other objects""" # Inherit self.assertEqual(self.x5.tag, self.x2.tag) self.assertEqual(self.x5.key, self.x2.key) self.assertEqual(self.f3.tag, self.x2.tag) self.assertEqual(self.f3.key, self.x2.key) #Inherit #suppress warning of BaseData type and cast data back to numpy hist_x6=self.x6.history[0].view(numpy.ndarray) data_x5=self.x5.view(numpy.ndarray) # history self.assertEqual((hist_x6==data_x5).all(),True) self.assertEqual(self.x6.history[0].key,self.x5.key) self.assertEqual(self.x6.history[0].tag,self.x5.tag) self.assertEqual(self.x6.history[0].specs['node_specs'],self.some_nice_dict) hist_f3=self.f3.history[0].view(numpy.ndarray) self.assertEqual((hist_f3==data_x5).all(),True) self.assertEqual(self.f3.history[0].key,self.x5.key) self.assertEqual(self.f3.history[0].tag,self.x5.tag) #if key (and tag) were already set, these original values #have to be kept # self.assertEqual(self.x6.key, self.x6_key) self.assertEqual(self.x6.tag, self.x2.tag) self.x6.inherit_meta_from(self.f3) #should not change tag and key self.assertEqual(self.x6.key, self.x6_key) self.assertEqual(self.x6.tag, self.x2.tag) #testing multiple histories x7 = TimeSeries([1,2,3,4,5,6], ['a','b','c','d','e','f'], 12,marker_name='S4') x7.add_to_history(self.x1) x7.add_to_history(self.x2) x7.add_to_history(self.x3) x7.add_to_history(self.x4) x7.add_to_history(self.x5) x7.add_to_history(self.x6) x7.add_to_history(self.x1) self.assertEqual(len(x7.history),7) self.assertEqual(x7.history[0].key,x7.history[6].key) self.assertEqual(x7.history[5].history,[]) def testSpecs(self): """Test specs behavior""" # so far, there's not much going on with specs... # same problem as in testkey # timeseries doesn't set spec self.assertEqual(self.x1.specs, {'Nice_Parameter': 1, 'Less_Nice_Param': '2'}) # Inherit self.assertEqual(self.x5.specs,self.x2.specs)
def feature_vector_conversion(self, data): float_data = data.get_data().astype(self.type) return FeatureVector.replace_data(data, float_data)
def forward_difference_method(self, sample): """ implementation of the forward difference method **Principle** The principle applied by this method of numerical differentiation is .. math:: f'(x)=\\frac{f(x+h)-f(x)}{h} where :math:`h` is the step of the differentiation that is computed as :math:`h(x)=\sqrt{\\varepsilon} \\cdot x` for :math:`x \\neq 0` and :math:`h(0)=\\sqrt{\\varepsilon}` for :math:`x=0`. The differentiation method distinguishes between ``FeatureVector`` and ``TimeSeries`` inputs and applies the derivative according to the input type. **Parameters** :sample: the initial value used for the derivation .. note:: Out of the three numerical differentiation methods, this one has the least overhead. Nonetheless, this method is less accurate than the half step method. """ initial_value = self._execute(sample) if type(sample) == FeatureVector: self.trafo = FeatureVector.replace_data( self.example, numpy.zeros(self.example.shape)) for j in range(len(sample.feature_names)): data_with_offset = copy.deepcopy(sample) if data_with_offset[0][j] == 0.: diff = numpy.sqrt(self.eps) else: diff = numpy.sqrt(self.eps)*data_with_offset[0][j] orig = data_with_offset[0][j] data_with_offset[0][j] += diff diff = data_with_offset[0][j] - orig new_feature_vector = FeatureVector.replace_data( sample, data_with_offset ) self.trafo[0][j] = \ numpy.longdouble((self._execute(new_feature_vector) - initial_value)/diff) elif type(sample) == TimeSeries: self.trafo = TimeSeries.replace_data( self.example, numpy.zeros(self.example.shape)) for i in range(sample.shape[0]): for j in range(sample.shape[1]): data_with_offset = copy.deepcopy(sample) if data_with_offset[i][j] == 0.: diff = numpy.sqrt(self.eps) else: diff = numpy.sqrt(self.eps)*data_with_offset[0][j] data_with_offset[i][j] += diff new_time_series = TimeSeries.replace_data( sample, data_with_offset) self.trafo[i][j] = \ numpy.longdouble((numpy.squeeze(self._execute(new_time_series)) - numpy.squeeze(initial_value))/diff)
def central_difference_with_halfstep_method(self, sample): """ implementation of the central difference method with a half step **Principle** The principle applied by the central difference method with a half step is .. math:: f'(x)=\\frac{f(x-h)-8f(x-\\frac{h}{2})+8f(x+\\frac{h}{2})-f(x-h)}{6h} where :math:`h` is the step of the differentiation that is computed as :math:`h(x)=\sqrt{\\varepsilon} \\cdot x` for :math:`x \\neq 0` and :math:`h(0)=\\sqrt{\\varepsilon}` for :math:`x=0`. **Parameters** :sample: the initial value used for the derivation .. note:: This method is the most accurate differentiation method but also has the greatest overhead. """ if type(sample) == FeatureVector: self.trafo = FeatureVector.replace_data( self.example, numpy.zeros(self.example.shape)) for j in range(len(sample.feature_names)): positive_offset = copy.deepcopy(sample) negative_offset = copy.deepcopy(sample) half_positive_offset = copy.deepcopy(sample) half_negative_offset = copy.deepcopy(sample) if positive_offset[0][j] == 0.: diff = numpy.sqrt(self.eps) else: diff = numpy.sqrt(self.eps)*positive_offset[0][j] positive_offset[0][j] += diff negative_offset[0][j] -= diff half_positive_offset[0][j] += diff/2. half_negative_offset[0][j] -= diff/2. diff = (positive_offset[0][j]-negative_offset[0][j])/2. positive_vector = FeatureVector.replace_data( sample, positive_offset ) negative_vector = FeatureVector.replace_data( sample, negative_offset ) half_positive_vector = FeatureVector.replace_data( sample, half_positive_offset ) half_negative_vector = FeatureVector.replace_data( sample, half_negative_offset ) self.trafo[0][j] = \ numpy.longdouble((self._execute(negative_vector) - 8*self._execute(half_negative_vector) + 8*self._execute(half_positive_vector) - self._execute(positive_vector))/(6.*diff)) elif type(sample) == TimeSeries: self.trafo = TimeSeries.replace_data( self.example, numpy.zeros(self.example.shape)) for i in range(sample.shape[0]): for j in range(sample.shape[1]): positive_offset = copy.deepcopy(sample) negative_offset = copy.deepcopy(sample) half_positive_offset = copy.deepcopy(sample) half_negative_offset = copy.deepcopy(sample) if positive_offset[i][j] == 0.: diff = numpy.sqrt(self.eps) else: diff = numpy.sqrt(self.eps)*positive_offset[i][j] positive_offset[i][j] += diff negative_offset[i][j] -= diff half_positive_offset[i][j] += diff/2. half_negative_offset[i][j] -= diff/2. diff = (positive_offset[i][j]-negative_offset[i][j])/2. positive_series = TimeSeries.replace_data( sample, positive_offset ) negative_series = TimeSeries.replace_data( sample, negative_offset ) half_positive_series = TimeSeries.replace_data( sample, half_positive_offset ) half_negative_series = TimeSeries.replace_data( sample, half_negative_offset ) self.trafo[i][j] = \ numpy.longdouble((self._execute(negative_series) - 8*self._execute(half_negative_series) + 8*self._execute(half_positive_series) - self._execute(positive_series))/(6.*diff))
def central_difference_method(self, sample): """ implementation of the central difference method **Principle** The principle applied by the central difference method is .. math:: f'(x)=\\frac{f(x+h)-f(x-h)}{2h} where :math:`h` is the step of the differentiation that is computed as :math:`h(x)=\sqrt{\\varepsilon} \\cdot x` for :math:`x \\neq 0` and :math:`h(0)=\\sqrt{\\varepsilon}` for :math:`x=0`. **Parameters** :sample: the initial value used for the derivation """ if type(sample) == FeatureVector: self.trafo = FeatureVector.replace_data( sample, numpy.zeros(sample.shape)) for j in range(len(sample.feature_names)): positive_offset = copy.deepcopy(sample) negative_offset = copy.deepcopy(sample) if positive_offset[0][j] == 0.: diff = numpy.sqrt(self.eps) else: diff = numpy.sqrt(self.eps)*positive_offset[0][j] positive_offset[0][j] += diff negative_offset[0][j] -= diff diff = (positive_offset[0][j]-negative_offset[0][j])/2. positive_vector = FeatureVector.replace_data( sample, positive_offset ) negative_vector = FeatureVector.replace_data( sample, negative_offset ) self.trafo[0][j] = \ numpy.longdouble((self._execute(positive_vector) - self._execute(negative_vector))/(2.*diff)) elif type(sample) == TimeSeries: self.trafo = TimeSeries.replace_data( self.example, numpy.zeros(self.example.shape)) for i in range(sample.shape[0]): for j in range(sample.shape[1]): positive_offset = copy.deepcopy(sample) negative_offset = copy.deepcopy(sample) if positive_offset[i][j] == 0.: diff = numpy.sqrt(self.eps) else: diff = numpy.sqrt(self.eps)*positive_offset[i][j] positive_offset[i][j] += diff negative_offset[i][j] -= diff diff = (positive_offset[i][j]-negative_offset[i][j])/2. positive_series = TimeSeries.replace_data( sample, positive_offset ) negative_series = TimeSeries.replace_data( sample, negative_offset ) self.trafo[i][j] = \ numpy.longdouble((self._execute(positive_series) - self._execute(negative_series))/(2.*diff))