def test_create_complete(self): feature = Feature(self.minimum_data) feature.add_custom_data(self.additional_data) eq_('alias/project', feature.project) eq_('status_200', feature.name) eq_(0.0, feature.min30) eq_(0.0453, feature.mean30) eq_(1119.0, feature.max30) eq_(28.5136, feature.var30) eq_(5.3398, feature.std30) eq_(0.0, feature.median30)
def execute(self, *args, **kwargs): """ Computes simple statistics over all examples of project and interval. All operations are calculated over entire matrix across columns (features) except percentile and histogram. After all calculations data is saved in feature set in one record per feature/project and one bin per stat/interval. """ project_interval = "{0}_{1}".format(self.project.full_name, self.interval) labels, X = self.create_ndarray( self.session.query(Example).filter_by(p_interval=project_interval)) tmp_stats = { 'max': np.amax(X[:, 3:], 0), 'min': np.amin(X[:, 3:], 0), 'mean': np.mean(X[:, 3:], 0), 'std': np.std(X[:, 3:], 0), 'var': np.var(X[:, 3:], 0), 'median': np.median(X[:, 3:], 0), } # reshape by feature for feature_index, feature_name in enumerate(labels[3:]): if feature_name == 'grouptime': continue custom_data = {} xkey = feature_index + 3 feature = Feature({ 'project': self.project.full_name, 'name': feature_name }) hist, bin_hedges = np.histogram(X[:, xkey], bins=10) # cast to list are needed by persistence layer (array not supported) custom_data['histogram{0}'.format( self.interval)] = [list(hist), list(bin_hedges)] custom_data['percentile{0}'.format(self.interval)] = list( np.percentile(X[:, xkey], [25, 50, 75])) for stat_name, stat_value in tmp_stats.items(): name = '{0}{1}'.format(stat_name, self.interval) value = float(stat_value[feature_index]) # cast needed custom_data[name] = value feature.add_custom_data(custom_data) self.session.add(feature)
def execute(self, *args, **kwargs): """ Computes simple statistics over all examples of project and interval. All operations are calculated over entire matrix across columns (features) except percentile and histogram. After all calculations data is saved in feature set in one record per feature/project and one bin per stat/interval. """ project_interval = "{0}_{1}".format( self.project.full_name, self.interval) labels, X = self.create_ndarray( self.session.query(Example).filter_by(p_interval=project_interval) ) tmp_stats = { 'max': np.amax(X[:, 3:], 0), 'min': np.amin(X[:, 3:], 0), 'mean': np.mean(X[:, 3:], 0), 'std': np.std(X[:, 3:], 0), 'var': np.var(X[:, 3:], 0), 'median': np.median(X[:, 3:], 0), } # reshape by feature for feature_index, feature_name in enumerate(labels[3:]): if feature_name == 'grouptime': continue custom_data = {} xkey = feature_index + 3 feature = Feature({'project': self.project.full_name, 'name': feature_name}) hist, bin_hedges = np.histogram(X[:, xkey], bins=10) # cast to list are needed by persistence layer (array not supported) custom_data['histogram{0}'.format(self.interval)] = [list(hist), list(bin_hedges)] custom_data['percentile{0}'.format(self.interval)] = list(np.percentile(X[:, xkey], [25, 50, 75])) for stat_name, stat_value in tmp_stats.items(): name = '{0}{1}'.format(stat_name, self.interval) value = float(stat_value[feature_index]) # cast needed custom_data[name] = value feature.add_custom_data(custom_data) self.session.add(feature)
def test_to_primitive(self): feature = Feature(self.minimum_data) feature.add_custom_data(self.additional_data) primitive = feature.to_primitive()
def test_validate_complete_data(self): feature = Feature(self.minimum_data) feature.add_custom_data(self.additional_data) feature.validate()