def test_asobjarray(self): for i in ([1, 2, 3], ['a', 2, '3'], ('asd')): i_con = asobjarray(i) self.failUnless(i_con.dtype is np.dtype('object')) self.failUnlessEqual(len(i), len(i_con)) self.failUnless(np.all(i == i_con))
def _call(self, dataset): # XXX Hm... it might make sense to unify access functions # naming across our swig libsvm wrapper and sg access # functions for svm clf = self.clf sgsvm = clf.svm sens_labels = None if isinstance(sgsvm, shogun.Classifier.MultiClassSVM): sens, biases = [], [] nsvms = sgsvm.get_num_svms() clabels = sorted(clf._attrmap.values()) nclabels = len(clabels) sens_labels = [] isvm = 0 # index for svm among known for i in xrange(nclabels): for j in xrange(i + 1, nclabels): sgsvmi = sgsvm.get_svm(isvm) labels_tuple = (clabels[i], clabels[j]) # Since we gave the labels in incremental order, # we always should be right - but it does not # hurt to check if set of labels is the same if __debug__ and _shogun_exposes_slavesvm_labels: if not sgsvmi.get_labels(): # We need to call classify() so labels get assigned # to the multiclass SVM sgsvm.classify() assert (set([ sgsvmi.get_label(int(x)) for x in sgsvmi.get_support_vectors() ]) == set(labels_tuple)) sens1, bias = self.__sg_helper(sgsvmi) sens.append(sens1) biases.append(bias) sens_labels += [labels_tuple[::-1]] # ??? positive first isvm += 1 assert (len(sens) == nsvms) # we should have covered all else: sens1, bias = self.__sg_helper(sgsvm) biases = np.atleast_1d(bias) sens = np.atleast_2d(sens1) if not clf.__is_regression__: assert (set(clf._attrmap.values()) == set([-1.0, 1.0])) assert (sens.shape[0] == 1) sens_labels = [(-1.0, 1.0)] ds = Dataset(np.atleast_2d(sens)) if sens_labels is not None: if isinstance(sens_labels[0], tuple): # Need to have them in array of dtype object sens_labels = asobjarray(sens_labels) if len(clf._attrmap): sens_labels = clf._attrmap.to_literal(sens_labels, recurse=True) ds.sa[clf.params.targets_attr] = sens_labels self.ca.biases = biases return ds
def _call(self, dataset): # XXX Hm... it might make sense to unify access functions # naming across our swig libsvm wrapper and sg access # functions for svm clf = self.clf sgsvm = clf.svm sens_labels = None if isinstance(sgsvm, shogun.Classifier.MultiClassSVM): sens, biases = [], [] nsvms = sgsvm.get_num_svms() clabels = sorted(clf._attrmap.values()) nclabels = len(clabels) sens_labels = [] isvm = 0 # index for svm among known for i in xrange(nclabels): for j in xrange(i+1, nclabels): sgsvmi = sgsvm.get_svm(isvm) labels_tuple = (clabels[i], clabels[j]) # Since we gave the labels in incremental order, # we always should be right - but it does not # hurt to check if set of labels is the same if __debug__ and _shogun_exposes_slavesvm_labels: if not sgsvmi.get_labels(): # We need to call classify() so labels get assigned # to the multiclass SVM sgsvm.classify() assert(set([sgsvmi.get_label(int(x)) for x in sgsvmi.get_support_vectors()]) == set(labels_tuple)) sens1, bias = self.__sg_helper(sgsvmi) sens.append(sens1) biases.append(bias) sens_labels += [labels_tuple[::-1]] # ??? positive first isvm += 1 assert(len(sens) == nsvms) # we should have covered all else: sens1, bias = self.__sg_helper(sgsvm) biases = np.atleast_1d(bias) sens = np.atleast_2d(sens1) if not clf.__is_regression__: assert(set(clf._attrmap.values()) == set([-1.0, 1.0])) assert(sens.shape[0] == 1) sens_labels = [(-1.0, 1.0)] ds = Dataset(np.atleast_2d(sens)) if sens_labels is not None: if isinstance(sens_labels[0], tuple): # Need to have them in array of dtype object sens_labels = asobjarray(sens_labels) if len(clf._attrmap): sens_labels = clf._attrmap.to_literal(sens_labels, recurse=True) ds.sa[clf.params.targets_attr] = sens_labels self.ca.biases = biases return ds
def _call(self, dataset): sens = super(RegressionAsClassifierSensitivityAnalyzer, self)._call(dataset) # We can have only a single sensitivity out of regression assert(sens.shape[0] == 1) if 'targets' not in sens.sa: clf = self.clf # We just assign a tuple of all labels sorted labels = tuple(sorted(clf._trained_attrmap.values())) if len(clf._trained_attrmap): labels = clf._trained_attrmap.to_literal(labels, recurse=True) sens.sa['targets'] = asobjarray([labels]) return sens
def _call(self, dataset): sens = super(RegressionAsClassifierSensitivityAnalyzer, self)._call(dataset) # We can have only a single sensitivity out of regression assert(sens.shape[0] == 1) clf = self.clf targets_attr = clf.params.targets_attr if targets_attr not in sens.sa: # We just assign a tuple of all labels sorted labels = tuple(sorted(clf._trained_attrmap.values())) if len(clf._trained_attrmap): labels = clf._trained_attrmap.to_literal(labels, recurse=True) sens.sa[targets_attr] = asobjarray([labels]) return sens
def _call(self, dataset, callables=[]): # local bindings clf = self.clf model = clf.model # Labels for sensitivities to be returned sens_labels = None if clf.__is_regression__: nr_class = None svm_labels = None # shouldn't bother to provide "targets" for regressions else: nr_class = model.nr_class svm_labels = model.labels # No need to warn since now we by default we do not do # anything evil and provide labels -- so it is up for a user # to decide either he wants to do something silly #if nr_class != 2: # warning("You are estimating sensitivity for SVM %s trained on %d" % # (str(clf), nr_class) + # " classes. Make sure that it is what you intended to do" ) svcoef = np.matrix(model.get_sv_coef()) svs = np.matrix(model.get_sv()) rhos = np.asarray(model.get_rho()) self.ca.biases = rhos if self.params.split_weights: if nr_class != 2: raise NotImplementedError, \ "Cannot compute per-class weights for" \ " non-binary classification task" # libsvm might have different idea on the ordering # of labels, so we would need to map them back explicitely ds_labels = list(dataset.sa[clf.get_space()].unique) # labels in the dataset senses = [None for i in ds_labels] # first label is given positive value for i, (c, l) in enumerate( [(svcoef > 0, lambda x: x), (svcoef < 0, lambda x: x*-1)] ): # convert to array, and just take the meaningful dimension c_ = c.A[0] # NOTE svm_labels are numerical; ds_labels are literal senses[ds_labels.index( clf._attrmap.to_literal(svm_labels[i]))] = \ (l(svcoef[:, c_] * svs[c_, :])).A[0] weights = np.array(senses) sens_labels = svm_labels else: # XXX yoh: .mean() is effectively # averages across "sensitivities" of all paired classifiers (I # think). See more info on this topic in svm.py on how sv_coefs # are stored # # First multiply SV coefficients with the actual SVs to get # weighted impact of SVs on decision, then for each feature # take mean across SVs to get a single weight value # per feature if nr_class is None or nr_class <= 2: # as simple as this weights = (svcoef * svs).A # and only in case of classification if nr_class: # ??? First label seems corresponds to positive sens_labels = [tuple(svm_labels[::-1])] else: # we need to compose correctly per each pair of classifiers. # See docstring for get_sv_coef for more details on internal # structure of bloody storage # total # of pairs npairs = nr_class * (nr_class-1)/2 # # of SVs in each class NSVs_perclass = model.get_n_sv() # indices where each class starts in each row of SVs # name is after similar variable in libsvm internals nz_start = np.cumsum([0] + NSVs_perclass[:-1]) nz_end = nz_start + NSVs_perclass # reserve storage weights = np.zeros((npairs, svs.shape[1])) ipair = 0 # index of the pair """ // classifier (i,j): coefficients with // i are in sv_coef[j-1][nz_start[i]...], // j are in sv_coef[i][nz_start[j]...] """ sens_labels = [] for i in xrange(nr_class): for j in xrange(i+1, nr_class): weights[ipair, :] = np.asarray( svcoef[j-1, nz_start[i]:nz_end[i]] * svs[nz_start[i]:nz_end[i]] + svcoef[i, nz_start[j]:nz_end[j]] * svs[nz_start[j]:nz_end[j]] ) # ??? First label corresponds to positive # that is why [j], [i] sens_labels += [(svm_labels[j], svm_labels[i])] ipair += 1 # go to the next pair assert(ipair == npairs) if __debug__ and 'SVM' in debug.active: if nr_class: nsvs = model.get_n_sv() else: nsvs = model.get_total_n_sv() if clf.__is_regression__: svm_type = clf._svm_impl # type of regression else: svm_type = '%d-class SVM(%s)' % (nr_class, clf._svm_impl) debug('SVM', "Extracting weights for %s: #SVs=%s, " % \ (svm_type, nsvs) + \ " SVcoefshape=%s SVs.shape=%s Rhos=%s." % \ (svcoef.shape, svs.shape, rhos) + \ " Result: min=%f max=%f" % (np.min(weights), np.max(weights))) ds_kwargs = {} if nr_class: # for classification only # and we should have prepared the labels assert(sens_labels is not None) if len(clf._attrmap): if isinstance(sens_labels[0], tuple): sens_labels = asobjarray(sens_labels) sens_labels = clf._attrmap.to_literal(sens_labels, recurse=True) # NOTE: `weights` is already and always 2D ds_kwargs = dict(sa={clf.get_space(): sens_labels}) weights_ds = Dataset(weights, **ds_kwargs) return weights_ds
def _call(self, dataset, callables=[]): # local bindings clf = self.clf model = clf.model # Labels for sensitivities to be returned sens_labels = None if clf.__is_regression__: nr_class = None svm_labels = None # shouldn't bother to provide "targets" for regressions else: nr_class = model.nr_class svm_labels = model.labels # No need to warn since now we by default we do not do # anything evil and provide labels -- so it is up for a user # to decide either he wants to do something silly #if nr_class != 2: # warning("You are estimating sensitivity for SVM %s trained on %d" % # (str(clf), nr_class) + # " classes. Make sure that it is what you intended to do" ) svcoef = np.matrix(model.get_sv_coef()) svs = np.matrix(model.get_sv()) rhos = np.asarray(model.get_rho()) self.ca.biases = rhos if self.params.split_weights: if nr_class != 2: raise NotImplementedError, \ "Cannot compute per-class weights for" \ " non-binary classification task" # libsvm might have different idea on the ordering # of labels, so we would need to map them back explicitely ds_labels = list(dataset.sa[ clf.params.targets_attr].unique) # labels in the dataset senses = [None for i in ds_labels] # first label is given positive value for i, (c, l) in enumerate([(svcoef > 0, lambda x: x), (svcoef < 0, lambda x: x * -1)]): # convert to array, and just take the meaningful dimension c_ = c.A[0] # NOTE svm_labels are numerical; ds_labels are literal senses[ds_labels.index( clf._attrmap.to_literal(svm_labels[i]))] = \ (l(svcoef[:, c_] * svs[c_, :])).A[0] weights = np.array(senses) sens_labels = svm_labels else: # XXX yoh: .mean() is effectively # averages across "sensitivities" of all paired classifiers (I # think). See more info on this topic in svm.py on how sv_coefs # are stored # # First multiply SV coefficients with the actual SVs to get # weighted impact of SVs on decision, then for each feature # take mean across SVs to get a single weight value # per feature if nr_class is None or nr_class <= 2: # as simple as this weights = (svcoef * svs).A # and only in case of classification if nr_class: # ??? First label seems corresponds to positive sens_labels = [tuple(svm_labels[::-1])] else: # we need to compose correctly per each pair of classifiers. # See docstring for get_sv_coef for more details on internal # structure of bloody storage # total # of pairs npairs = nr_class * (nr_class - 1) / 2 # # of SVs in each class NSVs_perclass = model.get_n_sv() # indices where each class starts in each row of SVs # name is after similar variable in libsvm internals nz_start = np.cumsum([0] + NSVs_perclass[:-1]) nz_end = nz_start + NSVs_perclass # reserve storage weights = np.zeros((npairs, svs.shape[1])) ipair = 0 # index of the pair """ // classifier (i,j): coefficients with // i are in sv_coef[j-1][nz_start[i]...], // j are in sv_coef[i][nz_start[j]...] """ sens_labels = [] for i in xrange(nr_class): for j in xrange(i + 1, nr_class): weights[ipair, :] = np.asarray( svcoef[j - 1, nz_start[i]:nz_end[i]] * svs[nz_start[i]:nz_end[i]] + svcoef[i, nz_start[j]:nz_end[j]] * svs[nz_start[j]:nz_end[j]]) # ??? First label corresponds to positive # that is why [j], [i] sens_labels += [(svm_labels[j], svm_labels[i])] ipair += 1 # go to the next pair assert (ipair == npairs) if __debug__ and 'SVM' in debug.active: if nr_class: nsvs = model.get_n_sv() else: nsvs = model.get_total_n_sv() debug('SVM', "Extracting weights for %s-class SVM: #SVs=%s, " % \ (nr_class, nsvs) + \ " SVcoefshape=%s SVs.shape=%s Rhos=%s." % \ (svcoef.shape, svs.shape, rhos) + \ " Result: min=%f max=%f" % (np.min(weights), np.max(weights))) ds_kwargs = {} if nr_class: # for classification only # and we should have prepared the labels assert (sens_labels is not None) if len(clf._attrmap): if isinstance(sens_labels[0], tuple): sens_labels = asobjarray(sens_labels) sens_labels = clf._attrmap.to_literal(sens_labels, recurse=True) # NOTE: `weights` is already and always 2D ds_kwargs = dict(sa={clf.params.targets_attr: sens_labels}) weights_ds = Dataset(weights, **ds_kwargs) return weights_ds
def hdf2obj(hdf, memo=None): """Convert an HDF5 group definition into an object instance. Obviously, this function assumes the conventions implemented in the `obj2hdf()` function. Those conventions will eventually be documented in the module docstring, whenever they are sufficiently stable. Parameters ---------- hdf : HDF5 group instance HDF5 group instance. this could also be an HDF5 file instance. memo : dict Dictionary tracking reconstructed objects to prevent recursions (analog to deepcopy). Notes ----- Although, this function uses a way to reconstruct object instances that is similar to unpickling, it should be *relatively* safe to open HDF files from untrusted sources. Only basic datatypes are stored in HDF files, and there is no foreign code that is executed during reconstructing. For that reason, any type that shall be reconstructed needs to be importable (importing is done be fully-qualified module names). Returns ------- object instance """ if memo is None: # init object tracker memo = {} # note, older file formats did not store objrefs if 'objref' in hdf.attrs: objref = hdf.attrs['objref'] else: objref = None # if this HDF group has an objref that points to an already recontructed # object, simple return this object again if not objref is None and objref in memo: obj = memo[objref] if __debug__: debug('HDF5', "Use tracked object %s (%i)" % (type(obj), objref)) return obj # # Actual data # if isinstance(hdf, h5py.Dataset): if __debug__: debug('HDF5', "Load from HDF5 dataset [%s]" % hdf.name) if 'is_scalar' in hdf.attrs: # extract the scalar from the 0D array obj = hdf[()] else: # read array-dataset into an array obj = np.empty(hdf.shape, hdf.dtype) hdf.read_direct(obj) else: # check if we have a class instance definition here if not ('class' in hdf.attrs or 'recon' in hdf.attrs): raise LookupError("Found hdf group without class instance " "information (group: %s). Cannot convert it into an " "object (content: '%s', attributes: '%s')." % (hdf.name, hdf.keys(), hdf.attrs.keys())) mod_name = hdf.attrs['module'] if 'recon' in hdf.attrs: # Custom objects custom reconstructor obj = _recon_customobj_customrecon(hdf, memo) elif mod_name != '__builtin__': # Custom objects default reconstructor cls_name = hdf.attrs['class'] if cls_name in ('function', 'type'): # Functions and types obj = _recon_functype(hdf) else: # Other custom objects obj = _recon_customobj_defaultrecon(hdf, memo) else: # Built-in objects cls_name = hdf.attrs['class'] if __debug__: debug('HDF5', "Reconstructing built-in object '%s'." % cls_name) # built in type (there should be only 'list', 'dict' and 'None' # that would not be in a Dataset if cls_name == 'NoneType': obj = None elif cls_name == 'tuple': obj = _hdf_tupleitems_to_obj(hdf, memo) elif cls_name == 'list': obj = _hdf_list_to_obj(hdf, memo) elif cls_name == 'dict': obj = _hdf_dict_to_obj(hdf, memo) elif cls_name == 'function': raise RuntimeError("Unhandled reconstruction of built-in " "function (at '%s')." % hdf.name) else: raise RuntimeError("Found hdf group with a builtin type " "that is not handled by the parser (group: %s). This " "is a conceptual bug in the parser. Please report." % hdf.name) # # Final post-processing # if 'is_objarray' in hdf.attrs: # need to handle special case of arrays of objects if np.isscalar(obj): obj = np.array(obj, dtype=np.object) else: obj = asobjarray(obj) # track if desired if objref: memo[objref] = obj if __debug__: debug('HDF5', "Done loading %s [%s]" % (type(obj), hdf.name)) return obj
def hdf2obj(hdf): """Convert an HDF5 group definition into an object instance. Obviously, this function assumes the conventions implemented in the `obj2hdf()` function. Those conventions will eventually be documented in the module docstring, whenever they are sufficiently stable. Parameters ---------- hdf : HDF5 group instance HDF5 group instance. this could also be an HDF5 file instance. Notes ----- Although, this function uses a way to reconstruct object instances that is similar to unpickling, it should be *relatively* safe to open HDF files from untrusted sources. Only basic datatypes are stored in HDF files, and there is no foreign code that is executed during reconstructing. For that reason, any type that shall be reconstructed needs to be importable (importing is done be fully-qualified module names). Returns ------- object instance """ # already at the level of real data if isinstance(hdf, h5py.Dataset): if __debug__: debug('HDF5', "Load HDF5 dataset '%s'." % hdf.name) if not len(hdf.shape): # extract the scalar from the 0D array return hdf[()] else: # read array-dataset into an array value = np.empty(hdf.shape, hdf.dtype) hdf.read_direct(value) return value else: # check if we have a class instance definition here if not ('class' in hdf.attrs or 'recon' in hdf.attrs): raise LookupError("Found hdf group without class instance " "information (group: %s). Cannot convert it into an " "object (attributes: '%s')." % (hdf.name, hdf.attrs.keys())) if __debug__: debug('HDF5', "Parsing HDF5 group (attributes: '%s')." % (hdf.attrs.keys())) if 'recon' in hdf.attrs: # we found something that has some special idea about how it wants # to be reconstructed # look for arguments for that reconstructor recon = hdf.attrs['recon'] mod = hdf.attrs['module'] if mod == '__builtin__': raise NotImplementedError( "Built-in reconstructors are not supported (yet). " "Got: '%s'." % recon) # turn names into definitions mod = __import__(mod, fromlist=[recon]) recon = mod.__dict__[recon] if 'rcargs' in hdf: recon_args = _hdf_tupleitems_to_obj(hdf['rcargs']) else: recon_args = () if __debug__: debug('HDF5', "Reconstructing object with '%s' (%i arguments)." % (recon, len(recon_args))) # reconstruct obj = recon(*recon_args) # TODO Handle potentially avialable state settings return obj cls = hdf.attrs['class'] mod = hdf.attrs['module'] if not mod == '__builtin__': # some custom class is desired # import the module and the class if __debug__: debug('HDF5', "Importing '%s' from '%s'." % (cls, mod)) mod = __import__(mod, fromlist=[cls]) if cls == 'function': fxname = hdf.attrs['name'] # special case of non-built-in functions if __debug__: debug('HDF5', "Loaded function '%s' from '%s'." % (fxname, mod)) return mod.__dict__[fxname] # get the class definition from the module dict cls = mod.__dict__[cls] if __debug__: debug('HDF5', "Reconstructing class '%s' instance." % cls) # create the object if issubclass(cls, dict): # use specialized __new__ if necessary or beneficial obj = dict.__new__(cls) else: obj = object.__new__(cls) if 'state' in hdf: # insert the state of the object if __debug__: debug('HDF5', "Populating instance state.") state = _hdf_dictitems_to_obj(hdf['state']) obj.__dict__.update(state) if __debug__: debug('HDF5', "Updated %i state items." % len(state)) # do we process a container? if 'items' in hdf: if issubclass(cls, dict): # charge a dict itself if __debug__: debug('HDF5', "Populating dictionary object.") obj.update(_hdf_dictitems_to_obj(hdf['items'])) if __debug__: debug('HDF5', "Loaded %i items." % len(obj)) else: raise NotImplementedError( "Unhandled conatiner typ (got: '%s')." % cls) return obj else: if __debug__: debug('HDF5', "Reconstruction built-in object '%s'." % cls) # built in type (there should be only 'list', 'dict' and 'None' # that would not be in a Dataset if cls == 'NoneType': return None elif cls == 'tuple': return _hdf_tupleitems_to_obj(hdf['items']) elif cls == 'list': l = _hdf_listitems_to_obj(hdf['items']) if 'is_objarray' in hdf.attrs: # need to handle special case of arrays of objects return asobjarray(l) else: return l elif cls == 'dict': return _hdf_dictitems_to_obj(hdf['items']) elif cls == 'function': raise RuntimeError("Unhandled reconstruction of built-in " "function (at '%s')." % hdf.name) else: raise RuntimeError("Found hdf group with a builtin type " "that is not handled by the parser (group: %s). This " "is a conceptual bug in the parser. Please report." % hdf.name)