def train(self, dataset): """'Train' `CachedQueryEngine`. Raises ------ ValueError If `dataset`'s .fa were changed -- it would raise an exception telling to `untrain` explicitly, since the idea is to reuse CachedQueryEngine with the same engine and same dataset (up to variation of .sa, such as labels permutation """ ds_fa_hash = idhash_(dataset.fa) + ':%d' % dataset.fa._uniform_length if self._trained_ds_fa_hash is None: # First time is called self._trained_ds_fa_hash = ds_fa_hash self._queryengine.train(dataset) # train the queryengine self._lookup_ids = [None] * dataset.nfeatures # lookup for query_byid self._lookup = {} # generic lookup elif self._trained_ds_fa_hash != ds_fa_hash: raise ValueError, \ "Feature attributes of %s (idhash=%r) were changed from " \ "what this %s was trained on (idhash=%r). Untrain it " \ "explicitly if you like to reuse it on some other data." \ % (dataset, ds_fa_hash, self, self._trained_ds_fa_hash) else: pass
def train(self, dataset): """'Train' `CachedQueryEngine`. Raises ------ ValueError If `dataset`'s .fa were changed -- it would raise an exception telling to `untrain` explicitly, since the idea is to reuse CachedQueryEngine with the same engine and same dataset (up to variation of .sa, such as labels permutation """ ds_fa_hash = idhash_(dataset.fa) + ':%d' % dataset.fa._uniform_length if self._trained_ds_fa_hash is None: # First time is called self._trained_ds_fa_hash = ds_fa_hash self._qe.train(dataset) # train the qe self._lookup_ids = [None ] * dataset.nfeatures # lookup for query_byid self._lookup = {} # generic lookup elif self._trained_ds_fa_hash != ds_fa_hash: raise ValueError, \ "Feature attributes of %s (idhash=%r) were changed from " \ "what this %s was trained on (idhash=%r). Untrain it " \ "explicitly if you like to reuse it on some other data." \ % (dataset, ds_fa_hash, self, self._trained_ds_fa_hash) else: pass
def idhash(self): """To verify if dataset is in the same state as when smth else was done Like if classifier was trained on the same dataset as in question """ res = 'self@%s samples@%s' % (idhash_(self), idhash_(self.samples)) for col in (self.a, self.sa, self.fa): # We cannot count on the order the values in the dict will show up # with `self._data.value()` and since idhash will be order-dependent # we have to make it deterministic keys = col.keys() keys.sort() for k in keys: res += ' %s@%s' % (k, idhash_(col[k].value)) return res
def query(self, **kwargs): k = idhash_(kwargs.items()) v = self._lookup.get(k, None) if v is None: self._lookup[k] = v = self._queryengine.query(**kwargs) return v
def query(self, **kwargs): k = idhash_(kwargs.items()) v = self._lookup.get(k, None) if v is None: self._lookup[k] = v = self._qe.query(**kwargs) return v