def extract(self, X): assert self.trained, "Must be trained before calling extract" channel_mode = self._settings['channel_mode'] ps = self._part_shape if channel_mode == 'together': C = 1 elif channel_mode == 'separate': C = X.shape[-1] dim = (X.shape[1]-ps[0]+1, X.shape[2]-ps[1]+1, C) EX_N = min(10, len(X)) #ex_log_probs = np.zeros((EX_N,) + dim) #ex_log_probs2 = [] covar = self._prepare_covariance() img_stds = ag.apply_once(np.std, X, [1, 2, 3], keepdims=False) #img_stds = None coding = self._settings['coding'] if coding == 'hard': feature_map = -np.ones((X.shape[0],) + dim, dtype=np.int64) elif coding == 'triangle': feature_map = np.zeros((X.shape[0],) + dim + (self.num_parts,), dtype=np.float32) elif coding == 'soft': feature_map = np.empty((X.shape[0],) + dim + (self.num_parts,), dtype=np.float32) feature_map[:] = self._min_log_prob for i, j in itr.product(range(dim[0]), range(dim[1])): Xij_patch = X[:, i:i+ps[0], j:j+ps[1]] if channel_mode == 'together': feature_map[:, i, j, 0] = self.__extract(Xij_patch, covar, img_stds) elif channel_mode == 'separate': for c in range(C): f = self.__extract(Xij_patch[...,c], covar, img_stds) #print(f.shape) #print(f[:10]) assert f.dtype in [np.int64, np.int32] f[f != -1] += c * self.num_parts feature_map[:, i, j, c] = f #self.__TEMP_ex_log_probs = ex_log_probs #self.__TEMP_ex_log_probs2 = np.concatenate(ex_log_probs2) if coding == 'hard': num_features = self.num_parts * C if self._settings['code_bkg']: num_features += 1 return (feature_map, num_features) else: return (feature_map.reshape(feature_map.shape[:3] + (-1,)), self.num_parts * C)
def _train(self, phi, data, y=None): X = phi(data) raw_originals, the_rest = self._get_patches(X) self._train_info = {} self._train_info['example_patches2'] = raw_originals[:10] # Standardize them old_raw_originals = raw_originals.copy() if self._settings['standardize']: mu = ag.apply_once(np.mean, raw_originals, [1, 2, 3, 4]) variances = ag.apply_once(np.var, raw_originals, [1, 2, 3, 4]) epsilon = self._settings['standardization_epsilon'] raw_originals = (raw_originals - mu) / np.sqrt(variances + epsilon) pp = raw_originals.reshape((np.prod(raw_originals.shape[:2]), -1)) sigma = np.dot(pp.T, pp) / len(pp) self._extra['sigma'] = sigma if self.w_epsilon is not None: U, S, _ = np.linalg.svd(sigma) shrinker = np.diag(1 / np.sqrt(S + self.w_epsilon)) #self._whitening_matrix = U @ shrinker @ U.T self._whitening_matrix = np.dot(U, np.dot(shrinker, U.T)) else: self._whitening_matrix = np.eye(sigma.shape[0]) pp = self.whiten_patches(pp) raw_originals = pp.reshape(raw_originals.shape) self.train_from_samples(raw_originals, the_rest) # TODO if 0: f = self.extract(lambda x: x, old_raw_originals[:,0]) feat = f[0].ravel() ag.info('bincounts', np.bincount(feat[feat!=-1], minlength=f[1])) self.preprocess()
def __extract_helper(self, X, covar, img_stds): flatXij_patch = X.reshape((X.shape[0], -1)) if self._settings['normalize_globally']: not_ok = (flatXij_patch.std(-1) / img_stds <= self._settings['std_thresh']) else: not_ok = (flatXij_patch.std(-1) <= self._settings['std_thresh']) if self._settings['standardize']: flatXij_patch = self._standardize_patches(flatXij_patch) if self._settings['whitening_epsilon'] is not None: flatXij_patch = self.whiten_patches(flatXij_patch) K = self._means.shape[0] logprob, log_resp = score_samples(flatXij_patch, self._means.reshape((K, -1)), self._weights.ravel(), covar, self.gmm_cov_type, ) coding = self._settings['coding'] if coding == 'hard': C = log_resp.argmax(-1) if self._settings['code_bkg']: bkg_part = self.num_parts else: bkg_part = -1 C[not_ok] = bkg_part return C elif coding == 'triangle': #means = ag.apply_once(np.mean, resp, [1]) dist = -log_resp means = ag.apply_once(np.mean, dist, [1]) f = np.maximum(means - dist, 0) f[not_ok] = 0.0 return f elif coding == 'soft': f = np.maximum(self._min_log_prob, log_resp) f[not_ok] = self._min_log_prob return f elif coding == 'raw': return logprob[:, np.newaxis] + log_resp
def _extract(self, phi, data): X_F = phi(data) output_dtype = self._settings.get('output_dtype') if isinstance(X_F, tuple): X = X_F[0] F = X_F[1] #if X.ndim == 3: #from pnet.cyfuncs import index_map_pooling as poolf #else: shape = self.calc_shape(X.shape[1:3]) strides = self.calc_strides(X.shape[1:3]) if self._operation == 'max': from pnet.cyfuncs import index_map_pooling_multi as poolf feature_map = poolf(X, F, shape, strides) elif self._operation == 'sum': from pnet.cyfuncs import index_map_sum_pooling_multi as poolf feature_map = poolf(X, F, shape, strides) else: raise ValueError('Unknown pooling operation: {}'.format( self._operation)) c = ag.apply_once(np.mean, feature_map, [0, 1, 2], keepdims=False) self._extract_info['concentration'] = c if output_dtype is not None: return feature_map.astype(output_dtype) else: return feature_map else: X = X_F if self._operation == 'max': op = np.max elif self._operation == 'sum': op = np.sum elif self._operation == 'avg': op = np.mean else: raise ValueError('Unknown pooling operation: {}'.format( self._operation)) if self._final_shape is not None: fs = self._final_shape x_steps = np.arange(fs[0]+1) * X.shape[1] / fs[0] x_bounds = np.round(x_steps).astype(np.int_) y_steps = np.arange(fs[1]+1) * X.shape[2] / fs[1] y_bounds = np.round(y_steps).astype(np.int_) xs = enumerate(zip(x_bounds[:-1], x_bounds[1:])) ys = enumerate(zip(y_bounds[:-1], y_bounds[1:])) N = X.shape[0] F = X.shape[-1] feature_map = np.zeros((N,) + fs + (F,), dtype=output_dtype) for (i, (x0, x1)), (j, (y0, y1)) in itr.product(xs, ys): patch = X[:, x0:x1, y0:y1] feat = ag.apply_once(op, patch, [1, 2], keepdims=False) feature_map[:, i, j] = feat return feature_map else: raise NotImplementedError('Not yet')
def _standardize_patches(self, flat_patches): means = ag.apply_once(np.mean, flat_patches, [1]) variances = ag.apply_once(np.var, flat_patches, [1]) return (flat_patches - means) / np.sqrt(variances + self.epsilon)
def fit(self, X): """ Estimate model parameters with the expectation-maximization algorithm. Parameters are set when constructing the estimator class. Parameters ---------- X : array_like, shape (n, n_permutations, n_features) Array of samples, where each sample has been transformed `n_permutations` times. """ def diff_entropy(cov): sign, logdet = np.linalg.slogdet(cov) return 0.5 * cov.shape[0] * np.log(2 * np.pi * np.e) + logdet def reg_covar(cov0, mcov, target_entropy): def regularize_cov(reg_val): return cov0 * (1 - reg_val) + np.eye(cov0.shape[0]) * reg_val lo, hi = self.min_covar * (1 + np.array([-0.95, 2.95])) ent = None for d in range(15): mi = np.mean([lo, hi]) c = regularize_cov(mi) ent = diff_entropy(c) print('ent', ent) if ent > target_entropy: hi = mi else: lo = mi mcov1 = np.mean([lo, hi]) print('mcov multiple', mcov1 / mcov) return regularize_cov(mcov1) assert X.ndim == 3 N, P, F = X.shape assert P == len(self.permutations) K = self.n_components if K == 1 and P == 1 and self._covtype == 'diag': self.weights_ = np.ones(1) self.means_ = ag.apply_once(np.mean, X, [0]) c0 = ag.apply_once(np.var, X, [0]) self.covars_ = c0 + self.min_covar self.converged_ = True def diff_entropy(cov): return 0.5 * cov.shape[2] * np.log(2 * np.pi * np.e) + np.sum(np.log(np.fabs(cov))) if self._target_entropy is None: c = c0 + self.min_covar ent = diff_entropy(c) self._target_entropy = ent self.covars_ = c self._entropy = ent else: lo, hi = self.min_covar * (1 + np.array([-0.25, 0.25])) self._entropy = diff_entropy(self.covars_) for d in range(10): mi = np.mean([lo, hi]) c = c0 + mi ent = diff_entropy(c) print('ent', ent) if ent > self._target_entropy: hi = mi else: lo = mi mcov = np.mean([lo, hi]) print('mcov', mcov) print('target_entropy', self._target_entropy) print('diff', np.fabs(ent - self._target_entropy)) self.covars_ = c0 + mcov self._entropy = diff_entropy(self.covars_) return if K == 1 and P == 1 and self._covtype == 'tied': cov0 = np.cov(X[:,0].T) U, S, V = np.linalg.svd(cov0) def regularize_cov(reg_val): #return cov0 + np.eye(cov.shape[0]) * reg_val return cov0 * (1 - reg_val) + np.eye(cov0.shape[0]) * reg_val #regS = S.clip(min=reg_val) #regS = (S + 0.0001) * (reg_val / self.min_covar) #return np.dot(np.dot(U, np.diag(regS)), V) self.weights_ = np.ones(1) self.means_ = ag.apply_once(np.mean, X, [0]) self.converged_ = True if self._target_entropy is None: c = regularize_cov(self.min_covar) ent = diff_entropy(c) self.covars_ = c self._entropy = ent else: lo, hi = self.min_covar * (1 + np.array([-0.95, 1.95])) ent = None for d in range(15): mi = np.mean([lo, hi]) c = regularize_cov(mi) ent = diff_entropy(c) print('ent', ent) if ent > self._target_entropy: hi = mi else: lo = mi mcov = np.mean([lo, hi]) print('mcov', mcov) print('target_entropy', self._target_entropy) print('diff', np.fabs(ent - self._target_entropy)) self.covars_ = regularize_cov(mcov) #np.cov(X[:,0].T) + np.diag(np.ones(F)*mcov) #print('diff entropy', diff_entropy(self.covars_)) self._entropy = diff_entropy(self.covars_) return #N34 = 3 * N // 4 #HX = X[N34:] #X = X[:N34] #HN = N - N34 #N = N34 print('HERE') XX = X.reshape((-1, X.shape[-1])) max_log_prob = -np.inf for trial in range(self.n_init): loglikelihoods = [] self.weights_ = np.ones((K, P)) / (K * P) flatX = X.reshape((-1, F)) # Initialize to covariance matrix of all samples if self._covtype == 'diag': pass elif 0: cv = np.eye(F) elif 1: print('cov') cv = (1 - self.min_covar) * np.cov(flatX.T) + self.min_covar * np.eye(F) print('cov done') else: cv = ag.io.load('/var/tmp/cov.h5') # Initialize by picking K components at random. #if self._covtype == 'diag': if 1: repr_samples = X[self.random_state.choice(N, K, replace=False)] self.means_ = repr_samples elif 0: # Initialize by running kmeans assert P == 1 from sklearn.cluster import KMeans clf = KMeans(n_clusters=K) XX2 = np.dot(cv, flatX.T).T clf.fit(XX2) means = clf.means_ self.means_ = clf.means_.reshape((K,) + X.shape[1:]) else: rs = np.random.RandomState(trial) # TODO: Insert seed mm = rs.multivariate_normal(np.zeros((F)), cv, size=K) print(mm.shape) print(K, X.shape) self.means_ = mm.reshape((K,) + X.shape[2:]) if self._covtype == 'ones': self.covars_ = np.ones(cv.shape[0]) elif self._covtype == 'tied': self.covars_ = cv elif self._covtype == 'diag': self.covars_ = np.tile(np.ones(F), (K, P, 1)) elif self._covtype == 'diag-perm': self.covars_ = np.tile(np.diag(cv).copy(), (P, 1)) elif self._covtype == 'full': self.covars_ = np.tile(cv, (K, 1, 1)) elif self._covtype == 'full-perm': self.covars_ = np.tile(cv, (P, 1, 1)) elif self._covtype == 'full-full': self.covars_ = np.tile(cv, (K, P, 1, 1)) self.converged_ = False for loop in range(self.n_iter): start = time.clock() # E-step logprob, log_resp = self.score_block_samples(X) #test_logprob, _ = self.score_block_samples(HX) #test_loglikelihood = test_logprob.sum() # TODO hh = np.histogram(np.exp(log_resp.max(-1).max(-1)), bins=np.linspace(0, 1, 11)) sh = (-1, log_resp.shape[1]) resp = np.exp(log_resp) lresp = log_resp.transpose((0, 2, 1)).reshape(sh) log_dens = logsumexp(lresp, axis=0)[np.newaxis, :, np.newaxis] dens = np.exp(log_dens) # M-step if 'm' in self._params: for p in range(P): v = 0.0 for shift in range(P): p0 = self.permutations[shift, p] v += np.dot(resp[:, :, shift].T, X[:, p0]) self.means_[:, p, :] = v self.means_ /= dens.ravel()[:, np.newaxis, np.newaxis] if 'w' in self._params: ww = (ag.apply_once(np.sum, resp, [0], keepdims=False) / N) self.weights_[:] = ww.clip(0.0001, 1 - 0.0001) if 'c' in self._params: if self._covtype == 'ones': # Do not update pass elif self._covtype == 'tied': from pnet.cyfuncs import calc_new_covar self.covars_[:] = calc_new_covar(X[:self._covar_limit], self.means_, resp, self.permutations) # Now make sure the diagonal is not overfit dd = np.diag(self.covars_) D = self.covars_.shape[0] self.covars_ = (self.covars_ * (1 - self.min_covar) + np.eye(D) * self.min_covar) elif self._covtype == 'diag': from pnet.cyfuncs import calc_new_covar_diag as calc self.covars_[:] = calc(X[:self._covar_limit], self.means_, resp, self.permutations) self.covars_[:] += self.min_covar elif self._covtype == 'diag-perm': from pnet.cyfuncs import calc_new_covar_diagperm as calc self.covars_[:] = calc(X[:self._covar_limit], self.means_, resp, self.permutations) self.covars_[:] = self.covars_.clip(min=self.min_covar) elif self._covtype == 'full': from pnet.cyfuncs import calc_new_covar_full as calc self.covars_[:] = calc(X[:self._covar_limit], self.means_, resp, self.permutations) for k in range(K): #dd = np.diag(self.covars_[k]) #clipped_dd = dd.clip(min=self.min_covar) #self.covars_[k] += np.diag(clipped_dd - dd) self.covars_[k] = reg_covar(self.covars_[k], self.min_covar, -9000.0) #c = self.covars_[k] #c = (1 - mcov) * c + mcov * np.eye(c.shape[0]) #self.covars_[k] = c elif self._covtype == 'full-perm': from pnet.cyfuncs import calc_new_covar_fullperm as calc self.covars_[:] = calc(X[:self._covar_limit], self.means_, resp, self.permutations) for p in range(P): dd = np.diag(self.covars_[p]) clipped_dd = dd.clip(min=self.min_covar) self.covars_[p] += np.diag(clipped_dd - dd) elif self._covtype == 'full-full': from pnet.cyfuncs import calc_new_covar_fullfull as calc self.covars_[:] = calc(X[:self._covar_limit], self.means_, resp, self.permutations) D = self.covars_.shape[2] for k, p in itr.product(range(K), range(P)): dd = np.diag(self.covars_[k, p]) clipped_dd = dd.clip(min=self.min_covar) #self.covars_[k, p] += np.diag(clipped_dd - dd) #self.covars_[k, p] += np.diag(clipped_dd - dd) self.covars_[k, p] += np.eye(D) * self.min_covar # Calculate log likelihood loglikelihoods.append(logprob.sum()) ag.info("Trial {trial}/{n_trials} Iteration {iter} " "Time {time:.2f}s Log-likelihood {llh:.2f} " #"Test log-likelihood {tllh:.2f}" "".format( trial=trial+1, n_trials=self.n_init, iter=loop+1, time=time.clock() - start, llh=loglikelihoods[-1] / N, #tllh=test_loglikelihood / HN, )) if loop > 0: absdiff = abs(loglikelihoods[-1] - loglikelihoods[-2]) if absdiff/abs(loglikelihoods[-2]) < self.thresh: self.converged_ = True break if loglikelihoods[-1] > max_log_prob: ag.info("Updated best log likelihood to {}" .format(loglikelihoods[-1])) max_log_prob = loglikelihoods[-1] best_params = {'weights': self.weights_, 'means': self.means_, 'covars': self.covars_, 'converged': self.converged_} else: ag.info("Did not updated best") self.weights_ = best_params['weights'] self.means_ = best_params['means'] self.covars_ = best_params['covars'] self.converged_ = best_params['converged']
warmstart_fn = None net, info = train_model(name, solver_conf_fn, conf_fn, bare_conf_fn, steps, seed=g_seed, logfile=logfile, device_id=DEVICE_ID, warmstart=warmstart_fn) all_fmj = net.forward_all(data=X).values()[0].squeeze(axis=(2,3)) all_te_fmj = net.forward_all(data=te_X).values()[0].squeeze(axis=(2,3)) all_fmj *= zstd all_te_fmj *= zstd g_seed += 1 #if loop == 0: #dd.io.save('all_fmj0_eps_inf.h5', all_fmj) #dd.io.save('all_te_fmj0_eps_inf.h5', all_te_fmj) fs = (K - 1) / K * (all_fmj - ag.apply_once(np.mean, all_fmj, [1])) te_fs = (K - 1) / K * (all_te_fmj - ag.apply_once(np.mean, all_te_fmj, [1])) T = 200.0 FXs += fs / T te_FXs += te_fs / T exp_FXs = np.exp(FXs) p[:] = exp_FXs / ag.apply_once(np.sum, exp_FXs, [1]) print(loop+1, 'test rate:', (te_FXs.argmax(-1) == te_y).mean(), 'train rate:', (FXs.argmax(-1) == y).mean()) all_losses.append(info) losses_fn = 'info/info_{}.h5'.format(rnd) print('Saving info to ', losses_fn)
def temperature(y, T): y = y**(1 / T) return y / ag.apply_once(np.sum, y, [1])
def _get_patches(self, X): samples_per_image = self._settings['samples_per_image'] the_originals = [] the_rest = [] ag.info("Extracting patches from") ps = self._part_shape channel_mode = self._settings['channel_mode'] ORI = self._n_orientations POL = self._settings['polarities'] assert POL in (1, 2), "Polarities must be 1 or 2" # LEAVE-BEHIND # Make it square, to accommodate all types of rotations size = X.shape[1:3] new_side = np.max(size) new_size = [new_side + (new_side - X.shape[1]) % 2, new_side + (new_side - X.shape[2]) % 2] from skimage import transform for n, img in enumerate(X): img_padded = ag.util.pad_to_size(img, (new_size[0], new_size[1],) + X.shape[3:]) pad = [(new_size[i]-size[i])//2 for i in range(2)] angles = np.arange(0, 360, 360 / ORI) radians = angles*np.pi/180 all_img = np.asarray([ transform.rotate(img_padded, angle, resize=False, mode='nearest') for angle in angles]) # Add inverted polarity too if POL == 2: all_img = np.concatenate([all_img, 1-all_img]) rs = np.random.RandomState(0) # Set up matrices that will translate a position in the canonical # image to the rotated iamges. This way, we're not rotating each # patch on demand, which will end up slower. center_adjusts = [ps[0] % 2, ps[1] % 2] offset = (np.asarray(new_size) - center_adjusts) / 2 matrices = [pnet.matrix.translation(offset[0], offset[1]) * pnet.matrix.rotation(a) * pnet.matrix.translation(-offset[0], -offset[1]) for a in radians] # Add matrices for the polarity flips too, if applicable matrices *= POL # This avoids hitting the outside of patches, even after rotating. # The 15 here is fairly arbitrary avoid_edge = int(1 + np.max(ps)*np.sqrt(2)) # These indices represent the center of patches range_x = range(pad[0]+avoid_edge, pad[0]+img.shape[0]-avoid_edge) range_y = range(pad[1]+avoid_edge, pad[1]+img.shape[1]-avoid_edge) indices = list(itr.product(range_x, range_y)) rs.shuffle(indices) i_iter = itr.cycle(iter(indices)) minus_ps = [-(ps[i]//2) for i in range(2)] plus_ps = [minus_ps[i] + ps[i] for i in range(2)] max_samples = self._settings['max_samples'] consecutive_failures = 0 # We want rotation of 90 deg to have exactly the same pixels. For # this, we need adjust the center of the patch a bit before # rotating. std_thresh = self._settings['std_thresh'] img_std = np.std(img_padded) ag.info('Image #{}, collected {} patches and rejected {} (std={}'.format( n, len(the_originals), len(the_rest), img_std)) for sample in range(samples_per_image): TRIES = 10000 for tries in range(TRIES): x, y = next(i_iter) fr = self._settings['std_thresh_frame'] sel0_inner = [0, slice(x+minus_ps[0]+fr, x+plus_ps[0]-fr), slice(y+minus_ps[1]+fr, y+plus_ps[1]-fr)] if channel_mode == 'separate': ii = rs.randint(X.shape[3]) sel0_inner += [ii] from copy import copy sel1_inner = copy(sel0_inner) sel1_inner[0] = slice(None) XY = np.array([x, y, 1])[:, np.newaxis] # Now, let's explore all orientations if channel_mode == 'together': vispatch = np.zeros((ORI * POL,) + ps + X.shape[3:]) elif channel_mode == 'separate': vispatch = np.zeros((ORI * POL,) + ps + (1,)) br = False for ori in range(ORI * POL): p = np.dot(matrices[ori], XY) # The epsilon makes the truncation safer ip = [int(round(float(p[i]))) for i in range(2)] selection = [ori, slice(ip[0] + minus_ps[0], ip[0] + plus_ps[0]), slice(ip[1] + minus_ps[1], ip[1] + plus_ps[1])] if channel_mode == 'separate': selection += [slice(ii, ii+1)] orig = all_img[selection] try: vispatch[ori] = orig except ValueError: br = True break if br: continue # Randomly rotate this patch, so that we don't bias # the unrotated (and possibly unblurred) image shift = rs.randint(ORI) vispatch[:ORI] = np.roll(vispatch[:ORI], shift, axis=0) if POL == 2: vispatch[ORI:] = np.roll(vispatch[ORI:], shift, axis=0) #if all_img[sel0_inner].std() > std_thresh: all_stds = ag.apply_once(np.std, all_img[sel1_inner], [1, 2], keepdims=False) #if np.median(all_stds) > std_thresh: #if np.median(all_stds) > std_thresh: if self._settings['normalize_globally']: ok = np.median(all_stds) / img_std > std_thresh else: ok = np.median(all_stds) > std_thresh if ok: the_originals.append(vispatch) if len(the_originals) % 500 == 0: ag.info('Samples {}/{}'.format(len(the_originals), max_samples)) if len(the_originals) >= max_samples: return (np.asarray(the_originals), np.asarray(the_rest)) consecutive_failures = 0 break else: the_rest.append(vispatch) if tries == TRIES-1: ag.info('WARNING: {} tries'.format(TRIES)) ag.info('cons', consecutive_failures) consecutive_failures += 1 if consecutive_failures >= 10: # Just give up. raise ValueError('FATAL ERROR: Threshold is ' 'probably too high (in {})' .format(self.__class__.__name__)) return np.asarray(the_originals), np.asarray(the_rest)