def compute_pca(data_path=os.path.join(BASE_DIR, 'data/memmap/'), out_path=os.path.join(BASE_DIR, 'data/'), batch_size=500, image_size=3*300*300): ipca = IncrementalPCA(n_components=3, batch_size=batch_size) path = os.path.join(data_path, 'tn_x.dat') train = np.memmap(path, dtype=theano.config.floatX, mode='r+', shape=(4044,image_size)) n_samples, _ = train.shape for batch_num, batch in enumerate(gen_batches(n_samples, batch_size)): X = train[batch,:] X = np.reshape(X, (X.shape[0], 3, int(image_size/3))) X = X.transpose(0, 2, 1) X = np.reshape(X, (reduce(np.multiply, X.shape[:2]), 3)) ipca.partial_fit(X) path = os.path.join(data_path, 'v_x.dat') valid = np.memmap(path, dtype=theano.config.floatX, mode='r+', shape=(500,image_size)) n_samples, _ = valid.shape for batch_num, batch in enumerate(gen_batches(n_samples, batch_size)): X = valid[batch,:] X = np.reshape(X, (X.shape[0], 3, int(image_size/3))) X = X.transpose(0, 2, 1) X = np.reshape(X, (reduce(np.multiply, X.shape[:2]), 3)) ipca.partial_fit(X) eigenvalues, eigenvectors = np.linalg.eig(ipca.get_covariance()) eigenvalues.astype('float32').dump(os.path.join(out_path, 'eigenvalues.dat')) eigenvectors.astype('float32').dump(os.path.join(out_path, 'eigenvectors.dat'))
def parallel_classify(modelLoad, X_test, y_test, n_jobs=1, probability=False, pca=False): from joblib import Parallel, delayed from sklearn.utils import gen_batches n_samples, n_features = X_test.shape batch_size = n_samples // n_jobs # fastest (might be unsafe) def _predict(method, X, sl): return method(X[sl]) if probability == False: y_pred_list = Parallel(n_jobs)( delayed(_predict)(modelLoad.predict, X_test, sl) for sl in gen_batches(n_samples, batch_size)) else: y_pred_list = Parallel(n_jobs)( delayed(_predict)(modelLoad.predict_proba, X_test, sl) for sl in gen_batches(n_samples, batch_size)) y_pred = np.asarray(list(chain.from_iterable( y_pred_list))) # 9D list of arrays to a 1D numpy array return y_pred
def test_gen_batches(): # Make sure gen_batches errors on invalid batch_size assert_array_equal(list(gen_batches(4, 2)), [slice(0, 2, None), slice(2, 4, None)]) msg_zero = "gen_batches got batch_size=0, must be positive" with pytest.raises(ValueError, match=msg_zero): next(gen_batches(4, 0)) msg_float = "gen_batches got batch_size=0.5, must be an integer" with pytest.raises(TypeError, match=msg_float): next(gen_batches(4, 0.5))
def testMinMaxScalerPartialFit(self): # Test if partial_fit run over many batches of size 1 and 50 # gives the same results as fit X = self.X_2d n = X.shape[0] for chunk_size in [50, n, n + 42]: # Test mean at the end of the process scaler_batch = MinMaxScaler().fit(X) scaler_incr = MinMaxScaler() for batch in gen_batches(self.n_samples, chunk_size): scaler_incr = scaler_incr.partial_fit(X[batch]) assert_array_almost_equal(scaler_batch.data_min_, scaler_incr.data_min_) assert_array_almost_equal(scaler_batch.data_max_, scaler_incr.data_max_) assert scaler_batch.n_samples_seen_ == scaler_incr.n_samples_seen_ assert_array_almost_equal(scaler_batch.data_range_, scaler_incr.data_range_) assert_array_almost_equal(scaler_batch.scale_, scaler_incr.scale_) assert_array_almost_equal(scaler_batch.min_, scaler_incr.min_) # Test std after 1 step batch0 = slice(0, chunk_size) scaler_batch = MinMaxScaler().fit(X[batch0]) scaler_incr = MinMaxScaler().partial_fit(X[batch0]) assert_array_almost_equal(scaler_batch.data_min_, scaler_incr.data_min_) assert_array_almost_equal(scaler_batch.data_max_, scaler_incr.data_max_) assert scaler_batch.n_samples_seen_ == scaler_incr.n_samples_seen_ assert_array_almost_equal(scaler_batch.data_range_, scaler_incr.data_range_) assert_array_almost_equal(scaler_batch.scale_, scaler_incr.scale_) assert_array_almost_equal(scaler_batch.min_, scaler_incr.min_) # Test std until the end of partial fits, and _ = MinMaxScaler().fit(X) scaler_incr = MinMaxScaler() # Clean estimator for i, batch in enumerate(gen_batches(self.n_samples, chunk_size)): scaler_incr = scaler_incr.partial_fit(X[batch]) assert_correct_incr(i, batch_start=batch.start, batch_stop=batch.stop, n=n, chunk_size=chunk_size, n_samples_seen=scaler_incr.n_samples_seen_)
def random_feature_subsets(array, batch_size, random_state=1234): """ Generate K subsets of the features in X """ random_state = check_random_state(random_state) features = range(array.shape[1]) random_state.shuffle(features) for batch in gen_batches(len(features), batch_size): yield features[batch]
def partial_fit(self, X, sample_indices=None): """ Update the factorization using rows from X Parameters ---------- X: ndarray, shape (n_samples, n_features) Input data sample_indices: Indices for each row of X. If None, consider that row i index is i (useful when providing the whole data to the function) Returns ------- self """ X = check_array(X, dtype=[np.float32, np.float64], order='C') n_samples, n_features = X.shape batches = gen_batches(n_samples, self.batch_size) for batch in batches: this_X = X[batch] these_sample_indices = get_sub_slice(sample_indices, batch) self._single_batch_fit(this_X, these_sample_indices) return self
def fit(self, X, y=None): """Fit the model with X, using minibatches of size batch_size. Parameters ---------- X: array-like, shape (n_samples, n_features) Training data, where n_samples is the number of samples and n_features is the number of features. y: Passthrough for ``Pipeline`` compatibility. Returns ------- self: object Returns the instance itself. """ if isinstance(X, Data): X = X[:] X = check_array(X, copy=self.copy, dtype=[np.float64, np.float32]) n_samples, n_features = X.shape if self.batch_size is None: batch_size = 12 * n_features else: batch_size = self.batch_size for batch in gen_batches(n_samples, batch_size): x = X[batch] self.partial_fit(x, check_input=False) return self
def test_sparse_matrices(): """Test that sparse and dense input matrices yield equal output.""" X = Xdigits_binary[:50] y = ydigits_binary[:50] X = csr_matrix(X) n_hidden = 15 batch_size = 10 # Standard ELM elm = ELMClassifier(random_state=1, n_hidden=n_hidden) # Batch based elm_batch_based = ELMClassifier(random_state=1, n_hidden=n_hidden, batch_size=10) # ELM for partial fitting elm_parital = ELMClassifier(random_state=1, n_hidden=n_hidden) # Train classifiers elm.fit(X, y) elm_batch_based.fit(X, y) for batch_slice in gen_batches(X.shape[0], batch_size): elm_parital.partial_fit(X[batch_slice], y[batch_slice]) # Get decision scores y_pred = elm.decision_function(X) y_pred_batch_based = elm_batch_based.decision_function(X) y_pred_partial = elm_parital.decision_function(X) # The prediction values should be the same assert_almost_equal(y_pred, y_pred_batch_based) assert_almost_equal(y_pred_batch_based, y_pred_partial)
def partial_fit(self, X, y=None): """ Online Learning with Min-Batch update Parameters ---------- X: sparse matrix, shape = [n_docs, n_vocabs] Data matrix to be decomposed Returns ------- self """ X = self._to_csr(X) n_docs, n_vocabs = X.shape batch_size = self.batch_size # initialize parameters or check if not hasattr(self, 'components_'): self._init_latent_vars(n_vocabs) if n_vocabs != self.n_vocabs: raise ValueError( "feature dimension(vocabulary size) doesn't match.") for idx_slice in gen_batches(n_docs, batch_size): self._em_step(X[idx_slice, :], batch_update=False) return self
def fit(self, X, y=None): """Fit the model with X, using minibatches of size batch_size. Parameters ---------- X: array-like, shape (n_samples, n_features) Training data, where n_samples is the number of samples and n_features is the number of features. y: Passthrough for ``Pipeline`` compatibility. Returns ------- self: object Returns the instance itself. """ self.components_ = None self.mean_ = None self.singular_values_ = None self.explained_variance_ = None self.explained_variance_ratio_ = None self.noise_variance_ = None self.var_ = None self.n_samples_seen_ = 0 #X = check_array(X, dtype=np.float) # --- ADJUSTED n_samples, _ = X.shape # --- ADJUSTED self.batch_size_ = self.batch_size # --- ADJUSTED iteration = 0 # --- ADJUSTED for batch in gen_batches(n_samples, self.batch_size_): print "Iteration " + str(iteration) # --- ADJUSTED self.partial_fit(X[batch].todense()[:, 0::4]) # --- ADJUSTED iteration += 1 # --- ADJUSTED return self
def reduce_contrasts( components: str = 'components_453_gm', studies: Union[str, List[str]] = 'all', masked_dir='unmasked', output_dir='reduced', n_jobs=1, lstsq=False, ): batch_size = 200 if not os.path.exists(output_dir): os.makedirs(output_dir) if studies == 'all': studies = STUDY_LIST modl_atlas = fetch_atlas_modl() mask = fetch_mask() dictionary = modl_atlas[components] masker = NiftiMasker(mask_img=mask).fit() components = masker.transform(dictionary) for study in studies: this_data, targets = load(join(masked_dir, 'data_%s.pt' % study)) n_samples = this_data.shape[0] batches = list(gen_batches(n_samples, batch_size)) this_data = Parallel(n_jobs=n_jobs, verbose=10, backend='multiprocessing', mmap_mode='r')(delayed(single_reduce)( components, this_data[batch], lstsq=lstsq) for batch in batches) this_data = np.concatenate(this_data, axis=0) dump((this_data, targets), join(output_dir, 'data_%s.pt' % study))
def mask_all(output_dir: str or None, n_jobs: int = 1, mask: str = 'hcp'): batch_size = 10 if not os.path.exists(output_dir): os.makedirs(output_dir) data = fetch_all() mask = fetch_mask()[mask] masker = NiftiMasker(smoothing_fwhm=4, mask_img=mask, verbose=0, memory_level=1, memory=None).fit() for study, this_data in data.groupby('study'): imgs = this_data['z_map'].values targets = this_data.reset_index()[['study', 'subject', 'contrast']] n_samples = this_data.shape[0] batches = list(gen_batches(n_samples, batch_size)) this_data = Parallel(n_jobs=n_jobs, verbose=10, backend='multiprocessing', mmap_mode='r')( delayed(single_mask)(masker, imgs[batch]) for batch in batches) this_data = np.concatenate(this_data, axis=0) dump((this_data, targets), join(output_dir, 'data_%s.pt' % study))
def _calc_raw(self): """ Returns ------- """ mem_per_pix = self.n_sho_bins * self.h5_sho_fit.dtype.itemsize + self.n_spec_bins * self.h5_raw.dtype.itemsize free_mem = self.max_ram batch_size = int(free_mem / mem_per_pix) batches = gen_batches(self.n_pixels, batch_size) w_vec = self.h5_spec_vals[get_attr(self.h5_spec_vals, 'Frequency')].squeeze() w_vec = w_vec[:self.n_bins] for pix_batch in batches: sho_chunk = self.h5_sho_fit[pix_batch, :].flatten() raw_data = np.zeros([sho_chunk.shape[0], self.n_bins], dtype=np.complex64) for iparm, sho_parms in enumerate(sho_chunk): raw_data[iparm, :] = SHOfunc(sho_parms, w_vec) self.h5_raw[pix_batch, :] = raw_data.reshape([-1, self.n_spec_bins]) self.h5_file.flush() return
def _find_impostors(self, Lx, margin_radii): n = Lx.shape[0] impostors = sparse.csr_matrix((n, n), dtype=np.int8) for class_ in self.classes_[:-1]: imp1, imp2 = [], [] ind_in, = np.where(np.equal(self.y_, class_)) ind_out, = np.where(np.greater(self.y_, class_)) # Subdivide idx_out x idx_in to chunks of a size that is # fitting in memory ii, jj = self._find_impostors_batch(Lx[ind_out], Lx[ind_in], margin_radii[ind_out], margin_radii[ind_in]) if len(ii): imp1.extend(ind_out[ii]) imp2.extend(ind_in[jj]) new_imps = sparse.csr_matrix(([1] * len(imp1), (imp1, imp2)), shape=(n, n), dtype=np.int8) impostors = impostors + new_imps imp1, imp2 = impostors.nonzero() if impostors.nnz > self.maxCst: # subsample constraints if too many randomState = check_random_state(self.randomState) ind_subsample = randomState.choice(impostors.nnz, self.maxCst, replace=False) imp1, imp2 = imp1[ind_subsample], imp2[ind_subsample] dist = np.zeros(len(imp1)) for chunk in gen_batches(len(imp1), 500): dist[chunk] = np.sum(np.square(Lx[imp1[chunk]] - Lx[imp2[chunk]]), axis=1) return imp1, imp2, dist
def sample_from_finite(x, m, random_state=None, full_after_one=False, replacement=False): random_state = check_random_state(random_state) n = x.shape[0] first_iter = True while True: if not first_iter and full_after_one: yield np.arange(x.shape[0]), torch.full((n, ), fill_value=-math.log(n)) else: if replacement: if n == m: indices = np.arange(n) else: indices = random_state.permutation(n)[:m] loga = torch.full((m, ), fill_value=-math.log(m)) yield indices, loga else: indices = random_state.permutation(n) for batches in gen_batches(x.shape[0], m): these_indices = indices[batches] this_m = len(these_indices) loga = torch.full((this_m, ), fill_value=-math.log(this_m)) yield these_indices, loga first_iter = False
def _decision_scores(self, X): """Predict using the ELM model Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) The input data. Returns ------- y_pred : array-like, shape (n_samples,) or (n_samples, n_outputs) The predicted values. """ X = check_array(X, accept_sparse=['csr', 'csc', 'coo']) if self.batch_size is None: hidden_activations = self._compute_hidden_activations(X) y_pred = safe_sparse_dot(hidden_activations, self.coef_output_) else: n_samples = X.shape[0] batches = gen_batches(n_samples, self.batch_size) y_pred = np.zeros((n_samples, self.n_outputs_)) for batch in batches: h_batch = self._compute_hidden_activations(X[batch]) y_pred[batch] = safe_sparse_dot(h_batch, self.coef_output_) return y_pred
def reduce_all(masked_dir, output_dir, n_jobs=1, lstsq=False, mask: str = 'icbm_gm'): batch_size = 200 if not os.path.exists(output_dir): os.makedirs(output_dir) modl_atlas = fetch_atlas_modl() mask = fetch_mask()[mask] dictionary = modl_atlas['components512'] masker = NiftiMasker(mask_img=mask).fit() components = masker.transform(dictionary) expr = re.compile("data_(.*).pt") for file in os.listdir(masked_dir): match = re.match(expr, file) if match: study = match.group(1) this_data, targets = load(join(masked_dir, file)) n_samples = this_data.shape[0] batches = list(gen_batches(n_samples, batch_size)) this_data = Parallel(n_jobs=n_jobs, verbose=10, backend='multiprocessing', mmap_mode='r')(delayed(single_reduce)( components, this_data[batch], lstsq=lstsq) for batch in batches) this_data = np.concatenate(this_data, axis=0) dump((this_data, targets), join(output_dir, 'data_%s.pt' % study))
def mask_contrasts(studies: Union[str, List[str]] = 'all', output_dir: str = 'masked', use_raw=False, n_jobs: int = 1): batch_size = 10 if not os.path.exists(output_dir): os.makedirs(output_dir) if use_raw and studies == 'all': data = fetch_all() else: data = fetch_contrasts(studies) mask = fetch_mask() masker = NiftiMasker(smoothing_fwhm=4, mask_img=mask, verbose=0, memory_level=1, memory=None).fit() for study, this_data in data.groupby('study'): imgs = this_data['z_map'].values targets = this_data.reset_index() n_samples = this_data.shape[0] batches = list(gen_batches(n_samples, batch_size)) this_data = Parallel(n_jobs=n_jobs, verbose=10, backend='multiprocessing', mmap_mode='r')( delayed(single_mask)(masker, imgs[batch]) for batch in batches) this_data = np.concatenate(this_data, axis=0) dump((this_data, targets), join(output_dir, 'data_%s.pt' % study))
def _compute_core_distances_(X, neighbors, min_samples, working_memory): """Compute the k-th nearest neighbor of each sample Equivalent to neighbors.kneighbors(X, self.min_samples)[0][:, -1] but with more memory efficiency. Parameters ---------- X : array, shape (n_samples, n_features) The data. neighbors : NearestNeighbors instance The fitted nearest neighbors estimator. working_memory : int, optional The sought maximum memory for temporary distance matrix chunks. When None (default), the value of ``sklearn.get_config()['working_memory']`` is used. Returns ------- core_distances : array, shape (n_samples,) Distance at which each sample becomes a core point. Points which will never be core have a distance of inf. """ n_samples = X.shape[0] core_distances = np.empty(n_samples) core_distances.fill(np.nan) chunk_n_rows = get_chunk_n_rows(row_bytes=16 * min_samples, max_n_rows=n_samples, working_memory=working_memory) slices = gen_batches(n_samples, chunk_n_rows) for sl in slices: core_distances[sl] = neighbors.kneighbors(X[sl], min_samples)[0][:, -1] return core_distances
def get_inception_output(inps, compute_fid=False): n_samples = len(inps) batches = list(gen_batches(n_samples, BATCH_SIZE)) n_batches = len(batches) # Thhis should be corrected to 1000 but everybody uses 1008 preds = np.zeros([len(inps), 1000], dtype=np.float32) if compute_fid: activations = np.zeros([len(inps), 2048], dtype=np.float32) with tf.Session(config=config) as sess: for i, batch in enumerate(batches): inp = inps[batch] / 255. * 2 - 1 if compute_fid: these_logits, these_activations = sess.run( [logits_, activations_], feed_dict={input_images_: inp}) activations[batch] = these_activations else: these_logits = sess.run(logits_, feed_dict={input_images_: inp}) preds[batch] = these_logits[:, :1000] if i % 100 == 0: print(f'inception network {i}/{n_batches}') preds = softmax(preds, axis=1) if compute_fid: return preds, activations else: return preds
def test_partial_fit_classification(): """Test partial_fit for classification. It should output the same results as 'fit' for binary and multi-class classification. """ for X, y in classification_datasets.values(): batch_size = 100 n_samples = X.shape[0] elm_fit = ELMClassifier(random_state=random_state, batch_size=batch_size) elm_partial_fit = ELMClassifier(random_state=random_state) elm_fit.fit(X, y) for batch_slice in gen_batches(n_samples, batch_size): elm_partial_fit.partial_fit(X[batch_slice], y[batch_slice], classes=np.unique(y)) pred1 = elm_fit.predict(X) pred2 = elm_partial_fit.predict(X) assert_array_equal(pred1, pred2) assert_greater(elm_fit.score(X, y), 0.95) assert_greater(elm_partial_fit.score(X, y), 0.95)
def test_partial_fit_regression(): """Test partial_fit for regression. It should output the same results as 'fit' for regression on different activations functions. """ X = Xboston y = yboston batch_size = 100 n_samples = X.shape[0] for activation in ACTIVATION_TYPES: elm_fit = ELMRegressor(random_state=random_state, C=100, activation=activation, batch_size=batch_size) elm_partial_fit = ELMRegressor(activation=activation, C=100, random_state=random_state, batch_size=batch_size) elm_fit.fit(X, y) for batch_slice in gen_batches(n_samples, batch_size): elm_partial_fit.partial_fit(X[batch_slice], y[batch_slice]) pred1 = elm_fit.predict(X) pred2 = elm_partial_fit.predict(X) assert_almost_equal(pred1, pred2, decimal=2) assert_greater(elm_fit.score(X, y), 0.85) assert_greater(elm_partial_fit.score(X, y), 0.85)
def _compute_chunked_score_samples(self, X): n_samples = _num_samples(X) if self._max_features == X.shape[1]: subsample_features = False else: subsample_features = True # We get as many rows as possible within our working_memory budget # (defined by sklearn.get_config()['working_memory']) to store # self._max_features in each row during computation. # # Note: # - this will get at least 1 row, even if 1 row of score will # exceed working_memory. # - this does only account for temporary memory usage while loading # the data needed to compute the scores -- the returned scores # themselves are 1D. chunk_n_rows = get_chunk_n_rows(row_bytes=16 * self._max_features, max_n_rows=n_samples) slices = gen_batches(n_samples, chunk_n_rows) scores = np.zeros(n_samples, order="f") for sl in slices: # compute score on the slices of test samples: scores[sl] = self._compute_score_samples(X[sl], subsample_features) return scores
def run_step(self, run_number, step_size, howlong): dfslot = self.get_input_slot('df') dfslot.update(run_number) if dfslot.has_deleted() or dfslot.has_updated(): logger.debug('has deleted or updated, reseting') self.reset() dfslot.update(run_number) print('dfslot has buffered %d elements'% dfslot.created_length()) if dfslot.created_length() < self.mbk.n_clusters: # Should add more than k items per loop return self._return_run_step(self.state_blocked, steps_run=0) indices = dfslot.next_created(step_size) # returns a slice steps = indices_len(indices) if steps==0: return self._return_run_step(self.state_blocked, steps_run=0) input_df = dfslot.data() X = self.filter_columns(input_df, fix_loc(indices)).values batch_size = self.mbk.batch_size or 100 for batch in gen_batches(steps, batch_size): self.mbk.partial_fit(X[batch]) if self._buffer is not None: df = pd.DataFrame({'labels': self.mbk.labels_}) df[self.UPDATE_COLUMN] = run_number self._buffer.append(df) with self.lock: self._df = pd.DataFrame(self.mbk.cluster_centers_, columns=self.columns) self._df[self.UPDATE_COLUMN] = run_number if self._buffer is not None: logger.debug('Setting the labels') self._labels = self._buffer.df() return self._return_run_step(dfslot.next_state(), steps_run=steps)
def fit(self, X, y = None): self.components_ = None self.n_samples_seen_ = 0 self.mean_ = .0 self.var_ = .0 self.singular_values_ = None self.explained_variance_ = None self.explained_variance_ratio_ = None self.singular_values_ = None self.noise_variance_ = None X = check_array(X, accept_sparse = ["csr", "csc"], copy = self.copy, dtype = [numpy.float64, numpy.float32]) n_samples, n_features = X.shape if self.batch_size is None: self.batch_size_ = 5 * n_features else: self.batch_size_ = self.batch_size for batch in gen_batches(n_samples, self.batch_size_): self.partial_fit(X[batch], check_input = False) return self
def _calc_raw(self): """ Returns ------- """ mem_per_pix = self.n_sho_bins * self.h5_sho_fit.dtype.itemsize + self.n_spec_bins * self.h5_raw.dtype.itemsize free_mem = get_available_memory() batch_size = int(free_mem / mem_per_pix) batches = gen_batches(self.n_pixels, batch_size) w_vec = self.h5_spec_vals[self.h5_raw.spec_dim_labels.index( 'Frequency')].squeeze() w_vec = w_vec[:self.n_bins] for pix_batch in batches: sho_chunk = self.h5_sho_fit[pix_batch, :].flatten() raw_data = np.zeros([sho_chunk.shape[0], self.n_bins], dtype=np.complex64) for iparm, sho_parms in enumerate(sho_chunk): raw_data[iparm, :] = SHOfunc(sho_parms, w_vec) self.h5_raw[pix_batch, :] = raw_data.reshape( [-1, self.n_spec_bins]) self.h5_file.flush() return
def fit(self, X, y): # Test if single fit or multiple fit X, y = check_X_y(X, y, estimator=self, ensure_min_samples=1) self.classes_ = np.sort(unique_labels(y)) if self.priors is None: # estimate priors from sample _, y_t = np.unique(y, return_inverse=True) # non-negative ints self.priors_ = np.bincount(y_t) / float(len(y)) else: self.priors_ = np.asarray(self.priors) if (self.priors_ < 0).any(): raise ValueError("priors must be non-negative") if not np.isclose(self.priors_.sum(), 1.0): warnings.warn("The priors do not sum to 1. Renormalizing", UserWarning) self.priors_ = self.priors_ / self.priors_.sum() # Get the maximum number of components if self.n_components is None: self._max_components = len(self.classes_) - 1 else: self._max_components = min( len(self.classes_) - 1, self.n_components) # LDA Logic begins here n_samples, n_features = X.shape if self.batch_size is None: self.batch_size = 5 * n_features for batch in gen_batches(n_samples, self.batch_size): self.partial_fit(X[batch], y[batch]) return self
def generator(Xs, padded_ys, masks, dataset_weights, batch_size, random_state=None): if batch_size is None: batch_sizes = [X.shape[0] for X in Xs] else: batch_sizes = [batch_size] * len(Xs) batchers = [iter([]) for _ in Xs] while True: for i, (X, y, mask, dataset_weight, batcher, batch_size) in enumerate( zip(Xs, padded_ys, masks, dataset_weights, batchers, batch_sizes)): try: batch = next(batcher) except StopIteration: permutation = random_state.permutation(X.shape[0]) X[:] = X[permutation] y[:] = y[permutation] mask[:] = mask[permutation] batcher = gen_batches(X.shape[0], batch_size) batchers[i] = batcher batch = next(batcher) batch_dataset_weight = np.ones(batch.stop - batch.start) * dataset_weight yield [X[batch], mask[batch]], y[batch], batch_dataset_weight
def _decision_scores(self, X): """Predict using the RandomNN model Parameters ---------- X : {array-like, sparse matrix}, shape (n_samples, n_features) The input data. Returns ------- y_pred : array-like, shape (n_samples,) or (n_samples, n_outputs) The predicted values. """ X = check_array(X, accept_sparse=['csr', 'csc', 'coo']) if self.batch_size is None: hidden_activations = self._compute_hidden_activations(X) y_pred = safe_sparse_dot(hidden_activations, self.coef_output_) else: n_samples = X.shape[0] batches = gen_batches(n_samples, self.batch_size) y_pred = np.zeros((n_samples, self.n_outputs_)) for batch in batches: h_batch = self._compute_hidden_activations(X[batch]) y_pred[batch] = safe_sparse_dot(h_batch, self.coef_output_) return y_pred
def create_raw_contrast_data(imgs, mask, raw_dir, memory=Memory(cachedir=None), n_jobs=1, batch_size=100): if not os.path.exists(raw_dir): os.makedirs(raw_dir) # Selection of contrasts masker = MultiNiftiMasker(smoothing_fwhm=0, mask_img=mask, memory=memory, memory_level=1, n_jobs=n_jobs).fit() mask_img_file = os.path.join(raw_dir, 'mask_img.nii.gz') masker.mask_img_.to_filename(mask_img_file) batches = gen_batches(len(imgs), batch_size) data = np.empty((len(imgs), masker.mask_img_.get_data().sum()), dtype=np.float32) for i, batch in enumerate(batches): print('Batch %i' % i) data[batch] = masker.transform(imgs['z_map'].values[batch]) imgs = pd.DataFrame(data=data, index=imgs.index, dtype=np.float32) imgs.to_pickle(join(raw_dir, 'imgs.pkl'))
def pairs_distances_batch(X, ind_a, ind_b, batch_size=500): """Equivalent to np.sum(np.square(x[ind_a] - x[ind_b]), axis=1) Parameters ---------- X : array_like An array of data samples with shape (n_samples, n_features_in). ind_a : array_like An array of samples indices with shape (m,). ind_b : array_like Another array of samples indices with shape (m,). batch_size : Size of each chunk of X to compute distances for (default: 500) Returns ------- array-like An array of pairwise distances with shape (m,). """ n = len(ind_a) res = np.zeros(n) for chunk in gen_batches(n, batch_size): res[chunk] = np.sum(np.square(X[ind_a[chunk]] - X[ind_b[chunk]]), axis=1) return res
def predict_numpy(self, X, y, batch_size): if batch_size is None: input_ = torch.from_numpy(X).float() if type( X) == np.ndarray else X target = torch.from_numpy(y) if type(y) == np.ndarray else y out = self.net.forward(input_, cluster=None, training=False) pred = out.max(1, keepdim=True)[1] correct = pred.eq(target.view_as(pred)).sum().item() accuracy = correct / len(target) iteration_test_loss = self.criterion(out, target) else: # this is needed for CNNs because they require more memory so we need # to feed test data as batches n_samples = X.shape[0] weighted_loss = weighted_correct = 0.0 for batch_slice in gen_batches(n_samples, batch_size): X_batch, y_batch = X[batch_slice, :], y[batch_slice] input_ = torch.from_numpy(X_batch).float() if type( X_batch) == np.ndarray else X_batch target = torch.from_numpy(y_batch) if type( y_batch) == np.ndarray else y_batch out = self.net.forward(input_, cluster=None, training=False) pred = out.max( 1, keepdim=True)[1] # TODO: pred here is just the last batch correct = pred.eq(target.view_as(pred)).sum().item() weighted_correct += correct weighted_loss += self.criterion( out, target) * (batch_slice.stop - batch_slice.start) accuracy = weighted_correct / n_samples iteration_test_loss = weighted_loss / n_samples return pred, accuracy, float(iteration_test_loss.data)
def score(self, X): ''' Returns the Kullback-Leibler divergence. Parameters ---------- X : array-like (str), shape [n_samples,] The data to encode. Returns ------- kl_divergence : float. Transformed input. ''' unq_X, lookup = np.unique(X, return_inverse=True) unq_V = self.ngrams_count.transform(unq_X) if self.add_words: unq_V2 = self.word_count.transform(unq_X) unq_V = sparse.hstack((unq_V, unq_V2), format='csr') self._add_unseen_keys_to_H_dict(unq_X) unq_H = self._get_H(unq_X) for slice in gen_batches(n=unq_H.shape[0], batch_size=self.batch_size): unq_H[slice] = _multiplicative_update_h( unq_V[slice], self.W_, unq_H[slice], epsilon=1e-3, max_iter=self.max_iter_e_step, rescale_W=self.rescale_W, gamma_shape_prior=self.gamma_shape_prior, gamma_scale_prior=self.gamma_scale_prior) kl_divergence = _beta_divergence( unq_V[lookup], unq_H[lookup], self.W_, 'kullback-leibler', square_root=False) return kl_divergence
def transform(self, X): """Transform X using the trained matrix W. Parameters ---------- X : array-like (str), shape [n_samples,] The data to encode. Returns ------- X_new : 2-d array, shape [n_samples, n_topics] Transformed input. """ unq_X = np.unique(X) unq_V = self.ngrams_count.transform(unq_X) if self.add_words: unq_V2 = self.word_count.transform(unq_X) unq_V = sparse.hstack((unq_V, unq_V2), format='csr') self._add_unseen_keys_to_H_dict(unq_X) unq_H = self._get_H(unq_X) for slice in gen_batches(n=unq_H.shape[0], batch_size=self.batch_size): unq_H[slice] = _multiplicative_update_h( unq_V[slice], self.W_, unq_H[slice], epsilon=1e-3, max_iter=100, rescale_W=self.rescale_W, gamma_shape_prior=self.gamma_shape_prior, gamma_scale_prior=self.gamma_scale_prior) self._update_H_dict(unq_X, unq_H) return self._get_H(X)
def do_pca(S, output_filename): model = sklearn.decomposition.IncrementalPCA(n_components=400, batch_size=300) model.fit(S.matrix) res = [] for batch in gen_batches(S.matrix.shape[0], 400): res.append(S.matrix[batch].todense()[:, 0::4]) output = np.vstack(res) f = open(output_filename, 'wb') np.savez(f, output) f.close()
def fit(self, X, y=None): self.mean_ = None self.covar_ = None self.n_samples_seen_ = 0. n_samples, n_features = X.shape if self.batch_size is None: self.batch_size_ = 5 * n_features else: self.batch_size_ = self.batch_size for batch in gen_batches(n_samples, self.batch_size_): self.partial_fit(X[batch]) return self
def scalable_frobenius_norm_discrepancy(X, U, s, V): # if the input is not too big, just call scipy if X.shape[0] * X.shape[1] < MAX_MEMORY: A = X - U.dot(np.diag(s).dot(V)) return norm_diff(A, norm='fro') print("... computing fro norm by batches...") batch_size = 1000 Vhat = np.diag(s).dot(V) cum_norm = .0 for batch in gen_batches(X.shape[0], batch_size): M = X[batch, :] - U[batch, :].dot(Vhat) cum_norm += norm_diff(M, norm='fro', msg=False) return np.sqrt(cum_norm)
def _online_dl_slow(X, alpha, learning_rate, A, B, counter, G, T, P, Q, fit_intercept, n_epochs, batch_size, random_state, verbose, impute, callback): row_nnz = X.getnnz(axis=1) max_idx_size = row_nnz.max() * batch_size row_range = row_nnz.nonzero()[0] n_rows, n_cols = X.shape n_components = P.shape[0] Q_idx = np.zeros((n_components, max_idx_size), order='F') last_call = 0 norm = np.zeros(n_components) if not fit_intercept: components_range = np.arange(n_components) else: components_range = np.arange(1, n_components) for e in range(n_epochs): random_state.shuffle(row_range) batches = gen_batches(len(row_range), batch_size) for batch in batches: row_batch = row_range[batch] idx = _update_code_slow(X, alpha, learning_rate, A, B, G, T, counter, P, Q, row_batch, impute=impute) random_state.shuffle(components_range) _update_dict_slow(X, A, B, G, Q, Q_idx, idx, fit_intercept, components_range, norm, impute=impute) # assert_array_almost_equal(Q.dot(Q.T), G) if verbose and counter[0] // (n_rows // verbose) == last_call + 1: print("Iteration %i" % (counter[0])) last_call += 1 callback()
def _reduced_transform(self, X): n_rows, n_cols = X.shape G = self.G_.copy() G.flat[::self.n_components + 1] += 2 * self.alpha len_subset = int(floor(n_cols / self.reduction)) batches = gen_batches(len(X), self.batch_size) row_range = self.random_state_.permutation(n_rows) code = np.zeros((n_rows, self.n_components), order='C') subset_range = np.arange(n_cols, dtype='i4') for batch in batches: sample_subset = row_range[batch] self.random_state_.shuffle(subset_range) subset = subset_range[:len_subset] self.row_counter_[sample_subset] += 1 this_X = X[sample_subset][:, subset] * self.reduction Dx = self.D_[:, subset].dot(this_X.T) code[batch] = linalg.solve(G, Dx, sym_pos=True, overwrite_a=True, check_finite=False).T return code
def rebuild_svd(h5_main, components=None, cores=None, max_RAM_mb=1024): """ Rebuild the Image from the SVD results on the windows Optionally, only use components less than n_comp. Parameters ---------- h5_main : hdf5 Dataset dataset which SVD was performed on components : {int, iterable of int, slice} optional Defines which components to keep Default - None, all components kept Input Types integer : Components less than the input will be kept length 2 iterable of integers : Integers define start and stop of component slice to retain other iterable of integers or slice : Selection of component indices to retain cores : int, optional How many cores should be used to rebuild Default - None, all but 2 cores will be used, min 1 max_RAM_mb : int, optional Maximum ammount of memory to use when rebuilding, in Mb. Default - 1024Mb Returns ------- rebuilt_data : HDF5 Dataset the rebuilt dataset """ comp_slice, num_comps = get_component_slice(components, total_components=h5_main.shape[1]) if isinstance(comp_slice, np.ndarray): comp_slice = list(comp_slice) dset_name = h5_main.name.split('/')[-1] # Ensuring that at least one core is available for use / 2 cores are available for other use max_cores = max(1, cpu_count() - 2) # print('max_cores',max_cores) if cores is not None: cores = min(round(abs(cores)), max_cores) else: cores = max_cores max_memory = min(max_RAM_mb * 1024 ** 2, 0.75 * get_available_memory()) if cores != 1: max_memory = int(max_memory / 2) ''' Get the handles for the SVD results ''' try: h5_svd_group = find_results_groups(h5_main, 'SVD')[-1] h5_S = h5_svd_group['S'] h5_U = h5_svd_group['U'] h5_V = h5_svd_group['V'] except KeyError: raise KeyError('SVD Results for {dset} were not found.'.format(dset=dset_name)) except: raise func, is_complex, is_compound, n_features, type_mult = check_dtype(h5_V) ''' Calculate the size of a single batch that will fit in the available memory ''' n_comps = h5_S[comp_slice].size mem_per_pix = (h5_U.dtype.itemsize + h5_V.dtype.itemsize * h5_V.shape[1]) * n_comps fixed_mem = h5_main.size * h5_main.dtype.itemsize if cores is None: free_mem = max_memory - fixed_mem else: free_mem = max_memory * 2 - fixed_mem batch_size = int(round(float(free_mem) / mem_per_pix)) batch_slices = gen_batches(h5_U.shape[0], batch_size) print('Reconstructing in batches of {} positions.'.format(batch_size)) print('Batchs should be {} Mb each.'.format(mem_per_pix * batch_size / 1024.0 ** 2)) ''' Loop over all batches. ''' ds_V = np.dot(np.diag(h5_S[comp_slice]), func(h5_V[comp_slice, :])) rebuild = np.zeros((h5_main.shape[0], ds_V.shape[1])) for ibatch, batch in enumerate(batch_slices): rebuild[batch, :] += np.dot(h5_U[batch, comp_slice], ds_V) rebuild = stack_real_to_target_dtype(rebuild, h5_V.dtype) print('Completed reconstruction of data from SVD results. Writing to file.') ''' Create the Group and dataset to hold the rebuild data ''' rebuilt_grp = create_indexed_group(h5_svd_group, 'Rebuilt_Data') h5_rebuilt = write_main_dataset(rebuilt_grp, rebuild, 'Rebuilt_Data', get_attr(h5_main, 'quantity'), get_attr(h5_main, 'units'), None, None, h5_pos_inds=h5_main.h5_pos_inds, h5_pos_vals=h5_main.h5_pos_vals, h5_spec_inds=h5_main.h5_spec_inds, h5_spec_vals=h5_main.h5_spec_vals, chunks=h5_main.chunks, compression=h5_main.compression) if isinstance(comp_slice, slice): rebuilt_grp.attrs['components_used'] = '{}-{}'.format(comp_slice.start, comp_slice.stop) else: rebuilt_grp.attrs['components_used'] = components copy_attributes(h5_main, h5_rebuilt, skip_refs=False) h5_main.file.flush() print('Done writing reconstructed data to file.') return h5_rebuilt
def _fit(self, X, y, sample_weight=None, incremental=False): """Fit the model to the data X and target y.""" # Validate input params if self.n_hidden <= 0: raise ValueError("n_hidden must be > 0, got %s." % self.n_hidden) if self.C <= 0.0: raise ValueError("C must be > 0, got %s." % self.C) if self.activation not in ACTIVATIONS: raise ValueError("The activation %s is not supported. Supported " "activation are %s." % (self.activation, ACTIVATIONS)) # Initialize public attributes if not hasattr(self, 'classes_'): self.classes_ = None if not hasattr(self, 'coef_hidden_'): self.coef_hidden_ = None # Initialize private attributes if not hasattr(self, '_HT_H_accumulated'): self._HT_H_accumulated = None X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'], dtype=np.float64, order="C", multi_output=True) # This outputs a warning when a 1d array is expected if y.ndim == 2 and y.shape[1] == 1: y = column_or_1d(y, warn=True) # Classification if isinstance(self, ClassifierMixin): self.label_binarizer_.fit(y) if self.classes_ is None or not incremental: self.classes_ = self.label_binarizer_.classes_ if sample_weight is None: sample_weight = compute_sample_weight(self.class_weight, self.classes_, y) else: classes = self.label_binarizer_.classes_ if not np.all(np.in1d(classes, self.classes_)): raise ValueError("`y` has classes not in `self.classes_`." " `self.classes_` has %s. 'y' has %s." % (self.classes_, classes)) y = self.label_binarizer_.transform(y) # Ensure y is 2D if y.ndim == 1: y = np.reshape(y, (-1, 1)) n_samples, n_features = X.shape self.n_outputs_ = y.shape[1] # Step (1/2): Compute the hidden layer coefficients if (self.coef_hidden_ is None or (not incremental and not self.warm_start)): # Randomize and scale the input-to-hidden coefficients self._init_weights(n_features) # Step (2/2): Compute hidden-to-output coefficients if self.batch_size is None: # Run the least-square algorithm on the whole dataset batch_size = n_samples else: # Run the recursive least-square algorithm on mini-batches batch_size = self.batch_size batches = gen_batches(n_samples, batch_size) # (First time call) Run the least-square algorithm on batch 0 if not incremental or self._HT_H_accumulated is None: batch_slice = next(batches) H_batch = self._compute_hidden_activations(X[batch_slice]) # Get sample weights for the batch if sample_weight is None: sw = None else: sw = sample_weight[batch_slice] # beta_{0} = inv(H_{0}^T H_{0} + (1. / C) * I) * H_{0}.T y_{0} self.coef_output_ = ridge_regression(H_batch, y[batch_slice], 1. / self.C, sample_weight=sw).T # Initialize K if this is batch based or partial_fit if self.batch_size is not None or incremental: # K_{0} = H_{0}^T * W * H_{0} weighted_H_batch = _multiply_weights(H_batch, sw) self._HT_H_accumulated = safe_sparse_dot(H_batch.T, weighted_H_batch) if self.verbose: y_scores = self._decision_scores(X[batch_slice]) if self.batch_size is None: verbose_string = "Training mean squared error =" else: verbose_string = "Batch 0, Training mean squared error =" print("%s %f" % (verbose_string, mean_squared_error(y[batch_slice], y_scores, sample_weight=sw))) # Run the least-square algorithm on batch 1, 2, ..., n for batch, batch_slice in enumerate(batches): # Compute hidden activations H_{i} for batch i H_batch = self._compute_hidden_activations(X[batch_slice]) # Get sample weights (sw) for the batch if sample_weight is None: sw = None else: sw = sample_weight[batch_slice] weighted_H_batch = _multiply_weights(H_batch, sw) # Update K_{i+1} by H_{i}^T * W * H_{i} self._HT_H_accumulated += safe_sparse_dot(H_batch.T, weighted_H_batch) # Update beta_{i+1} by # K_{i+1}^{-1} * H_{i+1}^T * W * (y_{i+1} - H_{i+1} * beta_{i}) y_batch = y[batch_slice] - safe_sparse_dot(H_batch, self.coef_output_) weighted_y_batch = _multiply_weights(y_batch, sw) Hy_batch = safe_sparse_dot(H_batch.T, weighted_y_batch) # Update hidden-to-output coefficients regularized_HT_H = self._HT_H_accumulated.copy() regularized_HT_H.flat[::self.n_hidden + 1] += 1. / self.C # It is safe to use linalg.solve (instead of linalg.lstsq # which is slow) since it is highly unlikely that # regularized_HT_H is singular due to the random # projection of the first layer and 'C' regularization being # not dangerously large. self.coef_output_ += linalg.solve(regularized_HT_H, Hy_batch, sym_pos=True, overwrite_a=True, overwrite_b=True) if self.verbose: y_scores = self._decision_scores(X[batch_slice]) print("Batch %d, Training mean squared error = %f" % (batch + 1, mean_squared_error(y[batch_slice], y_scores, sample_weight=sw))) return self
def fit(self, X, y, **dump_kwargs): if self.debug_folder is not None: self.dump_init() X_ref = self.fm_decoder.fm_to_csr(X, y) n_iter = X_ref.shape[0] * self.n_epochs // self.batch_size random_state = check_random_state(self.random_state) dict_init = random_state.randn(self.n_components, X_ref.shape[1]) dict_learning = MiniBatchDictionaryLearning( n_components=self.n_components, alpha=self.alpha, transform_alpha=self.alpha, fit_algorithm=self.algorithm, transform_algorithm=self.algorithm, dict_init=dict_init, l1_ratio=self.l1_ratio, batch_size=self.batch_size, shuffle=True, fit_intercept=self.fit_intercept, n_iter=n_iter, missing_values=0, learning_rate=self.learning_rate, learning_rate_offset=self.learning_rate_offset, verbose=3, debug_info=self.debug_folder is not None, random_state=self.random_state) if self.fit_intercept: self.dictionary_ = np.r_[np.ones((1, dict_init.shape[1])), dict_init] self.code_ = np.zeros((X.shape[0], self.n_components + 1)) else: self.dictionary_ = dict_init self.code_ = np.zeros((X.shape[0], self.n_components)) if self.debug_folder is None: (X_csr, self.global_mean_, self.sample_mean_, self.feature_mean_) = csr_center_data(X_ref) for i in range(self.n_epochs): dict_learning.partial_fit(X_csr, deprecated=False) if self.decreasing_batch_size: dict_learning.set_params(batch_size= dict_learning.batch_size // 2) self.n_iter_ = dict_learning.n_iter_ self.dictionary_ = dict_learning.components_ self.code_ = dict_learning.transform(X_csr) if self.debug_folder is not None: (X_csr, self.global_mean_, self.sample_mean_, self.feature_mean_) = csr_center_data(X_ref) self.dump_inter(**dump_kwargs) for i in range(self.n_epochs): permutation = random_state.permutation(X_csr.shape[0]) batches = gen_batches(X_csr.shape[0], X_csr.shape[0] // 5 + 1) last_seen = 0 for batch in batches: last_seen = max(batch.stop, last_seen) dict_learning.partial_fit(X_csr[permutation[batch]], deprecated=False) self.dictionary_ = dict_learning.components_ self.code_[permutation[:last_seen]] = dict_learning.\ transform(X_csr[permutation[:last_seen]]) self.n_iter_ = dict_learning.n_iter_ self.dump_inter(debug_dict=dict_learning.debug_info_, **dump_kwargs) if self.decreasing_batch_size: dict_learning.set_params(batch_size= dict_learning.batch_size // 2) self.dictionary_ = dict_learning.components_ self.code_ = dict_learning.transform(X_csr) return self
def _calc_sho(self, coef_OF_mat, coef_IF_mat, amp_noise=0.1, phase_noise=0.1, q_noise=0.2, resp_noise=0.01): """ Build the SHO dataset from the coefficient matrices Parameters ---------- coef_OF_mat : numpy.ndarray Out-of-field coefficients coef_IF_mat : numpy.ndarray In-field coefficients amp_noise : float Noise factor for amplitude parameter phase_noise : float Noise factor for phase parameter q_noise : float Noise factor for Q-value parameter resp_noise : float Noide factor for w0 parameter Returns ------- None """ # TODO: Fix sho parameter generation vdc_vec = self.h5_sho_spec_vals[self.h5_sho_spec_vals.attrs['DC_Offset']].squeeze() sho_field = self.h5_sho_spec_vals[self.h5_sho_spec_vals.attrs['Field']].squeeze() sho_of_inds = sho_field == 0 sho_if_inds = sho_field == 1 # determine how many pixels can be read at once mem_per_pix = vdc_vec.size * np.float32(0).itemsize free_mem = self.max_ram - vdc_vec.size * vdc_vec.dtype.itemsize * 6 batch_size = int(free_mem / mem_per_pix) batches = gen_batches(self.n_pixels, batch_size) for pix_batch in batches: R_OF = np.array([loop_fit_function(vdc_vec[sho_of_inds], coef) for coef in coef_OF_mat[pix_batch]]) R_IF = np.array([loop_fit_function(vdc_vec[sho_if_inds], coef) for coef in coef_IF_mat[pix_batch]]) R_mat = np.hstack([R_IF[:, np.newaxis, :], R_OF[:, np.newaxis, :]]) R_mat = np.rollaxis(R_mat, 1, R_mat.ndim).reshape(R_mat.shape[0], -1) del R_OF, R_IF amp = np.abs(R_mat) resp = coef_OF_mat[pix_batch, 9, None] * np.ones_like(R_mat) q_val = coef_OF_mat[pix_batch, 10, None] * np.ones_like(R_mat) * 10 phase = np.sign(R_mat) * np.pi / 2 self.h5_sho_fit[pix_batch, :] = stack_real_to_compound(np.hstack([amp, resp, q_val, phase, np.ones_like(R_mat)]), sho32) self.h5_sho_guess[pix_batch, :] = stack_real_to_compound(np.hstack([amp * get_noise_vec(self.n_sho_bins, amp_noise), resp * get_noise_vec(self.n_sho_bins, resp_noise), q_val * get_noise_vec(self.n_sho_bins, q_noise), phase * get_noise_vec(self.n_sho_bins, phase_noise), np.ones_like(R_mat)]), sho32) self.h5_file.flush() return
def online_dl(X, Q, P=None, alpha=1., learning_rate=1., offset=0., batch_size=1, reduction=1, l1_ratio=1., stat=None, impute=False, max_n_iter=0, freeze_first_col=False, random_state=None, verbose=0, debug=False, callback=None, backend='c'): """Matrix factorization estimation based on masked online dictionary learning. Parameters ---------- alpha: float, Regularization of the code (ridge penalty) learning_rate: float in [0.5, 1], Controls the sequence of weights in the update of the surrogate function batch_size: int, Number of samples to consider between each dictionary update offset: float, Offset in the sequence of weights in the update of the surrogate function reduction: float, Sets how much the data is masked during the algorithm freeze_first_col: boolean, Fixes the first dictionary atom Q: ndarray (n_components, n_features), Initial dictionary P: ndarray (n_components, n_samples), optional Array where the rolling code is kept (for matrix completion) l1_ratio: float in [0, 1]: Controls the sparsity of the dictionary impute: boolean, Updates the Gram matrix online (Experimental, non tested) max_n_iter: int, Number of samples to visit before stopping. If None, fit performs a single epoch on data random_state: int or RandomState Pseudo number generator state used for random sampling. verbose: boolean, Degree of output the procedure will print. backend: str in {'c', 'python'}, 'c' is faster, but 'python' is easier to hack debug: boolean, Keep tracks of the surrogate loss during the procedure callback: callable, Function to be called when printing information """ n_rows, n_cols = X.shape n_components = Q.shape[0] X = check_array(X, accept_sparse='csr', dtype='float', order='F') if Q.shape[1] != n_cols: Q = check_array(Q, order='F', dtype='float') raise ValueError('X and Q shape mismatch: %r != %r' % (n_cols, Q.shape[1])) if P is not None: P = check_array(P, order='C', dtype='float') if P.shape != (n_rows, Q.shape[0]): raise ValueError('Bad P shape: expected %r, got %r' % ((n_rows, Q.shape[0]), P.shape)) if debug and backend == 'c': raise NotImplementedError("Recording objective loss is only available" "with backend == 'python'") random_state = check_random_state(random_state) if stat is None: stat = _init_stats(Q, impute=impute, reduction=reduction, max_n_iter=max_n_iter, random_state=random_state) old_n_iter = stat.n_iter n_verbose_call = 0 if sp.isspmatrix_csr(X): row_range = X.getnnz(axis=1).nonzero()[0] max_subset_size = min(n_cols, batch_size * X.getnnz(axis=1).max()) else: row_range = np.arange(n_rows) max_subset_size = stat.subset_stop - stat.subset_start random_state.shuffle(row_range) batches = gen_batches(len(row_range), batch_size) if backend == 'c': R = np.empty((n_components, n_cols), order='F') Q_subset = np.empty((n_components, max_subset_size), order='F') norm = np.zeros(n_components) buffer = np.zeros(max_subset_size) old_sub_G = np.empty((n_components, n_components), order='F') G_temp = np.empty((n_components, n_components), order='F') if sp.isspmatrix_csr(X): P_temp = np.empty((n_components, batch_size), order='F') else: P_temp = np.empty((n_components, batch_size), order='F') if freeze_first_col: components_range = np.arange(1, n_components) else: components_range = np.arange(n_components) weights = np.zeros(max_subset_size + 1) subset_mask = np.zeros(n_cols, dtype='i1') dict_subset = np.zeros(max_subset_size, dtype='i4') dict_subset_lim = np.zeros(1, dtype='i4') this_X = np.zeros((1, max_subset_size), order='F') P_dummy = np.zeros((1, 1), order='C') for batch in batches: row_batch = row_range[batch] if sp.isspmatrix_csr(X): if backend == 'c': stat.n_iter = _update_code_sparse_batch(X.data, X.indices, X.indptr, n_rows, n_cols, row_batch, alpha, learning_rate, offset, Q, P if P is not None else P_dummy, stat.A, stat.B, stat.counter, stat.G, stat.T, impute, Q_subset, P_temp, G_temp, this_X, subset_mask, dict_subset, dict_subset_lim, weights, stat.n_iter, max_n_iter, P is not None ) # This is hackish, but np.where becomes a # bottleneck for low batch size otherwise dict_subset = dict_subset[:dict_subset_lim[0]] else: for j in row_batch: if 0 < max_n_iter <= stat.n_iter: return P, Q subset = X.indices[X.indptr[j]:X.indptr[j + 1]] reg = alpha * subset.shape[0] / n_cols this_X = np.empty((1, subset.shape[0]), order='F') this_X[:] = X.data[X.indptr[j]:X.indptr[j + 1]] this_P = _update_code_slow(this_X, subset, reg, learning_rate, offset, Q, stat, impute, debug) if P is not None: P[j] = this_P stat.n_iter += 1 dict_subset = np.concatenate([X.indices[ X.indptr[j]:X.indptr[j + 1]] for j in row_batch]) dict_subset = np.unique(dict_subset) else: # X is a dense matrix : we force masks if 0 < max_n_iter <= stat.n_iter + len(row_batch) - 1: return P, Q subset = stat.subset_array[stat.subset_start:stat.subset_stop] reg = alpha * subset.shape[0] / n_cols this_X = X[row_batch][:, subset] if backend == 'python': this_P = _update_code_slow(this_X, subset, reg, learning_rate, offset, Q, stat, impute, debug) else: _update_code(this_X, subset, reg, learning_rate, offset, Q, stat.A, stat.B, stat.counter, stat.G, stat.T, impute, Q_subset, P_temp, G_temp, subset_mask, weights) this_P = P_temp.T dict_subset = subset if P is not None: P[row_batch] = this_P[:len(row_batch)] stat.n_iter += len(row_batch) _update_subset_stat(stat, random_state) # Dictionary update if backend == 'python': _update_dict_slow(Q, dict_subset, freeze_first_col, l1_ratio, stat, impute, random_state) else: random_state.shuffle(components_range) _update_dict(Q, dict_subset, freeze_first_col, l1_ratio, stat.A, stat.B, stat.G, impute, R, Q_subset, old_sub_G, norm, buffer, components_range) if verbose and (stat.n_iter - old_n_iter) // ceil( int(n_rows / verbose)) == n_verbose_call: print("Iteration %i" % stat.n_iter) n_verbose_call += 1 if callback is not None: callback() return P, Q
def _fit_stochastic(self, X, y, activations, deltas, coef_grads, intercept_grads, layer_units, incremental): rng = check_random_state(self.random_state) if not incremental or not hasattr(self, '_optimizer'): params = self.coefs_ + self.intercepts_ if self.algorithm == 'sgd': self._optimizer = SGDOptimizer( params, self.learning_rate_init, self.learning_rate, self.momentum, self.nesterovs_momentum, self.power_t) elif self.algorithm == 'adam': self._optimizer = AdamOptimizer( params, self.learning_rate_init, self.beta_1, self.beta_2, self.epsilon) # early_stopping in partial_fit doesn't make sense early_stopping = self.early_stopping and not incremental if early_stopping: X, X_val, y, y_val = train_test_split( X, y, random_state=self.random_state, test_size=self.validation_fraction) if isinstance(self, ClassifierMixin): y_val = self.label_binarizer_.inverse_transform(y_val) else: X_val = None y_val = None n_samples = X.shape[0] if self.batch_size == 'auto': batch_size = min(200, n_samples) else: batch_size = np.clip(self.batch_size, 1, n_samples) try: for it in range(self.max_iter): X, y = shuffle(X, y, random_state=rng) accumulated_loss = 0.0 for batch_slice in gen_batches(n_samples, batch_size): activations[0] = X[batch_slice] batch_loss, coef_grads, intercept_grads = self._backprop( X[batch_slice], y[batch_slice], activations, deltas, coef_grads, intercept_grads) accumulated_loss += batch_loss * (batch_slice.stop - batch_slice.start) # update weights grads = coef_grads + intercept_grads self._optimizer.update_params(grads) self.n_iter_ += 1 self.loss_ = accumulated_loss / X.shape[0] self.t_ += n_samples self.loss_curve_.append(self.loss_) if self.verbose: print("Iteration %d, loss = %.8f" % (self.n_iter_, self.loss_)) # update no_improvement_count based on training loss or # validation score according to early_stopping self._update_no_improvement_count(early_stopping, X_val, y_val) # for learning rate that needs to be updated at iteration end self._optimizer.iteration_ends(self.t_) if self._no_improvement_count > 2: # not better than last two iterations by tol. # stop or decrease learning rate if early_stopping: msg = ("Validation score did not improve more than " "tol=%f for two consecutive epochs." % self.tol) else: msg = ("Training loss did not improve more than tol=%f" " for two consecutive epochs." % self.tol) is_stopping = self._optimizer.trigger_stopping( msg, self.verbose) if is_stopping: break else: self._no_improvement_count = 0 if incremental: break if self.n_iter_ == self.max_iter: # warnings.warn('Stochastic Optimizer: Maximum iterations' # ' reached and the optimization hasn\'t ' # 'converged yet.' # % (), 1) print "convergence warning" except KeyboardInterrupt: pass if early_stopping: # restore best weights self.coefs_ = self._best_coefs self.intercepts_ = self._best_intercepts
def _fit_sgd(self, X, y, activations, deltas, coef_grads, intercept_grads, layer_units, incremental): rng = check_random_state(self.random_state) ######## Initialize learning rate classes from base import learning_rate_class as lr coef_lr_classes = [lr(learning_rate=self.learning_rate, learning_rate_init=self.learning_rate_, momentum=self.momentum, nesterovs_momentum=self.nesterovs_momentum) for i in range(self.n_layers_ )] intercept_lr_classes = [lr(learning_rate=self.learning_rate, learning_rate_init=self.learning_rate_, momentum=self.momentum, nesterovs_momentum=self.nesterovs_momentum) for i in range(self.n_layers_ )] # early_stopping in partial_fit doesn't make sense early_stopping = self.early_stopping and not incremental if early_stopping: X, X_val, y, y_val = train_test_split(X, y, random_state=self.random_state, test_size=.1) y_val = self.label_binarizer_.inverse_transform(y_val) n_samples = X.shape[0] batch_size = np.clip(self.batch_size, 1, n_samples) # shorthands coef_velocity = self._coef_velocity intercept_velocity = self._intercept_velocity try: for it in range(self.max_iter): X, y = shuffle(X, y, random_state=rng) for batch_slice in gen_batches(n_samples, batch_size): activations[0] = X[batch_slice] self.loss_, coef_grads, intercept_grads = self._backprop( X[batch_slice], y[batch_slice], activations, deltas, coef_grads, intercept_grads) # update weights for i in range(self.n_layers_ - 1): self.coefs_[i] -= coef_lr_classes[i].get_update(coef_grads[i], self.coefs_[i]) self.intercepts_[i] -= intercept_lr_classes[i].get_update(intercept_grads[i], self.intercepts_[i]) self.n_iter_ += 1 self.t_ += n_samples self.loss_curve_.append(self.loss_) if self.verbose: print("Iteration %d, loss = %.8f" % (self.n_iter_, self.loss_)) # validation set evaluation if early_stopping: # compute validation score, use that for stopping self.validation_scores_.append(self.score(X_val, y_val)) if self.verbose: print("Validation score: %f" % (self.validation_scores_[-1])) # update best parameters # use validation_scores_, not loss_curve_ # let's hope no-one overloads .score with mse if self.validation_scores_[-1] > self.best_validation_score_: self.best_validation_score_ = self.validation_scores_[-1] self._best_coefs = [c for c in self.coefs_] self._best_intercepts = [i for i in self.intercepts_] if self.validation_scores_[-1] < self.best_validation_score_ + self.tol: self._no_improvement_count += 1 else: self._no_improvement_count = 0 else: if self.loss_curve_[-1] < self.best_loss_: self.best_loss_ = self.loss_curve_[-1] if self.loss_curve_[-1] > self.best_loss_ - self.tol: self._no_improvement_count += 1 else: self._no_improvement_count = 0 # stopping criteria if self._no_improvement_count > 20: # not better than last two iterations by tol. # stop or decreate learning rate msg = ("Training loss did not improve more than tol for two" " consecutive epochs.") if self.learning_rate == 'adaptive': if self.learning_rate_ > 1e-6: self.learning_rate_ /= 5 self._no_improvement_count = 0 if self.verbose: print(msg + " Setting learning rate to %f" % self.learning_rate_) else: if self.verbose: print(msg + " Learning rate too small. Stopping.") break else: # non-adaptive learning rates if self.verbose: print(msg + " Stopping.") break if incremental: break if self.n_iter_ == self.max_iter: warnings.warn('SGD: Maximum iterations have reached and' ' the optimization hasn\'t converged yet.' % (), ConvergenceWarning) except KeyboardInterrupt: pass if early_stopping: # restore best weights self.coefs_ = self._best_coefs self.intercepts_ = self._best_intercepts
def SGD(self, train_X, train_y, val_X = None, val_y = None): """ Use stochastic gradient descent to estimate the weights and biases of the network Parameters: ----------- train_X: (n_samples, n_features)-array of training data. train_y: (n_samples, 1)-array of training labels/values val_X: (n_val_samples, n_features)-array of validation data. val_y: (n_val_samples, n_features)-array of validation labels/values. """ # Initialize parameters n_samples, n_features = train_X.shape # Start iterating over the network for i in xrange(self.epochs): prev_cost = np.inf cost_increase = 0 # Shuffle the data and generate mini batches train_X, train_y = shuffle(train_X, train_y, random_state = self.rng_state) mini_batches = gen_batches(n_samples, self.mini_batch_size) for mini_batch in mini_batches: # Back-propagate the mini batch through the network cost, nabla_b, nabla_w = self.back_propagate(train_X[mini_batch], train_y[mini_batch]) # Update the biases and weights for idx, layer in enumerate(self.layers): if self.adaptive_learning_rate is True: # Update learning rate cache for rmsprop layer.update_learning_rate_cache(self.adaptive_learning_rate_decay, nabla_b[idx], nabla_w[idx]) # Update learning rate learning_rate_b = self.learning_rate / np.sqrt(layer.cache_b + 1e-8) learning_rate_w = self.learning_rate / np.sqrt(layer.cache_w + 1e-8) else: # If the learning rate is not adapted, just use the original learning rate learning_rate_b = self.learning_rate learning_rate_w = self.learning_rate # Perform the gradient update step layer.velocity_b = self.momentum * layer.velocity_b - learning_rate_b * nabla_b[idx] layer.biases += layer.velocity_b layer.velocity_w = self.momentum * layer.velocity_w - learning_rate_w * nabla_w[idx] layer.weights += layer.velocity_w if self.verbose: if val_X is not None and val_y is not None: print "Epoch {0}: Training cost: {1}. Validation accuracy: {2} / {3}".format(i + 1, cost, self.evaluate(val_X, val_y), len(val_y)) else: print "Epoch {0}: training cost = {1}".format(i + 1, cost) if cost > prev_cost: cost_increase += 1 if cost_increase >= 0.2*self.epochs: warnings.warn('Cost is increasing for more than 20%%' ' of the iterations. Consider reducing' ' learning_rate_init and preprocessing' ' your data with StandardScaler or ' ' MinMaxScaler.' % cost, ConvergenceWarning) elif np.abs(cost - prev_cost) < self.tol: print "Epoch {0}: Algorithm has converged.".format(i + 1) break prev_cost = cost
def partial_fit(self, X, y=None, sample_subset=None, check_input=True): """Stream data X to update the estimator dictionary Parameters ---------- X: ndarray (n_samples, n_features) Dataset to learn the code from """ if self.backend not in ['python', 'c']: raise ValueError("Invalid backend %s" % self.backend) if self.debug and self.backend == 'c': raise NotImplementedError( "Recording objective loss is only available" "with backend == 'python'") if not self._is_initialized(): self._init(X) self._init_arrays(X) if check_input: X = check_array(X, dtype='float', order='C', accept_sparse=self.sparse_) n_rows, n_cols = X.shape # Sample related variables if sample_subset is None: sample_subset = np.arange(n_rows, dtype='int') row_range = np.arange(n_rows) self.random_state_.shuffle(row_range) if self.backend == 'c': random_seed = self.random_state_.randint(np.iinfo(np.uint32).max) if self.sparse_: dict_learning_sparse(X.data, X.indices, X.indptr, n_rows, n_cols, row_range, sample_subset, self.batch_size, self.alpha, self.learning_rate, self.offset, self.fit_intercept, self.l1_ratio, self._get_var_red(), self._get_projection(), self.D_, self.code_, self.A_, self.B_, self.G_, self.beta_, self.multiplier_, self.counter_, self.row_counter_, self._D_subset, self._code_temp, self._G_temp, self._this_X, self._w_temp, self._subset_mask, self._dict_subset, self._dict_subset_lim, self._this_sample_subset, self._dummy_2d_float, self._R, self._D_range, self._norm_temp, self._proj_temp, random_seed, self.verbose, self.n_iter_, self._callback, ) else: dict_learning_dense(X, row_range, sample_subset, self.batch_size, self.alpha, self.learning_rate, self.offset, self.fit_intercept, self.l1_ratio, self._get_var_red(), self._get_projection(), self.replacement, self.D_, self.code_, self.A_, self.B_, self.G_, self.beta_, self.multiplier_, self.counter_, self.row_counter_, self._D_subset, self._code_temp, self._G_temp, self._this_X, self._full_X, self._w_temp, self._len_subset, self._subset_range, self._temp_subset, self._subset_lim, self._this_sample_subset, self._R, self._D_range, self._norm_temp, self._proj_temp, random_seed, self.verbose, self.n_iter_, self._callback, ) else: new_verbose_iter_ = 0 old_n_iter = self.n_iter_[0] batches = gen_batches(len(row_range), self.batch_size) for batch in batches: if self.verbose: if self.n_iter_[0] - old_n_iter >= new_verbose_iter_: print("Iteration %i" % self.n_iter_[0]) new_verbose_iter_ += n_rows // self.verbose self._callback() row_batch = row_range[batch] len_batch = row_batch.shape[0] self._this_sample_subset[:len_batch] = sample_subset[row_batch] if 0 < self.max_n_iter <= self.n_iter_[0] + len_batch - 1: return if self.sparse_: for j in row_batch: subset = X.indices[X.indptr[j]:X.indptr[j + 1]] self._this_X[0, :subset.shape[0]] = X.data[ X.indptr[j]: X.indptr[ j + 1]] self._update_code_slow( self._this_X[:, :subset.shape[0]], subset, sample_subset[j:j + 1]) dict_subset = np.concatenate([X.indices[ X.indptr[j]:X.indptr[ j + 1]] for j in row_batch]) dict_subset = np.unique(dict_subset) # End if self.sparse_ else: random_seed = self.random_state_.randint( np.iinfo(np.uint32).max) _update_subset(self.replacement, self._len_subset, self._subset_range, self._subset_lim, self._temp_subset, random_seed) subset = self._subset_range[ self._subset_lim[0]:self._subset_lim[1]] # print(self._subset_lim) self._full_X[:len_batch] = X[row_batch] self._this_X[:len_batch] = self._full_X[:len_batch, subset] self._update_code_slow(self._this_X, subset, sample_subset[row_batch], full_X=self._full_X) dict_subset = subset # End else self._reset_stat() self.random_state_.shuffle(self._D_range) # Dictionary update self._update_dict_slow(dict_subset, self._D_range) self.n_iter_[0] += len(row_batch)
def _fit(self, X, y, incremental=False): # Make sure self.hidden_layer_sizes is a list hidden_layer_sizes = self.hidden_layer_sizes if not hasattr(hidden_layer_sizes, "__iter__"): hidden_layer_sizes = [hidden_layer_sizes] hidden_layer_sizes = list(hidden_layer_sizes) # Validate input parameters. if np.any(np.array(hidden_layer_sizes) <= 0): raise ValueError("hidden_layer_sizes must be > 0, got %s." % hidden_layer_sizes) if not isinstance(self.shuffle, bool): raise ValueError("shuffle must be either True or False, got %s." % self.shuffle) if self.max_iter <= 0: raise ValueError("max_iter must be > 0, got %s." % self.max_iter) if self.alpha < 0.0: raise ValueError("alpha must be >= 0, got %s." % self.alpha) if (self.learning_rate in ["constant", "invscaling"] and self.learning_rate_init <= 0.0): raise ValueError("learning_rate_init must be > 0, got %s." % self.learning_rate) # raise ValueError if not registered if self.activation not in ACTIVATIONS: raise ValueError("The activation %s is not supported. Supported " "activations are %s." % (self.activation, ACTIVATIONS)) if self.learning_rate not in ["constant", "invscaling"]: raise ValueError("learning rate %s is not supported. " % self.learning_rate) if self.algorithm not in ["sgd", "l-bfgs"]: raise ValueError("The algorithm %s is not supported. " % self.algorithm) #X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'], # multi_output=True) # This outputs a warning when a 1d array is expected #if y.ndim == 2 and y.shape[1] == 1: # y = column_or_1d(y, warn=True) n_samples, n_features = X.shape # Classification if isinstance(self, ClassifierMixin): self.label_binarizer_.fit(y) if self.classes_ is None or not incremental: self.classes_ = self.label_binarizer_.classes_ else: classes = self.label_binarizer_.classes_ if not np.all(np.in1d(classes, self.classes_)): raise ValueError("`y` has classes not in `self.classes_`." " `self.classes_` has %s. 'y' has %s." % (self.classes_, classes)) y = self.label_binarizer_.transform(y) # Ensure y is 2D if y.ndim == 1: y = y.reshape((-1, 1)) self.n_outputs_ = y.shape[1] layer_units = ([n_features] + hidden_layer_sizes + [self.n_outputs_]) # First time training the model if self.layers_coef_ is None or (not self.warm_start and not incremental): # Initialize parameters self.n_iter_ = 0 self.t_ = 0 self.learning_rate_ = self.learning_rate_init self.n_outputs_ = y.shape[1] # Compute the number of layers self.n_layers_ = len(layer_units) # Output for regression if not isinstance(self, ClassifierMixin): self.out_activation_ = 'identity' # Output for multi class elif self.label_binarizer_.y_type_ == 'multiclass': self.out_activation_ = 'softmax' # Output for binary class and multi-label else: self.out_activation_ = 'logistic' # Initialize coefficient and intercept layers self.layers_coef_ = [] self.layers_intercept_ = [] for i in range(self.n_layers_ - 1): rng = check_random_state(self.random_state) n_fan_in = layer_units[i] n_fan_out = layer_units[i + 1] # Use the initialization method recommended by # Glorot et al. weight_init_bound = np.sqrt(6. / (n_fan_in + n_fan_out)) self.layers_coef_.append(rng.uniform(-weight_init_bound, weight_init_bound, (n_fan_in, n_fan_out))) rng = check_random_state(self.random_state) self.layers_intercept_.append(rng.uniform(-weight_init_bound, weight_init_bound, n_fan_out)) if self.shuffle: X, y = shuffle(X, y, random_state=self.random_state) # l-bfgs does not support mini-batches if self.algorithm == 'l-bfgs': batch_size = n_samples else: batch_size = np.clip(self.batch_size, 1, n_samples) # Initialize lists activations = [X] activations.extend(np.empty((batch_size, n_fan_out)) for n_fan_out in layer_units[1:]) deltas = [np.empty_like(a_layer) for a_layer in activations] coef_grads = [np.empty((n_fan_in, n_fan_out)) for n_fan_in, n_fan_out in zip(layer_units[:-1], layer_units[1:])] intercept_grads = [np.empty(n_fan_out) for n_fan_out in layer_units[1:]] # Run the Stochastic Gradient Descent algorithm if self.algorithm == 'sgd': prev_cost = np.inf cost_increase_count = 0 for i in range(self.max_iter): for batch_slice in gen_batches(n_samples, batch_size): activations[0] = X[batch_slice] self.cost_, coef_grads, intercept_grads = self._backprop( X[batch_slice], y[batch_slice], activations, deltas, coef_grads, intercept_grads) # update weights for i in range(self.n_layers_ - 1): self.layers_coef_[i] -= (self.learning_rate_ * coef_grads[i]) self.layers_intercept_[i] -= (self.learning_rate_ * intercept_grads[i]) if self.learning_rate == 'invscaling': self.learning_rate_ = self.learning_rate_init / \ (self.t_ + 1) ** self.power_t if incremental is False: self.n_iter_ += 1 self.t_ += n_samples if self.verbose: print("Iteration %d, cost = %.8f" % (self.n_iter_, self.cost_)) if self.cost_ > prev_cost: cost_increase_count += 1 if cost_increase_count == 0.2 * self.max_iter: warnings.warn('Cost is increasing for more than 20%%' ' of the iterations. Consider reducing' ' learning_rate_init and preprocessing' ' your data with StandardScaler or ' ' MinMaxScaler.' % self.cost_, ConvergenceWarning) elif prev_cost - self.cost_ < self.tol or incremental: break prev_cost = self.cost_ if self.n_iter_ == self.max_iter: warnings.warn('SGD: Maximum iterations have reached and' ' the optimization hasn\'t converged yet.' % (), ConvergenceWarning) # Run the LBFGS algorithm elif self.algorithm == 'l-bfgs': # Store meta information for the parameters self._coef_indptr = [] self._intercept_indptr = [] start = 0 # Save sizes and indices of coefficients for faster unpacking for i in range(self.n_layers_ - 1): n_fan_in, n_fan_out = layer_units[i], layer_units[i + 1] end = start + (n_fan_in * n_fan_out) self._coef_indptr.append((start, end, (n_fan_in, n_fan_out))) start = end # Save sizes and indices of intercepts for faster unpacking for i in range(self.n_layers_ - 1): end = start + layer_units[i + 1] self._intercept_indptr.append((start, end)) start = end # Run LBFGS packed_coef_inter = _pack(self.layers_coef_, self.layers_intercept_) if self.verbose is True or self.verbose >= 1: iprint = 1 else: iprint = -1 optimal_parameters, self.cost_, d = fmin_l_bfgs_b( x0=packed_coef_inter, func=self._cost_grad_lbfgs, maxfun=self.max_iter, iprint=iprint, pgtol=self.tol, args=(X, y, activations, deltas, coef_grads, intercept_grads)) self._unpack(optimal_parameters) return self
def partial_fit(self, X, y=None, sample_subset=None, check_input=True): """Stream data X to update the estimator dictionary Parameters ---------- X: ndarray (n_samples, n_features) Dataset to learn the code from """ X = self._prefit(X, check_input=check_input) n_rows, n_cols = X.shape # Sample related variables if sample_subset is None: sample_subset = np.arange(n_rows, dtype="int") row_range = np.arange(n_rows) self.random_state_.shuffle(row_range) if self.backend == "c": random_seed = self.random_state_.randint(np.iinfo(np.uint32).max) if self.sparse_: dict_learning_sparse( X.data, X.indices, X.indptr, n_rows, n_cols, row_range, sample_subset, self.batch_size, self.alpha, self.learning_rate, self.offset, self.fit_intercept, self.l1_ratio, self._get_projection(), self.D_, self.code_, self.A_, self.B_, self.counter_, self._D_subset, self._code_temp, self._G_temp, self._this_X, self._w_temp, self._subset_mask, self._dict_subset, self._dict_subset_lim, self._this_sample_subset, self._R, self._D_range, self._norm_temp, self._proj_temp, random_seed, self.verbose, self.n_iter_, self._callback, ) else: dict_learning_dense( X, row_range, sample_subset, self.batch_size, self.alpha, self.learning_rate, self.offset, self.fit_intercept, self.l1_ratio, self._get_projection(), self.D_, self.code_, self.A_, self.B_, self.counter_, self._D_subset, self._code_temp, self._G_temp, self._this_X, self._w_temp, self._len_subset, self._subset_range, self._temp_subset, self._subset_lim, self._this_sample_subset, self._R, self._D_range, self._norm_temp, self._proj_temp, random_seed, self.verbose, self.n_iter_, self._callback, ) else: new_verbose_iter_ = 0 old_n_iter = self.n_iter_[0] batches = gen_batches(len(row_range), self.batch_size) for batch in batches: if self.verbose: if self.n_iter_[0] - old_n_iter >= new_verbose_iter_: print("Iteration %i" % self.n_iter_[0]) new_verbose_iter_ += n_rows // self.verbose self._callback() row_batch = row_range[batch] len_batch = row_batch.shape[0] self._this_sample_subset[:len_batch] = sample_subset[row_batch] if 0 < self.max_n_iter <= self.n_iter_[0] + len_batch - 1: return if self.sparse_: for j in row_batch: subset = X.indices[X.indptr[j] : X.indptr[j + 1]] if len(subset) == 0: continue self._this_X[0, : subset.shape[0]] = X.data[X.indptr[j] : X.indptr[j + 1]] self._update_code_slow(self._this_X[:, : subset.shape[0]], subset, sample_subset[j : j + 1]) dict_subset = np.concatenate([X.indices[X.indptr[j] : X.indptr[j + 1]] for j in row_batch]) dict_subset = np.unique(dict_subset) # End if self.sparse_ else: random_seed = self.random_state_.randint(np.iinfo(np.uint32).max) _update_subset( False, self._len_subset, self._subset_range, self._subset_lim, self._temp_subset, random_seed ) subset = self._subset_range[self._subset_lim[0] : self._subset_lim[1]] self._this_X[:len_batch] = X[row_batch][:, subset] self._update_code_slow(self._this_X, subset, sample_subset[row_batch]) dict_subset = subset # End else self.random_state_.shuffle(self._D_range) # Dictionary update self._update_dict_slow(dict_subset, self._D_range) self.n_iter_[0] += len(row_batch)