zeros = zeros.astype(X_mb.dtype) mask_zeros = np.zeros((new_len, X_mb_mask.shape[1])) mask_zeros = mask_zeros.astype(X_mb_mask.dtype) X_mb = np.concatenate((X_mb, zeros), axis=0) X_mb_mask = np.concatenate((X_mb_mask, mask_zeros), axis=0) assert len(X_mb[start:stop]) == cut_len assert len(X_mb_mask[start:stop]) == cut_len rval = function(X_mb[start:stop], X_mb_mask[start:stop], c_mb, c_mb_mask, prev_h1, prev_h2, prev_h3, prev_kappa, prev_w) current_cost = rval[0] prev_h1, prev_h2, prev_h3 = rval[1:4] prev_h1 = prev_h1[-1] prev_h2 = prev_h2[-1] prev_h3 = prev_h3[-1] prev_kappa = rval[4][-1] prev_w = rval[5][-1] partial_costs.append(current_cost) return partial_costs run_loop(_loop, train_function, train_itr, cost_function, valid_itr, n_epochs=n_epochs, checkpoint_dict=checkpoint_dict, checkpoint_every_n=checkpoint_every_n, skip_minimums=True)
partial_costs = [] for n in range(n_cuts): start = n * cut_len stop = (n + 1) * cut_len if len(X_mb[start:stop]) < cut_len: new_len = cut_len - len(X_mb) % cut_len zeros = np.zeros((new_len, X_mb.shape[1], X_mb.shape[2])) zeros = zeros.astype(X_mb.dtype) mask_zeros = np.zeros((new_len, X_mb_mask.shape[1])) mask_zeros = mask_zeros.astype(X_mb_mask.dtype) X_mb = np.concatenate((X_mb, zeros), axis=0) X_mb_mask = np.concatenate((X_mb_mask, mask_zeros), axis=0) assert len(X_mb[start:stop]) == cut_len assert len(X_mb_mask[start:stop]) == cut_len rval = function(X_mb[start:stop], X_mb_mask[start:stop], c_mb, c_mb_mask, prev_h1, prev_h2, prev_kappa, prev_w) current_cost = rval[0] prev_h1, prev_h2 = rval[1:3] prev_h1 = prev_h1[-1] prev_h2 = prev_h2[-1] prev_kappa = rval[3][-1] prev_w = rval[4][-1] partial_costs.append(current_cost) return partial_costs run_loop(_loop, train_function, train_itr, cost_function, valid_itr, n_epochs=n_epochs, checkpoint_dict=checkpoint_dict)
start = n * cut_len stop = (n + 1) * cut_len if len(X_mb[start:stop]) < cut_len: new_len = cut_len - len(X_mb) % cut_len zeros = np.zeros((new_len, X_mb.shape[1], X_mb.shape[2])) zeros = zeros.astype(X_mb.dtype) mask_zeros = np.zeros((new_len, X_mb_mask.shape[1])) mask_zeros = mask_zeros.astype(X_mb_mask.dtype) X_mb = np.concatenate((X_mb, zeros), axis=0) X_mb_mask = np.concatenate((X_mb_mask, mask_zeros), axis=0) assert len(X_mb[start:stop]) == cut_len assert len(X_mb_mask[start:stop]) == cut_len rval = function(X_mb[start:stop], X_mb_mask[start:stop], c_mb, c_mb_mask, prev_h1, prev_h2, prev_h3, prev_kappa, prev_w) current_cost = rval[0] prev_h1, prev_h2, prev_h3 = rval[1:4] prev_h1 = prev_h1[-1] prev_h2 = prev_h2[-1] prev_h3 = prev_h3[-1] prev_kappa = rval[4][-1] prev_w = rval[5][-1] partial_costs.append(current_cost) return partial_costs run_loop(_loop, train_function, train_itr, cost_function, valid_itr, n_epochs=n_epochs, checkpoint_dict=checkpoint_dict, checkpoint_every_n=checkpoint_every_n, skip_minimums=True)
stop = (n + 1) * cut_len if len(X_mb[start:stop]) < cut_len: new_len = cut_len - len(X_mb) % cut_len zeros = np.zeros((new_len, X_mb.shape[1], X_mb.shape[2])) zeros = zeros.astype(X_mb.dtype) mask_zeros = np.zeros((new_len, X_mb_mask.shape[1])) mask_zeros = mask_zeros.astype(X_mb_mask.dtype) X_mb = np.concatenate((X_mb, zeros), axis=0) X_mb_mask = np.concatenate((X_mb_mask, mask_zeros), axis=0) assert len(X_mb[start:stop]) == cut_len assert len(X_mb_mask[start:stop]) == cut_len rval = function(X_mb[start:stop], X_mb_mask[start:stop], c_mb, c_mb_mask, prev_h1, prev_h2, prev_kappa, prev_w) current_cost = rval[0] prev_h1, prev_h2 = rval[1:3] prev_h1 = prev_h1[-1] prev_h2 = prev_h2[-1] prev_kappa = rval[3][-1] prev_w = rval[4][-1] partial_costs.append(current_cost) return partial_costs run_loop(_loop, train_function, train_itr, cost_function, valid_itr, n_epochs=n_epochs, checkpoint_dict=checkpoint_dict)
train_loss = train_loss.mean() valid_prediction = get_output(l_out, deterministic=True)[:, :, :width, :height] valid_loss = squared_error(valid_prediction, target_var) valid_loss = valid_loss.mean() params = get_all_params(l_out, trainable=True) # adam is the optimizer that is updating everything updates = adam(train_loss, params, learning_rate=1E-4) train_function = theano.function([input_var, target_var], train_loss, updates=updates) valid_function = theano.function([input_var, target_var], valid_loss) predict_function = theano.function([input_var], prediction) checkpoint_dict = {} checkpoint_dict["train_function"] = train_function checkpoint_dict["valid_function"] = valid_function checkpoint_dict["predict_function"] = predict_function def _loop(function, itr): X_train, y_train = itr.next() ret = function(X_train, y_train) return [ret] run_loop(_loop, train_function, train_itr, valid_function, valid_itr, n_epochs=n_epochs, checkpoint_dict=checkpoint_dict, checkpoint_every_n=100)
params = get_all_params(l_out, trainable=True) # adam is the optimizer that is updating everything updates = adam(train_loss, params, learning_rate=1E-4) train_function = theano.function([input_var, target_var], train_loss, updates=updates) valid_function = theano.function([input_var, target_var], valid_loss) predict_function = theano.function([input_var], prediction) checkpoint_dict = {} checkpoint_dict["train_function"] = train_function checkpoint_dict["valid_function"] = valid_function checkpoint_dict["predict_function"] = predict_function def _loop(function, itr): X_train, y_train = itr.next() ret = function(X_train, y_train) return [ret] run_loop(_loop, train_function, train_itr, valid_function, valid_itr, n_epochs=n_epochs, checkpoint_dict=checkpoint_dict, checkpoint_every_n=100)