def gate_layer(tparams, X_word, X_char, options, prefix, pretrain_mode, activ='lambda x: x', **kwargs): """ compute the forward pass for a gate layer Parameters ---------- tparams : OrderedDict of theano shared variables, {parameter name: value} X_word : theano 3d tensor, word input, dimensions: (num of time steps, batch size, dim of vector) X_char : theano 3d tensor, char input, dimensions: (num of time steps, batch size, dim of vector) options : dictionary, {hyperparameter: value} prefix : string, layer name pretrain_mode : theano shared scalar, 0. = word only, 1. = char only, 2. = word & char activ : string, activation function: 'liner', 'tanh', or 'rectifier' Returns ------- X : theano 3d tensor, final vector, dimensions: (num of time steps, batch size, dim of vector) """ # compute gating values, Eq.(3) G = tensor.nnet.sigmoid(tensor.dot(X_word, tparams[p_name(prefix, 'v')]) + tparams[p_name(prefix, 'b')][0]) X = ifelse(tensor.le(pretrain_mode, numpy.float32(1.)), ifelse(tensor.eq(pretrain_mode, numpy.float32(0.)), X_word, X_char), G[:, :, None] * X_char + (1. - G)[:, :, None] * X_word) return eval(activ)(X)
def get_aggregator(self): initialized = shared_like(0.) numerator_acc = shared_like(self.numerator) denominator_acc = shared_like(self.denominator) # Dummy default expression to use as the previously-aggregated # value, that has the same shape as the new result numerator_zeros = tensor.as_tensor(self.numerator).zeros_like() denominator_zeros = tensor.as_tensor(self.denominator).zeros_like() conditional_update_num = self.numerator + ifelse(initialized, numerator_acc, numerator_zeros) conditional_update_den = self.denominator + ifelse(initialized, denominator_acc, denominator_zeros) initialization_updates = [(numerator_acc, tensor.zeros_like(numerator_acc)), (denominator_acc, tensor.zeros_like(denominator_acc)), (initialized, 0.)] accumulation_updates = [(numerator_acc, conditional_update_num), (denominator_acc, conditional_update_den), (initialized, 1.)] aggregator = Aggregator(aggregation_scheme=self, initialization_updates=initialization_updates, accumulation_updates=accumulation_updates, readout_variable=(numerator_acc / denominator_acc)) return aggregator
def _recursive_step(self, i, regs, tokens, seqs, back_routes, back_lens): seq = seqs[i] # Encoding left, right, target = seq[0], seq[1], seq[2] left_rep = ifelse(T.lt(left, 0), tokens[-left], regs[left]) right_rep = ifelse(T.lt(right, 0), tokens[-right], regs[right]) rep = self._encode_computation(left_rep, right_rep) if self.deep: inter_rep = rep rep = self._deep_encode(inter_rep) else: inter_rep = T.constant(0) new_regs = T.set_subtensor(regs[target], rep) back_len = back_lens[i] back_reps, lefts, rights = self._unfold(back_routes[i], new_regs, back_len) gf_W_d1, gf_W_d2, gf_B_d1, gf_B_d2, distance, rep_gradient = self._unfold_gradients(back_reps, lefts, rights, back_routes[i], tokens, back_len) return ([rep, inter_rep, left_rep, right_rep, new_regs, rep_gradient, distance], self.decode_optimizer.setup([self.W_d1, self.W_d2, self.B_d1, self.B_d2], [gf_W_d1, gf_W_d2, gf_B_d1, gf_B_d2], method=self.optimization, beta=self.beta))
def beta_div(X, W, H, beta): """Compute beta divergence D(X|WH) Parameters ---------- X : Theano tensor data W : Theano tensor Bases H : Theano tensor activation matrix beta : Theano scalar Returns ------- div : Theano scalar beta divergence D(X|WH)""" div = ifelse( T.eq(beta, 2), T.sum(1. / 2 * T.power(X - T.dot(H, W), 2)), ifelse( T.eq(beta, 0), T.sum(X / T.dot(H, W) - T.log(X / T.dot(H, W)) - 1), ifelse( T.eq(beta, 1), T.sum(T.mul(X, (T.log(X) - T.log(T.dot(H, W)))) + T.dot(H, W) - X), T.sum(1. / (beta * (beta - 1.)) * (T.power(X, beta) + (beta - 1.) * T.power(T.dot(H, W), beta) - beta * T.power(T.mul(X, T.dot(H, W)), (beta - 1))))))) return div
def momentum_normscaled(loss, all_params, lr, mom, batch_size, max_norm=np.inf, weight_decay=0.0,verbose=False): updates = [] #all_grads = [theano.grad(loss, param) for param in all_params] all_grads = theano.grad(gradient_clipper(loss),all_params) grad_lst = [ T.sum( ( grad / float(batch_size) )**2 ) for grad in all_grads ] grad_norm = T.sqrt( T.sum( grad_lst )) if verbose: grad_norm = theano.printing.Print('MOMENTUM GRAD NORM1:')(grad_norm) all_grads = ifelse(T.gt(grad_norm, max_norm), [grads*(max_norm / grad_norm) for grads in all_grads], all_grads) if verbose: grad_lst = [ T.sum( ( grad / float(batch_size) )**2 ) for grad in all_grads ] grad_norm = T.sqrt( T.sum( grad_lst )) grad_norm = theano.printing.Print('MOMENTUM GRAD NORM2:')(grad_norm) all_grads = ifelse(T.gt(grad_norm, np.inf), [grads*(max_norm / grad_norm) for grads in all_grads], all_grads) for param_i, grad_i in zip(all_params, all_grads): mparam_i = theano.shared(np.zeros(param_i.get_value().shape, dtype=theano.config.floatX)) v = mom * mparam_i - lr*(weight_decay*param_i + grad_i) updates.append( (mparam_i, v) ) updates.append( (param_i, param_i + v) ) return updates
def _forward(self): eps = self.eps param_size = (1, 1, self.n_output, 1, 1) self.gamma = self.declare(param_size) self.beta = self.declare(param_size) mean = self.inpt.mean(axis=[0, 1, 3, 4], keepdims=False) std = self.inpt.std(axis=[0, 1, 3, 4], keepdims=False) self._setup_running_metrics(self.n_output) self.running_mean.default_update = ifelse( self.training, (1.0 - self.alpha) * self.running_mean + self.alpha * mean, self.running_mean ) self.running_std.default_update = ifelse( self.training, (1.0 - self.alpha) * self.running_std + self.alpha * std, self.running_std ) # This will be optimized away, but ensures the running mean and the running std get updated. # Reference: https://gist.github.com/f0k/f1a6bd3c8585c400c190#file-batch_norm-py-L86 mean += 0 * self.running_mean std += 0 * self.running_std use_mean = ifelse(self.training, mean, self.running_mean) use_std = ifelse(self.training, std, self.running_std) use_mean = use_mean.dimshuffle('x', 'x', 0, 'x', 'x') use_std = use_std.dimshuffle('x', 'x', 0, 'x', 'x') norm_inpt = (self.inpt - use_mean) / (use_std + eps) self.output = self.gamma * norm_inpt + self.beta
def more_complex_test(): notimpl = NotImplementedOp() ifelseifelseif = IfElseIfElseIf() x1 = T.scalar('x1') x2 = T.scalar('x2') c1 = T.scalar('c1') c2 = T.scalar('c2') t1 = ifelse(c1, x1, notimpl(x2)) t1.name = 't1' t2 = t1 * 10 t2.name = 't2' t3 = ifelse(c2, t2, x1 + t1) t3.name = 't3' t4 = ifelseifelseif(T.eq(x1, x2), x1, T.eq(x1, 5), x2, c2, t3, t3 + 0.5) t4.name = 't4' f = function([c1, c2, x1, x2], t4, mode=Mode(linker='vm', optimizer='fast_run')) if theano.config.vm.lazy is False: try: f(1, 0, numpy.array(10, dtype=x1.dtype), 0) assert False except NotImplementedOp.E: pass else: print(f(1, 0, numpy.array(10, dtype=x1.dtype), 0)) assert f(1, 0, numpy.array(10, dtype=x1.dtype), 0) == 20.5 print('... passed')
def build_model(self): print '\n... building the model with unroll=%d, backroll=%d' \ % (self.source.unroll, self.source.backroll) x = T.imatrix('x') y = T.imatrix('y') reset = T.scalar('reset') hiddens = [h['init'] for h in self.hiddens.values()] outputs_info = [None] * 3 + hiddens [losses, probs, errors, hids], updates = \ theano.scan(self.step, sequences=[x, y], outputs_info=outputs_info) loss = losses.sum() error = errors.sum() / T.cast((T.neq(y, 255).sum()), floatX) hidden_updates_train = [] hidden_updates_test = [] for h in self.hiddens.values(): h_train = ifelse(T.eq(reset, 0), \ hids[-1-self.source.backroll, :], T.ones_like(h['init'])) h_test = ifelse(T.eq(reset, 0), \ hids[-1, :], T.ones_like(h['init'])) hidden_updates_train.append((h['init'], h_train)) hidden_updates_test.append((h['init'], h_test)) updates = self.source.get_updates(loss, self.sgd_params) updates += hidden_updates_train rets = [loss, probs[-1, :], error] mode = theano.Mode(linker='cvm') train_model = theano.function([x, y, reset, self.lr], rets, \ updates=updates, mode=mode) test_model = theano.function([x, y, reset], rets, \ updates=hidden_updates_test, mode=mode) return train_model, test_model
def build_model(shared_params, options, other_params): """ Build the complete neural network model and return the symbolic variables """ # symbolic variables x = tensor.matrix(name="x", dtype=floatX) y1 = tensor.iscalar(name="y1") y2 = tensor.iscalar(name="y2") # lstm cell (ht, ct) = lstm_cell(x, shared_params, options, other_params) # gets the ht, ct # softmax 1 i.e. frame type prediction activation = tensor.dot(shared_params['softmax1_W'], ht).transpose() + shared_params['softmax1_b'] frame_pred = tensor.nnet.softmax(activation) # .transpose() # softmax 2 i.e. gesture class prediction # # predicted probability for frame type f_pred_prob = theano.function([x], frame_pred, name="f_pred_prob") # predicted frame type f_pred = theano.function([x], frame_pred.argmax(), name="f_pred") # cost cost = ifelse(tensor.eq(y1, 1), -tensor.log(frame_pred[0, 0] + options['log_offset']) * other_params['begin_cost_factor'], ifelse(tensor.eq(y1, 2), -tensor.log(frame_pred[0, 1] + options['log_offset']) * other_params['end_cost_factor'], ifelse(tensor.eq(y1, 3), -tensor.log(frame_pred[0, 2] + options['log_offset']), tensor.abs_(tensor.log(y1)))), name='ifelse_cost') # function for output of the currect lstm cell and softmax prediction f_model_cell_output = theano.function([x], (ht, ct, frame_pred), name="f_model_cell_output") # return the model symbolic variables and theano functions return x, y1, y2, f_pred_prob, f_pred, cost, f_model_cell_output
def norm_col(w, h): """normalize the column vector w (Theano function). Apply the invert normalization on h such that w.h does not change Parameters ---------- w: Theano vector vector to be normalised h: Ttheano vector vector to be normalised by the invert normalistation Returns ------- w : Theano vector with the same shape as w normalised vector (w/norm) h : Theano vector with the same shape as h h*norm """ norm = w.norm(2, 0) eps = 1e-12 size_norm = (T.ones_like(w)).norm(2, 0) w = ifelse(T.gt(norm, eps), w/norm, (w+eps)/(eps*size_norm).astype(theano.config.floatX)) h = ifelse(T.gt(norm, eps), h*norm, (h*eps*size_norm).astype(theano.config.floatX)) return w, h
def AdaMaxAvg2(ws, objective, alpha=.01, beta1=.1, beta2=.001, beta3=0.01, n_accum=1): if n_accum == 1: return AdaMaxAvg(ws, objective, alpha, beta1, beta2, beta3) print 'AdaMax_Avg2', 'alpha:',alpha,'beta1:',beta1,'beta2:',beta2,'beta3:',beta3,'n_accum:',n_accum gs = G.ndict.T_grad(objective.sum(), ws, disconnected_inputs='raise') new = OrderedDict() from theano.ifelse import ifelse it = G.sharedf(0.) new[it] = it + 1 reset = T.eq(T.mod(it,n_accum), 0) update = T.eq(T.mod(it,n_accum), n_accum-1) ws_avg = [] for j in range(len(ws)): w_avg = {} for i in ws[j]: _w = ws[j][i] _g = gs[j][i] #_g = T.switch(T.isnan(_g),T.zeros_like(_g),_g) #remove NaN's mom1 = G.sharedf(_w.get_value() * 0.) _max = G.sharedf(_w.get_value() * 0.) w_avg[i] = G.sharedf(_w.get_value()) g_sum = G.sharedf(_w.get_value() * 0.) new[g_sum] = ifelse(reset, _g, g_sum + _g) new[mom1] = ifelse(update, (1-beta1) * mom1 + beta1 * new[g_sum], mom1) new[_max] = ifelse(update, T.maximum((1-beta2)*_max, abs(new[g_sum]) + 1e-8), _max) new[_w] = ifelse(update, _w + alpha * new[mom1] / new[_max], _w) new[w_avg[i]] = ifelse(update, beta3 * new[_w] + (1.-beta3) * w_avg[i], w_avg[i]) ws_avg += [w_avg] return new, ws_avg
def get_aggregator(self): initialized = shared_like(0.) numerator_acc = shared_like(self.numerator) denominator_acc = shared_like(self.denominator) conditional_update_num = ifelse(initialized, self.numerator + numerator_acc, self.numerator) conditional_update_den = ifelse(initialized, self.denominator + denominator_acc, self.denominator) initialization_updates = [(numerator_acc, tensor.zeros_like(numerator_acc)), (denominator_acc, tensor.zeros_like(denominator_acc)), (initialized, 0.)] accumulation_updates = [(numerator_acc, conditional_update_num), (denominator_acc, conditional_update_den), (initialized, 1.)] aggregator = Aggregator(aggregation_scheme=self, initialization_updates=initialization_updates, accumulation_updates=accumulation_updates, readout_variable=(numerator_acc / denominator_acc)) return aggregator
def build(self, output, tparams=None, BNparams=None): if self.BN_mode: self.BN_eps = npt(self.BN_eps) if not hasattr(self, 'BN_mean'): self.BN_mean = T.mean(output) if not hasattr(self, 'BN_std'): m2 = (1 + 1 / (T.prod(output.shape) - 1)).astype(floatX) self.BN_std = T.sqrt(m2 * T.var(output) + self.BN_eps) if self.BN_mode == 2: t_mean = T.mean(output, axis=[0, 2, 3], keepdims=True) t_var = T.var(output, axis=[0, 2, 3], keepdims=True) BN_mean = BNparams[p_(self.prefix, 'mean')].dimshuffle( 'x', 0, 'x', 'x') BN_std = BNparams[p_(self.prefix, 'std')].dimshuffle( 'x', 0, 'x', 'x') output = ifelse( self.training, (output - t_mean) / T.sqrt(t_var + self.BN_eps), (output - BN_mean) / BN_std) output *= tparams[p_(self.prefix, 'BN_scale')].dimshuffle( 'x', 0, 'x', 'x') output += tparams[p_(self.prefix, 'BN_shift')].dimshuffle( 'x', 0, 'x', 'x') elif self.BN_mode == 1: t_mean = T.mean(output) t_var = T.var(output) output = ifelse( self.training, (output - t_mean) / T.sqrt(t_var + self.BN_eps), ((output - BNparams[p_(self.prefix, 'mean')]) / BNparams[p_(self.prefix, 'std')])) output *= tparams[p_(self.prefix, 'BN_scale')] output += tparams[p_(self.prefix, 'BN_shift')] self.output = self.activation(output)
def call(self, vals, mask=None): block_out = vals[0] prev_out = vals[1] test_out = self.zi * block_out return ifelse(self.test, test_out, ifelse(self.zi,block_out,prev_out))
def get_sensi_speci(y_hat, y): # y_hat = T.concatenate(T.sum(input=y_hat[:, 0:2], axis=1), T.sum(input=y_hat[:, 2:], axis=1)) y_hat = T.stacklists([y_hat[:, 0] + y_hat[:, 1], y_hat[:, 2] + y_hat[:, 3] + y_hat[:, 4]]).T y_hat = T.argmax(y_hat) tag = 10 * y_hat + y tneg = T.cast((T.shape(tag[(T.eq(tag, 0.)).nonzero()]))[0], config.floatX) fneg = T.cast((T.shape(tag[(T.eq(tag, 1.)).nonzero()]))[0], config.floatX) fpos = T.cast((T.shape(tag[(T.eq(tag, 10.)).nonzero()]))[0], config.floatX) tpos = T.cast((T.shape(tag[(T.eq(tag, 11.)).nonzero()]))[0], config.floatX) # assert fneg + fneg + fpos + tpos == 1380 # tneg.astype(config.floatX) # fneg.astype(config.floatX) # fpos.astype(config.floatX) # tpos.astype(config.floatX) speci = ifelse(T.eq((tneg + fpos), 0), np.float64(float('inf')), tneg / (tneg + fpos)) sensi = ifelse(T.eq((tpos + fneg), 0), np.float64(float('inf')), tpos / (tpos + fneg)) # keng die!!! # if T.eq((tneg + fpos), 0): # speci = float('inf') # else: # speci = tneg // (tneg + fpos) # if T.eq((tpos + fneg), 0.): # sensi = float('inf') # else: # sensi = tpos // (tpos + fneg) # speci.astype(config.floatX) # sensi.astype(config.floatX) return [sensi, speci]
def test_merge_ifs_true_false(self): raise SkipTest("Optimization temporarily disabled") x1 = tensor.scalar('x1') x2 = tensor.scalar('x2') y1 = tensor.scalar('y1') y2 = tensor.scalar('y2') w1 = tensor.scalar('w1') w2 = tensor.scalar('w2') c = tensor.iscalar('c') out = ifelse(c, ifelse(c, x1, x2) + ifelse(c, y1, y2) + w1, ifelse(c, x1, x2) + ifelse(c, y1, y2) + w2) f = theano.function([x1, x2, y1, y2, w1, w2, c], out, allow_input_downcast=True) assert len([x for x in f.maker.env.toposort() if isinstance(x.op, IfElse)]) == 1 rng = numpy.random.RandomState(utt.fetch_seed()) vx1 = rng.uniform() vx2 = rng.uniform() vy1 = rng.uniform() vy2 = rng.uniform() vw1 = rng.uniform() vw2 = rng.uniform() assert numpy.allclose(f(vx1, vx2, vy1, vy2, vw1, vw2, 1), vx1 + vy1 + vw1) assert numpy.allclose(f(vx1, vx2, vy1, vy2, vw1, vw2, 0), vx2 + vy2 + vw2)
def __init__(self, factor=numpy.sqrt(2), decay=1.0, min_factor=None, padding=False, **kwargs): super(ConvFMPLayer, self).__init__(**kwargs) if min_factor is None: min_factor = factor factor = T.maximum(factor * (decay ** self.network.epoch), numpy.float32(min_factor)) sizes_raw = self.source.output_sizes # handle size problems if not padding: padding = T.min(self.source.output_sizes / factor) <= 0 padding = theano.printing.Print(global_fn=maybe_print_pad_warning)(padding) fixed_sizes = T.maximum(sizes_raw, T.cast(T.as_tensor( [factor + self.filter_height - 1, factor + self.filter_width - 1]), 'float32')) sizes = ifelse(padding, fixed_sizes, sizes_raw) X_size = T.cast(T.max(sizes, axis=0), "int32") def pad_fn(x_t, s): x = T.alloc(numpy.cast["float32"](0), X_size[0], X_size[1], self.X.shape[3]) x = T.set_subtensor(x[:s[0], :s[1]], x_t[:s[0], :s[1]]) return x fixed_X, _ = theano.scan(pad_fn, [self.X.dimshuffle(2, 0, 1, 3), T.cast(sizes_raw, "int32")]) fixed_X = fixed_X.dimshuffle(1, 2, 0, 3) self.X = ifelse(padding, T.unbroadcast(fixed_X, 3), self.X) conv_out = CuDNNConvHWBCOpValidInstance(self.X, self.W, self.b) conv_out_sizes = self.conv_output_size_from_input_size(sizes) self.output, self.output_sizes = fmp(conv_out, conv_out_sizes, T.cast(factor,'float32'))
def test_pushout1(self): raise SkipTest("Optimization temporarily disabled") x1 = tensor.scalar('x1') x2 = tensor.scalar('x2') y1 = tensor.scalar('y1') y2 = tensor.scalar('y2') w1 = tensor.scalar('w1') w2 = tensor.scalar('w2') c = tensor.iscalar('c') x, y = ifelse(c, (x1, y1), (x2, y2), name='f1') z = ifelse(c, w1, w2, name='f2') out = x * z * y f = theano.function([x1, x2, y1, y2, w1, w2, c], out, allow_input_downcast=True) assert isinstance(f.maker.env.toposort()[-1].op, IfElse) rng = numpy.random.RandomState(utt.fetch_seed()) vx1 = rng.uniform() vx2 = rng.uniform() vy1 = rng.uniform() vy2 = rng.uniform() vw1 = rng.uniform() vw2 = rng.uniform() assert numpy.allclose(f(vx1, vx2, vy1, vy2, vw1, vw2, 1), vx1 * vy1 * vw1) assert numpy.allclose(f(vx1, vx2, vy1, vy2, vw1, vw2, 0), vx2 * vy2 * vw2)
def decay(self): updates = [] new_batch = ifelse(T.gt(self.batch, self.decay_batch), sharedX(0), self.batch+1) new_lr = ifelse(T.gt(self.batch, self.decay_batch), self.lr*self.lr_decay_factor, self.lr) updates.append((self.batch, new_batch)) updates.append((self.lr, new_lr)) return updates
def gradients(cost, parameters, lr=0.001): updates = [] c = 0 for param in parameters: update = param - lr * theano.grad(cost, param) if c == 1 or c == 3: # update = t.minimum(t.abs_(update), np.pi) * (update / abs(update)) # # update = t.maximum(update, 0) # update = t.minimum(update, np.pi) update = ifelse(t.lt(update, 0), np.pi * 2 - 0.001, update) update = ifelse(t.gt(update, np.pi * 2), 0.001, update) if c == 2: update = ifelse(t.lt(update, 2), float(20), update) elif c == 5 or c == 6: update = t.maximum(update, -5) update = t.minimum(update, 5) updates.append((param, update)) c += 1 return updates
def group_div(X, W, H, beta, params): """Compute beta divergence D(X|WH), intra-class distance and intra-session distance for a particular (class, session) couple [1]_. Parameters ---------- X : Theano tensor data W : Theano tensor Bases H : Theano tensor activation matrix beta : Theano scalar params : Theano tensor Matrix of parameter related to class/session. :params[0][0]: index for the (class, session) couple :params[1][0]: number of vector basis related to class :params[1][1]: number of vector basis related to session :params[2]: weight on the class/session similarity constraints :params[3]: sessions in which class c appears :params[4]: classes present in session s Returns ------- cost : Theano scalar total cost div : Theano scalar beta divergence D(X|WH) sum_cls : Theano scalar intra-class distance sum_ses : Theano scalar intra-session distance""" ind = params[0][0] k_cls = params[1][0] k_ses = params[1][1] lambdas = params[2] Sc = params[3] Cs = params[4] res_ses, up = theano.scan( fn=lambda Cs, prior_result: prior_result + eucl_dist(W[ind, :, k_cls : k_cls + k_ses], W[Cs, :, k_cls : k_cls + k_ses]), outputs_info=T.zeros_like(beta), sequences=Cs, ) sum_ses = ifelse(T.gt(Cs[0], 0), res_ses[-1], T.zeros_like(beta)) res_cls, up = theano.scan( fn=lambda Sc, prior_result: prior_result + eucl_dist(W[ind, :, 0:k_cls], W[Sc, :, 0:k_cls]), outputs_info=T.zeros_like(beta), sequences=Sc, ) sum_cls = ifelse(T.gt(Sc[0], 0), res_cls[-1], T.zeros_like(beta)) betaDiv = beta_div(X, W[ind].T, H, beta) cost = lambdas[0] * sum_cls + lambdas[1] * sum_ses + betaDiv return cost, betaDiv, sum_cls, sum_ses
def one_run(my_x, my_y, my_z, my_u, my_v, my_w, my_weight, my_heat, my_albedo, my_microns_per_shell): # move random = rng.uniform(low=0.00003, high=1.) t = -T.log(random) x_moved = my_x + my_u*t y_moved = my_y + my_v*t z_moved = my_z + my_w*t # absorb shell = T.cast(T.sqrt(T.sqr(x_moved) + T.sqr(y_moved) + T.sqr(z_moved)) * my_microns_per_shell, 'int32') shell = T.clip(shell, 0, SHELL_MAX-1) new_weight = my_weight * my_albedo # new direction xi1 = rng.uniform(low=-1., high=1.) xi2 = rng.uniform(low=-1., high=1.) xi_norm = T.sqrt(T.sqr(xi1) + T.sqr(xi2)) t_xi = rng.uniform(low=0.000000001, high=1.) # rescale xi12 to fit t_xi as norm xi1 = xi1/xi_norm * T.sqr(t_xi) xi2 = xi2/xi_norm * T.sqr(t_xi) u_new_direction = 2. * t_xi - 1. v_new_direction = xi1 * T.sqrt((1. - T.sqr(u_new_direction)) / t_xi) w_new_direction = xi2 * T.sqrt((1. - T.sqr(u_new_direction)) / t_xi) # roulette weight_for_starting_roulette = 0.001 CHANCE = 0.1 partakes_roulette = T.switch(T.lt(new_weight, weight_for_starting_roulette), 1, 0) roulette = rng.uniform(low=0., high=1.) loses_roulette = T.gt(roulette, CHANCE) # if roulette decides to terminate the photon: set weight to 0 weight_after_roulette = ifelse(T.and_(partakes_roulette, loses_roulette), 0., new_weight) # if partakes in roulette but does not get terminated weight_after_roulette = ifelse(T.and_(partakes_roulette, T.invert(loses_roulette)), weight_after_roulette / CHANCE, weight_after_roulette) new_heat = (1.0 - my_albedo) * my_weight heat_i = my_heat[shell] return (x_moved, y_moved, z_moved,\ u_new_direction, v_new_direction, w_new_direction,\ weight_after_roulette),\ OrderedDict({my_heat: T.inc_subtensor(heat_i, new_heat)})
def scalar_armijo_search(phi, phi0, derphi0, c1=constant(1e-4), n_iters=10, profile=0): """ .. todo:: WRITEME """ alpha0 = one phi_a0 = phi(alpha0) alpha1 = -(derphi0) * alpha0 ** 2 / 2.0 /\ (phi_a0 - phi0 - derphi0 * alpha0) phi_a1 = phi(alpha1) csol1 = phi_a0 <= phi0 + c1 * derphi0 csol2 = phi_a1 <= phi0 + c1 * alpha1 * derphi0 def armijo(alpha0, alpha1, phi_a0, phi_a1): factor = alpha0 ** 2 * alpha1 ** 2 * (alpha1 - alpha0) a = alpha0 ** 2 * (phi_a1 - phi0 - derphi0 * alpha1) - \ alpha1 ** 2 * (phi_a0 - phi0 - derphi0 * alpha0) a = a / factor b = -alpha0 ** 3 * (phi_a1 - phi0 - derphi0 * alpha1) + \ alpha1 ** 3 * (phi_a0 - phi0 - derphi0 * alpha0) b = b / factor alpha2 = (-b + TT.sqrt(abs(b ** 2 - 3 * a * derphi0))) / (3.0 * a) phi_a2 = phi(alpha2) end_condition = phi_a2 <= phi0 + c1 * alpha2 * derphi0 end_condition = TT.bitwise_or( TT.isnan(alpha2), end_condition) end_condition = TT.bitwise_or( TT.isinf(alpha2), end_condition) alpha2 = TT.switch( TT.bitwise_or(alpha1 - alpha2 > alpha1 / constant(2.), one - alpha2 / alpha1 < 0.96), alpha1 / constant(2.), alpha2) return [alpha1, alpha2, phi_a1, phi_a2], \ theano.scan_module.until(end_condition) states = [alpha0, alpha1, phi_a0, phi_a1] # print 'armijo' rvals, _ = scan( armijo, outputs_info=states, n_steps=n_iters, name='armijo', mode=theano.Mode(linker='cvm'), profile=profile) sol_scan = rvals[1][-1] a_opt = ifelse(csol1, one, ifelse(csol2, alpha1, sol_scan)) score = ifelse(csol1, phi_a0, ifelse(csol2, phi_a1, rvals[2][-1])) return a_opt, score
def _FindB_best(lPLcl, lPprev, dVLcl): srtLcl = tensor.argsort(-lPLcl) srtLcl = srtLcl[:beam_size] deltaVec = tensor.fill( lPLcl[srtLcl], numpy_floatX(-10000.)) deltaVec = tensor.set_subtensor(deltaVec[0], lPprev) lProbBest = ifelse(tensor.eq( dVLcl, tensor.zeros_like(dVLcl)), lPLcl[srtLcl] + lPprev, deltaVec) xWIdxBest = ifelse(tensor.eq( dVLcl, tensor.zeros_like(dVLcl)), srtLcl, tensor.zeros_like(srtLcl)) return lProbBest, xWIdxBest
def __call__(self, input): mean = input.mean(self.axes, keepdims=True) std = input.std(self.axes, keepdims=True) + self.epsilon # Don't batchnoramlise a single data point mean = ifelse(T.gt(input.shape[0], 1), mean, T.zeros(mean.shape, dtype=mean.dtype)) std = ifelse(T.gt(input.shape[0], 1), std, T.ones(std.shape, dtype=std.dtype)) return (input - mean) * T.addbroadcast((self.gamma / std) + self.beta, *self.axes)
def viterbi(self, tokScore, prevScore): transition = self.A[:-1] candidates = (prevScore + transition.T).T bestIndex = T.argmax(candidates,axis=0) scoreNew = T.max(candidates,axis=0) + tokScore scoreSum = T.sum(prevScore) scoreNew = ifelse(T.eq(scoreSum, 0), tokScore + self.A[-1], scoreNew) bestIndex = ifelse(T.eq(scoreSum, 0), T.arange(self.n_tags).astype('int64'), bestIndex.astype('int64')) return scoreNew, bestIndex
def compute_y(idx, p, q, S, D): yi = ifelse(T.eq(idx, 0), T.dot(D[0], p[-1]), ifelse(T.eq(idx, nT-1), T.dot(D[-1],p[0]) + q[-1], T.dot(D[idx], p[-idx-1]) + q[idx-1] ) ) return yi
def scaled_cost(x, t): sq_error = (x - t) ** 2 above_thresh_sq_error = sq_error[(t > THRESHOLD).nonzero()] below_thresh_sq_error = sq_error[(t <= THRESHOLD).nonzero()] above_thresh_mean = above_thresh_sq_error.mean() below_thresh_mean = below_thresh_sq_error.mean() above_thresh_mean = ifelse(T.isnan(above_thresh_mean), 0.0, above_thresh_mean) below_thresh_mean = ifelse(T.isnan(below_thresh_mean), 0.0, below_thresh_mean) return (above_thresh_mean + below_thresh_mean) / 2.0
def test_merge(self): raise SkipTest("Optimization temporarily disabled") x = tensor.vector('x') y = tensor.vector('y') c = tensor.iscalar('c') z1 = ifelse(c, x + 1, y + 1) z2 = ifelse(c, x + 2, y + 2) z = z1 + z2 f = theano.function([c, x, y], z) assert len([x for x in f.maker.env.toposort() if isinstance(x.op, IfElse)]) == 1
def scan_y(cur_step): # Compute pairwise affinities sum_y = tensor.sum(tensor.square(y_arg), 1) num = 1 / (1 + tensor.add(tensor.add(-2 * tensor.dot(y_arg, y_arg.T), sum_y).T, sum_y)) num = tensor.set_subtensor(num[range(n),range(n)], 0) Q = num / tensor.sum(num) Q = tensor.maximum(Q, 1e-12) PQ = p_arg - Q def inner(pq_i, num_i, y_arg_i): return tensor.sum(tensor.tile(pq_i * num_i, (no_dims, 1)).T * (y_arg_i - y_arg), 0) dy_arg, _ = theano.scan(inner, outputs_info = None, sequences = [PQ, num, y_arg]) dy_arg = tensor.cast(dy_arg,FLOATX) # dy_arg = y_arg momentum = ifelse(tensor.lt(cur_step, 20), initial_momentum_f, final_momentum_f) indexsa = tensor.neq((dy_arg>0), (iy_arg>0)).nonzero() indexsb = tensor.eq((dy_arg>0), (iy_arg>0)).nonzero() resulta = tensor.set_subtensor(gains_arg[indexsa], gains_arg[indexsa]+0.2) resultb = tensor.set_subtensor(resulta[indexsb], resulta[indexsb]*0.8) indexs_min = (resultb<min_gain_f).nonzero() new_gains_arg = tensor.set_subtensor(resultb[indexs_min], min_gain_f) # last step in simple version of SNE new_iy_arg = momentum * iy_arg - eta * (new_gains_arg * dy_arg) new_y_arg = y_arg + new_iy_arg new_y_arg = new_y_arg - tensor.tile(tensor.mean(new_y_arg, 0), (n, 1)) # # Compute current value of cost function # if (cur_step + 1) % 10 == 0: # C = tensor.sum(p_arg * tensor.log(p_arg / Q)) # print "Iteration ", (cur_step + 1), ": error is ", C # Stop lying about P-values # new_p_arg = p_arg # if cur_step == 2: # new_p_arg = p_arg / 4 # p_arg = p_arg / 4 # p_arg.set_value(p_arg.get_value / 4) new_p_arg = ifelse(tensor.eq(cur_step, 100), p_arg / 4, p_arg) return [(y_arg,new_y_arg),(iy_arg,new_iy_arg), (gains_arg,new_gains_arg),(p_arg,new_p_arg)]
def sr1(inverse_hessian, weight_delta, gradient_delta, epsilon=1e-8): epsilon = asfloat(epsilon) param = weight_delta - inverse_hessian.dot(gradient_delta) denominator = T.dot(param, gradient_delta) return ifelse( T.lt(T.abs_(denominator), epsilon * param.norm(L=2) * gradient_delta.norm(L=2)), inverse_hessian, inverse_hessian + T.outer(param, param) / denominator)
def iftrain(self, then_branch, else_branch): """ Execute `then_branch` when training. """ return ifelse(self._training_flag, then_branch, else_branch, name="iftrain")
def do_step(i, x_, h_, c_): """ i: The step number (int) x_: An input vector h_: A hiddens state vector c_: A memory cell vector """ y_prob, h, c = self.step(x_, h_, c_) y_candidate = ifelse( int(stochastic), rng.multinomial(n=1, pvals=y_prob[None, :])[0].astype( theano.config.floatX), y_prob) # y_candidate = ifelse(int(stochastic), rng.multinomial(n=1, pvals=y_prob.dimshuffle('x', 1))[0].astype(theano.config.floatX), y_prob) y = ifelse( i < n_primer_steps, primer[i], y_candidate ) # Note: If you get error here, you just need to prime with something on first call. return y, h, c
def value_single(self, x, y, f): ret = T.mean([ T.min([1. - (1 - y) + f[2], 1.]), T.min([1. - f[2] + (1 - y), 1.]) ]) ret = T.cast(ret, dtype=theano.config.floatX) return T.cast(ifelse(T.eq(self.condition_single(x, f), 1.), ret, 1.), dtype=theano.config.floatX)
def __init__(self, layers, cost_y, cost_z, alpha=0.5, updater='Adam', size_y=128, verbose=2, interpolated=True, zero_shot=False): self.settings = locals() del self.settings['self'] self.layers = layers self.cost_y = cost_y self.cost_z = cost_z if isinstance(updater, basestring): self.updater = case_insensitive_import(passage.updates, updater)() else: self.updater = updater self.iterator = SortedPaddedXYZ(size_y=size_y, shuffle=False) self.size_y = size_y self.verbose = verbose self.interpolated = interpolated self.zero_shot = zero_shot for i in range(1, len(self.layers)): self.layers[i].connect(self.layers[i-1]) self.params = flatten([l.params for l in layers]) self.alpha = alpha self.X = self.layers[0].input self.y_tr = self.layers[-1].output_left(dropout_active=True) self.y_te = self.layers[-1].output_left(dropout_active=False) self.Y = T.tensor3() self.z_tr = self.layers[-1].output_right(dropout_active=True) self.z_te = self.layers[-1].output_right(dropout_active=False) self.Z = T.matrix() cost_y = self.cost_y(self.Y, self.y_tr) if self.zero_shot: # In zero-shot setting, we disable z-loss for examples with zero z-targets cost_z = ifelse(T.gt(self.Z.norm(2), 0.0), self.cost_z(self.Z, self.z_tr), 0.0) else: cost_z = self.cost_z(self.Z, self.z_tr) if self.interpolated: cost = self.alpha * cost_y + (1.0 - self.alpha) * cost_z else: cost = self.alpha * cost_y + cost_z cost_valid_y = self.cost_y(self.Y, self.y_te) cost_valid_z = self.cost_z(self.Z, self.z_te) cost_valid = self.alpha * cost_valid_y + (1.0 - self.alpha) * cost_valid_z self.updates = self.updater.get_updates(self.params, cost) #grads = theano.tensor.grad(cost, self.params) #norm = theano.tensor.sqrt(sum([theano.tensor.sum(g**2) for g in grads])) self._train = theano.function([self.X, self.Y, self.Z], cost, updates=self.updates) self._params = theano.function([], self.params[0]) self._cost = theano.function([self.X, self.Y, self.Z], cost) self._cost_valid = theano.function([self.X, self.Y, self.Z], [cost_valid_y, cost_valid_z, cost_valid]) self._predict_y = theano.function([self.X], self.y_te) self._predict_z = theano.function([self.X], self.z_te) self._predict = theano.function([self.X], [self.y_te, self.z_te])
def init_train_updates(self): network_input = self.variables.network_input network_output = self.variables.network_output inv_hessian = self.variables.inv_hessian prev_params = self.variables.prev_params prev_full_gradient = self.variables.prev_full_gradient params = list(iter_parameters(self)) param_vector = parameters2vector(self) gradients = T.grad(self.variables.error_func, wrt=params) full_gradient = T.concatenate([grad.flatten() for grad in gradients]) new_inv_hessian = ifelse( T.eq(self.variables.epoch, 1), inv_hessian, self.update_function(inv_hessian, param_vector - prev_params, full_gradient - prev_full_gradient) ) param_delta = -new_inv_hessian.dot(full_gradient) def prediction(step): # TODO: I need to update this ugly solution later updated_params = param_vector + step * param_delta layer_input = network_input start_pos = 0 for layer in self.layers: for param in layer.parameters: end_pos = start_pos + param.size parameter_name, parameter_id = param.name.split('_') setattr(layer, parameter_name, T.reshape( updated_params[start_pos:end_pos], param.shape )) start_pos = end_pos layer_input = layer.output(layer_input) return layer_input def phi(step): return self.error(network_output, prediction(step)) def derphi(step): error_func = self.error(network_output, prediction(step)) return T.grad(error_func, wrt=step) step = asfloat(line_search(phi, derphi)) updated_params = param_vector + step * param_delta updates = setup_parameter_updates(params, updated_params) updates.extend([ (inv_hessian, new_inv_hessian), (prev_params, param_vector), (prev_full_gradient, full_gradient), ]) return updates
def AdaMaxAvg2(ws, objective, alpha=.01, beta1=.1, beta2=.001, beta3=0.01, n_accum=1): if n_accum == 1: return AdaMaxAvg(ws, objective, alpha, beta1, beta2, beta3) print 'AdaMax_Avg2', 'alpha:', alpha, 'beta1:', beta1, 'beta2:', beta2, 'beta3:', beta3, 'n_accum:', n_accum gs = G.ndict.T_grad(objective.sum(), ws, disconnected_inputs='raise') new = OrderedDict() from theano.ifelse import ifelse it = G.sharedf(0.) new[it] = it + 1 reset = T.eq(T.mod(it, n_accum), 0) update = T.eq(T.mod(it, n_accum), n_accum - 1) ws_avg = [] for j in range(len(ws)): w_avg = {} for i in ws[j]: _w = ws[j][i] _g = gs[j][i] #_g = T.switch(T.isnan(_g),T.zeros_like(_g),_g) #remove NaN's mom1 = G.sharedf(_w.get_value() * 0.) _max = G.sharedf(_w.get_value() * 0.) w_avg[i] = G.sharedf(_w.get_value()) g_sum = G.sharedf(_w.get_value() * 0.) new[g_sum] = ifelse(reset, _g, g_sum + _g) new[mom1] = ifelse(update, (1 - beta1) * mom1 + beta1 * new[g_sum], mom1) new[_max] = ifelse( update, T.maximum((1 - beta2) * _max, abs(new[g_sum]) + 1e-8), _max) new[_w] = ifelse(update, _w + alpha * new[mom1] / new[_max], _w) new[w_avg[i]] = ifelse(update, beta3 * new[_w] + (1. - beta3) * w_avg[i], w_avg[i]) ws_avg += [w_avg] return new, ws_avg
def in_transit(self, t, r=0.0, texp=None): """Get a list of timestamps that are in transit Args: t (vector): A vector of timestamps to be evaluated. r (Optional): The radii of the planets. texp (Optional[float]): The exposure time. Returns: The indices of the timestamps that are in transit. """ z = tt.zeros_like(self.a) r = tt.as_tensor_variable(r) + z R = self.r_star + z # Wrap the times into time since transit hp = 0.5 * self.period dt = tt.mod(self._warp_times(t) - self.t0 + hp, self.period) - hp if self.ecc is None: # Equation 14 from Winn (2010) k = r / R arg = tt.square(1 + k) - tt.square(self.b) factor = R / (self.a * self.sin_incl) hdur = hp * tt.arcsin(factor * tt.sqrt(arg)) / np.pi t_start = -hdur t_end = hdur flag = z else: M_contact = self.contact_points_op( self.a, self.ecc, self.cos_omega, self.sin_omega, self.cos_incl + z, self.sin_incl + z, R + r) flag = M_contact[2] t_start = (M_contact[0] - self.M0) / self.n t_start = tt.mod(t_start + hp, self.period) - hp t_end = (M_contact[1] - self.M0) / self.n t_end = tt.mod(t_end + hp, self.period) - hp t_start = tt.switch(tt.gt(t_start, 0.0), t_start - self.period, t_start) t_end = tt.switch(tt.lt(t_end, 0.0), t_end + self.period, t_end) if texp is not None: t_start -= 0.5*texp t_end += 0.5*texp mask = tt.any(tt.and_(dt >= t_start, dt <= t_end), axis=-1) result = ifelse(tt.all(tt.eq(flag, 0)), tt.arange(t.size)[mask], tt.arange(t.size)) return result
def _unfold_gradients_func(self, rep, dec, g_dec, target_tok, tok, w, b, unfold_idx=0): distance = T.sum((target_tok - dec)**2) g_cost_dec = T.grad(distance, dec) tok_is_token = T.lt(tok, 0) g_dec_switcher = ifelse(tok_is_token, g_cost_dec, g_dec) output_distance = ifelse(tok_is_token, distance, T.constant(0.0, dtype=FLOATX)) _rep, = make_float_vectors("_rep") _dec = self._decode_computation(_rep)[unfold_idx] node_map = {_rep: rep, _dec: dec} g_dec_rep = SRG(T.grad(T.sum(_dec), _rep), node_map) * g_dec_switcher g_dec_w = SRG(T.grad(T.sum(_dec), w), node_map) * g_dec_switcher g_dec_b = SRG(T.grad(T.sum(_dec), b), node_map) * g_dec_switcher return g_dec_rep, g_dec_w, g_dec_b, output_distance
def bfgs(inverse_hessian, weight_delta, gradient_delta, maxrho=1e4): ident_matrix = cast_float(T.eye(inverse_hessian.shape[0])) maxrho = cast_float(maxrho) rho = cast_float(1.) / gradient_delta.dot(weight_delta) rho = ifelse(T.isinf(rho), maxrho * T.sgn(rho), rho) param1 = ident_matrix - T.outer(weight_delta, gradient_delta) * rho param2 = ident_matrix - T.outer(gradient_delta, weight_delta) * rho param3 = rho * T.outer(weight_delta, weight_delta) return param1.dot(inverse_hessian).dot(param2) + param3
def dropout(tensor, apply_dropout, keep_prob): mask = RND_STREAM.binomial(n=1, p=keep_prob, size=tensor.shape, dtype='float32') keep_prob = T.cast(keep_prob, 'float32') # todo: weirdity around shared.set_value tensor_dropped = tensor * (1.0 / keep_prob) * mask return ifelse(apply_dropout, tensor_dropped, tensor)
def apply_me(args): if len(args) == 1: return args[0] else: rval = ifelse(args[0], true, apply_me(args[1:]), name=name + str(len(args))) return rval
def apply_me(args): if len(args) == 1: return args[0] else: rval = ifelse(TT.eq(args[0], zero), false, apply_me(args[1:]), name=name + str(len(args))) return rval
def learning_updates(self): batch_counter = theano.shared(np.array(0, dtype="int32"), "batch_counter") batch_size = self.batch_size to_update = batch_counter >= batch_size for param in self.network.parameters: # delta = self.learning_rate * T.grad(self.J, param) gsum = theano.shared( np.zeros(param.get_value().shape, dtype=FLOATX), "batch_gsum_%s" % param.name) yield gsum, ifelse(to_update, T.zeros_like(gsum), gsum + T.grad(self.cost, param)) delta = self.learning_rate * gsum / batch_size yield param, ifelse(to_update, param - delta, param) yield batch_counter, ifelse(to_update, T.constant(0, dtype="int32"), batch_counter + 1)
def compute_S(idx, Sp1, zAA, zBB): Sm = ifelse( T.eq(idx, nT - 2), T.dot(zBB[iib[-1]], Tla.matrix_inverse(zAA[iia[-1]])), T.dot( zBB[iib[idx]], Tla.matrix_inverse(zAA[iia[T.min([idx + 1, nT - 2])]] - T.dot( Sp1, T.transpose(zBB[iib[T.min([idx + 1, nT - 2])]]))))) return Sm
def __init__(self, input, p, drop_switch): self.input = input self.srng = RandomStreams(seed=234) self.rv_n = self.srng.normal(self.input.shape) self.mask = T.cast( self.rv_n < p, dtype=theano.config.floatX ) / p # first dropout mask, scaled with /p so we do not have to perform test time scaling (source: cs231n) self.output = ifelse(drop_switch > 0.5, self.input * self.mask, self.input) # only drop if drop == 1.0
def find_right_bound(prev_func_output, step, maxstep): func_output = f(step) is_output_decrease = T.gt(prev_func_output, func_output) step = ifelse(is_output_decrease, T.minimum(2. * step, maxstep), step) is_output_increse = T.lt(prev_func_output, func_output) stoprule = theano.scan_module.until( T.or_(is_output_increse, step > maxstep)) return [func_output, step], stoprule
def test_callback_with_ifelse(self): a, b, c = tensor.scalars('abc') f = function([a, b, c], ifelse(a, 2 * b, 2 * c), mode=Mode(optimizer=None, linker=vm.VM_Linker(callback=self.callback))) f(1, 2, 3) assert self.n_callbacks['IfElse'] == 2
def norm_clip(dW, max_l2_norm=10.0): """ Clip theano symbolic var dW to have some max l2 norm. """ dW_l2_norm = T.sqrt(T.sum(dW**2.0)) norm_ratio = (max_l2_norm / dW_l2_norm) clip_factor = ifelse(T.lt(norm_ratio, 1.0), norm_ratio, 1.0) dW_clipped = dW * clip_factor return dW_clipped
def __init__(self, rng, input, input_sh, n_out, W_0=None, b_0=None, activation=T.nnet.sigmoid, name='FullyConnectedLayer', p=.5, fit_intercept=True): super().__init__(rng, input, input_sh, name) # print('FullyConnected input shape: ' + repr(input.size)) self.output_sh = (input_sh[0], n_out) self.W_sh = (input_sh[1], n_out) self.n_out = n_out self.activation = activation self.fit_intercept = int(fit_intercept) # dropout self.default_p = p self.p = theano.shared(p) # self.p.set_value(p self.drop_input = theano.shared(self._dropout(), name='drop_input' + name, borrow=True) # weights if W_0 is None: W_0 = self._default_W() self.W = theano.shared(W_0, name='W' + name, borrow=True) # bias if b_0 is None: b_0 = self._default_b() self.b = theano.shared(b_0, name='b' + name, borrow=True) # param list self.params = [self.W] + self.fit_intercept * [self.b] self.speeds = [np.sqrt(input_sh[1])] * 2 # output input_ = ifelse(T.gt(self.p.eval(), 0), input * self.drop_input / (1 - self.p), input) lin_out = ifelse( self.fit_intercept, T.dot(input_, self.W) + self.b.repeat(repeats=input_sh[0], axis=0), T.dot(input_, self.W)) self.output = lin_out if (activation is None) else activation(lin_out)
def _per_roi_pooling(coord, x): #x = tt.tensor3() # 512x7x7 float tensor #coord = tt.fvector() # [ xmin, ymin, xmax, ymax ] in [0,1] x-width,y-height # step 1: float coord to int nb_rows = x.shape[1] # height,y nb_cols = x.shape[2] # width,x icoords = tt.iround( coord * [nb_cols, nb_rows, nb_cols, nb_rows ]) # xmin,xmax multiply nb_cols, ymin,ymax multiply nb_rows # 0 <= xmin < nb_cols xmin = tt.clip(icoords[0], 0, nb_cols - 1) # 0 <= ymin < nb_rows ymin = tt.clip(icoords[1], 0, nb_rows - 1) xmax = tt.clip(icoords[2], 1 + xmin, nb_cols) # min(xmax) = 1+xmin, max(xmax) = nb_cols ymax = tt.clip(icoords[3], 1 + ymin, nb_rows) # min (ymax) = 1+ymin, max(ymax) = nb_rows # if xmin == xmax == nb_cols xmin = ifelse(tt.eq(xmax, xmin), xmax - 1, xmin) # if ymin == ymax == nb_rows ymin = ifelse(tt.eq(ymax, ymin), ymax - 1, ymin) # step 2: extract raw sub-stensor roi = x[:, ymin:ymax, xmin:xmax] # step 3: resize raw to target_hx target_w ''' # method1 (slow): upsampling -> downsampling subtensor_h = ymax - ymin subtensor_w = xmax - xmin # upsample by ( target_h, target_w ) -> ( subtensor_h * target_h, subtensor_w * target_w ) kernel = tt.ones((target_h, target_w)) # create ones filter roi_up,_ =scan(fn=lambda r2d, kernel: kron(r2d,kernel),sequences = roi,non_sequences = kernel) # downsample to (target_h, target_w) #target = roi_up[:,::subtensor_h,::subtensor_w] target = max_pooling(roi_up, subtensor_h, subtensor_w) ''' # method 2 if cfg.NET.POOL_METHOD == 'slicepool': target = slice_pooling(roi, target_h, target_w) else: target = float_max_pooling(roi, target_h, target_w) return K.flatten(target)
def __init__(self, collapse='mean', maxout=False, transpose=False, **kwargs): super(TwoDToOneDLayer, self).__init__(1, **kwargs) self.set_attr('collapse', collapse) self.set_attr('transpose', transpose) Y = self.sources[0].output if transpose: Y = Y.dimshuffle(1, 0, 2, 3) #index handling def index_fn(index, size): return T.set_subtensor(index[:size], numpy.cast['int8'](1)) index_init = T.zeros((Y.shape[2],Y.shape[1]), dtype='int8') self.index, _ = theano.scan(index_fn, [index_init, T.cast(self.sources[0].output_sizes[:,1],"int32")]) self.index = self.index.dimshuffle(1, 0) n_out = self.sources[0].attrs['n_out'] if maxout: Y = Y.max(axis=3).dimshuffle(0,1,2,'x') if collapse == 'sum' or collapse == True: Y = Y.sum(axis=0) elif collapse == 'mean': Y = Y.mean(axis=0) elif collapse == 'conv': from returnn.theano.util import circular_convolution Y, _ = theano.scan(lambda x_i,x_p:circular_convolution(x_i,x_p),Y,Y[0]) Y = Y[-1] elif collapse == 'flatten': self.index = T.ones((Y.shape[0] * Y.shape[1], Y.shape[2]), dtype='int8') Y = Y.reshape((Y.shape[0]*Y.shape[1],Y.shape[2],Y.shape[3])) elif str(collapse).startswith('pad_'): pad = numpy.int32(collapse.split('_')[-1]) Y = ifelse(T.lt(Y.shape[0],pad),T.concatenate([Y,T.zeros((pad-Y.shape[0],Y.shape[1],Y.shape[2],Y.shape[3]),'float32')],axis=0), ifelse(T.gt(Y.shape[0],pad),Y[:pad],Y)) Y = Y.dimshuffle(1,2,3,0).reshape((Y.shape[1],Y.shape[2],Y.shape[3]*Y.shape[0])) n_out *= pad elif collapse != False: assert False, "invalid collapse mode" if self.attrs['batch_norm']: Y = self.batch_norm(Y, n_out, force_sample=False) self.output = Y self.act = [Y, Y] self.set_attr('n_out', n_out)
def search_iteration_step(x_previous, x_current, y_previous, y_current, y_deriv_previous, is_first_iteration, x_star): y_deriv_current = f_deriv(x_current) x_new = x_current * asfloat(2) y_new = f(x_new) condition1 = T.or_( y_current > (y0 + c1 * x_current * y_deriv_0), T.and_(y_current >= y_previous, T.bitwise_not(is_first_iteration))) condition2 = T.abs_(y_deriv_current) <= -c2 * y_deriv_0 condition3 = y_deriv_current >= zero x_star = ifelse( condition1, zoom(x_previous, x_current, y_previous, y_current, y_deriv_previous, f, f_deriv, y0, y_deriv_0, c1, c2), ifelse( condition2, x_current, ifelse( condition3, zoom(x_current, x_previous, y_current, y_previous, y_deriv_current, f, f_deriv, y0, y_deriv_0, c1, c2), x_new, ), ), ) y_deriv_previous_new = ifelse(condition1, y_deriv_previous, y_deriv_current) is_any_condition_satisfied = sequential_or(condition1, condition2, condition3) y_current_new = ifelse(is_any_condition_satisfied, y_current, y_new) return ([ x_current, x_new, y_current, y_current_new, y_deriv_previous_new, theano_false, x_star ], theano.scan_module.scan_utils.until( sequential_or( T.eq(x_new, zero), is_any_condition_satisfied, )))
def apply_mean_stress_theory(m_s_th, sm, rng, sn_0, r_m, r_y): rng = ifelse( tt.eq(1, m_s_th), ifelse(tt.lt(0, sm), rng / (1 - (sm / r_m)), ifelse(tt.le(r_m, tt.abs_(sm)), 1.01 * sn_0, rng)), ifelse( tt.eq(2, m_s_th), ifelse(tt.lt(tt.abs_(sm), r_m), rng / (1 - (sm / r_m)**2), ifelse(tt.le(r_m, sm), 1.01 * sn_0, rng)), ifelse( tt.eq(3, m_s_th), ifelse( tt.lt(0, sm) & tt.lt(sm, r_y), rng / 1 - (sm / r_y), ifelse(tt.le(r_y, tt.abs_(sm)), 1.01 * sn_0, rng)), rng))) return rng
def interval_reduction(a, b, c, d, tol): fc = f(c) fd = f(d) a, b, c, d = ifelse(T.lt(fc, fd), [a, d, d - golden_ratio * (d - a), c], [c, b, d, c + golden_ratio * (b - c)]) stoprule = theano.scan_module.until(T.lt(T.abs_(c - d), tol)) return [a, b, c, d], stoprule
def init_train_updates(self): updates = super(ErrDiffStepUpdate, self).init_train_updates() step = self.variables.step last_error = self.variables.last_error previous_error = self.variables.previous_error step_update_condition = ifelse( last_error < previous_error, self.update_for_smaller_error * step, ifelse( last_error > self.update_for_bigger_error * previous_error, self.update_for_bigger_error * step, step ) ) updates.append((step, step_update_condition)) return updates
def compute_D(idx, Dm1, zS, zAA, zBB): D = ifelse( T.eq(idx, nT - 1), T.dot( Tla.matrix_inverse(zAA[iia[-1]]), III + T.dot(T.transpose(zBB[iib[idx - 1]]), T.dot(Dm1, S[0]))), ifelse( T.eq(idx, 0), Tla.matrix_inverse(zAA[iia[0]] - T.dot(zBB[iib[0]], T.transpose(S[-1]))), T.dot( Tla.matrix_inverse( zAA[iia[idx]] - T.dot(zBB[iib[T.min([idx, nT - 2])]], T.transpose(S[T.max([-idx - 1, -nT + 1])]))), III + T.dot(T.transpose(zBB[iib[T.min([idx - 1, nT - 2])]]), T.dot(Dm1, S[-idx]))))) return D
def test_callback_with_ifelse(self): a, b, c = tensor.scalars("abc") f = function( [a, b, c], ifelse(a, 2 * b, 2 * c), mode=Mode(optimizer=None, linker=VMLinker(callback=self.callback)), ) f(1, 2, 3) assert self.n_callbacks["IfElse"] == 2
def distribution_helper(self, w, X, F, conds): nx = X.shape[0] distr = T.alloc(1.0, nx, self.K) distr,_ = theano.scan( lambda c,x,f,d: ifelse(T.eq(c,1.), self.distribution_helper_helper(x,f), d), sequences=[conds, X, F, distr]) distr,_ = theano.scan( lambda d: -w*(T.min(d,keepdims=True)-d), # relative value w.r.t the minimum sequences=distr) return distr
def output_index(self): from theano.ifelse import ifelse index = self.index if self.sources: # In some cases, e.g. forwarding, the target index (for "classes") might have shape[0]==0. # Or shape[0]==1 with index[0]==0. See Dataset.shapes_for_batches(). # Use source index in that case. have_zero = T.le(index.shape[0], 1) * T.eq(T.sum(index[0]), 0) index = ifelse(have_zero, self.sources[0].index, index) return index