def get_output(self, go_backwards = False, train = False): self.reset_states(train.shape[0]) inputs = train.dimshuffle((1, 0, 2)) results, _ = theano.scan( self.step, sequences=inputs, outputs_info=[self.states[0],self.states[1]], go_backwards=go_backwards) ''' # deal with Theano API inconsistency if type(results) is list: outputs = results[0] states = results[1:] else: outputs = results states = [] outputs = T.squeeze(outputs) last_output = outputs[-1] ''' #outputs = np.asarray(results)[:,0] #outputs = T.squeeze(outputs) #outputs = outputs.dimshuffle((1, 0, 2)) #states = [T.squeeze(state[-1]) for state in states] #return last_output, outputs, states outputs = results[0] outputs = T.squeeze(outputs) outputs = outputs.dimshuffle((1, 0, 2)) return outputs
def sym_entropy(self, S, mapping): """ Defines the symbolic calculation of the soft entropy """ if self.distance == 'euclidean': distances = euclidean_distance(S, self.C) else: distances = cosine_distance(S, self.C) Q = T.nnet.softmax(-distances / self.m) # Calculates the fuzzy membership vector for each histogram S # Q, scan_u = theano.map(fn=self.sym_get_similarity, sequences=[S]) Nk = T.sum(Q, axis=0) H = T.dot(mapping.T, Q) P = H / Nk entropy_per_cluster = P * T.log2(P) entropy_per_cluster = T.switch(T.isnan(entropy_per_cluster), 0, entropy_per_cluster) entropy_per_cluster = entropy_per_cluster.sum(axis=0) Rk = Nk / Nk.sum() E = -(entropy_per_cluster * Rk).sum() return T.squeeze(E)
def _comp_modes(self): try: return tt.as_tensor_variable(self.comp_dists.mode) except AttributeError: return tt.squeeze(tt.stack([comp_dist.mode for comp_dist in self.comp_dists], axis=-1))
def __rnn(self, peaks, initial_states, indication=None): """ wait to comp """ if indication is None: initial_output = K.zeros_like(initial_states[-1]) initial_output = T.unbroadcast(initial_output, 1) if len(initial_states) > 0: initial_states[0] = T.unbroadcast(initial_states[0], 1) outputs, _ = theano.scan(self.__step, sequences=[T.arange(self.max_time_steps)], outputs_info=[initial_output] + initial_states, non_sequences=peaks, go_backwards=False) ### WARNING !!! YOU CAN NOT PUT '[' and ']' around 'peaks' WHEN call THEANO.SCAN ### # deal with Theano API inconsistency else: if len(initial_states) > 0: initial_states[0] = T.unbroadcast(initial_states[0], 1) outputs, _ = theano.scan( self.__step, sequences=[T.arange(self.max_time_steps), indication], outputs_info=[None] + initial_states, non_sequences=peaks, go_backwards=False) if isinstance(outputs, list): outputs = outputs[0] outputs = T.squeeze(outputs) axes = [1, 0] + list(range(2, outputs.ndim)) outputs = outputs.dimshuffle(axes) return outputs
def getTrainingFunc2(self): input = T.dmatrix() target = T.dvector() learning_rate = T.scalar() y = input for i in xrange(0, self.n_layers-1): y = T.maximum(0.0, T.dot(y, self.params[i*3]) + self.params[i*3+1] ) y = y*self.theano_rng.binomial(y.shape, 1, 0.5) y = T.maximum(0, T.dot(y, self.params[(self.n_layers-1)*3]) + self.params[(self.n_layers-1)*3+1] ) y = T.squeeze(y.T) #y = T.dot(y, self.params[-1]) diff = y - target #regulator = theano.printing.Print('norm:')(T.sum(abs(y))*alpha) #L = theano.printing.Print('L:')(T.sum(diff*diff) + regulator) L = T.sum(diff*diff) #- target*T.log(y) - (1-target)*T.log(1-y) gparam = T.grad(L, [ self.params[i] for i in xrange(len(self.params)) if i%3 != 2 ]) updates = {} for i,p,g,m in zip(xrange(len(gparam)),[ self.params[i] for i in xrange(len(self.params)) if i%3 != 2 ], gparam, [ self.moments[i] for i in xrange(len(self.moments)) if i%3 != 2 ]): if i%2 == 0: updates[m] = 0.9*m - learning_rate*0.0005*p - learning_rate*g else: updates[m] = 0.9*m - learning_rate*g updates[p] = p + m train_func = theano.function( inputs = [input, target, learning_rate], outputs=[L,y], updates= updates) return train_func
def _comp_means(self): try: return tt.as_tensor_variable(self.comp_dists.mean) except AttributeError: return tt.squeeze( tt.stack([comp_dist.mean for comp_dist in self.comp_dists], axis=1))
def splittings(omega, x, l): vals = [] for n in range(1, n2 + 1): # 0 to 35? area = 0 kern = np.loadtxt("kerns/l.{l:.0f}_n.{n:.0f}".format(l=l, n=n), skiprows=1) # This is bad: if x.size < 4800: v = int(x.size / n2) kern = kern[0::v] # Shouldn't this just be a dot product? for j in range(1, x.size): area = tt.add(area, (x[j] - x[j - 1]) * tt.dot(omega[j], kern[j])) beta_mask = (beta[:, 0] == l) * (beta[:, 1] == n) delta = tt.dot(beta[beta_mask, 2], area) vals.append(delta) vals = tt.as_tensor_variable(vals) vals = tt.squeeze(vals) print("vals") print(vals.tag.test_value) return vals
def adjust_estimate(estimate_r_t, risk_pref_t): # preserves TxN shape of estimates by broadcasting 1xN parameter array across the trial dimension estimate_r_t_adj = (estimate_r_t - 0.5) * risk_pref_t + 0.5 # creates TxNx1 estimate_r_t_adj3 = T.reshape( estimate_r_t_adj, newshape=[estimate_r_t_adj.shape[0], estimate_r_t_adj.shape[1], 1]) # creates 0's that are TxNx1 zeros_like_estimate_r_t_adj3 = T.reshape( T.zeros_like(estimate_r_t_adj), newshape=[estimate_r_t_adj.shape[0], estimate_r_t_adj.shape[1], 1]) # create TxNx2 and then take the max over returning TxNx1 estimate_r_t_adj_max = T.max( T.stack([estimate_r_t_adj3, zeros_like_estimate_r_t_adj3], axis=2), axis=2, ) # create TxNx2 and then take the max over returning TxN estimate_r_t_adj_max_min = T.squeeze( T.min(T.stack( [estimate_r_t_adj_max, T.ones_like(estimate_r_t_adj_max)], axis=2), axis=2)) return (estimate_r_t_adj_max_min)
def get_relative_position(self, t): """The planets' positions relative to the star Args: t: The times where the position should be evaluated. Returns: The components of the position vector at ``t`` in units of ``R_sun``. """ dt = tt.mod(tt.shape_padright(t) - self._ref_time, self.period) dt -= self._half_period x = tt.squeeze(self.speed * dt) y = tt.squeeze(self._b_norm + tt.zeros_like(dt)) z = -tt.ones_like(x) return x, y, z
def wrapped_generator(u, consts): if u.ndim == 1: u = u[None, :] n_batch = u.shape[0] return [ tt.squeeze(output.reshape((n_batch, -1))) for output in generator(u, consts) ]
def make_node(self, *xi): xi = [T.squeeze(T.as_tensor_variable(x)) for x in xi] if any([x.ndim != 1 for x in xi]): raise TypeError('%s: input must be 1D' % self.__class__.__name__) return gof.Apply(self, xi, [self.output_type(xi)(), self.output_type(xi)()])
def squeeze(x, axis): '''Remove a 1-dimension from the tensor at index "axis". ''' broadcastable = x.broadcastable[:axis] + x.broadcastable[axis+1:] x = T.patternbroadcast(x, [i == axis for i in range(x.type.ndim)]) x = T.squeeze(x) x = T.patternbroadcast(x, broadcastable) return x
def squeeze(x, axis): '''Remove a 1-dimension from the tensor at index "axis". ''' broadcastable = x.broadcastable[:axis] + x.broadcastable[axis + 1:] x = T.patternbroadcast(x, [i == axis for i in range(x.type.ndim)]) x = T.squeeze(x) x = T.patternbroadcast(x, broadcastable) return x
def get_radial_velocity(self, t, K=None, output_units=None): """Get the radial velocity of the star .. note:: The convention in exoplanet is that positive `z` points *towards* the observer. However, for consistency with radial velocity literature this method returns values where positive radial velocity corresponds to a redshift as expected. Args: t: The times where the radial velocity should be evaluated. K (Optional): The semi-amplitudes of the orbits. If provided, the ``m_planet`` and ``incl`` parameters will be ignored and this amplitude will be used instead. output_units (Optional): An AstroPy velocity unit. If not given, the output will be evaluated in ``m/s``. This is ignored if a value is given for ``K``. Returns: The reflex radial velocity evaluated at ``t`` in units of ``output_units``. For multiple planets, this will have one row for each planet. """ # Special case for K given: m_planet, incl, etc. is ignored if K is not None: sinf, cosf = self._get_true_anomaly(t) if self.ecc is None: return tt.squeeze(K * cosf) # cos(w + f) + e * cos(w) from Lovis & Fischer return tt.squeeze( K * ( self.cos_omega * cosf - self.sin_omega * sinf + self.ecc * self.cos_omega ) ) # Compute the velocity using the full orbit solution if output_units is None: output_units = u.m / u.s conv = (1 * u.R_sun / u.day).to(output_units).value v = self.get_star_velocity(t) return -conv * v[2]
def __init__(self, f, D): self.f = f self.D = D self.proj = T.dot(self.D.T, self.f) self.amax = T.argmax(T.abs_(self.proj)) self.dist = self.proj[self.amax] self.residual = self.f - T.squeeze(self.D[:, self.amax] * self.dist) self.coeffs = T.zeros([self.D.shape[1], 1]) self.coeffs = T.set_subtensor(self.coeffs[self.amax], self.dist)
def sample_v_given_h(self, h0_sample): pre_sigmoid_v1, v1_mean = self.propdown(h0_sample) v1_sample, updates = theano.scan( fn=lambda v1_mean1, d: self.theano_rng.multinomial( size=(1, 1), n=d, pvals=v1_mean1, dtype=theano.config.floatX), outputs_info=None, sequences=[v1_mean, self.D]) return [pre_sigmoid_v1, v1_mean, T.squeeze(v1_sample), updates]
def manual_fitting(tic_id, sector, days, flux, t0_guess, period_guess, star_radius, star_mass, x_fold): with pm.Model() as model: lower_log = np.log(np.std(flux)) - 1 logs = pm.Uniform("logs", lower=lower_log, upper=0, testval=np.log(np.std(flux))) mean_flux = pm.Normal("mean_flux", mu=0, sd=np.std(flux)) u = xo.distributions.QuadLimbDark("u") period = pm.Uniform("period", lower=period_guess * 0.9, upper=period_guess * 1.1, testval=period_guess) t0 = pm.Uniform("t0", lower=t0_guess - 0.2, upper=t0_guess + 0.2) r, b = xo.distributions.get_joint_radius_impact(min_radius=0.0005, max_radius=0.5, testval_r=0.015) orbit = xo.orbits.KeplerianOrbit(period=period, t0=t0, b=b, r_star=star_radius, m_star=star_mass) # The light curve model is computed using "starry" star = xo.StarryLightCurve(u) light_curve = star.get_light_curve(orbit=orbit, r=r, t=days) # The returned light curve will always have the shape (ntime, nplanet) # but we only have one planet so we can "squeeze" the result # 1e2 it is because it's the percentage. light_curve = tt.squeeze(star.get_light_curve( orbit=orbit, r=r, t=days)) * 1e2 + mean_flux # Finally, this is the likelihoood for the observations pm.Normal("obs", mu=light_curve, sd=tt.exp(logs), observed=flux) with model: transit_model = xo.utils.eval_in_model(light_curve) inds = np.argsort(x_fold) p = plotting_folded(days, flux, tic_id, sector, x_fold) p.line(x_fold[inds], transit_model[inds], legend="initial model", line_width=3, line_alpha=0.6, line_color="black") # output_file("test.html", title="test.py example") show(p) return model, light_curve
def squeeze(x, axis): """Remove a 1-dimension from the tensor at index "axis". """ input_shape = get_shape(x) axis = axis % x.ndim x = T.addbroadcast(x, axis) x = T.squeeze(x) if isinstance(input_shape, (tuple, list)): add_shape(x, tuple([j for i, j in enumerate(input_shape) if i != axis])) return x
def get_output_for(self, incoming, **kwargs): p0 = T.exp( - self.Lambda * (incoming[:,0] - self.re)) p1 = T.exp( - self.Lambda * (incoming[:,1] - self.re)) p2 = T.exp( - self.Lambda * (incoming[:,2] - self.re)) p = T.concatenate([[p0],[p1],[p2]], axis=0) p = T.squeeze(p).T return p
def predict_with_masks(vals, masks): global mask_num with model: X_mask = pm.Normal(f'X_mask{mask_num}', mu=Xmu, sd=1., shape=vals.shape) Xpred = tt.squeeze(X_mask)*masks + vals*(1-masks) y_prob = pm.math.sigmoid(intercept + sum([coefs[i] * Xpred[:,i] for i in range(len(train_columns))])) y = pm.Bernoulli(f'y{mask_num}', y_prob, shape=(len(vals))) mask_num += 1 ppc = pm.sample_posterior_predictive(trace, model=model, vars=[y], samples=300) return ppc[f'y{mask_num-1}']
def _comp_logp(self, value): comp_dists = self.comp_dists try: value_ = value if value.ndim > 1 else tt.shape_padright(value) return comp_dists.logp(value_) except AttributeError: return tt.squeeze( tt.stack([comp_dist.logp(value) for comp_dist in comp_dists], axis=1))
def _comp_logp(self, value): comp_dists = self.comp_dists try: value_ = value if value.ndim > 1 else tt.shape_padright(value) return comp_dists.logp(value_) except AttributeError: return tt.squeeze(tt.stack([comp_dist.logp(value) for comp_dist in comp_dists], axis=1))
def get_output_for(self, incoming, **kwargs): R0 = incoming[:, 0] R1 = incoming[:, 1] R2 = incoming[:, 2] B_To_Ang = 0.52917721067 Kcm_To_Hartree = 0.159360144e-2 VRef = 0.1915103559 a = numpy.array([ -1.488979427684798e3, 1.881435846488955e4, -1.053475425838226e5, 2.755135591229064e5, -4.277588997761775e5, 4.404104009614092e5, -2.946204062950765e5, 1.176861219078620e5 ]) alpha = 9.439784362354936e-1 beta = 1.262242998506810 r2r4Scalar = 2.59361680 rs6 = 0.5299 rs8 = 2.20 c6 = 12.8 c8Step = 3.0 * c6 * r2r4Scalar**2 tmp = math.sqrt(c8Step / c6) tmp6 = (rs6 * tmp + rs8)**6 tmp8 = (rs6 * tmp + rs8)**8 eS0 = c6 / (R0**6 + tmp6) eE0 = c8Step / (R0**8 + tmp8) eS1 = c6 / (R1**6 + tmp6) eE1 = c8Step / (R1**8 + tmp8) eS2 = c6 / (R2**6 + tmp6) eE2 = c8Step / (R2**8 + tmp8) VDisp0 = (-eS0 - 2.0 * eE0) VDisp1 = (-eS1 - 2.0 * eE1) VDisp2 = (-eS2 - 2.0 * eE2) V0 = T.zeros_like(R0) V1 = T.zeros_like(R1) V2 = T.zeros_like(R2) for k in range(8): V0 = V0 + a[k] * T.exp(-alpha * beta**k * (R0 * B_To_Ang)**2) V1 = V1 + a[k] * T.exp(-alpha * beta**k * (R1 * B_To_Ang)**2) V2 = V2 + a[k] * T.exp(-alpha * beta**k * (R2 * B_To_Ang)**2) V0 = V0 * 1.e-3 V1 = V1 * 1.e-3 V2 = V2 * 1.e-3 p0 = (VDisp0 + V0 + VRef / 3.0) * 27.2113839712790 p1 = (VDisp1 + V1 + VRef / 3.0) * 27.2113839712790 p2 = (VDisp2 + V2 + VRef / 3.0) * 27.2113839712790 p = T.concatenate([[p0], [p1], [p2]], axis=0) p = T.squeeze(p).T return p
def __call__(self, X): print(len(X)) rot_prof = tt.squeeze(self.a * tt.exp(tt.dot(-x, self.b)) + self.c) # Debugging print("rot_prof: ", rot_prof.tag.test_value) #return rot_prof # A one dimensional column vector of inputs. vals = splittings(rot_prof, 1) print("outer vals", vals.tag.test_value) return vals
def __call__(self, X): rot_prof = tt.squeeze(self.a * tt.exp(tt.dot(-X, self.b)) + self.c) # Debugging print("rot_prof: ", rot_prof.tag.test_value) #return rot_prof # A one dimensional column vector of inputs. X_stumf = np.linspace(0.1, 0.9, n2)[:, None] vals = splittings(rot_prof, X_stumf, 1) print("outer vals", vals.tag.test_value) return vals
def logsumexp(x, axis=None, keepdims=False): max_value = T.max(x, axis=axis, keepdims=True) res = max_value + T.log(T.sum(T.exp(x-max_value), axis=axis, keepdims=True)) if not keepdims: if axis is None: return T.squeeze(res) slices = [slice(None, None, None)]*res.ndim slices[axis] = 0 # Axis being merged return res[tuple(slices)] return res
def __get_cost_value(self, Y_hat, Y): """Gets the cost value of the training. Args: Y_hat (np.ndarray): The observed output of the network. Y (np.ndarray): The correct output. Returns: np.ndarray: The cost of each output element in matrix. """ m = Y_hat.shape[1] cost = -1 / m * (T.dot(Y, T.log(Y_hat).T) + T.dot(T.sub(1, Y), T.log(1 - Y_hat).T)) return T.squeeze(cost)
def get_radial_velocity(self, t, K=None, output_units=None): """Get the radial velocity of the star .. note:: The convention in exoplanet is that positive `z` points *towards* the observer. However, for consistency with radial velocity literature this method returns values where positive radial velocity corresponds to a redshift as expected. Args: t: The times where the radial velocity should be evaluated. K (Optional): The semi-amplitudes of the orbits. If provided, the ``m_planet`` and ``incl`` parameters will be ignored and this amplitude will be used instead. output_units (Optional): An AstroPy velocity unit. If not given, the output will be evaluated in ``m/s``. This is ignored if a value is given for ``K``. Returns: The reflex radial velocity evaluated at ``t`` in units of ``output_units``. For multiple planets, this will have one row for each planet. """ # Special case for K given: m_planet, incl, etc. is ignored if K is not None: f = self._get_true_anomaly(t) if self.ecc is None: return tt.squeeze(K * tt.cos(f)) # cos(w + f) + e * cos(w) from Lovis & Fischer return tt.squeeze( K * (self.cos_omega*tt.cos(f) - self.sin_omega*tt.sin(f) + self.ecc * self.cos_omega)) # Compute the velocity using the full orbit solution if output_units is None: output_units = u.m / u.s conv = (1 * u.R_sun / u.day).to(output_units).value v = self.get_star_velocity(t) return -conv * v[2]
def splittings(omega, l): vals = [] for n in frequ.loc[frequ['l'] == l]['n']: # 0 to 35? area = tt.dot(x_diffs, omega * kernels[l, n, :]) beta_mask = (beta[:, 0] == l) * (beta[:, 1] == n) delta = beta[beta_mask, 2] * area vals.append(delta) vals = tt.as_tensor_variable(vals) vals = tt.squeeze(vals) print("vals") print(vals.tag.test_value) return vals
def get_planet_velocity(self, t): """Get the planets' velocity vector Args: t: The times where the velocity should be evaluated. Returns: The components of the velocity vector at ``t`` in units of ``M_sun/day``. """ return tuple(tt.squeeze(x) for x in self._get_velocity(-self.m_star, t))
def get_planet_position(self, t): """The planets' positions in the barycentric frame Args: t: The times where the position should be evaluated. Returns: The components of the position vector at ``t`` in units of ``R_sun``. """ return tuple(tt.squeeze(x) for x in self._get_position(self.a_planet, t))
def get_planet_velocity(self, t): """Get the planets' velocity vector Args: t: The times where the velocity should be evaluated. Returns: The components of the velocity vector at ``t`` in units of ``M_sun/day``. """ return tuple( tt.squeeze(x) for x in self._get_velocity(-self.m_star, t))
def get_planet_position(self, t): """The planets' positions in the barycentric frame Args: t: The times where the position should be evaluated. Returns: The components of the position vector at ``t`` in units of ``R_sun``. """ return tuple( tt.squeeze(x) for x in self._get_position(self.a_planet, t))
def get_relative_position(self, t, light_delay=False): """The planets' positions relative to the star Args: t: The times where the position should be evaluated. Returns: The components of the position vector at ``t`` in units of ``R_sun``. """ if light_delay: raise NotImplementedError( "Light travel time delay is not implemented for simple orbits" ) dt = tt.mod(tt.shape_padright(t) - self._ref_time, self.period) dt -= self._half_period x = tt.squeeze(self.speed * dt) y = tt.squeeze(self._b_norm + tt.zeros_like(dt)) m = tt.abs_(dt) < 0.5 * self.duration z = tt.squeeze(m * 1.0 - (~m) * 1.0) return x, y, z
def __pow__(self, other): if (isinstance(other, theano.compile.SharedVariable) and other.get_value().squeeze().shape == ()): other = tt.squeeze(other) return Exponentiated(self, other) elif isinstance(other, Number): return Exponentiated(self, other) elif np.asarray(other).squeeze().shape == (): other = np.squeeze(other) return Exponentiated(self, other) raise ValueError( "A covariance function can only be exponentiated by a scalar value" )
def __init__(self, p, *args, **kwargs): super().__init__(*args, **kwargs) try: self.k = tt.shape(p)[-1].tag.test_value except AttributeError: self.k = tt.shape(p)[-1] p = tt.as_tensor_variable(floatX(p)) # From #2082, it may be dangerous to automatically rescale p at this # point without checking for positiveness self.p = p self.mode = tt.argmax(p, axis=-1) if self.mode.ndim == 1: self.mode = tt.squeeze(self.mode)
def get_output_for(self, incoming, **kwargs): # p0 = self.Lambda[1] * (1.0 - T.exp( - self.Lambda[0] * (incoming[:,0] - self.re[0]) ) )**2 # p1 = self.Lambda[1] * (1.0 - T.exp( - self.Lambda[0] * (incoming[:,1] - self.re[0]) ) )**2 # p2 = self.Lambda[1] * (1.0 - T.exp( - self.Lambda[0] * (incoming[:,2] - self.re[0]) ) )**2 p0 = (1.0 - T.exp( - self.Lambda * (incoming[:,0] - self.re) ) )**2 p1 = (1.0 - T.exp( - self.Lambda * (incoming[:,1] - self.re) ) )**2 p2 = (1.0 - T.exp( - self.Lambda * (incoming[:,2] - self.re) ) )**2 p = T.concatenate([[p0],[p1],[p2]], axis=0) p = T.squeeze(p).T return p
def get_relative_angles(self, t, parallax=None): """The planets' relative position to the star in the sky plane, in separation, position angle coordinates. .. note:: This treats each planet independently and does not take the other planets into account when computing the position of the star. This is fine as long as the planet masses are small. Args: t: The times where the position should be evaluated. Returns: The separation (arcseconds) and position angle (radians, measured east of north) of the planet relative to the star. """ X, Y, Z = self._get_position(-self.a, t, parallax) # calculate rho and theta rho = tt.squeeze(tt.sqrt(X**2 + Y**2)) # arcsec theta = tt.squeeze(tt.arctan2(Y, X)) # radians between [-pi, pi] return (rho, theta)
def get_star_velocity(self, t): """Get the star's velocity vector .. note:: For a system with multiple planets, this will return one column per planet with the contributions from each planet. The total velocity can be found by summing along the last axis. Args: t: The times where the velocity should be evaluated. Returns: The components of the velocity vector at ``t`` in units of ``M_sun/day``. """ return tuple(tt.squeeze(x) for x in self._get_velocity(self.m_planet, t))
def get_relative_velocity(self, t): """The planets' velocity relative to the star .. note:: This treats each planet independently and does not take the other planets into account when computing the position of the star. This is fine as long as the planet masses are small. Args: t: The times where the velocity should be evaluated. Returns: The components of the velocity vector at ``t`` in units of ``R_sun/day``. """ return tuple(tt.squeeze(x) for x in self._get_velocity(-self.m_total, t))
def get_star_position(self, t): """The star's position in the barycentric frame .. note:: If there are multiple planets in the system, this will return one column per planet with each planet's contribution to the motion. The star's full position can be computed by summing over the last axis. Args: t: The times where the position should be evaluated. Returns: The components of the position vector at ``t`` in units of ``R_sun``. """ return tuple(tt.squeeze(x) for x in self._get_position(self.a_star, t))
def _compute_losses(self, model_output): # model_output.shape : (batch_size, seq_len, K, M, target_size) # self.dataset.symb_targets.shape = (batch_size, seq_len+K-1, target_dims) # targets.shape = (batch_size, seq_len, 3) targets = self.dataset.symb_targets[:, : -self.model.k + 1 or None, :] # mask.shape : (batch_size, seq_len) mask = self.dataset.symb_mask # samples.shape : (batch_size, seq_len, 3) # T.squeeze(.) should remove the K=1 and M=1 dimensions self.samples = self.model.get_max_component_samples(T.squeeze(model_output)) # loss_per_time_step.shape = (batch_size, seq_len) self.loss_per_time_step = l2distance(self.samples, targets) # loss_per_seq.shape = (batch_size,) self.loss_per_seq = T.sum(self.loss_per_time_step * mask, axis=1) / T.sum(mask, axis=1) return self.loss_per_seq
def prederrrate(output, target_output, mask,db='Y_PRED_ERRORS:',verbose=False): """ Calculates the misclassification rate. Masks 'masked' samples All matrices are shape (sequences x sequence_length x nclasses) :param output: Output from nntools network. example: last_layer,get_output(input,deterministic=False) :param target_output: tensor3 with one-hot-encoded targets (sequences x sequence_length x nclasses) :param mask: tensor3 binary mask indicating if output should be included as error. 1 is included, 0 is excluded :param verbose: if true prints the cross entropy :param db: versose printing name :return: """ true_labels = T.argmax(target_output, axis=-1).flatten() preds = T.argmax(output, axis=-1).flatten() eq = T.eq(true_labels,preds) n_time_steps = T.sum(mask) acc = T.sum(eq*T.squeeze(mask)) / n_time_steps if verbose: acc = theano.printing.Print(db+' ACC')(acc) error = 1.0-acc return error
def fwd_old(self, x, V, A, L): """ x : signal V : eigenvectors A : area L : eigenvalues """ V = V[:,:self.K] L = L[:self.K] sampleLoc = (L.dimshuffle(0,'x') - self.evalSamples.dimshuffle('x',0)) / self.dEval basis = self.cubicBSpline(sampleLoc) basis = basis.dimshuffle('x',0,1) rho = T.sqrt(T.sum(A)) # weight the basis columns for each input function to generate a ghat # Q x K, a window for each input function ghat = T.squeeze(T.batched_dot( T.tile(basis, [self.nin, 1, 1]), self.beta)[:,:,0]) # crazy stuff here, why doesn't squeeze work? # Q x K x N V_ = T.tile(V.dimshuffle('x',1,0), [self.nin, 1, 1]) # Q x K x N tmp = (ghat.dimshuffle(0,'x',1) * V).dimshuffle(0,2,1) # Q x N x N transl = rho * T.batched_dot(V_.dimshuffle(0,2,1), tmp) transl = A.dimshuffle('x',0,'x') * transl # Q x K x N tmp = (V.dimshuffle(0,'x',1) * x.dimshuffle(0,1,'x')).dimshuffle(1,2,0) # Q x K x N desc = rho * T.batched_dot(tmp, transl) desc = T.abs_(desc) desc = desc.dimshuffle(2,0,'x',1) # BC01 format : N x Q x 1 x K return self.activation(theano.tensor.nnet.conv.conv2d(desc, self.W).flatten(2) + self.b)
def rnn(step_function, inputs, initial_states, go_backwards=False, masking=True): '''Iterates over the time dimension of a tensor. Parameters ---------- inputs: tensor of temporal data of shape (samples, time, ...) (at least 3D). step_function: Parameters: input: tensor with shape (samples, ...) (no time dimension), representing input for the batch of samples at a certain time step. states: list of tensors. Returns: output: tensor with shape (samples, ...) (no time dimension), new_states: list of tensors, same length and shapes as 'states'. initial_states: tensor with shape (samples, ...) (no time dimension), containing the initial values for the states used in the step function. go_backwards: boolean. If True, do the iteration over the time dimension in reverse order. masking: boolean. If true, any input timestep inputs[s, i] that is all-zeros will be skipped (states will be passed to the next step unchanged) and the corresponding output will be all zeros. Returns ------- A tuple (last_output, outputs, new_states). last_output: the latest output of the rnn, of shape (samples, ...) outputs: tensor with shape (samples, time, ...) where each entry outputs[s, t] is the output of the step function at time t for sample s. new_states: list of tensors, latest states returned by the step function, of shape (samples, ...). ''' inputs = inputs.dimshuffle((1, 0, 2)) def _step(*args): global single_result input = args[0] states = args[1:] output, new_states = step_function(input, states) if masking: # if all-zero input timestep, return # all-zero output and unchanged states switch = T.any(input) output = T.switch(switch, output, 0. * output) return_states = [] for state, new_state in zip(states, new_states): return_states.append(T.switch(switch, new_state, state)) return [output] + return_states else: return [output] + new_states results, _ = theano.scan( _step, sequences=inputs, outputs_info=[None] + initial_states, go_backwards=go_backwards) # deal with Theano API inconsistency if type(results) is list: outputs = results[0] states = results[1:] else: outputs = results states = [] outputs = T.squeeze(outputs) last_output = outputs[-1] outputs = outputs.dimshuffle((1, 0, 2)) states = [T.squeeze(state[-1]) for state in states] return last_output, outputs, states
def get_star_acceleration(self, t): return tuple(tt.squeeze(x) for x in self._get_acceleration(self.a_star, self.m_planet, t))
def rnn(step_function, inputs, initial_states, go_backwards=False, mask=None, constants=None, unroll=False, input_length=None): '''Iterates over the time dimension of a tensor. # Arguments inputs: tensor of temporal data of shape (samples, time, ...) (at least 3D). step_function: Parameters: input: tensor with shape (samples, ...) (no time dimension), representing input for the batch of samples at a certain time step. states: list of tensors. Returns: output: tensor with shape (samples, ...) (no time dimension), new_states: list of tensors, same length and shapes as 'states'. initial_states: tensor with shape (samples, ...) (no time dimension), containing the initial values for the states used in the step function. go_backwards: boolean. If True, do the iteration over the time dimension in reverse order. mask: binary tensor with shape (samples, time), with a zero for every element that is masked. constants: a list of constant values passed at each step. unroll: whether to unroll the RNN or to use a symbolic loop (`scan`). input_length: must be specified if using `unroll`. # Returns A tuple (last_output, outputs, new_states). last_output: the latest output of the rnn, of shape (samples, ...) outputs: tensor with shape (samples, time, ...) where each entry outputs[s, t] is the output of the step function at time t for sample s. new_states: list of tensors, latest states returned by the step function, of shape (samples, ...). ''' ndim = inputs.ndim assert ndim >= 3, 'Input should be at least 3D.' if unroll: if input_length is None: raise Exception('When specifying `unroll=True`, an `input_length` ' 'must be provided to `rnn`.') axes = [1, 0] + list(range(2, ndim)) inputs = inputs.dimshuffle(axes) if constants is None: constants = [] if mask is not None: if mask.ndim == ndim-1: mask = expand_dims(mask) assert mask.ndim == ndim mask = mask.dimshuffle(axes) if unroll: indices = list(range(input_length)) if go_backwards: indices = indices[::-1] successive_outputs = [] successive_states = [] states = initial_states for i in indices: output, new_states = step_function(inputs[i], states + constants) if len(successive_outputs) == 0: prev_output = zeros_like(output) else: prev_output = successive_outputs[-1] output = T.switch(mask[i], output, prev_output) kept_states = [] for state, new_state in zip(states, new_states): kept_states.append(T.switch(mask[i], new_state, state)) states = kept_states successive_outputs.append(output) successive_states.append(states) outputs = T.stack(*successive_outputs) states = [] for i in range(len(successive_states[-1])): states.append(T.stack(*[states_at_step[i] for states_at_step in successive_states])) else: # build an all-zero tensor of shape (samples, output_dim) initial_output = step_function(inputs[0], initial_states + constants)[0] * 0 # Theano gets confused by broadcasting patterns in the scan op initial_output = T.unbroadcast(initial_output, 0, 1) def _step(input, mask, output_tm1, *states): output, new_states = step_function(input, states) # output previous output if masked. output = T.switch(mask, output, output_tm1) return_states = [] for state, new_state in zip(states, new_states): return_states.append(T.switch(mask, new_state, state)) return [output] + return_states results, _ = theano.scan( _step, sequences=[inputs, mask], outputs_info=[initial_output] + initial_states, non_sequences=constants, go_backwards=go_backwards) # deal with Theano API inconsistency if type(results) is list: outputs = results[0] states = results[1:] else: outputs = results states = [] else: if unroll: indices = list(range(input_length)) if go_backwards: indices = indices[::-1] successive_outputs = [] successive_states = [] states = initial_states for i in indices: output, states = step_function(inputs[i], states + constants) successive_outputs.append(output) successive_states.append(states) outputs = T.stack(*successive_outputs) states = [] for i in range(len(successive_states[-1])): states.append(T.stack(*[states_at_step[i] for states_at_step in successive_states])) else: def _step(input, *states): output, new_states = step_function(input, states) return [output] + new_states results, _ = theano.scan( _step, sequences=inputs, outputs_info=[None] + initial_states, non_sequences=constants, go_backwards=go_backwards) # deal with Theano API inconsistency if type(results) is list: outputs = results[0] states = results[1:] else: outputs = results states = [] outputs = T.squeeze(outputs) last_output = outputs[-1] axes = [1, 0] + list(range(2, outputs.ndim)) outputs = outputs.dimshuffle(axes) states = [T.squeeze(state[-1]) for state in states] return last_output, outputs, states
def squeeze(x, axis): '''Remove a 1-dimension from the tensor at index "axis". ''' x = T.addbroadcast(x, axis) return T.squeeze(x)
def get_relative_acceleration(self, t): return tuple(tt.squeeze(x) for x in self._get_acceleration(-self.a, -self.m_total, t))
def _comp_logp(self, value): comp_dists = self.comp_dists if self.comp_is_distribution: # Value can be many things. It can be the self tensor, the mode # test point or it can be observed data. The latter case requires # careful handling of shape, as the observed's shape could look # like (repetitions,) + dist_shape, which does not include the last # mixture axis. For this reason, we try to eval the value.shape, # compare it with self.shape and shape_padright if we infer that # the value holds observed data try: val_shape = tuple(value.shape.eval()) except AttributeError: val_shape = value.shape except theano.gof.MissingInputError: val_shape = None try: self_shape = tuple(self.shape) except AttributeError: # Happens in __init__ when computing self.logp(comp_modes) self_shape = None comp_shape = tuple(comp_dists.shape) ndim = value.ndim if ( val_shape is not None and not((self_shape is not None and val_shape == self_shape) or val_shape == comp_shape) ): # value is neither the test point nor the self tensor, it # is likely to hold observed values, so we must compute the # ndim discarding the dimensions that don't match # self_shape if ( self_shape and val_shape[-len(self_shape):] == self_shape ): # value has observed values for the Mixture ndim = len(self_shape) elif ( comp_shape and val_shape[-len(comp_shape):] == comp_shape ): # value has observed for the Mixture components ndim = len(comp_shape) else: # We cannot infer what was passed, we handle this # as was done in earlier versions of Mixture. We pad # always if ndim is lower or equal to 1 (default # legacy implementation) if ndim <= 1: ndim = len(comp_dists.shape) - 1 else: # We reach this point if value does not hold observed data, so # we can use its ndim safely to determine shape padding, or it # holds something that we cannot infer, so we revert to using # the value's ndim for shape padding. # We will always pad a single dimension if ndim is lower or # equal to 1 (default legacy implementation) if ndim <= 1: ndim = len(comp_dists.shape) - 1 if ndim < len(comp_dists.shape): value_ = tt.shape_padright(value, len(comp_dists.shape) - ndim) else: value_ = value return comp_dists.logp(value_) else: return tt.squeeze(tt.stack([comp_dist.logp(value) for comp_dist in comp_dists], axis=-1))
def stack_rnn(step_function, inputs, initial_states, stack_indices, go_backwards=False, mask=None, constants=None, unroll=False, input_length=None): ndim = inputs.ndim assert ndim >= 3, 'Input should be at least 3D.' axes = [1, 0] + list(range(2, ndim)) inputs = inputs.dimshuffle(axes) stack_indices = stack_indices.dimshuffle([1,0]) if constants is None: constants = [] if mask is not None: if mask.ndim == ndim-1: mask = expand_dims(mask) assert mask.ndim == ndim mask = mask.dimshuffle(axes) if unroll: indices = list(range(input_length)) if go_backwards: indices = indices[::-1] successive_outputs = [] state_tensors = initial_states initial = [state0[0,:,:] for state0 in state_tensors] batch_index = T.arange(state_tensors[0].shape[1]) prev_output = parent_state = None for x_ind in indices: p_ind = stack_indices[x_ind] if parent_state is None: parent_state = initial else: parent_state = [state_tensor[p_ind, batch_index] for state_tensor in state_tensors] output, new_states = step_function(inputs[x_ind], parent_state + constants) if prev_output is None: prev_output = zeros_like(output) else: prev_output = successive_outputs[-1] output = T.switch(mask[x_ind], output, prev_output) kept_states = [] for i,(p_state, new_state) in enumerate(zip(parent_state, new_states)): state_tensors[i] = T.set_subtensor(state_tensors[i][x_ind], T.switch(mask[x_ind], p_state, new_state)) #state_stack.append(kept_states) successive_outputs.append(output) outputs = T.stack(*successive_outputs) htensor, ctensor = state_tensors #states = [] #for i in range(len(state_stack[-1])): # states.append(T.stack(*[states_at_step[i] for states_at_step in state_stack])) else: # build an all-zero tensor of shape (samples, output_dim) init_states = [s[0,:,:] for s in initial_states] initial_output = step_function(inputs[0], init_states + constants)[0] * 0 # Theano gets confused by broadcasting patterns in the scan op initial_output = T.unbroadcast(initial_output, 0, 1) def _step(input, mask, stack_index, iter_index, output_tm1, h_tensor, c_tensor, *constants): batch_index = T.arange(stack_index.shape[0]) hm1 = colgather(h_tensor, batch_index, stack_index) #hm1 = h_tensor[stack_index] cm1 = colgather(c_tensor, batch_index, stack_index) #cm1 = c_tensor[stack_index] output, [h, c] = step_function(input, [hm1, cm1]+list(constants)) output = T.switch(mask, output, output_tm1) assert mask.ndim == h.ndim == c.ndim == hm1.ndim == cm1.ndim h = T.switch(mask, h, hm1) c = T.switch(mask, c, cm1) return [output, T.set_subtensor(h_tensor[iter_index], h), T.set_subtensor(c_tensor[iter_index], c)] (outputs, htensor, ctensor), _ = theano.scan( _step, sequences=[inputs, mask, stack_indices, T.arange(inputs.shape[0])], outputs_info=[initial_output]+initial_states, non_sequences=constants, go_backwards=go_backwards) htensor = htensor[-1] ctensor = ctensor[-1] else: if unroll: indices = list(range(input_length)) if go_backwards: indices = indices[::-1] successive_outputs = [] state_tensors = initial_states initial = [state0[0,:,:] for state0 in state_tensors] prev_output = parent_state = None for x_ind in indices: p_ind = stack_indices[x_ind] if parent_state is None: parent_state = initial else: parent_state = [state_tensor[p_ind] for state_tensor in state_tensors] output, new_states = step_function(inputs[x_ind], parent_state + constants) if prev_output is None: prev_output = zeros_like(output) else: prev_output = successive_outputs[-1] for i, state in enumerate(new_states): state_tensors[i] = T.set_subtensor(state_tensors[i][x_ind], state) successive_outputs.append(output) outputs = T.stack(*successive_outputs) htensor, ctensor = state_tensors #states = [] #for i in range(len(state_stack[-1])): # states.append(T.stack(*[states_at_step[i] for states_at_step in state_stack])) else: def _step(input, stack_index, iter_index, h_tensor, c_tensor, *constants): batch_index = T.arange(stack_index.shape[0]) hm1 = colgather(h_tensor, batch_index, stack_index) #hm1 = h_tensor[stack_index] cm1 = colgather(c_tensor, batch_index, stack_index) #cm1 = c_tensor[stack_index] output, [h, c] = step_function(input, [hm1, cm1]+list(constants)) return [output, T.set_subtensor(h_tensor[iter_index], h), T.set_subtensor(c_tensor[iter_index], c)] (outputs, htensor, ctensor), _ = theano.scan( _step, sequences=[inputs, stack_indices, T.arange(inputs.shape[0])], outputs_info=[None]+initial_states, non_sequences=constants, go_backwards=go_backwards) htensor = htensor[-1] ctensor = ctensor[-1] outputs = T.squeeze(outputs) last_output = outputs[-1] axes = [1, 0] + list(range(2, outputs.ndim)) outputs = outputs.dimshuffle(axes) return last_output, outputs, [T.squeeze(htensor), T.squeeze(ctensor)]
def __call__(self, X): return tt.squeeze(tt.dot(X, self.A) + self.b)
def rnn(step_function, inputs, initial_states, go_backwards=False, mask=None): '''Iterates over the time dimension of a tensor. Parameters ---------- inputs: tensor of temporal data of shape (samples, time, ...) (at least 3D). step_function: Parameters: input: tensor with shape (samples, ...) (no time dimension), representing input for the batch of samples at a certain time step. states: list of tensors. Returns: output: tensor with shape (samples, ...) (no time dimension), new_states: list of tensors, same length and shapes as 'states'. initial_states: tensor with shape (samples, ...) (no time dimension), containing the initial values for the states used in the step function. go_backwards: boolean. If True, do the iteration over the time dimension in reverse order. mask: binary tensor with shape (samples, time, 1), with a zero for every element that is masked. Returns ------- A tuple (last_output, outputs, new_states). last_output: the latest output of the rnn, of shape (samples, ...) outputs: tensor with shape (samples, time, ...) where each entry outputs[s, t] is the output of the step function at time t for sample s. new_states: list of tensors, latest states returned by the step function, of shape (samples, ...). ''' ndim = inputs.ndim assert ndim >= 3, "Input should be at least 3D." axes = [1, 0] + list(range(2, ndim)) inputs = inputs.dimshuffle(axes) if mask is None: mask = expand_dims(ones_like(T.sum(inputs, axis=-1))) else: mask = mask.dimshuffle(axes) def _step(input, mask, output_tm1, *states): output, new_states = step_function(input, states) # output previous output if masked. output = T.switch(mask, output, output_tm1) return_states = [] for state, new_state in zip(states, new_states): return_states.append(T.switch(mask, new_state, state)) return [output] + return_states # build an all-zero tensor of shape (samples, output_dim) initial_output = step_function(inputs[0], initial_states)[0] * 0 # Theano gets confused by broadcasting patterns in the scan op initial_output = T.unbroadcast(initial_output, 0, 1) results, _ = theano.scan( _step, sequences=[inputs, mask], outputs_info=[initial_output] + initial_states, go_backwards=go_backwards) # deal with Theano API inconsistency if type(results) is list: outputs = results[0] states = results[1:] else: outputs = results states = [] outputs = T.squeeze(outputs) last_output = outputs[-1] axes = [1, 0] + list(range(2, outputs.ndim)) outputs = outputs.dimshuffle(axes) states = [T.squeeze(state[-1]) for state in states] return last_output, outputs, states
def rttn(step_function, inputs, initial_states, tree_topology, action_types, horizon, shape_key, context_matrix, mask=None, constants=None, **kwargs): assert inputs.ndim >= 3, 'Input should be at least 3D.' horizon_words, horizon_indices = horizon _shuffle = lambda tensor: tensor.dimshuffle([1,0]+list(range(2,tensor.ndim))) inputs = _shuffle(inputs) tree_topology = _shuffle(tree_topology) action_types = _shuffle(action_types) horizon_words = _shuffle(horizon_words) # all words on horizon horizon_indices = _shuffle(horizon_indices) # all of their branch indices if constants is None: constants = [] if mask is not None: if mask.ndim == inputs.ndim-1: mask = expand_dims(mask) assert mask.ndim == inputs.ndim mask = _shuffle(mask) def _step(iter_index, x_input, x_mask, x_type, x_topology, horizon_words, horizon_indices, h_traverse, branch_tensor, W_ctx, *constants): '''Notes for this function: W_ctx is passed in under non sequences but is separated here from the constants ''' ### topology batch_index = T.arange(x_topology.shape[0]) h_parent = colgather(branch_tensor, batch_index, x_topology) states = (h_parent, h_traverse, x_type) + constants h_child, h_vplus = step_function(x_input, states) ## is masking necessary for branches? idk. h_child = T.switch(x_mask, h_child, h_parent) h_vplus = T.switch(x_mask, h_vplus, h_traverse) branch_tensor = T.set_subtensor(branch_tensor[iter_index], h_child) ### shape sizes s_batch = shape_key['batch'] s_rnn = shape_key['rnn'] s_word = shape_key['word'] s_rnn_word = s_rnn + s_word # ctx is used as an attentional vector over the horizon states # W_ctx is (R, RW), h_vplus is (B, R); horizont_types is (B,) # horizon_types lets different tree actions be considered ctx = T.dot(h_vplus, W_ctx) ctx_shape = (s_batch, 1, s_rnn_word) ctx = T.reshape(ctx, ctx_shape) T.addbroadcast(ctx, 1) # horizon state is (B, HorizonSize, RNNWORD) branch_owners = branch_tensor[horizon_indices, T.arange(s_batch).reshape((s_batch, 1))] #branch_owners = branch_tensor[T.arange(s_batch), horizon_indices] # indexes into the branches horizon_state = T.concatenate([branch_owners, horizon_words], axis=-1) # now create the probability tensor p_horizon = horizon_state * ctx # elemwise multiplying p_horizon = T.sum(p_horizon, axis=-1) #then summing. #this was basically a dot, but per batch row and resulting in a dim reduction # now, given (B,Horizon), we can get a softmax distribution per row p_horizon = T.nnet.softmax(p_horizon) # note, this means we can also sample if we want to do a dynamic oracle. return h_vplus, branch_tensor, horizon_state, p_horizon output_info = initial_states + [None, None] (h_v, branch_tensor, horizon_states, p_horizons), _ = theano.scan( _step, sequences=[T.arange(inputs.shape[0]), inputs, mask, action_types, tree_topology, horizon_words, horizon_indices], outputs_info=output_info, non_sequences=[context_matrix] + constants) branch_tensor = branch_tensor[-1] else: def _step(iter_index, x_input, x_type, x_topology, horizon_words, horizon_indices, h_traverse, branch_tensor, W_ctx, *constants): '''Notes for this function: W_ctx is passed in under non sequences but is separated here from the constants ''' ### topology batch_index = T.arange(x_topology.shape[0]) h_parent = colgather(branch_tensor, batch_index, x_topology) states = (h_parent, h_traverse, x_type) + constants h_child, h_vplus = step_function(x_input, states) branch_tensor = T.set_subtensor(branch_tensor[iter_index], h_child) ### shape sizes s_batch = shape_key['batch'] s_rnn = shape_key['rnn'] s_word = shape_key['word'] s_rnn_word = s_rnn + s_word # ctx is used as an attentional vector over the horizon states # W_ctx is (4, R, RW), h_vplus is (B, R); horizont_types is (B,) # horizon_types lets different tree actions be considered ctx = T.dot(h_vplus, W_ctx) ctx = T.addbroadcast(T.reshape(ctx, (s_batch, 1, s_rnn_word)), 1) # horizon state is (B, HorizonSize, s_rnn_word) branch_owners = branch_tensor[T.arange(s_batch), horizon_indices] # indexes into the branches horizon_state = T.concatenate([branch_owners, horizon_words], axis=-1) # now create the probability tensor p_horizon = horizon_state * ctx # elemwise multiplying p_horizon = T.sum(p_horizon, axis=-1) #then summing. #this was basically a dot, but per batch row and resulting in a dim reduction # now, given (B,Horizon), we can get a softmax distribution per row p_horizon = T.nnet.softmax(p_horizon) # b, horizon #p_horizon = T.addbroadcast(T.reshape(p_horizon, (s_batch, s_horizon, 1)), 1) # note, this means we can also sample if we want to do a dynamic oracle. #horizon_attn = T.sum(p_horizon * horizon_state, axis=1) return h_vplus, branch_tensor, horizon_state, p_horizon output_info = initial_states + [None, None] (h_v, branch_tensor, horizon_states, p_horizons), _ = theano.scan( _step, sequences=[T.arange(inputs.shape[0]), inputs, action_types, tree_topology, horizon_words, horizon_indices], outputs_info=output_info, non_sequences=[context_matrix] + constants) branch_tensor = branch_tensor[-1] unshuffle = lambda tensor: T.squeeze(tensor).dimshuffle([1, 0] + list(range(2, tensor.ndim))) h_v = unshuffle(h_v) branch_tensor = unshuffle(branch_tensor) horizon_states = unshuffle(horizon_states) p_horizons = unshuffle(p_horizons) return branch_tensor, h_v, horizon_states, p_horizons
def rnn(step_function, inputs, nb_other_outputs, initial_states, contexts, parameters, truncate_gradient=-1, go_backwards=False, masking=True): '''Iterate over the time dimension of a tensor. Parameters ---------- inputs: tensor of temporal data of shape (samples, time, ...) (at least 3D). step_function: Parameters: input: tensor with shape (samples, ...) (no time dimension), representing input for the batch of samples at a certain time step. states: list of tensors. Returns: output: tensor with shape (samples, ...) (no time dimension), new_states: list of tensors, same length and shapes as 'states'. initial_states: tensor with shape (samples, ...) (no time dimension), containing the initial values for the states used in the step function. contexts: list of contexts that are passed to step_function at each steps. parameters: list of parameters that are passed to step_function at each steps. truncate_gradient ``truncate_gradient`` is the number of steps to use in truncated BPTT. If you compute gradients through a scan op, they are computed using backpropagation through time. By providing a different value then -1, you choose to use truncated BPTT instead of classical BPTT, where you go for only ``truncate_gradient`` number of steps back in time. go_backwards: boolean. If True, do the iteration over the time dimension in reverse order. masking: boolean. If true, any input timestep inputs[s, i] that is all-zeros will be skipped (states will be passed to the next step unchanged) and the corresponding output will be all zeros. Returns ------- A tuple (last_output, outputs, new_states). last_output: the latest output of the rnn, of shape (samples, ...) outputs: tensor with shape (samples, time, ...) where each entry outputs[s, t] is the output of the step function at time t for sample s. new_states: list of tensors, latest states returned by the step function, of shape (samples, ...). ''' inputs = inputs.dimshuffle((1, 0, 2)) nb_states = len(initial_states) def _step(input, *args): # separate states and contexts states = args[0:nb_states] output, other_outputs, new_states = step_function(input, args) if masking: # if all-zero input timestep, return # all-zero output and unchanged states switch = T.any(input, axis=-1, keepdims=True) output = T.switch(switch, output, 0. * output) for other_output in other_outputs: other_output = T.switch(switch, other_output, 0. * other_output) return_states = [] for state, new_state in zip(states, new_states): return_states.append(T.switch(switch, new_state, state)) return [output] + other_outputs + return_states else: return [output] + other_outputs + new_states results, _ = theano.scan( _step, sequences=inputs, outputs_info=[None]*(1+nb_other_outputs) + initial_states, no_sequence=contexts + parameters, truncate_gradient=truncate_gradient, go_backwards=go_backwards) # deal with Theano API inconsistency if type(results) is list: outputs = results[0] other_outputs = results[1:1+nb_other_outputs] states = results[1+nb_other_outputs:] else: outputs = results other_outputs = [] states = [] outputs = T.squeeze(outputs) last_output = outputs[-1] outputs = outputs.dimshuffle((1, 0, 2)) states = [T.squeeze(state[-1]) for state in states] return last_output, outputs, other_outputs, states
def sampled_rnn(step_function, inputs, initial_states, go_backwards=False, mask=None, constants=None): '''Iterates over the time dimension of a tensor. # Arguments inputs: tensor of temporal data of shape (samples, time, ...) (at least 3D). step_function: Parameters: input: tensor with shape (samples, ...) (no time dimension), representing input for the batch of samples at a certain time step. states: list of tensors. Returns: output: tensor with shape (samples, ...) (no time dimension), new_states: list of tensors, same length and shapes as 'states'. initial_states: tensor with shape (samples, ...) (no time dimension), containing the initial values for the states used in the step function. go_backwards: boolean. If True, do the iteration overx the time dimension in reverse order. mask: binary tensor with shape (samples, time), with a zero for every element that is masked. constants: a list of constant values passed at each step. # Returns A tuple (last_output, outputs, new_states). last_output: the latest output of the rnn, of shape (samples, ...) outputs: tensor with shape (samples, time, ...) where each entry outputs[s, t] is the output of the step function at time t for sample s. new_states: list of tensors, latest states returned by the step function, of shape (samples, ...). ''' ndim = inputs.ndim assert ndim >= 3, 'Input should be at least 3D.' axes = [1, 0] + list(range(2, ndim)) inputs = inputs.dimshuffle(axes) # if constants is None: # constants = [] if mask is not None: print """Mask is not doing anything right now :( This NEEDS to be fixed""" def _step(h, *states): output, new_states = step_function(h, states) return [output] + new_states results, updates = theano.scan(_step, sequences=inputs, outputs_info=[None] + initial_states, non_sequences=constants, go_backwards=go_backwards) # deal with Theano API inconsistency if type(results) is list: outputs = results[0] states = results[1:] else: outputs = results states = [] outputs = T.squeeze(outputs) last_output = outputs[-1][-1] # -1 for sampled output, axes = [1, 0] + list(range(2, outputs.ndim)) outputs = outputs.dimshuffle(axes)[-1] # -1 for sampled output, axes = [1, 0] + list(range(2, outputs.ndim)) outputs = outputs.dimshuffle(axes) states = [T.squeeze(state[-1]) for state in states] return last_output, outputs, states, updates
def rnn(step_function, input, initial_states, non_sequences=[], go_backwards=False, masking=False): '''Iterates over the time dimension of a tensor. Parameters ---------- inputs: tensor of temporal data of shape (samples, time, ...) (at least 3D). step_function: Parameters: input: tensor with shape (samples, ...) (no time dimension), representing input for the batch of samples at a certain time step. states: list of tensors. Returns: output: tensor with shape (samples, ...) (no time dimension), new_states: list of tensors, same length and shapes as 'states'. initial_states: tensor with shape (samples, ...) (no time dimension), containing the initial values for the states used in the step function. go_backwards: boolean. If True, do the iteration over the time dimension in reverse order. masking: boolean. If true, any input timestep inputs[s, i] that is all-zeros will be skipped (states will be passed to the next step unchanged) and the corresponding output will be all zeros. Returns ------- A tuple (last_output, outputs, new_states). last_output: the latest output of the rnn, of shape (samples, ...) outputs: tensor with shape (samples, time, ...) where each entry outputs[s, t] is the output of the step function at time t for sample s. new_states: list of tensors, latest states returned by the step function, of shape (samples, ...). ''' num_states = len(initial_states) def _step(*args): in_seq, states, non_seqs = args[0], args[1:1 + num_states], args[1 + num_states:] output, new_states = step_function(in_seq, states, *non_seqs) return [output] + new_states results, _ = theano.scan( _step, sequences=[input], non_sequences=non_sequences, outputs_info=[None] + initial_states, go_backwards=go_backwards) # deal with Theano API inconsistency if type(results) is list: outputs = results[0] states = results[1:] else: outputs = results states = [] states = [T.squeeze(state[-1]) for state in states] return outputs, states