def __init__(self, input_dim, output_dim, width, height, N, **kwargs): super(AttentionWriter, self).__init__(name="writer", **kwargs) self.img_width = width self.img_height = height self.N = N self.input_dim = input_dim self.output_dim = output_dim assert output_dim == width * height self.zoomer = ZoomableAttentionWindow(height, width, N) self.z_trafo = Linear(name=self.name + '_ztrafo', input_dim=input_dim, output_dim=5, weights_init=self.weights_init, biases_init=self.biases_init, use_bias=True) self.w_trafo = Linear(name=self.name + '_wtrafo', input_dim=input_dim, output_dim=N * N, weights_init=self.weights_init, biases_init=self.biases_init, use_bias=True) self.children = [self.z_trafo, self.w_trafo]
class AttentionReader(Initializable): def __init__(self, x_dim, dec_dim, width, height, N, **kwargs): super(AttentionReader, self).__init__(name="reader", **kwargs) self.width = width self.height = height self.N = N self.x_dim = x_dim self.dec_dim = dec_dim self.output_dim = 2 * N * N self.zoomer = ZoomableAttentionWindow(height, width, N, normalize=True) self.readout = MLP(activations=[Identity()], dims=[dec_dim, 5], **kwargs) self.children = [self.readout] @application(inputs=['x', 'x_hat', 'h_dec'], outputs=['r']) def apply(self, x, x_hat, h_dec): l = self.readout.apply(h_dec) center_y = (l[:, 0] + 1.) / 2. center_x = (l[:, 1] + 1.) / 2. log_delta = l[:, 2] log_sigma = l[:, 3] / 2. log_gamma = l[:, 4] w = self.zoomer.read(x, center_y, center_x, T.exp(log_delta), T.exp(log_sigma)) w_hat = self.zoomer.read(x_hat, center_y, center_x, T.exp(log_delta), T.exp(log_sigma)) gamma = T.exp(log_gamma).dimshuffle(0, 'x') return gamma * T.concatenate([w, w_hat], axis=1)
class AttentionWriter(Initializable): def __init__(self, input_dim, output_dim, channels, width, height, N, **kwargs): super(AttentionWriter, self).__init__(name="writer", **kwargs) self.channels = channels self.img_width = width self.img_height = height self.N = N self.input_dim = input_dim self.output_dim = output_dim assert output_dim == channels * width * height self.zoomer = ZoomableAttentionWindow(channels, height, width, N) self.z_trafo = Linear(name=self.name + '_ztrafo', input_dim=input_dim, output_dim=5, weights_init=self.weights_init, biases_init=self.biases_init, use_bias=True) self.w_trafo = Linear(name=self.name + '_wtrafo', input_dim=input_dim, output_dim=channels * N * N, weights_init=self.weights_init, biases_init=self.biases_init, use_bias=True) self.children = [self.z_trafo, self.w_trafo] @application(inputs=['h'], outputs=['c_update']) def apply(self, h): w = self.w_trafo.apply(h) l = self.z_trafo.apply(h) center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l) c_update = 1. / gamma * self.zoomer.write(w, center_y, center_x, delta, sigma) return c_update @application(inputs=['h'], outputs=['c_update', 'center_y', 'center_x', 'delta']) def apply_detailed(self, h): w = self.w_trafo.apply(h) l = self.z_trafo.apply(h) center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l) c_update = 1. / gamma * self.zoomer.write(w, center_y, center_x, delta, sigma) return c_update, center_y, center_x, delta
class AttentionWriter(Initializable): def __init__(self, input_dim, output_dim, width, height, N, **kwargs): super(AttentionWriter, self).__init__(name="writer", **kwargs) self.img_width = width self.img_height = height self.N = N self.input_dim = input_dim self.output_dim = output_dim assert output_dim == width * height self.zoomer = ZoomableAttentionWindow(height, width, N) self.z_trafo = Linear( name=self.name + "_ztrafo", input_dim=input_dim, output_dim=5, weights_init=self.weights_init, biases_init=self.biases_init, use_bias=True, ) self.w_trafo = Linear( name=self.name + "_wtrafo", input_dim=input_dim, output_dim=N * N, weights_init=self.weights_init, biases_init=self.biases_init, use_bias=True, ) self.children = [self.z_trafo, self.w_trafo] @application(inputs=["h"], outputs=["c_update"]) def apply(self, h): w = self.w_trafo.apply(h) l = self.z_trafo.apply(h) center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l) c_update = 1.0 / gamma * self.zoomer.write(w, center_y, center_x, delta, sigma) return c_update @application(inputs=["h"], outputs=["c_update", "center_y", "center_x", "delta"]) def apply_detailed(self, h): w = self.w_trafo.apply(h) l = self.z_trafo.apply(h) center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l) c_update = 1.0 / gamma * self.zoomer.write(w, center_y, center_x, delta, sigma) return c_update, center_y, center_x, delta
def __init__(self, x_dim, dec_dim, height, width, N, **kwargs): super(AttentionReader, self).__init__(name="reader", **kwargs) self.img_height = height self.img_width = width self.N = N self.x_dim = x_dim self.dec_dim = dec_dim self.output_dim = 2*N*N self.zoomer = ZoomableAttentionWindow(height, width, N) self.readout = MLP(activations=[Identity()], dims=[dec_dim, 5], **kwargs) self.children = [self.readout]
class AttentionWriter(Initializable): def __init__(self, input_dim, output_dim, channels, width, height, N, **kwargs): super(AttentionWriter, self).__init__(name="writer", **kwargs) self.channels = channels self.img_width = width self.img_height = height self.N = N self.input_dim = input_dim self.output_dim = output_dim assert output_dim == channels*width*height self.zoomer = ZoomableAttentionWindow(channels, height, width, N) self.z_trafo = Linear( name=self.name+'_ztrafo', input_dim=input_dim, output_dim=5, weights_init=self.weights_init, biases_init=self.biases_init, use_bias=True) self.w_trafo = Linear( name=self.name+'_wtrafo', input_dim=input_dim, output_dim=channels*N*N, weights_init=self.weights_init, biases_init=self.biases_init, use_bias=True) self.children = [self.z_trafo, self.w_trafo] @application(inputs=['h'], outputs=['c_update']) def apply(self, h): w = self.w_trafo.apply(h) l = self.z_trafo.apply(h) center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l) c_update = 1./gamma * self.zoomer.write(w, center_y, center_x, delta, sigma) return c_update @application(inputs=['h'], outputs=['c_update', 'center_y', 'center_x', 'delta']) def apply_detailed(self, h): w = self.w_trafo.apply(h) l = self.z_trafo.apply(h) center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l) c_update = 1./gamma * self.zoomer.write(w, center_y, center_x, delta, sigma) return c_update, center_y, center_x, delta
class AttentionReader(Initializable): def __init__(self, x_dim, dec_dim, channels, height, width, N, **kwargs): super(AttentionReader, self).__init__(name="reader", **kwargs) self.img_height = height self.img_width = width self.N = N self.x_dim = x_dim self.dec_dim = dec_dim self.output_dim = 2*channels*N*N self.zoomer = ZoomableAttentionWindow(channels, height, width, N) self.readout = MLP(activations=[Identity()], dims=[dec_dim, 5], **kwargs) self.children = [self.readout] def get_dim(self, name): if name == 'input': return self.dec_dim elif name == 'x_dim': return self.x_dim elif name == 'output': return self.output_dim else: raise ValueError @application(inputs=['x', 'x_hat', 'h_dec'], outputs=['r']) def apply(self, x, x_hat, h_dec): l = self.readout.apply(h_dec) center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l) w = gamma * self.zoomer.read(x , center_y, center_x, delta, sigma) w_hat = gamma * self.zoomer.read(x_hat, center_y, center_x, delta, sigma) return T.concatenate([w, w_hat], axis=1) @application(inputs=['x', 'x_hat', 'h_dec'], outputs=['r','center_y', 'center_x', 'delta']) def apply_detailed(self, x, x_hat, h_dec): l = self.readout.apply(h_dec) center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l) w = gamma * self.zoomer.read(x , center_y, center_x, delta, sigma) w_hat = gamma * self.zoomer.read(x_hat, center_y, center_x, delta, sigma) r = T.concatenate([w, w_hat], axis=1) return r, center_y, center_x, delta
def __init__(self, input_dim, output_dim, channels, width, height, N, **kwargs): super(AttentionWriter, self).__init__(name="writer", **kwargs) self.channels = channels self.img_width = width self.img_height = height self.N = N self.input_dim = input_dim self.output_dim = output_dim assert output_dim == channels*width*height self.zoomer = ZoomableAttentionWindow(channels, height, width, N) self.z_trafo = Linear( name=self.name+'_ztrafo', input_dim=input_dim, output_dim=5, weights_init=self.weights_init, biases_init=self.biases_init, use_bias=True) self.w_trafo = Linear( name=self.name+'_wtrafo', input_dim=input_dim, output_dim=channels*N*N, weights_init=self.weights_init, biases_init=self.biases_init, use_bias=True) self.children = [self.z_trafo, self.w_trafo]
class LocatorReader(Initializable): def __init__(self, x_dim, dec_dim, channels, height, width, N, **kwargs): super(LocatorReader, self).__init__(name="reader", **kwargs) self.img_height = height self.img_width = width self.N = N self.x_dim = x_dim self.dec_dim = dec_dim self.output_dim = channels * N * N self.zoomer = ZoomableAttentionWindow(channels, height, width, N) self.readout = MLP(activations=[Identity()], dims=[dec_dim, 7], **kwargs) self.children = [self.readout] def get_dim(self, name): if name == 'input': return self.dec_dim elif name == 'x_dim': return self.x_dim elif name == 'output': return self.output_dim else: raise ValueError @application(inputs=['x', 'h_dec'], outputs=['r', 'l']) def apply(self, x, h_dec): l = self.readout.apply(h_dec) center_y, center_x, deltaY, deltaX, sigmaY, sigmaX, gamma = self.zoomer.nn2att(l) w = gamma * self.zoomer.read(x, center_y, center_x, deltaY, deltaX, sigmaY, sigmaX) return w, l @application(inputs=['h_dec'], outputs=['center_y', 'center_x', 'deltaY', 'deltaX']) def apply_l(self, h_dec): l = self.readout.apply(h_dec) center_y, center_x, deltaY, deltaX = self.zoomer.nn2att_wn(l) return center_y, center_x, deltaY, deltaX
class AttentionReader(Initializable): def __init__(self, x_dim, dec_dim, height, width, N, **kwargs): super(AttentionReader, self).__init__(name="reader", **kwargs) self.img_height = height self.img_width = width self.N = N self.x_dim = x_dim self.dec_dim = dec_dim self.output_dim = 2 * N * N self.zoomer = ZoomableAttentionWindow(height, width, N) self.readout = MLP(activations=[Identity()], dims=[dec_dim, 5], **kwargs) self.children = [self.readout] def get_dim(self, name): if name == 'input': return self.dec_dim elif name == 'x_dim': return self.x_dim elif name == 'output': return self.output_dim else: raise ValueError @application(inputs=['x', 'x_hat', 'h_dec'], outputs=['r']) def apply(self, x, x_hat, h_dec): l = self.readout.apply(h_dec) center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l) w = gamma * self.zoomer.read(x, center_y, center_x, delta, sigma) w_hat = gamma * self.zoomer.read(x_hat, center_y, center_x, delta, sigma) return T.concatenate([w, w_hat], axis=1)
def __init__(self, x_dim, dec_dim, channels, height, width, N, **kwargs): super(AttentionReader, self).__init__(name="reader", **kwargs) self.img_height = height self.img_width = width self.N = N self.x_dim = x_dim self.dec_dim = dec_dim self.output_dim = 2*channels*N*N self.zoomer = ZoomableAttentionWindow(channels, height, width, N) self.readout = MLP(activations=[Identity()], dims=[dec_dim, 5], **kwargs) self.children = [self.readout]
class AttentionReader(Initializable): def __init__(self, x_dim, dec_dim, height, width, N, **kwargs): super(AttentionReader, self).__init__(name="reader", **kwargs) self.img_height = height self.img_width = width self.N = N self.x_dim = x_dim self.dec_dim = dec_dim self.output_dim = 2 * N * N self.zoomer = ZoomableAttentionWindow(height, width, N) self.readout = MLP(activations=[Identity()], dims=[dec_dim, 5], **kwargs) self.children = [self.readout] def get_dim(self, name): if name == "input": return self.dec_dim elif name == "x_dim": return self.x_dim elif name == "output": return self.output_dim else: raise ValueError @application(inputs=["x", "x_hat", "h_dec"], outputs=["r"]) def apply(self, x, x_hat, h_dec): l = self.readout.apply(h_dec) center_y, center_x, delta, sigma, gamma = self.zoomer.nn2att(l) w = gamma * self.zoomer.read(x, center_y, center_x, delta, sigma) w_hat = gamma * self.zoomer.read(x_hat, center_y, center_x, delta, sigma) return T.concatenate([w, w_hat], axis=1)
class AttentionWriter(Initializable): def __init__(self, input_dim, output_dim, width, height, N, **kwargs): super(AttentionWriter, self).__init__(name="writer", **kwargs) self.width = width self.height = height self.N = N self.input_dim = input_dim self.output_dim = output_dim assert output_dim == width * height self.zoomer = ZoomableAttentionWindow(height, width, N, normalize=True) self.z_trafo = Linear(name=self.name + '_ztrafo', input_dim=input_dim, output_dim=5, weights_init=self.weights_init, biases_init=self.biases_init, use_bias=True) self.w_trafo = Linear(name=self.name + '_wtrafo', input_dim=input_dim, output_dim=N * N, weights_init=self.weights_init, biases_init=self.biases_init, use_bias=True) self.children = [self.z_trafo, self.w_trafo] @application(inputs=['h'], outputs=['c_update']) def apply(self, h): w = self.w_trafo.apply(h) l = self.z_trafo.apply(h) center_y = (l[:, 0] + 1.) / 2. center_x = (l[:, 1] + 1.) / 2. log_delta = l[:, 2] log_sigma = l[:, 3] / 2. log_gamma = l[:, 4] gamma = T.exp(log_gamma).dimshuffle(0, 'x') c_update = self.zoomer.write(w, center_y, center_x, T.exp(log_delta), T.exp(log_sigma)) / gamma return c_update
def main(name, epochs, batch_size, learning_rate): if name is None: name = "att-rw" print("\nRunning experiment %s" % name) print(" learning rate: %5.3f" % learning_rate) print() #------------------------------------------------------------------------ img_height, img_width = 28, 28 read_N = 12 write_N = 14 inits = { #'weights_init': Orthogonal(), 'weights_init': IsotropicGaussian(0.001), 'biases_init': Constant(0.), } x_dim = img_height * img_width reader = ZoomableAttentionWindow(img_height, img_width, read_N) writer = ZoomableAttentionWindow(img_height, img_width, write_N) # Parameterize the attention reader and writer mlpr = MLP(activations=[Tanh(), Identity()], dims=[x_dim, 50, 5], name="RMLP", **inits) mlpw = MLP(activations=[Tanh(), Identity()], dims=[x_dim, 50, 5], name="WMLP", **inits) # MLP between the reader and writer mlp = MLP(activations=[Tanh(), Identity()], dims=[read_N**2, 300, write_N**2], name="MLP", **inits) for brick in [mlpr, mlpw, mlp]: brick.allocate() brick.initialize() #------------------------------------------------------------------------ x = tensor.matrix('features') hr = mlpr.apply(x) hw = mlpw.apply(x) center_y, center_x, delta, sigma, gamma = reader.nn2att(hr) r = reader.read(x, center_y, center_x, delta, sigma) h = mlp.apply(r) center_y, center_x, delta, sigma, gamma = writer.nn2att(hw) c = writer.write(h, center_y, center_x, delta, sigma) / gamma x_recons = T.nnet.sigmoid(c) cost = BinaryCrossEntropy().apply(x, x_recons) cost.name = "cost" #------------------------------------------------------------ cg = ComputationGraph([cost]) params = VariableFilter(roles=[PARAMETER])(cg.variables) algorithm = GradientDescent( cost=cost, params=params, step_rule=CompositeRule([ RemoveNotFinite(), Adam(learning_rate), StepClipping(3.), ]) #step_rule=RMSProp(learning_rate), #step_rule=Momentum(learning_rate=learning_rate, momentum=0.95) ) #------------------------------------------------------------------------ # Setup monitors monitors = [cost] #for v in [center_y, center_x, log_delta, log_sigma, log_gamma]: # v_mean = v.mean() # v_mean.name = v.name # monitors += [v_mean] # monitors += [aggregation.mean(v)] train_monitors = monitors[:] train_monitors += [aggregation.mean(algorithm.total_gradient_norm)] train_monitors += [aggregation.mean(algorithm.total_step_norm)] # Live plotting... plot_channels = [ ["cost"], ] #------------------------------------------------------------ mnist_train = BinarizedMNIST("train", sources=['features']) mnist_test = BinarizedMNIST("test", sources=['features']) #mnist_train = MNIST("train", binary=True, sources=['features']) #mnist_test = MNIST("test", binary=True, sources=['features']) main_loop = MainLoop( model=Model(cost), data_stream=ForceFloatX(DataStream(mnist_train, iteration_scheme=SequentialScheme( mnist_train.num_examples, batch_size))), algorithm=algorithm, extensions=[ Timing(), FinishAfter(after_n_epochs=epochs), DataStreamMonitoring( monitors, ForceFloatX(DataStream(mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, batch_size))), prefix="test"), TrainingDataMonitoring( train_monitors, prefix="train", after_every_epoch=True), SerializeMainLoop(name+".pkl"), #Plot(name, channels=plot_channels), ProgressBar(), Printing()]) main_loop.run()
def main(name, epochs, batch_size, learning_rate): if name is None: name = "att-rw" print("\nRunning experiment %s" % name) print(" learning rate: %5.3f" % learning_rate) print() #------------------------------------------------------------------------ img_height, img_width = 28, 28 read_N = 12 write_N = 14 inits = { #'weights_init': Orthogonal(), 'weights_init': IsotropicGaussian(0.001), 'biases_init': Constant(0.), } x_dim = img_height * img_width reader = ZoomableAttentionWindow(img_height, img_width, read_N) writer = ZoomableAttentionWindow(img_height, img_width, write_N) # Parameterize the attention reader and writer mlpr = MLP(activations=[Tanh(), Identity()], dims=[x_dim, 50, 5], name="RMLP", **inits) mlpw = MLP(activations=[Tanh(), Identity()], dims=[x_dim, 50, 5], name="WMLP", **inits) # MLP between the reader and writer mlp = MLP(activations=[Tanh(), Identity()], dims=[read_N**2, 300, write_N**2], name="MLP", **inits) for brick in [mlpr, mlpw, mlp]: brick.allocate() brick.initialize() #------------------------------------------------------------------------ x = tensor.matrix('features') hr = mlpr.apply(x) hw = mlpw.apply(x) center_y, center_x, delta, sigma, gamma = reader.nn2att(hr) r = reader.read(x, center_y, center_x, delta, sigma) h = mlp.apply(r) center_y, center_x, delta, sigma, gamma = writer.nn2att(hw) c = writer.write(h, center_y, center_x, delta, sigma) / gamma x_recons = T.nnet.sigmoid(c) cost = BinaryCrossEntropy().apply(x, x_recons) cost.name = "cost" #------------------------------------------------------------ cg = ComputationGraph([cost]) params = VariableFilter(roles=[PARAMETER])(cg.variables) algorithm = GradientDescent( cost=cost, params=params, step_rule=CompositeRule([ RemoveNotFinite(), Adam(learning_rate), StepClipping(3.), ]) #step_rule=RMSProp(learning_rate), #step_rule=Momentum(learning_rate=learning_rate, momentum=0.95) ) #------------------------------------------------------------------------ # Setup monitors monitors = [cost] #for v in [center_y, center_x, log_delta, log_sigma, log_gamma]: # v_mean = v.mean() # v_mean.name = v.name # monitors += [v_mean] # monitors += [aggregation.mean(v)] train_monitors = monitors[:] train_monitors += [aggregation.mean(algorithm.total_gradient_norm)] train_monitors += [aggregation.mean(algorithm.total_step_norm)] # Live plotting... plot_channels = [ ["cost"], ] #------------------------------------------------------------ mnist_train = BinarizedMNIST("train", sources=['features']) mnist_test = BinarizedMNIST("test", sources=['features']) #mnist_train = MNIST("train", binary=True, sources=['features']) #mnist_test = MNIST("test", binary=True, sources=['features']) main_loop = MainLoop( model=Model(cost), data_stream=ForceFloatX( DataStream(mnist_train, iteration_scheme=SequentialScheme( mnist_train.num_examples, batch_size))), algorithm=algorithm, extensions=[ Timing(), FinishAfter(after_n_epochs=epochs), DataStreamMonitoring( monitors, ForceFloatX( DataStream(mnist_test, iteration_scheme=SequentialScheme( mnist_test.num_examples, batch_size))), prefix="test"), TrainingDataMonitoring(train_monitors, prefix="train", after_every_epoch=True), SerializeMainLoop(name + ".pkl"), #Plot(name, channels=plot_channels), ProgressBar(), Printing() ]) main_loop.run()