def create_NIPS_Sprag_init(inp_shape, output_num, stride=None, untie_biases=False, input_var=None): import theano.tensor.signal.conv from theano.sandbox.cuda import dnn # if no dnn support use default conv if not theano.config.device.startswith("gpu") or not dnn.dnn_available(): # code stolen from lasagne dnn.py import lasagne.layers.conv conv = lasagne.layers.conv.Conv2DLayer else: import lasagne.layers.dnn conv = lasagne.layers.dnn.Conv2DDNNLayer # setup network layout l_in = lasagne.layers.InputLayer(inp_shape, input_var=input_var) l_hid1 = conv(l_in, 16, (8, 8), stride=stride[0], untie_biases=untie_biases, W=lasagne.init.Normal(.01), b=lasagne.init.Constant(.1)) l_hid2 = conv(l_hid1, 32, (4, 4), stride=stride[1], untie_biases=untie_biases, W=lasagne.init.Normal(.01), b=lasagne.init.Constant(.1)) l_hid3 = lasagne.layers.DenseLayer(l_hid2, 256, W=lasagne.init.Normal(.01), b=lasagne.init.Constant(.1)) l_out = lasagne.layers.DenseLayer(l_hid3, output_num, nonlinearity=lasagne.nonlinearities.linear, W=lasagne.init.Normal(.01), b=lasagne.init.Constant(.1)) return {'l_in': l_in, 'l_hid1': l_hid1, 'l_hid2': l_hid2, 'l_hid3': l_hid3, 'l_out': l_out}
def create_A3C(network_parms): validate_parms(network_parms) conv = get_lasagne_conv_layer() # setup network layout l_in = lasagne.layers.InputLayer(network_parms.get('input_shape')) l_hid1 = conv(l_in, 16, (8, 8), stride=network_parms.get('stride')[0], untie_biases=network_parms.get('untie_biases')) l_hid2 = conv(l_hid1, 32, (4, 4), stride=network_parms.get('stride')[1], untie_biases=network_parms.get('untie_biases')) l_hid3 = lasagne.layers.DenseLayer(l_hid2, 256) l_value = lasagne.layers.DenseLayer( l_hid3, 1, nonlinearity=lasagne.nonlinearities.linear) l_policy = lasagne.layers.DenseLayer( l_hid3, network_parms.get('output_num'), nonlinearity=lasagne.nonlinearities.softmax) return { 'l_in': l_in, 'l_hid1': l_hid1, 'l_hid2': l_hid2, 'l_hid3': l_hid3, 'l_value': l_value, 'l_policy': l_policy }
def create_NIPS(network_parms): validate_parms(network_parms) conv = get_lasagne_conv_layer() # setup network layout l_in = lasagne.layers.InputLayer([None] + network_parms.get('input_shape')) l_hid1 = conv(l_in, 16, (8, 8), stride=network_parms.get('stride')[0]) l_hid2 = conv(l_hid1, 32, (4, 4), stride=network_parms.get('stride')[1]) l_hid3 = lasagne.layers.DenseLayer(l_hid2, 256) l_out = lasagne.layers.DenseLayer( l_hid3, network_parms.get('output_num'), nonlinearity=lasagne.nonlinearities.linear) return { 'l_in': l_in, 'l_hid1': l_hid1, 'l_hid2': l_hid2, 'l_hid3': l_hid3, 'l_out': l_out }
def create_NIPS_Sprag_init(inp_shape, output_num, stride=None, untie_biases=False, input_var=None): import theano.tensor.signal.conv from theano.sandbox.cuda import dnn # if no dnn support use default conv if not theano.config.device.startswith("gpu") or not dnn.dnn_available( ): # code stolen from lasagne dnn.py import lasagne.layers.conv conv = lasagne.layers.conv.Conv2DLayer else: import lasagne.layers.dnn conv = lasagne.layers.dnn.Conv2DDNNLayer # setup network layout l_in = lasagne.layers.InputLayer(inp_shape, input_var=input_var) l_hid1 = conv(l_in, 16, (8, 8), stride=stride[0], untie_biases=untie_biases, W=lasagne.init.Normal(.01), b=lasagne.init.Constant(.1)) l_hid2 = conv(l_hid1, 32, (4, 4), stride=stride[1], untie_biases=untie_biases, W=lasagne.init.Normal(.01), b=lasagne.init.Constant(.1)) l_hid3 = lasagne.layers.DenseLayer(l_hid2, 256, W=lasagne.init.Normal(.01), b=lasagne.init.Constant(.1)) l_out = lasagne.layers.DenseLayer( l_hid3, output_num, nonlinearity=lasagne.nonlinearities.linear, W=lasagne.init.Normal(.01), b=lasagne.init.Constant(.1)) return { 'l_in': l_in, 'l_hid1': l_hid1, 'l_hid2': l_hid2, 'l_hid3': l_hid3, 'l_out': l_out }
def create_NIPS(network_parms): validate_parms(network_parms) conv = get_lasagne_conv_layer() # setup network layout l_in = lasagne.layers.InputLayer([None] + network_parms.get('input_shape')) l_hid1 = conv(l_in, 16, (8, 8), stride=network_parms.get('stride')[0]) l_hid2 = conv(l_hid1, 32, (4, 4), stride=network_parms.get('stride')[1]) l_hid3 = lasagne.layers.DenseLayer(l_hid2, 256) l_out = lasagne.layers.DenseLayer(l_hid3, network_parms.get('output_num'), nonlinearity=lasagne.nonlinearities.linear) return {'l_in': l_in, 'l_hid1': l_hid1, 'l_hid2': l_hid2, 'l_hid3': l_hid3, 'l_out': l_out}
def create_A3C(network_parms): validate_parms(network_parms) conv = get_lasagne_conv_layer() # setup network layout l_in = lasagne.layers.InputLayer(network_parms.get('input_shape')) l_hid1 = conv(l_in, 16, (8, 8), stride=network_parms.get('stride')[0], untie_biases=network_parms.get('untie_biases')) l_hid2 = conv(l_hid1, 32, (4, 4), stride=network_parms.get('stride')[1], untie_biases=network_parms.get('untie_biases')) l_hid3 = lasagne.layers.DenseLayer(l_hid2, 256) l_value = lasagne.layers.DenseLayer(l_hid3, 1, nonlinearity=lasagne.nonlinearities.linear) l_policy = lasagne.layers.DenseLayer(l_hid3, network_parms.get('output_num'), nonlinearity=lasagne.nonlinearities.softmax) return {'l_in': l_in, 'l_hid1': l_hid1, 'l_hid2': l_hid2, 'l_hid3': l_hid3, 'l_value': l_value, 'l_policy': l_policy}
def create_A3C(inp_shape, output_num, stride=None, untie_biases=False, input_var=None): import theano.tensor.signal.conv from theano.sandbox.cuda import dnn # if no dnn support use default conv if not theano.config.device.startswith("gpu") or not dnn.dnn_available( ): # code stolen from lasagne dnn.py import lasagne.layers.conv conv = lasagne.layers.conv.Conv2DLayer else: import lasagne.layers.dnn conv = lasagne.layers.dnn.Conv2DDNNLayer # setup network layout l_in = lasagne.layers.InputLayer(inp_shape, input_var=input_var) l_hid1 = conv(l_in, 16, (8, 8), stride=stride[0], untie_biases=untie_biases) l_hid2 = conv(l_hid1, 32, (4, 4), stride=stride[1], untie_biases=untie_biases) l_hid3 = lasagne.layers.DenseLayer(l_hid2, 256) l_value = lasagne.layers.DenseLayer( l_hid3, 1, nonlinearity=lasagne.nonlinearities.linear) l_policy = lasagne.layers.DenseLayer( l_hid3, output_num, nonlinearity=lasagne.nonlinearities.softmax) return { 'l_in': l_in, 'l_hid1': l_hid1, 'l_hid2': l_hid2, 'l_hid3': l_hid3, 'l_value': l_value, 'l_policy': l_policy }
def create_async_muupan_init(network_parms): validate_parms(network_parms) conv = get_lasagne_conv_layer() # setup network layout input_shape = network_parms.get('input_shape') l_in = lasagne.layers.InputLayer(input_shape) l_hid1 = conv(l_in, 16, (8, 8), stride=network_parms.get('stride')[0], W=TorchInit((input_shape[1], 8, 8)), b=TorchInit((input_shape[1], 8, 8))) l_hid2 = conv(l_hid1, 32, (4, 4), stride=network_parms.get('stride')[1], W=TorchInit((16, 4, 4)), b=TorchInit((16, 4, 4))) l_hid3 = lasagne.layers.DenseLayer(l_hid2, 256, W=TorchInit((32, 4, 4)), b=TorchInit((32, 4, 4))) l_out = lasagne.layers.DenseLayer( l_hid3, network_parms.get('output_num'), nonlinearity=lasagne.nonlinearities.linear, W=TorchInit(256), b=TorchInit(256)) return { 'l_in': l_in, 'l_hid1': l_hid1, 'l_hid2': l_hid2, 'l_hid3': l_hid3, 'l_out': l_out }
def create_A3C(inp_shape, output_num, stride=None, untie_biases=False, input_var=None): import theano.tensor.signal.conv from theano.sandbox.cuda import dnn # if no dnn support use default conv if not theano.config.device.startswith("gpu") or not dnn.dnn_available(): # code stolen from lasagne dnn.py import lasagne.layers.conv conv = lasagne.layers.conv.Conv2DLayer else: import lasagne.layers.dnn conv = lasagne.layers.dnn.Conv2DDNNLayer # setup network layout l_in = lasagne.layers.InputLayer(inp_shape, input_var=input_var) l_hid1 = conv(l_in, 16, (8, 8), stride=stride[0], untie_biases=untie_biases) l_hid2 = conv(l_hid1, 32, (4, 4), stride=stride[1], untie_biases=untie_biases) l_hid3 = lasagne.layers.DenseLayer(l_hid2, 256) l_value = lasagne.layers.DenseLayer(l_hid3, 1, nonlinearity=lasagne.nonlinearities.linear) l_policy = lasagne.layers.DenseLayer(l_hid3, output_num, nonlinearity=lasagne.nonlinearities.softmax) return {'l_in': l_in, 'l_hid1': l_hid1, 'l_hid2': l_hid2, 'l_hid3': l_hid3, 'l_value': l_value, 'l_policy': l_policy}
def create_async_muupan_init(network_parms): validate_parms(network_parms) conv = get_lasagne_conv_layer() # setup network layout input_shape = network_parms.get('input_shape') l_in = lasagne.layers.InputLayer(input_shape) l_hid1 = conv(l_in, 16, (8, 8), stride=network_parms.get('stride')[0], W=TorchInit((input_shape[1], 8, 8)), b=TorchInit((input_shape[1], 8, 8))) l_hid2 = conv(l_hid1, 32, (4, 4), stride=network_parms.get('stride')[1], W=TorchInit((16, 4, 4)), b=TorchInit((16, 4, 4))) l_hid3 = lasagne.layers.DenseLayer(l_hid2, 256, W=TorchInit((32, 4, 4)), b=TorchInit((32, 4, 4))) l_out = lasagne.layers.DenseLayer(l_hid3, network_parms.get('output_num'), nonlinearity=lasagne.nonlinearities.linear, W=TorchInit(256), b=TorchInit(256)) return {'l_in': l_in, 'l_hid1': l_hid1, 'l_hid2': l_hid2, 'l_hid3': l_hid3, 'l_out': l_out}
def __init__(self, inpShape, outputNum, clip=None, stride=(4, 2), untie_biases=False): import theano.tensor.signal.conv from theano.sandbox.cuda import dnn # if no dnn support use default conv if not theano.config.device.startswith("gpu") or not dnn.dnn_available( ): # code stolen from lasagne dnn.py import lasagne.layers.conv conv = lasagne.layers.conv.Conv2DLayer else: import lasagne.layers.dnn conv = lasagne.layers.dnn.Conv2DDNNLayer # setup shared vars self.states_for_training = theano.shared( np.zeros((32, inpShape[1], inpShape[2], inpShape[3]), dtype=theano.config.floatX)) self.states_tp1 = theano.shared( np.zeros((32, inpShape[1], inpShape[2], inpShape[3]), dtype=theano.config.floatX)) self.states_for_output = theano.shared( np.zeros((1, inpShape[1], inpShape[2], inpShape[3]), dtype=theano.config.floatX)) self.truths = theano.shared( np.zeros((32, outputNum), dtype=theano.config.floatX)) self.terminals = theano.shared(np.zeros(32, dtype=int)) self.rewards = theano.shared(np.zeros(32, dtype=theano.config.floatX)) self.actions = theano.shared(np.zeros(32, dtype=int)) # setup network layout self.l_in = lasagne.layers.InputLayer(inpShape) if stride is None: self.l_hid1 = conv(self.l_in, 16, (8, 8), untie_biases=untie_biases, W=lasagne.init.Normal(.01), b=lasagne.init.Constant(.1)) else: self.l_hid1 = conv(self.l_in, 16, (8, 8), stride=stride[0], untie_biases=untie_biases, W=lasagne.init.Normal(.01), b=lasagne.init.Constant(.1)) if stride is None: self.l_hid2 = conv(self.l_hid1, 32, (4, 4), untie_biases=untie_biases, W=lasagne.init.Normal(.01), b=lasagne.init.Constant(.1)) else: self.l_hid2 = conv(self.l_hid1, 32, (4, 4), stride=stride[1], untie_biases=untie_biases, W=lasagne.init.Normal(.01), b=lasagne.init.Constant(.1)) self.l_hid3 = lasagne.layers.DenseLayer(self.l_hid2, 256, W=lasagne.init.Normal(.01), b=lasagne.init.Constant(.1)) self.l_out = lasagne.layers.DenseLayer( self.l_hid3, outputNum, nonlinearity=lasagne.nonlinearities.linear, W=lasagne.init.Normal(.01), b=lasagne.init.Constant(.1)) # network output vars net_output = lasagne.layers.get_output(self.l_out, self.states_for_output / 255.0) net_output_statetp1 = lasagne.layers.get_output( self.l_out, self.states_tp1 / 255.0) net_output_statetp1 = theano.gradient.disconnected_grad( net_output_statetp1) net_output_training = lasagne.layers.get_output( self.l_out, self.states_for_training / 255.0) # setup qlearning values and loss est_rew_tp1 = (1 - self.terminals) * 0.95 * T.max(net_output_statetp1, axis=1) rewards = self.rewards + est_rew_tp1 diff = rewards - net_output_training[T.arange(32), self.actions] loss = T.mean(0.5 * diff**2) # loss = T.mean(diff**2) # # get layaer parms params = lasagne.layers.get_all_params(self.l_out) rms_update = lasagne.updates.rmsprop(loss, params, 0.0002, 0.99) self._train_optimized = theano.function([], loss, updates=rms_update) self._get_output = theano.function([], outputs=net_output) self.get_hid1_act = theano.function([self.l_in.input_var], outputs=lasagne.layers.get_output( self.l_hid1)) self.get_hid2_act = theano.function([self.l_in.input_var], outputs=lasagne.layers.get_output( self.l_hid2))
def conv2d(x, kernel, conv=conv.conv2d, *args, **kwargs): b, c, d0, d1 = x.shape y = conv(x.reshape((b * c, d0, d1)), kernel, *args, **kwargs) d0, d1 = y.shape[1:] return y.reshape((b, c, d0, d1))
def __init__(self, inpShape, outputNum, clip=None, stride=(4, 2), untie_biases=False): import theano.tensor.signal.conv from theano.sandbox.cuda import dnn # if no dnn support use default conv if not theano.config.device.startswith("gpu") or not dnn.dnn_available(): # code stolen from lasagne dnn.py import lasagne.layers.conv conv = lasagne.layers.conv.Conv2DLayer else: import lasagne.layers.dnn conv = lasagne.layers.dnn.Conv2DDNNLayer # setup shared vars self.states_for_training = theano.shared(np.zeros((32, inpShape[1], inpShape[2], inpShape[3]), dtype=theano.config.floatX)) self.states_tp1 = theano.shared(np.zeros((32, inpShape[1], inpShape[2], inpShape[3]), dtype=theano.config.floatX)) self.states_for_output = theano.shared(np.zeros((1, inpShape[1], inpShape[2], inpShape[3]), dtype=theano.config.floatX)) self.truths = theano.shared(np.zeros((32, outputNum), dtype=theano.config.floatX)) self.terminals = theano.shared(np.zeros(32, dtype=int)) self.rewards = theano.shared(np.zeros(32, dtype=theano.config.floatX)) self.actions = theano.shared(np.zeros(32, dtype=int)) # setup network layout self.l_in = lasagne.layers.InputLayer(inpShape) if stride is None: self.l_hid1 = conv(self.l_in, 16, (8, 8), untie_biases=untie_biases, W=lasagne.init.Normal(.01), b=lasagne.init.Constant(.1)) else: self.l_hid1 = conv(self.l_in, 16, (8, 8), stride=stride[0], untie_biases=untie_biases, W=lasagne.init.Normal(.01), b=lasagne.init.Constant(.1)) if stride is None: self.l_hid2 = conv(self.l_hid1, 32, (4, 4), untie_biases=untie_biases, W=lasagne.init.Normal(.01), b=lasagne.init.Constant(.1)) else: self.l_hid2 = conv(self.l_hid1, 32, (4, 4), stride=stride[1], untie_biases=untie_biases, W=lasagne.init.Normal(.01), b=lasagne.init.Constant(.1)) self.l_hid3 = lasagne.layers.DenseLayer(self.l_hid2, 256, W=lasagne.init.Normal(.01), b=lasagne.init.Constant(.1)) self.l_out = lasagne.layers.DenseLayer(self.l_hid3, outputNum, nonlinearity=lasagne.nonlinearities.linear, W=lasagne.init.Normal(.01), b=lasagne.init.Constant(.1)) # network output vars net_output = lasagne.layers.get_output(self.l_out, self.states_for_output/255.0) net_output_statetp1 = lasagne.layers.get_output(self.l_out, self.states_tp1/255.0) net_output_statetp1 = theano.gradient.disconnected_grad(net_output_statetp1) net_output_training = lasagne.layers.get_output(self.l_out, self.states_for_training/255.0) # setup qlearning values and loss est_rew_tp1 = (1-self.terminals) * 0.95 * T.max(net_output_statetp1, axis=1) rewards = self.rewards + est_rew_tp1 diff = rewards - net_output_training[T.arange(32), self.actions] loss = T.mean(0.5*diff**2) # loss = T.mean(diff**2) # # get layaer parms params = lasagne.layers.get_all_params(self.l_out) rms_update = lasagne.updates.rmsprop(loss, params, 0.0002, 0.99) self._train_optimized = theano.function([], loss, updates=rms_update) self._get_output = theano.function([], outputs=net_output) self.get_hid1_act = theano.function([self.l_in.input_var], outputs=lasagne.layers.get_output(self.l_hid1)) self.get_hid2_act = theano.function([self.l_in.input_var], outputs=lasagne.layers.get_output(self.l_hid2))