Exemple #1
0
def create_NIPS_Sprag_init(inp_shape, output_num, stride=None, untie_biases=False, input_var=None):
    import theano.tensor.signal.conv
    from theano.sandbox.cuda import dnn
    # if no dnn support use default conv
    if not theano.config.device.startswith("gpu") or not dnn.dnn_available():  # code stolen from lasagne dnn.py
        import lasagne.layers.conv
        conv = lasagne.layers.conv.Conv2DLayer
    else:
        import lasagne.layers.dnn
        conv = lasagne.layers.dnn.Conv2DDNNLayer

    # setup network layout
    l_in = lasagne.layers.InputLayer(inp_shape, input_var=input_var)
    l_hid1 = conv(l_in, 16, (8, 8), stride=stride[0], untie_biases=untie_biases,
                        W=lasagne.init.Normal(.01),
                        b=lasagne.init.Constant(.1))

    l_hid2 = conv(l_hid1, 32, (4, 4), stride=stride[1], untie_biases=untie_biases,
                        W=lasagne.init.Normal(.01),
                        b=lasagne.init.Constant(.1))

    l_hid3 = lasagne.layers.DenseLayer(l_hid2, 256,
                        W=lasagne.init.Normal(.01),
                        b=lasagne.init.Constant(.1))

    l_out = lasagne.layers.DenseLayer(l_hid3, output_num, nonlinearity=lasagne.nonlinearities.linear,
                        W=lasagne.init.Normal(.01),
                        b=lasagne.init.Constant(.1))

    return {'l_in': l_in, 'l_hid1': l_hid1, 'l_hid2': l_hid2, 'l_hid3': l_hid3, 'l_out': l_out}
Exemple #2
0
def create_A3C(network_parms):
    validate_parms(network_parms)
    conv = get_lasagne_conv_layer()

    # setup network layout
    l_in = lasagne.layers.InputLayer(network_parms.get('input_shape'))
    l_hid1 = conv(l_in,
                  16, (8, 8),
                  stride=network_parms.get('stride')[0],
                  untie_biases=network_parms.get('untie_biases'))

    l_hid2 = conv(l_hid1,
                  32, (4, 4),
                  stride=network_parms.get('stride')[1],
                  untie_biases=network_parms.get('untie_biases'))

    l_hid3 = lasagne.layers.DenseLayer(l_hid2, 256)

    l_value = lasagne.layers.DenseLayer(
        l_hid3, 1, nonlinearity=lasagne.nonlinearities.linear)
    l_policy = lasagne.layers.DenseLayer(
        l_hid3,
        network_parms.get('output_num'),
        nonlinearity=lasagne.nonlinearities.softmax)

    return {
        'l_in': l_in,
        'l_hid1': l_hid1,
        'l_hid2': l_hid2,
        'l_hid3': l_hid3,
        'l_value': l_value,
        'l_policy': l_policy
    }
Exemple #3
0
def create_NIPS(network_parms):
    validate_parms(network_parms)

    conv = get_lasagne_conv_layer()

    # setup network layout
    l_in = lasagne.layers.InputLayer([None] + network_parms.get('input_shape'))
    l_hid1 = conv(l_in, 16, (8, 8), stride=network_parms.get('stride')[0])

    l_hid2 = conv(l_hid1, 32, (4, 4), stride=network_parms.get('stride')[1])

    l_hid3 = lasagne.layers.DenseLayer(l_hid2, 256)

    l_out = lasagne.layers.DenseLayer(
        l_hid3,
        network_parms.get('output_num'),
        nonlinearity=lasagne.nonlinearities.linear)

    return {
        'l_in': l_in,
        'l_hid1': l_hid1,
        'l_hid2': l_hid2,
        'l_hid3': l_hid3,
        'l_out': l_out
    }
Exemple #4
0
def create_NIPS_Sprag_init(inp_shape,
                           output_num,
                           stride=None,
                           untie_biases=False,
                           input_var=None):
    import theano.tensor.signal.conv
    from theano.sandbox.cuda import dnn
    # if no dnn support use default conv
    if not theano.config.device.startswith("gpu") or not dnn.dnn_available(
    ):  # code stolen from lasagne dnn.py
        import lasagne.layers.conv
        conv = lasagne.layers.conv.Conv2DLayer
    else:
        import lasagne.layers.dnn
        conv = lasagne.layers.dnn.Conv2DDNNLayer

    # setup network layout
    l_in = lasagne.layers.InputLayer(inp_shape, input_var=input_var)
    l_hid1 = conv(l_in,
                  16, (8, 8),
                  stride=stride[0],
                  untie_biases=untie_biases,
                  W=lasagne.init.Normal(.01),
                  b=lasagne.init.Constant(.1))

    l_hid2 = conv(l_hid1,
                  32, (4, 4),
                  stride=stride[1],
                  untie_biases=untie_biases,
                  W=lasagne.init.Normal(.01),
                  b=lasagne.init.Constant(.1))

    l_hid3 = lasagne.layers.DenseLayer(l_hid2,
                                       256,
                                       W=lasagne.init.Normal(.01),
                                       b=lasagne.init.Constant(.1))

    l_out = lasagne.layers.DenseLayer(
        l_hid3,
        output_num,
        nonlinearity=lasagne.nonlinearities.linear,
        W=lasagne.init.Normal(.01),
        b=lasagne.init.Constant(.1))

    return {
        'l_in': l_in,
        'l_hid1': l_hid1,
        'l_hid2': l_hid2,
        'l_hid3': l_hid3,
        'l_out': l_out
    }
Exemple #5
0
def create_NIPS(network_parms):
    validate_parms(network_parms)

    conv = get_lasagne_conv_layer()

    # setup network layout
    l_in = lasagne.layers.InputLayer([None] + network_parms.get('input_shape'))
    l_hid1 = conv(l_in, 16, (8, 8), stride=network_parms.get('stride')[0])

    l_hid2 = conv(l_hid1, 32, (4, 4), stride=network_parms.get('stride')[1])

    l_hid3 = lasagne.layers.DenseLayer(l_hid2, 256)

    l_out = lasagne.layers.DenseLayer(l_hid3, network_parms.get('output_num'), nonlinearity=lasagne.nonlinearities.linear)

    return {'l_in': l_in, 'l_hid1': l_hid1, 'l_hid2': l_hid2, 'l_hid3': l_hid3, 'l_out': l_out}
Exemple #6
0
def create_A3C(network_parms):
    validate_parms(network_parms)
    conv = get_lasagne_conv_layer()

    # setup network layout
    l_in = lasagne.layers.InputLayer(network_parms.get('input_shape'))
    l_hid1 = conv(l_in, 16, (8, 8), stride=network_parms.get('stride')[0], untie_biases=network_parms.get('untie_biases'))

    l_hid2 = conv(l_hid1, 32, (4, 4), stride=network_parms.get('stride')[1], untie_biases=network_parms.get('untie_biases'))

    l_hid3 = lasagne.layers.DenseLayer(l_hid2, 256)

    l_value = lasagne.layers.DenseLayer(l_hid3, 1, nonlinearity=lasagne.nonlinearities.linear)
    l_policy = lasagne.layers.DenseLayer(l_hid3, network_parms.get('output_num'), nonlinearity=lasagne.nonlinearities.softmax)

    return {'l_in': l_in, 'l_hid1': l_hid1, 'l_hid2': l_hid2, 'l_hid3': l_hid3, 'l_value': l_value, 'l_policy': l_policy}
Exemple #7
0
def create_A3C(inp_shape,
               output_num,
               stride=None,
               untie_biases=False,
               input_var=None):
    import theano.tensor.signal.conv
    from theano.sandbox.cuda import dnn
    # if no dnn support use default conv
    if not theano.config.device.startswith("gpu") or not dnn.dnn_available(
    ):  # code stolen from lasagne dnn.py
        import lasagne.layers.conv
        conv = lasagne.layers.conv.Conv2DLayer
    else:
        import lasagne.layers.dnn
        conv = lasagne.layers.dnn.Conv2DDNNLayer

    # setup network layout
    l_in = lasagne.layers.InputLayer(inp_shape, input_var=input_var)
    l_hid1 = conv(l_in,
                  16, (8, 8),
                  stride=stride[0],
                  untie_biases=untie_biases)

    l_hid2 = conv(l_hid1,
                  32, (4, 4),
                  stride=stride[1],
                  untie_biases=untie_biases)

    l_hid3 = lasagne.layers.DenseLayer(l_hid2, 256)

    l_value = lasagne.layers.DenseLayer(
        l_hid3, 1, nonlinearity=lasagne.nonlinearities.linear)
    l_policy = lasagne.layers.DenseLayer(
        l_hid3, output_num, nonlinearity=lasagne.nonlinearities.softmax)

    return {
        'l_in': l_in,
        'l_hid1': l_hid1,
        'l_hid2': l_hid2,
        'l_hid3': l_hid3,
        'l_value': l_value,
        'l_policy': l_policy
    }
Exemple #8
0
def create_async_muupan_init(network_parms):
    validate_parms(network_parms)
    conv = get_lasagne_conv_layer()

    # setup network layout
    input_shape = network_parms.get('input_shape')
    l_in = lasagne.layers.InputLayer(input_shape)
    l_hid1 = conv(l_in,
                  16, (8, 8),
                  stride=network_parms.get('stride')[0],
                  W=TorchInit((input_shape[1], 8, 8)),
                  b=TorchInit((input_shape[1], 8, 8)))

    l_hid2 = conv(l_hid1,
                  32, (4, 4),
                  stride=network_parms.get('stride')[1],
                  W=TorchInit((16, 4, 4)),
                  b=TorchInit((16, 4, 4)))

    l_hid3 = lasagne.layers.DenseLayer(l_hid2,
                                       256,
                                       W=TorchInit((32, 4, 4)),
                                       b=TorchInit((32, 4, 4)))

    l_out = lasagne.layers.DenseLayer(
        l_hid3,
        network_parms.get('output_num'),
        nonlinearity=lasagne.nonlinearities.linear,
        W=TorchInit(256),
        b=TorchInit(256))

    return {
        'l_in': l_in,
        'l_hid1': l_hid1,
        'l_hid2': l_hid2,
        'l_hid3': l_hid3,
        'l_out': l_out
    }
Exemple #9
0
def create_A3C(inp_shape, output_num, stride=None, untie_biases=False, input_var=None):
    import theano.tensor.signal.conv
    from theano.sandbox.cuda import dnn
    # if no dnn support use default conv
    if not theano.config.device.startswith("gpu") or not dnn.dnn_available():  # code stolen from lasagne dnn.py
        import lasagne.layers.conv
        conv = lasagne.layers.conv.Conv2DLayer
    else:
        import lasagne.layers.dnn
        conv = lasagne.layers.dnn.Conv2DDNNLayer

    # setup network layout
    l_in = lasagne.layers.InputLayer(inp_shape, input_var=input_var)
    l_hid1 = conv(l_in, 16, (8, 8), stride=stride[0], untie_biases=untie_biases)

    l_hid2 = conv(l_hid1, 32, (4, 4), stride=stride[1], untie_biases=untie_biases)

    l_hid3 = lasagne.layers.DenseLayer(l_hid2, 256)

    l_value = lasagne.layers.DenseLayer(l_hid3, 1, nonlinearity=lasagne.nonlinearities.linear)
    l_policy = lasagne.layers.DenseLayer(l_hid3, output_num, nonlinearity=lasagne.nonlinearities.softmax)

    return {'l_in': l_in, 'l_hid1': l_hid1, 'l_hid2': l_hid2, 'l_hid3': l_hid3, 'l_value': l_value, 'l_policy': l_policy}
Exemple #10
0
def create_async_muupan_init(network_parms):
    validate_parms(network_parms)
    conv = get_lasagne_conv_layer()

    # setup network layout
    input_shape = network_parms.get('input_shape')
    l_in = lasagne.layers.InputLayer(input_shape)
    l_hid1 = conv(l_in, 16, (8, 8), stride=network_parms.get('stride')[0],
                        W=TorchInit((input_shape[1], 8, 8)),
                        b=TorchInit((input_shape[1], 8, 8)))

    l_hid2 = conv(l_hid1, 32, (4, 4), stride=network_parms.get('stride')[1],
                        W=TorchInit((16, 4, 4)),
                        b=TorchInit((16, 4, 4)))

    l_hid3 = lasagne.layers.DenseLayer(l_hid2, 256,
                        W=TorchInit((32, 4, 4)),
                        b=TorchInit((32, 4, 4)))

    l_out = lasagne.layers.DenseLayer(l_hid3, network_parms.get('output_num'), nonlinearity=lasagne.nonlinearities.linear,
                        W=TorchInit(256),
                        b=TorchInit(256))

    return {'l_in': l_in, 'l_hid1': l_hid1, 'l_hid2': l_hid2, 'l_hid3': l_hid3, 'l_out': l_out}
Exemple #11
0
    def __init__(self,
                 inpShape,
                 outputNum,
                 clip=None,
                 stride=(4, 2),
                 untie_biases=False):
        import theano.tensor.signal.conv
        from theano.sandbox.cuda import dnn
        # if no dnn support use default conv
        if not theano.config.device.startswith("gpu") or not dnn.dnn_available(
        ):  # code stolen from lasagne dnn.py
            import lasagne.layers.conv
            conv = lasagne.layers.conv.Conv2DLayer
        else:
            import lasagne.layers.dnn
            conv = lasagne.layers.dnn.Conv2DDNNLayer

        # setup shared vars
        self.states_for_training = theano.shared(
            np.zeros((32, inpShape[1], inpShape[2], inpShape[3]),
                     dtype=theano.config.floatX))
        self.states_tp1 = theano.shared(
            np.zeros((32, inpShape[1], inpShape[2], inpShape[3]),
                     dtype=theano.config.floatX))
        self.states_for_output = theano.shared(
            np.zeros((1, inpShape[1], inpShape[2], inpShape[3]),
                     dtype=theano.config.floatX))
        self.truths = theano.shared(
            np.zeros((32, outputNum), dtype=theano.config.floatX))
        self.terminals = theano.shared(np.zeros(32, dtype=int))
        self.rewards = theano.shared(np.zeros(32, dtype=theano.config.floatX))
        self.actions = theano.shared(np.zeros(32, dtype=int))

        # setup network layout
        self.l_in = lasagne.layers.InputLayer(inpShape)
        if stride is None:
            self.l_hid1 = conv(self.l_in,
                               16, (8, 8),
                               untie_biases=untie_biases,
                               W=lasagne.init.Normal(.01),
                               b=lasagne.init.Constant(.1))
        else:
            self.l_hid1 = conv(self.l_in,
                               16, (8, 8),
                               stride=stride[0],
                               untie_biases=untie_biases,
                               W=lasagne.init.Normal(.01),
                               b=lasagne.init.Constant(.1))

        if stride is None:
            self.l_hid2 = conv(self.l_hid1,
                               32, (4, 4),
                               untie_biases=untie_biases,
                               W=lasagne.init.Normal(.01),
                               b=lasagne.init.Constant(.1))
        else:
            self.l_hid2 = conv(self.l_hid1,
                               32, (4, 4),
                               stride=stride[1],
                               untie_biases=untie_biases,
                               W=lasagne.init.Normal(.01),
                               b=lasagne.init.Constant(.1))

        self.l_hid3 = lasagne.layers.DenseLayer(self.l_hid2,
                                                256,
                                                W=lasagne.init.Normal(.01),
                                                b=lasagne.init.Constant(.1))
        self.l_out = lasagne.layers.DenseLayer(
            self.l_hid3,
            outputNum,
            nonlinearity=lasagne.nonlinearities.linear,
            W=lasagne.init.Normal(.01),
            b=lasagne.init.Constant(.1))

        # network output vars
        net_output = lasagne.layers.get_output(self.l_out,
                                               self.states_for_output / 255.0)
        net_output_statetp1 = lasagne.layers.get_output(
            self.l_out, self.states_tp1 / 255.0)
        net_output_statetp1 = theano.gradient.disconnected_grad(
            net_output_statetp1)
        net_output_training = lasagne.layers.get_output(
            self.l_out, self.states_for_training / 255.0)

        # setup qlearning values and loss
        est_rew_tp1 = (1 - self.terminals) * 0.95 * T.max(net_output_statetp1,
                                                          axis=1)
        rewards = self.rewards + est_rew_tp1
        diff = rewards - net_output_training[T.arange(32), self.actions]
        loss = T.mean(0.5 * diff**2)
        # loss = T.mean(diff**2)
        # # get layaer parms
        params = lasagne.layers.get_all_params(self.l_out)
        rms_update = lasagne.updates.rmsprop(loss, params, 0.0002, 0.99)

        self._train_optimized = theano.function([], loss, updates=rms_update)
        self._get_output = theano.function([], outputs=net_output)
        self.get_hid1_act = theano.function([self.l_in.input_var],
                                            outputs=lasagne.layers.get_output(
                                                self.l_hid1))
        self.get_hid2_act = theano.function([self.l_in.input_var],
                                            outputs=lasagne.layers.get_output(
                                                self.l_hid2))
Exemple #12
0
def conv2d(x, kernel, conv=conv.conv2d, *args, **kwargs):
    b, c, d0, d1 = x.shape
    y = conv(x.reshape((b * c, d0, d1)), kernel, *args, **kwargs)
    d0, d1 = y.shape[1:]
    return y.reshape((b, c, d0, d1))
Exemple #13
0
    def __init__(self, inpShape, outputNum, clip=None, stride=(4, 2), untie_biases=False):
        import theano.tensor.signal.conv
        from theano.sandbox.cuda import dnn
        # if no dnn support use default conv
        if not theano.config.device.startswith("gpu") or not dnn.dnn_available():  # code stolen from lasagne dnn.py
            import lasagne.layers.conv
            conv = lasagne.layers.conv.Conv2DLayer
        else:
            import lasagne.layers.dnn
            conv = lasagne.layers.dnn.Conv2DDNNLayer

        # setup shared vars
        self.states_for_training = theano.shared(np.zeros((32, inpShape[1], inpShape[2], inpShape[3]), dtype=theano.config.floatX))
        self.states_tp1 = theano.shared(np.zeros((32, inpShape[1], inpShape[2], inpShape[3]), dtype=theano.config.floatX))
        self.states_for_output = theano.shared(np.zeros((1, inpShape[1], inpShape[2], inpShape[3]), dtype=theano.config.floatX))
        self.truths = theano.shared(np.zeros((32, outputNum), dtype=theano.config.floatX))
        self.terminals = theano.shared(np.zeros(32, dtype=int))
        self.rewards = theano.shared(np.zeros(32, dtype=theano.config.floatX))
        self.actions = theano.shared(np.zeros(32, dtype=int))

        # setup network layout
        self.l_in = lasagne.layers.InputLayer(inpShape)
        if stride is None:
            self.l_hid1 = conv(self.l_in, 16, (8, 8), untie_biases=untie_biases,
                                W=lasagne.init.Normal(.01),
                                b=lasagne.init.Constant(.1))
        else:
            self.l_hid1 = conv(self.l_in, 16, (8, 8), stride=stride[0], untie_biases=untie_biases,
                                W=lasagne.init.Normal(.01),
                                b=lasagne.init.Constant(.1))

        if stride is None:
            self.l_hid2 = conv(self.l_hid1, 32, (4, 4), untie_biases=untie_biases,
                                W=lasagne.init.Normal(.01),
                                b=lasagne.init.Constant(.1))
        else:
            self.l_hid2 = conv(self.l_hid1, 32, (4, 4), stride=stride[1], untie_biases=untie_biases,
                                W=lasagne.init.Normal(.01),
                                b=lasagne.init.Constant(.1))

        self.l_hid3 = lasagne.layers.DenseLayer(self.l_hid2, 256,
                                W=lasagne.init.Normal(.01),
                                b=lasagne.init.Constant(.1))
        self.l_out = lasagne.layers.DenseLayer(self.l_hid3, outputNum, nonlinearity=lasagne.nonlinearities.linear,
                                W=lasagne.init.Normal(.01),
                                b=lasagne.init.Constant(.1))

        # network output vars
        net_output = lasagne.layers.get_output(self.l_out, self.states_for_output/255.0)
        net_output_statetp1 = lasagne.layers.get_output(self.l_out, self.states_tp1/255.0)
        net_output_statetp1 = theano.gradient.disconnected_grad(net_output_statetp1)
        net_output_training = lasagne.layers.get_output(self.l_out, self.states_for_training/255.0)

        # setup qlearning values and loss
        est_rew_tp1 = (1-self.terminals) * 0.95 * T.max(net_output_statetp1, axis=1)
        rewards = self.rewards + est_rew_tp1
        diff = rewards - net_output_training[T.arange(32), self.actions]
        loss = T.mean(0.5*diff**2)
        # loss = T.mean(diff**2)
        # # get layaer parms
        params = lasagne.layers.get_all_params(self.l_out)
        rms_update = lasagne.updates.rmsprop(loss, params, 0.0002, 0.99)

        self._train_optimized = theano.function([], loss, updates=rms_update)
        self._get_output = theano.function([], outputs=net_output)
        self.get_hid1_act = theano.function([self.l_in.input_var], outputs=lasagne.layers.get_output(self.l_hid1))
        self.get_hid2_act = theano.function([self.l_in.input_var], outputs=lasagne.layers.get_output(self.l_hid2))