Beispiel #1
0
class cnn(object):
    class simple_cnn_model(object):
        def __init__(self, epochs, batch_size, lr):
            self.epochs = epochs
            self.batch_size = batch_size
            self.lr = lr

        def load_data(self):
            # load data from cifar100 folder
            (x_train, y_train), (x_test, y_test) = cifar100(1211506319)
            return x_train, y_train, x_test, y_test

        def train_model(self, layers, loss_metrics, x_train, y_train):
            # build model
            self.model = Sequential(layers, loss_metrics)
            # train the model
            loss = self.model.fit(x_train,
                                  y_train,
                                  self.epochs,
                                  self.lr,
                                  self.batch_size,
                                  print_output=True)
            avg_loss = np.mean(np.reshape(loss, (self.epochs, -1)), axis=1)
            return avg_loss

        def test_model(self, x_test, y_test):
            # make a prediction
            pred_result = self.model.predict(x_test)
            accuracy = np.mean(pred_result == y_test)
            return accuracy

    if __name__ == '__main__':
        # define model parameters
        epochs = 15
        batch_size = 128
        lr = [.1]

        # define layers
        layers = (ConvLayer(3, 16, 3), ReluLayer(), MaxPoolLayer(),
                  ConvLayer(16, 32, 3), ReluLayer(), MaxPoolLayer(),
                  FlattenLayer(), FullLayer(2048, 4), SoftMaxLayer())

        loss_matrics = CrossEntropyLayer()

        # build and train model
        model = simple_cnn_model(epochs, batch_size, lr)
        x_train, y_train, x_test, y_test = model.load_data()
        loss = model.train_model(layers, loss_matrics, x_train, y_train)
        accuracy = model.test_model(x_test, y_test)
        print("loss: %s" % loss)
        print("The accuracy of the model is %s" % accuracy)
Beispiel #2
0
    def __init__(self,
                 outputs,
                 inputs,
                 layer_type='mcmf_lrt',
                 activation_type='softplus'):
        super(BBBAlexNet, self).__init__()

        self.num_classes = outputs
        self.layer_type = layer_type

        if layer_type == 'mcmf_lrt':
            BBBLinear = BBB_MCMF_LRT_Linear
            BBBConv2d = BBB_MCMF_LRT_Conv2d
        elif layer_type == 'lrt':
            BBBLinear = BBB_LRT_Linear
            BBBConv2d = BBB_LRT_Conv2d
        else:
            raise ValueError("Undefined layer_type")

        if activation_type == 'softplus':
            self.act = nn.Softplus
        elif activation_type == 'relu':
            self.act = nn.ReLU
        else:
            raise ValueError("Only softplus or relu supported")

        self.conv1 = BBBConv2d(inputs, 64, 11, stride=4, padding=5, bias=True)
        self.act1 = self.act()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = BBBConv2d(64, 192, 5, padding=2, bias=True)
        self.act2 = self.act()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv3 = BBBConv2d(192, 384, 3, padding=1, bias=True)
        self.act3 = self.act()

        self.conv4 = BBBConv2d(384, 256, 3, padding=1, bias=True)
        self.act4 = self.act()

        self.conv5 = BBBConv2d(256, 128, 3, padding=1, bias=True)
        self.act5 = self.act()
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.flatten = FlattenLayer(1 * 1 * 128)
        self.classifier = BBBLinear(1 * 1 * 128, outputs, bias=True)
Beispiel #3
0
    def __init__(self,
                 outputs,
                 inputs,
                 layer_type='mcmf_lrt',
                 activation_type='softplus'):
        super(BBB3Conv3FC, self).__init__()

        self.num_classes = outputs
        self.layer_type = layer_type

        if layer_type == 'mcmf_lrt':
            BBBLinear = BBB_MCMF_LRT_Linear
            BBBConv2d = BBB_MCMF_LRT_Conv2d
        elif layer_type == 'lrt':
            BBBLinear = BBB_LRT_Linear
            BBBConv2d = BBB_LRT_Conv2d
        else:
            raise ValueError("Undefined layer_type")

        if activation_type == 'softplus':
            self.act = nn.Softplus
        elif activation_type == 'relu':
            self.act = nn.ReLU
        else:
            raise ValueError("Only softplus or relu supported")

        self.conv1 = BBBConv2d(inputs, 32, 5, padding=2, bias=True)
        self.act1 = self.act()
        self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2)

        self.conv2 = BBBConv2d(32, 64, 5, padding=2, bias=True)
        self.act2 = self.act()
        self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2)

        self.conv3 = BBBConv2d(64, 128, 5, padding=1, bias=True)
        self.act3 = self.act()
        self.pool3 = nn.MaxPool2d(kernel_size=3, stride=2)

        self.flatten = FlattenLayer(2 * 2 * 128)
        self.fc1 = BBBLinear(2 * 2 * 128, 1000, bias=True)
        self.act5 = self.act()

        self.fc2 = BBBLinear(1000, 1000, bias=True)
        self.act6 = self.act()

        self.fc3 = BBBLinear(1000, outputs, bias=True)
Beispiel #4
0
def get_layers_dict(json_list):
    layers_list = []
    for layer_info in json_list:
        #(layer_name,layer),(layer_activation_name,layer_activation) = Layer(layer_info)
        if 'dense' in layer_info['layer_type']:
            layer = DenseLayer(layer_info)
        elif 'conv2d' in layer_info['layer_type']:
            layer = Conv2dLayer(layer_info)
        elif 'flatten' in layer_info['layer_type']:
            layer = FlattenLayer(layer_info)
        layer_tup, activation_tup = layer.get_torch_layer()
        layers_list.append(layer_tup)
        if activation_tup[1] is not None:
            layers_list.append(activation_tup)
    #ret = dict([(item[0], item[1]) for item in layers_list])
    ret = collections.OrderedDict(layers_list)
    return ret
Beispiel #5
0
    def build_model(self):
        layers = []
        input_shape = np.array(
            [self.batch_size, self.x_dim, self.x_dim, self.c_dim])
        # layer_1: input_layer ==> [n, 28, 28, 1]
        x = InputLayer(input_shape)
        layers.append(x)
        # layer_2: conv_layer [n, 28, 28, 1] ==> [n, 28, 28, 32]
        x = ConvLayer(x,
                      output_nums=20,
                      kernel=5,
                      strides=1,
                      padding='SAME',
                      name='conv1')
        layers.append(x)
        # layer_4: avgpool_layer [n, 28, 28, 32] ==> [n, 14, 14, 32]
        x = MaxPoolLayer(x, kernel=2, strides=2, paddind='SAME', name='pool1')
        layers.append(x)
        # layer_5: conv_layer [n, 14, 14, 32] ==> [n, 14, 14, 64]
        x = ConvLayer(x,
                      output_nums=50,
                      kernel=5,
                      strides=1,
                      padding='SAME',
                      name='conv2')
        layers.append(x)
        # layer_7: avgpool_layer [n, 14, 14, 64] ==> [n, 7, 7, 64]
        x = MaxPoolLayer(x, kernel=2, strides=2, padding='SAME', name='pool2')
        layers.append(x)
        # layer_8: flatten_layer [n, 7, 7, 64] ==> [n, 7*7*64]
        x = FlattenLayer(x, name='flatten')
        layers.append(x)
        # layer_9: fullconnected_layer [n, 3136] ==> [n, 500]
        x = DenseLayer(x, output_nums=500, name='dense1')
        layers.append(x)
        # layer_10: relu_layer [n, 500] ==> [n, 500]
        x = ReLULayer(x, name='relu1')
        layers.append(x)
        # layer_11: fullconnected_layer [n, 500] ==> [n, 10]
        x = DenseLayer(x, output_nums=10, name='dense2')
        layers.append(x)
        # layer_12: softmax_layer [n, 10] ==> [n, 10]
        x = SoftMaxLayer(x, name='softmax')
        layers.append(x)

        self.layers = layers
Beispiel #6
0
    def __init__(self, outputs, inputs, priors, layer_type='lrt', activation_type='softplus'):
        super(BBBLeNet, self).__init__()

        self.num_classes = outputs
        self.layer_type = layer_type
        self.priors = priors

        if layer_type=='lrt':
            BBBLinear = BBB_LRT_Linear
            BBBConv2d = BBB_LRT_Conv2d
        elif layer_type=='bbb':
            BBBLinear = BBB_Linear
            BBBConv2d = BBB_Conv2d
        else:
            raise ValueError("Undefined layer_type")
        
        if activation_type=='softplus':
            self.act = nn.Softplus
        elif activation_type=='relu':
            self.act = nn.ReLU
        else:
            raise ValueError("Only softplus or relu supported")

        self.conv1 = BBBConv2d(inputs, 6, 5, padding=0, bias=True, priors=self.priors)
        self.act1 = self.act()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = BBBConv2d(6, 16, 5, padding=0, bias=True, priors=self.priors)
        self.act2 = self.act()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.flatten = FlattenLayer(5 * 5 * 16)
        self.fc1 = BBBLinear(5 * 5 * 16, 120, bias=True, priors=self.priors)
        self.act3 = self.act()

        self.fc2 = BBBLinear(120, 84, bias=True, priors=self.priors)
        self.act4 = self.act()

        self.fc3 = BBBLinear(84, outputs, bias=True, priors=self.priors)
Beispiel #7
0
def build_discriminator(input_var=None, use_batch_norm=True):
    from lasagne.layers import InputLayer, batch_norm
    from layers import (Lipshitz_Layer, LipConvLayer, Subpixel_Layer,
                        ReshapeLayer, FlattenLayer)

    layer = InputLayer(shape=(None, 784), input_var=input_var)
    if use_batch_norm:
        raise NotImplementedError
    else:
        layer = ReshapeLayer(layer, (-1, 1, 28, 28))
        layer = LipConvLayer(layer, 16, (5, 5), init=1)
        layer = LipConvLayer(layer, 32, (5, 5), init=1)
        layer = LipConvLayer(layer, 64, (5, 5), init=1)
        layer = LipConvLayer(layer, 128, (5, 5), init=1)
        layer = FlattenLayer(layer)
        layer = Lipshitz_Layer(layer, 512, init=1)
        layer = Lipshitz_Layer(layer,
                               1 + 10,
                               init=1,
                               nonlinearity=lasagne.nonlinearities.sigmoid)

    print("Discriminator output:", layer.output_shape)
    print("Number of parameters:", lasagne.layers.count_params(layer))
    return layer
"""
Created on Sun Mar 25 19:52:43 2018

@author: kaushik
"""
import time
import numpy as np
import matplotlib.pyplot as plt
from layers.dataset import cifar100
from layers import (ConvLayer, FullLayer, FlattenLayer, MaxPoolLayer,
                    ReluLayer, SoftMaxLayer, CrossEntropyLayer, Sequential)

(x_train, y_train), (x_test, y_test) = cifar100(1337)
model = Sequential(layers=(ConvLayer(3, 16, 3), ReluLayer(), MaxPoolLayer(),
                           ConvLayer(16, 32, 3), ReluLayer(), MaxPoolLayer(),
                           FlattenLayer(), FullLayer(8 * 8 * 32,
                                                     4), SoftMaxLayer()),
                   loss=CrossEntropyLayer())
start_time = time.clock()
lr_vals = [0.1]
losses_train = list()
losses_test = list()
test_acc = np.zeros(len(lr_vals))
for j in range(len(lr_vals)):
    train_loss, test_loss = model.fit(x_train,
                                      y_train,
                                      x_test,
                                      y_test,
                                      epochs=8,
                                      lr=lr_vals[j],
                                      batch_size=128)
def experiment(variant):
    from traffic.make_env import make_env
    expl_env = make_env(args.exp_name, **variant['env_kwargs'])
    eval_env = make_env(args.exp_name, **variant['env_kwargs'])
    obs_dim = eval_env.observation_space.low.size
    action_dim = eval_env.action_space.n
    label_num = expl_env.label_num
    label_dim = expl_env.label_dim

    from graph_builder_multi import MultiTrafficGraphBuilder
    policy_gb = MultiTrafficGraphBuilder(
        input_dim=4 + label_dim,
        node_num=expl_env.max_veh_num + 1,
        ego_init=torch.tensor([0., 1.]),
        other_init=torch.tensor([1., 0.]),
    )
    if variant['gnn_kwargs']['attention']:
        from gnn_attention_net import GNNAttentionNet
        gnn_class = GNNAttentionNet
    else:
        from gnn_net import GNNNet
        gnn_class = GNNNet
    policy_gnn = gnn_class(
        pre_graph_builder=policy_gb,
        node_dim=variant['gnn_kwargs']['node'],
        num_conv_layers=variant['gnn_kwargs']['layer'],
        hidden_activation=variant['gnn_kwargs']['activation'],
    )
    from layers import FlattenLayer, SelectLayer
    policy = nn.Sequential(
        policy_gnn, SelectLayer(1, 0), FlattenLayer(), nn.ReLU(),
        nn.Linear(variant['gnn_kwargs']['node'], action_dim))

    sup_gb = MultiTrafficGraphBuilder(
        input_dim=4,
        node_num=expl_env.max_veh_num + 1,
        ego_init=torch.tensor([0., 1.]),
        other_init=torch.tensor([1., 0.]),
    )
    sup_attentioner = None
    from layers import ReshapeLayer
    from gnn_net import GNNNet
    sup_gnn = GNNNet(
        pre_graph_builder=sup_gb,
        node_dim=variant['gnn_kwargs']['node'],
        num_conv_layers=variant['gnn_kwargs']['layer'],
        hidden_activation=variant['gnn_kwargs']['activation'],
    )
    sup_learner = nn.Sequential(
        sup_gnn,
        SelectLayer(1, np.arange(1, expl_env.max_veh_num + 1)),
        nn.ReLU(),
        nn.Linear(variant['gnn_kwargs']['node'], label_dim),
    )
    from sup_sep_softmax_policy import SupSepSoftmaxPolicy
    policy = SupSepSoftmaxPolicy(policy, sup_learner, label_num, label_dim)
    print('parameters: ',
          np.sum([p.view(-1).shape[0] for p in policy.parameters()]))

    vf = Mlp(
        hidden_sizes=[32, 32],
        input_size=obs_dim,
        output_size=1,
    )
    vf_criterion = nn.MSELoss()
    eval_policy = ArgmaxDiscretePolicy(policy, use_preactivation=True)
    expl_policy = policy

    eval_path_collector = MdpPathCollector(
        eval_env,
        eval_policy,
    )
    from sup_sep_rollout import sup_sep_rollout
    expl_path_collector = MdpPathCollector(
        expl_env,
        expl_policy,
        rollout_fn=sup_sep_rollout,
    )
    from sup_replay_buffer import SupReplayBuffer
    replay_buffer = SupReplayBuffer(
        observation_dim=obs_dim,
        label_dim=label_num,
        max_replay_buffer_size=int(1e6),
    )

    from rlkit.torch.vpg.trpo_sup_sep import TRPOSupSepTrainer
    trainer = TRPOSupSepTrainer(policy=policy,
                                value_function=vf,
                                vf_criterion=vf_criterion,
                                replay_buffer=replay_buffer,
                                **variant['trainer_kwargs'])
    algorithm = TorchOnlineRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        log_path_function=get_traffic_path_information,
        **variant['algorithm_kwargs'])
    algorithm.to(ptu.device)
    algorithm.train()
Beispiel #10
0
def experiment(variant):
    from simple_sup_lstm import SimpleSupLSTMEnv
    expl_env = SimpleSupLSTMEnv(**variant['env_kwargs'])
    eval_env = SimpleSupLSTMEnv(**variant['env_kwargs'])
    obs_dim = eval_env.observation_space.low.size
    action_dim = eval_env.action_space.n
    label_num = expl_env.label_num
    label_dim = expl_env.label_dim

    if variant['load_kwargs']['load']:
        load_dir = variant['load_kwargs']['load_dir']
        load_data = torch.load(load_dir + '/params.pkl', map_location='cpu')
        policy = load_data['trainer/policy']
        vf = load_data['trainer/value_function']
    else:
        hidden_dim = variant['lstm_kwargs']['hidden_dim']
        num_lstm_layers = variant['lstm_kwargs']['num_layers']
        node_dim = variant['gnn_kwargs']['node_dim']

        node_num = expl_env.node_num
        input_node_dim = int(obs_dim / node_num)
        a_0 = np.zeros(action_dim)
        h1_0 = np.zeros((node_num, hidden_dim * num_lstm_layers))
        c1_0 = np.zeros((node_num, hidden_dim * num_lstm_layers))
        h2_0 = np.zeros((node_num, hidden_dim * num_lstm_layers))
        c2_0 = np.zeros((node_num, hidden_dim * num_lstm_layers))
        latent_0 = (h1_0, c1_0, h2_0, c2_0)
        from lstm_net import LSTMNet
        lstm1_ego = LSTMNet(input_node_dim, action_dim, hidden_dim,
                            num_lstm_layers)
        lstm1_other = LSTMNet(input_node_dim, 0, hidden_dim, num_lstm_layers)
        lstm2_ego = LSTMNet(node_dim, 0, hidden_dim, num_lstm_layers)
        lstm2_other = LSTMNet(node_dim, 0, hidden_dim, num_lstm_layers)
        from graph_builder import TrafficGraphBuilder
        gb = TrafficGraphBuilder(
            input_dim=hidden_dim,
            node_num=node_num,
            ego_init=torch.tensor([0., 1.]),
            other_init=torch.tensor([1., 0.]),
        )
        from gnn_net import GNNNet
        gnn = GNNNet(
            pre_graph_builder=gb,
            node_dim=variant['gnn_kwargs']['node_dim'],
            conv_type=variant['gnn_kwargs']['conv_type'],
            num_conv_layers=variant['gnn_kwargs']['num_layers'],
            hidden_activation=variant['gnn_kwargs']['activation'],
        )
        from gnn_lstm2_net import GNNLSTM2Net
        policy_net = GNNLSTM2Net(node_num, gnn, lstm1_ego, lstm1_other,
                                 lstm2_ego, lstm2_other)
        from layers import FlattenLayer, SelectLayer
        post_net = nn.Sequential(SelectLayer(-2, 0), FlattenLayer(2),
                                 nn.ReLU(), nn.Linear(hidden_dim, action_dim))
        from softmax_lstm_policy import SoftmaxLSTMPolicy
        policy = SoftmaxLSTMPolicy(
            a_0=a_0,
            latent_0=latent_0,
            obs_dim=obs_dim,
            action_dim=action_dim,
            lstm_net=policy_net,
            post_net=post_net,
        )
        print('parameters: ',
              np.sum([p.view(-1).shape[0] for p in policy.parameters()]))

        vf = Mlp(
            hidden_sizes=[32, 32],
            input_size=obs_dim,
            output_size=1,
        )  # TODO: id is also an input

    vf_criterion = nn.MSELoss()
    from rlkit.torch.policies.make_deterministic import MakeDeterministic
    eval_policy = MakeDeterministic(policy)
    expl_policy = policy

    eval_path_collector = MdpPathCollector(
        eval_env,
        eval_policy,
    )
    expl_path_collector = MdpPathCollector(
        expl_env,
        expl_policy,
    )
    trainer = PPOTrainer(policy=policy,
                         value_function=vf,
                         vf_criterion=vf_criterion,
                         recurrent=True,
                         **variant['trainer_kwargs'])
    algorithm = TorchOnlineRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        **variant['algorithm_kwargs'])
    algorithm.to(ptu.device)
    algorithm.train()
Beispiel #11
0
def experiment(variant):
    from traffic.make_env import make_env
    expl_env = make_env(args.exp_name, **variant['env_kwargs'])
    eval_env = make_env(args.exp_name, **variant['env_kwargs'])
    obs_dim = eval_env.observation_space.low.size
    action_dim = eval_env.action_space.n
    label_num = expl_env.label_num
    label_dim = expl_env.label_dim

    encoder = nn.Sequential(
        nn.Linear(obs_dim, 32),
        nn.ReLU(),
    )
    from layers import ReshapeLayer, FlattenLayer, ConcatLayer
    sup_learner = nn.Sequential(
        nn.Linear(32, int(label_num * label_dim)),
        ReshapeLayer(shape=(label_num, label_dim)),
    )
    decoder = nn.Sequential(
        ConcatLayer([
            nn.Sequential(nn.Linear(32, 16), nn.ReLU()),
            nn.Sequential(sup_learner, nn.Softmax(dim=-1), FlattenLayer()),
        ],
                    need_gradients=True),
        nn.Linear(16 + int(label_num * label_dim), action_dim),
    )
    from sup_softmax_policy import SupSoftmaxPolicy
    policy = SupSoftmaxPolicy(encoder, decoder, sup_learner)
    print('parameters: ',
          np.sum([p.view(-1).shape[0] for p in policy.parameters()]))

    vf = Mlp(
        hidden_sizes=[32, 32],
        input_size=obs_dim,
        output_size=1,
    )
    vf_criterion = nn.MSELoss()
    eval_policy = ArgmaxDiscretePolicy(policy, use_preactivation=True)
    expl_policy = policy

    eval_path_collector = MdpPathCollector(
        eval_env,
        eval_policy,
    )
    expl_path_collector = MdpPathCollector(
        expl_env,
        expl_policy,
    )
    from sup_replay_buffer import SupReplayBuffer
    replay_buffer = SupReplayBuffer(
        observation_dim=obs_dim,
        label_dim=label_num,
        max_replay_buffer_size=int(1e6),
    )

    from rlkit.torch.vpg.trpo_sup import TRPOSupTrainer
    trainer = TRPOSupTrainer(policy=policy,
                             value_function=vf,
                             vf_criterion=vf_criterion,
                             replay_buffer=replay_buffer,
                             **variant['trainer_kwargs'])
    algorithm = TorchOnlineRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        log_path_function=get_traffic_path_information,
        **variant['algorithm_kwargs'])
    algorithm.to(ptu.device)
    algorithm.train()
Beispiel #12
0
    (x, y), (x2, y2) = mnist.load_data()
    y = np.array([[i] for i in y])
    #  map to 0 - 1
    X = x / 255
    # One Hot Encoding
    dataset = np.array([np.zeros(10) for i in range(len(y))])
    for i in range(len(y)):
        dataset[i][y[i]] = 1
    X = [[X[i]] for i in range(len(X))]
    net = NeuralNetwork()
    net.add(ConvolutionalLayer(2, (3, 3), stride=1, input_shape=(28, 28)))  # 6
    net.add(ConvolutionalLayer(3, (3, 3), stride=1))  # 6
    #net.add(PoolingLayer((2,2),2))
    #net.add(ConvolutionalLayer(16,(3,3),stride=1))
    #net.add(PoolingLayer((2,2),2))
    net.add(FlattenLayer())
    #net.add(Dense(120))
    net.add(Dense(84))
    net.add(Dense(10))  #,"softmax"))
    net.set_training_set(X[:2000], dataset[:2000])
    # NOTE: Only Increase epochs after successfully designed
    try:
        net.train(epochs=50, batch_size=45, resolution=1)
    except KeyboardInterrupt as e:
        net.save("CNN.failed.model")
    finally:
        net.save("CNN.model")
else:
    net = NeuralNetwork().load("CNN.model")
print(net.get_recall())
#print(net.get_acc(X[30000:31000],dataset[30000:31000]))
             ConvLayer(64, [7, 1])]),
        BranchedLayer([None, ConvLayer(64, [1, 7])]),
        BranchedLayer([None, ConvLayer(96, 3, padding='valid')]),
        MergeLayer(axis=3),
        BranchedLayer([
            ConvLayer(192, 3, strides=2, padding='valid'),
            MaxPoolLayer(3, strides=2, padding='valid')
        ]),
        MergeLayer(axis=3),
        *([inception_a] * args.na),  # x4
        ConvLayer(1024, 3, strides=2),  # reduction_a
        *([inception_b] * args.nb),  # x7
        ConvLayer(1536, 3, strides=2),  # reduction_b
        *([inception_c] * args.nc),  # x3
        GlobalAvgPoolLayer(),
        FlattenLayer(),
        DropoutLayer(rate=args.drop_prob)
    ]

    data_params = {
        'na': args.na,
        'nb': args.nb,
        'nc': args.nc,
        'batch_norm': batch_norm,
        'drop_prob': args.drop_prob,
        'augmentation': True
    }

    cnn = CNN(layers,
              n_classes=n_classes,
              batch_size=128,
Beispiel #14
0
def experiment(variant):
    from simple_sup import SimpleSupEnv
    expl_env = SimpleSupEnv(**variant['env_kwars'])
    eval_env = SimpleSupEnv(**variant['env_kwars'])
    obs_dim = eval_env.observation_space.low.size
    action_dim = eval_env.action_space.n

    encoder = nn.Sequential(
        nn.Linear(obs_dim, 16),
        nn.ReLU(),
    )
    from layers import ReshapeLayer, FlattenLayer
    sup_learner = nn.Sequential(
        nn.Linear(16, action_dim),
        ReshapeLayer(shape=(1, action_dim)),
    )
    decoder = nn.Sequential(
        sup_learner,
        FlattenLayer(),
        nn.Linear(action_dim, action_dim),
    )
    from sup_softmax_policy import SupSoftmaxPolicy
    policy = SupSoftmaxPolicy(encoder, decoder, sup_learner)

    vf = Mlp(
        hidden_sizes=[32],
        input_size=obs_dim,
        output_size=1,
    )
    vf_criterion = nn.MSELoss()
    eval_policy = ArgmaxDiscretePolicy(policy, use_preactivation=True)
    expl_policy = policy

    eval_path_collector = MdpPathCollector(
        eval_env,
        eval_policy,
    )
    expl_path_collector = MdpPathCollector(
        expl_env,
        expl_policy,
    )
    from sup_replay_buffer import SupReplayBuffer
    replay_buffer = SupReplayBuffer(
        observation_dim=obs_dim,
        label_dim=1,
        max_replay_buffer_size=int(1e6),
    )

    from rlkit.torch.vpg.trpo_sup import TRPOSupTrainer
    trainer = TRPOSupTrainer(policy=policy,
                             value_function=vf,
                             vf_criterion=vf_criterion,
                             replay_buffer=replay_buffer,
                             **variant['trainer_kwargs'])
    algorithm = TorchOnlineRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        **variant['algorithm_kwargs'])
    algorithm.to(ptu.device)
    algorithm.train()
Beispiel #15
0
    def __init__(self,
                 outputs,
                 inputs,
                 priors,
                 layer_type='lrt',
                 activation_type='softplus'):
        super(BBBConv6, self).__init__()

        self.num_classes = outputs
        self.layer_type = layer_type
        self.priors = priors

        if layer_type == 'lrt':
            BBBLinear = BBB_LRT_Linear
            BBBConv2d = BBB_LRT_Conv2d
        elif layer_type == 'bbb':
            BBBLinear = BBB_Linear
            BBBConv2d = BBB_Conv2d
        elif layer_type == 'mgp':
            BBBLinear = BBB_MGP_Linear
            BBBConv2d = BBB_MGP_Conv2d
        else:
            raise ValueError("Undefined layer_type")

        if activation_type == 'softplus':
            self.act = nn.Softplus
        elif activation_type == 'relu':
            self.act = nn.ReLU
        else:
            raise ValueError("Only softplus or relu supported")

        self.conv1 = BBBConv2d(inputs,
                               64,
                               3,
                               padding=1,
                               bias=True,
                               priors=self.priors)
        self.act1 = self.act()
        self.conv2 = BBBConv2d(64, 64, 3, 1, bias=True, priors=self.priors)
        self.act2 = self.act()
        self.pool1 = nn.MaxPool2d(kernel_size=2)

        self.conv3 = BBBConv2d(64,
                               128,
                               3,
                               padding=1,
                               bias=True,
                               priors=self.priors)
        self.act3 = self.act()
        self.conv4 = BBBConv2d(128,
                               128,
                               3,
                               padding=1,
                               bias=True,
                               priors=self.priors)
        self.act4 = self.act()
        self.pool2 = nn.MaxPool2d(kernel_size=2)

        self.conv5 = BBBConv2d(128,
                               256,
                               3,
                               padding=1,
                               bias=True,
                               priors=self.priors)
        self.act5 = self.act()
        self.conv6 = BBBConv2d(256,
                               256,
                               3,
                               padding=1,
                               bias=True,
                               priors=self.priors)
        self.act6 = self.act()

        self.pool3 = nn.AdaptiveAvgPool2d((3, 3))

        self.flatten = FlattenLayer(3 * 3 * 256)
        self.fc1 = BBBLinear(3 * 3 * 256, 256, bias=True, priors=self.priors)
        self.act7 = self.act()

        self.fc2 = BBBLinear(256, 256, bias=True, priors=self.priors)
        self.act8 = self.act()

        self.fc3 = BBBLinear(256, outputs, bias=True, priors=self.priors)
Beispiel #16
0
    def __init__(self, x, input_shape):
        n_convfilter = [16, 32, 64, 64, 64, 64]
        n_fc_filters = [1024]
        n_deconvfilter = [64, 64, 64, 16, 8, 2]

        self.x = x
        # To define weights, define the network structure first
        x_ = InputLayer(input_shape)
        conv1a = ConvLayer(x_, (n_convfilter[0], 7, 7))
        conv1b = ConvLayer(conv1a, (n_convfilter[0], 3, 3))
        pool1 = PoolLayer(conv1b)

        print(
            'Conv1a = ConvLayer(x, (%s, 7, 7) => input_shape %s,  output_shape %s)'
            % (n_convfilter[0], conv1a._input_shape, conv1a._output_shape))
        print(
            'Conv1b = ConvLayer(x, (%s, 3, 3) => input_shape %s,  output_shape %s)'
            % (n_convfilter[0], conv1b._input_shape, conv1b._output_shape))
        print('pool1 => input_shape %s,  output_shape %s)' %
              (pool1._input_shape, pool1._output_shape))

        conv2a = ConvLayer(pool1, (n_convfilter[1], 3, 3))
        conv2b = ConvLayer(conv2a, (n_convfilter[1], 3, 3))
        conv2c = ConvLayer(pool1, (n_convfilter[1], 1, 1))
        pool2 = PoolLayer(conv2c)

        print(
            'Conv2a = ConvLayer(x, (%s, 3, 3) => input_shape %s,  output_shape %s)'
            % (n_convfilter[1], conv2a._input_shape, conv2a._output_shape))
        print(
            'Conv2b = ConvLayer(x, (%s, 3, 3) => input_shape %s,  output_shape %s)'
            % (n_convfilter[1], conv2b._input_shape, conv2b._output_shape))
        conv3a = ConvLayer(pool2, (n_convfilter[2], 3, 3))
        conv3b = ConvLayer(conv3a, (n_convfilter[2], 3, 3))
        conv3c = ConvLayer(pool2, (n_convfilter[2], 1, 1))
        pool3 = PoolLayer(conv3b)

        print(
            'Conv3a = ConvLayer(x, (%s, 3, 3) => input_shape %s,  output_shape %s)'
            % (n_convfilter[2], conv3a._input_shape, conv3a._output_shape))
        print(
            'Conv3b = ConvLayer(x, (%s, 3, 3) => input_shape %s,  output_shape %s)'
            % (n_convfilter[2], conv3b._input_shape, conv3b._output_shape))
        print(
            'Conv3c = ConvLayer(x, (%s, 1, 1) => input_shape %s,  output_shape %s)'
            % (n_convfilter[1], conv3c._input_shape, conv3c._output_shape))
        print('pool3 => input_shape %s,  output_shape %s)' %
              (pool3._input_shape, pool3._output_shape))

        conv4a = ConvLayer(pool3, (n_convfilter[3], 3, 3))
        conv4b = ConvLayer(conv4a, (n_convfilter[3], 3, 3))
        pool4 = PoolLayer(conv4b)

        conv5a = ConvLayer(pool4, (n_convfilter[4], 3, 3))
        conv5b = ConvLayer(conv5a, (n_convfilter[4], 3, 3))
        conv5c = ConvLayer(pool4, (n_convfilter[4], 1, 1))
        pool5 = PoolLayer(conv5b)

        conv6a = ConvLayer(pool5, (n_convfilter[5], 3, 3))
        conv6b = ConvLayer(conv6a, (n_convfilter[5], 3, 3))
        pool6 = PoolLayer(conv6b)

        print(
            'Conv6a = ConvLayer(x, (%s, 3, 3) => input_shape %s,  output_shape %s)'
            % (n_convfilter[5], conv6a._input_shape, conv6a._output_shape))
        print(
            'Conv6b = ConvLayer(x, (%s, 3, 3) => input_shape %s,  output_shape %s)'
            % (n_convfilter[5], conv6b._input_shape, conv6b._output_shape))
        print('pool6 => input_shape %s,  output_shape %s)' %
              (pool6._input_shape, pool6._output_shape))

        flat6 = FlattenLayer(pool6)
        print('flat6 => input_shape %s,  output_shape %s)' %
              (flat6._input_shape, flat6._output_shape))

        fc7 = TensorProductLayer(flat6, n_fc_filters[0])
        print('fc7 => input_shape %s,  output_shape %s)' %
              (fc7._input_shape, fc7._output_shape))

        # Set the size to be 64x4x4x4
        #s_shape_1d = (cfg.batch, n_deconvfilter[0])
        s_shape_1d = (cfg.batch, n_fc_filters[0])
        self.prev_s = InputLayer(s_shape_1d)
        #view_features_shape = (cfg.batch, n_fc_filters[0], cfg.CONST.N_VIEWS)

        self.t_x_s_update = FCConv1DLayer(self.prev_s,
                                          fc7,
                                          n_fc_filters[0],
                                          isTrainable=True)

        self.t_x_s_reset = FCConv1DLayer(self.prev_s,
                                         fc7,
                                         n_fc_filters[0],
                                         isTrainable=True)

        self.reset_gate = SigmoidLayer(self.t_x_s_reset)

        self.rs = EltwiseMultiplyLayer(self.reset_gate, prev_s)
        self.t_x_rs = FCConv1DLayer(self.rs,
                                    fc7,
                                    n_fc_filters[0],
                                    isTrainable=True)

        def recurrence(x_curr, prev_s_tensor, prev_in_gate_tensor):
            # Scan function cannot use compiled function.
            input_ = InputLayer(input_shape, x_curr)
            conv1a_ = ConvLayer(input_, (n_convfilter[0], 7, 7),
                                params=conv1a.params)
            rect1a_ = LeakyReLU(conv1a_)
            conv1b_ = ConvLayer(rect1a_, (n_convfilter[0], 3, 3),
                                params=conv1b.params)
            rect1_ = LeakyReLU(conv1b_)
            pool1_ = PoolLayer(rect1_)

            conv2a_ = ConvLayer(pool1_, (n_convfilter[1], 3, 3),
                                params=conv2a.params)
            rect2a_ = LeakyReLU(conv2a_)
            conv2b_ = ConvLayer(rect2a_, (n_convfilter[1], 3, 3),
                                params=conv2b.params)
            rect2_ = LeakyReLU(conv2b_)
            conv2c_ = ConvLayer(pool1_, (n_convfilter[1], 1, 1),
                                params=conv2c.params)
            res2_ = AddLayer(conv2c_, rect2_)
            pool2_ = PoolLayer(res2_)

            conv3a_ = ConvLayer(pool2_, (n_convfilter[2], 3, 3),
                                params=conv3a.params)
            rect3a_ = LeakyReLU(conv3a_)
            conv3b_ = ConvLayer(rect3a_, (n_convfilter[2], 3, 3),
                                params=conv3b.params)
            rect3_ = LeakyReLU(conv3b_)
            conv3c_ = ConvLayer(pool2_, (n_convfilter[2], 1, 1),
                                params=conv3c.params)
            res3_ = AddLayer(conv3c_, rect3_)
            pool3_ = PoolLayer(res3_)

            conv4a_ = ConvLayer(pool3_, (n_convfilter[3], 3, 3),
                                params=conv4a.params)
            rect4a_ = LeakyReLU(conv4a_)
            conv4b_ = ConvLayer(rect4a_, (n_convfilter[3], 3, 3),
                                params=conv4b.params)
            rect4_ = LeakyReLU(conv4b_)
            pool4_ = PoolLayer(rect4_)

            conv5a_ = ConvLayer(pool4_, (n_convfilter[4], 3, 3),
                                params=conv5a.params)
            rect5a_ = LeakyReLU(conv5a_)
            conv5b_ = ConvLayer(rect5a_, (n_convfilter[4], 3, 3),
                                params=conv5b.params)
            rect5_ = LeakyReLU(conv5b_)
            conv5c_ = ConvLayer(pool4_, (n_convfilter[4], 1, 1),
                                params=conv5c.params)
            res5_ = AddLayer(conv5c_, rect5_)
            pool5_ = PoolLayer(res5_)

            conv6a_ = ConvLayer(pool5_, (n_convfilter[5], 3, 3),
                                params=conv6a.params)
            rect6a_ = LeakyReLU(conv6a_)
            conv6b_ = ConvLayer(rect6a_, (n_convfilter[5], 3, 3),
                                params=conv6b.params)
            rect6_ = LeakyReLU(conv6b_)
            res6_ = AddLayer(pool5_, rect6_)
            pool6_ = PoolLayer(res6_)

            flat6_ = FlattenLayer(pool6_)
            fc7_ = TensorProductLayer(flat6_,
                                      n_fc_filters[0],
                                      params=fc7.params)
            rect7_ = LeakyReLU(fc7_)

            prev_s_ = InputLayer(s_shape_1d, prev_s_tensor)
            #print(self.prev_s_._output_shape)

            t_x_s_update_ = FCConv1DLayer(prev_s_,
                                          rect7_,
                                          n_fc_filters[0],
                                          params=self.t_x_s_update.params,
                                          isTrainable=True)

            t_x_s_reset_ = FCConv1DLayer(prev_s_,
                                         rect7_,
                                         n_fc_filters[0],
                                         params=self.t_x_s_reset.params,
                                         isTrainable=True)

            update_gate_ = SigmoidLayer(t_x_s_update_)
            comp_update_gate_ = ComplementLayer(update_gate_)
            reset_gate_ = SigmoidLayer(t_x_s_reset_)

            rs_ = EltwiseMultiplyLayer(reset_gate_, prev_s_)
            t_x_rs_ = FCConv1DLayer(rs_,
                                    rect7_,
                                    n_fc_filters[0],
                                    params=self.t_x_rs.params,
                                    isTrainable=True)

            tanh_t_x_rs_ = TanhLayer(t_x_rs_)

            gru_out_ = AddLayer(
                EltwiseMultiplyLayer(update_gate_, prev_s_),
                EltwiseMultiplyLayer(comp_update_gate_, tanh_t_x_rs_))

            return gru_out_.output, update_gate_.output

        time_features, _ = theano.scan(
            recurrence,
            sequences=[
                self.x
            ],  # along with images, feed in the index of the current frame
            outputs_info=[
                tensor.zeros_like(np.zeros(s_shape_1d),
                                  dtype=theano.config.floatX),
                tensor.zeros_like(np.zeros(s_shape_1d),
                                  dtype=theano.config.floatX)
            ])
        time_all = time_features[0]
        time_last = time_all[-1]

        self.features = time_last
Beispiel #17
0
        def recurrence(x_curr, prev_s_tensor, prev_in_gate_tensor):
            # Scan function cannot use compiled function.
            input_ = InputLayer(input_shape, x_curr)
            conv1a_ = ConvLayer(input_, (n_convfilter[0], 7, 7),
                                params=conv1a.params)
            rect1a_ = LeakyReLU(conv1a_)
            conv1b_ = ConvLayer(rect1a_, (n_convfilter[0], 3, 3),
                                params=conv1b.params)
            rect1_ = LeakyReLU(conv1b_)
            pool1_ = PoolLayer(rect1_)

            conv2a_ = ConvLayer(pool1_, (n_convfilter[1], 3, 3),
                                params=conv2a.params)
            rect2a_ = LeakyReLU(conv2a_)
            conv2b_ = ConvLayer(rect2a_, (n_convfilter[1], 3, 3),
                                params=conv2b.params)
            rect2_ = LeakyReLU(conv2b_)
            conv2c_ = ConvLayer(pool1_, (n_convfilter[1], 1, 1),
                                params=conv2c.params)
            res2_ = AddLayer(conv2c_, rect2_)
            pool2_ = PoolLayer(res2_)

            conv3a_ = ConvLayer(pool2_, (n_convfilter[2], 3, 3),
                                params=conv3a.params)
            rect3a_ = LeakyReLU(conv3a_)
            conv3b_ = ConvLayer(rect3a_, (n_convfilter[2], 3, 3),
                                params=conv3b.params)
            rect3_ = LeakyReLU(conv3b_)
            conv3c_ = ConvLayer(pool2_, (n_convfilter[2], 1, 1),
                                params=conv3c.params)
            res3_ = AddLayer(conv3c_, rect3_)
            pool3_ = PoolLayer(res3_)

            conv4a_ = ConvLayer(pool3_, (n_convfilter[3], 3, 3),
                                params=conv4a.params)
            rect4a_ = LeakyReLU(conv4a_)
            conv4b_ = ConvLayer(rect4a_, (n_convfilter[3], 3, 3),
                                params=conv4b.params)
            rect4_ = LeakyReLU(conv4b_)
            pool4_ = PoolLayer(rect4_)

            conv5a_ = ConvLayer(pool4_, (n_convfilter[4], 3, 3),
                                params=conv5a.params)
            rect5a_ = LeakyReLU(conv5a_)
            conv5b_ = ConvLayer(rect5a_, (n_convfilter[4], 3, 3),
                                params=conv5b.params)
            rect5_ = LeakyReLU(conv5b_)
            conv5c_ = ConvLayer(pool4_, (n_convfilter[4], 1, 1),
                                params=conv5c.params)
            res5_ = AddLayer(conv5c_, rect5_)
            pool5_ = PoolLayer(res5_)

            conv6a_ = ConvLayer(pool5_, (n_convfilter[5], 3, 3),
                                params=conv6a.params)
            rect6a_ = LeakyReLU(conv6a_)
            conv6b_ = ConvLayer(rect6a_, (n_convfilter[5], 3, 3),
                                params=conv6b.params)
            rect6_ = LeakyReLU(conv6b_)
            res6_ = AddLayer(pool5_, rect6_)
            pool6_ = PoolLayer(res6_)

            flat6_ = FlattenLayer(pool6_)
            fc7_ = TensorProductLayer(flat6_,
                                      n_fc_filters[0],
                                      params=fc7.params)
            rect7_ = LeakyReLU(fc7_)

            prev_s_ = InputLayer(s_shape_1d, prev_s_tensor)
            #print(self.prev_s_._output_shape)

            t_x_s_update_ = FCConv1DLayer(prev_s_,
                                          rect7_,
                                          n_fc_filters[0],
                                          params=self.t_x_s_update.params,
                                          isTrainable=True)

            t_x_s_reset_ = FCConv1DLayer(prev_s_,
                                         rect7_,
                                         n_fc_filters[0],
                                         params=self.t_x_s_reset.params,
                                         isTrainable=True)

            update_gate_ = SigmoidLayer(t_x_s_update_)
            comp_update_gate_ = ComplementLayer(update_gate_)
            reset_gate_ = SigmoidLayer(t_x_s_reset_)

            rs_ = EltwiseMultiplyLayer(reset_gate_, prev_s_)
            t_x_rs_ = FCConv1DLayer(rs_,
                                    rect7_,
                                    n_fc_filters[0],
                                    params=self.t_x_rs.params,
                                    isTrainable=True)

            tanh_t_x_rs_ = TanhLayer(t_x_rs_)

            gru_out_ = AddLayer(
                EltwiseMultiplyLayer(update_gate_, prev_s_),
                EltwiseMultiplyLayer(comp_update_gate_, tanh_t_x_rs_))

            return gru_out_.output, update_gate_.output
Beispiel #18
0
def experiment(variant):
    from traffic.make_env import make_env
    expl_env = make_env(args.exp_name,**variant['env_kwargs'])
    eval_env = make_env(args.exp_name,**variant['env_kwargs'])
    obs_dim = eval_env.observation_space.low.size
    action_dim = eval_env.action_space.n
    label_num = expl_env.label_num
    label_dim = expl_env.label_dim

    if variant['load_kwargs']['load']:
        load_dir = variant['load_kwargs']['load_dir']
        load_data = torch.load(load_dir+'/params.pkl',map_location='cpu')
        policy = load_data['trainer/policy']
        vf = load_data['trainer/value_function']
    else:
        from graph_builder_multi import MultiTrafficGraphBuilder
        gb = MultiTrafficGraphBuilder(input_dim=4, node_num=expl_env.max_veh_num+1,
                                ego_init=torch.tensor([0.,1.]),
                                other_init=torch.tensor([1.,0.]),
                                )
        if variant['gnn_kwargs']['attention']:
            from gnn_attention_net import GNNAttentionNet
            gnn_class = GNNAttentionNet
        else:
            from gnn_net import GNNNet
            gnn_class = GNNNet
        gnn = gnn_class( 
                    pre_graph_builder=gb, 
                    node_dim=variant['gnn_kwargs']['node'],
                    conv_type=variant['gnn_kwargs']['conv_type'],
                    num_conv_layers=variant['gnn_kwargs']['layer'],
                    hidden_activation=variant['gnn_kwargs']['activation'],
                    )
        from layers import FlattenLayer, SelectLayer
        policy = nn.Sequential(
                    gnn,
                    SelectLayer(1,0),
                    FlattenLayer(),
                    nn.ReLU(),
                    nn.Linear(variant['gnn_kwargs']['node'],action_dim)
                )
        policy = SoftmaxPolicy(policy, learn_temperature=False)
        print('parameters: ',np.sum([p.view(-1).shape[0] for p in policy.parameters()]))

        vf = Mlp(
            hidden_sizes=[32, 32],
            input_size=obs_dim,
            output_size=1,
        )
        
    vf_criterion = nn.MSELoss()
    eval_policy = ArgmaxDiscretePolicy(policy,use_preactivation=True)
    expl_policy = policy

    eval_path_collector = MdpPathCollector(
        eval_env,
        eval_policy,
    )
    expl_path_collector = MdpPathCollector(
        expl_env,
        expl_policy,
    )
    trainer = PPOTrainer(
        policy=policy,
        value_function=vf,
        vf_criterion=vf_criterion,
        **variant['trainer_kwargs']
    )
    algorithm = TorchOnlineRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        log_path_function = get_traffic_path_information,
        **variant['algorithm_kwargs']
    )
    algorithm.to(ptu.device)
    algorithm.train()
Beispiel #19
0
def experiment(variant):
    from traffic.make_env import make_env
    expl_env = make_env(args.exp_name, **variant['env_kwargs'])
    eval_env = make_env(args.exp_name, **variant['env_kwargs'])
    obs_dim = eval_env.observation_space.low.size
    action_dim = eval_env.action_space.n
    label_num = expl_env.label_num
    label_dim = expl_env.label_dim
    max_path_length = variant['trainer_kwargs']['max_path_length']

    if variant['load_kwargs']['load']:
        load_dir = variant['load_kwargs']['load_dir']
        load_data = torch.load(load_dir + '/params.pkl', map_location='cpu')
        policy = load_data['trainer/policy']
        vf = load_data['trainer/value_function']
    else:
        hidden_dim = variant['lstm_kwargs']['hidden_dim']
        num_lstm_layers = variant['lstm_kwargs']['num_layers']
        node_dim = variant['gnn_kwargs']['node_dim']

        node_num = expl_env.max_veh_num + 1
        input_node_dim = int(obs_dim / node_num)
        a_0 = np.zeros(action_dim)
        h1_0 = np.zeros((node_num, hidden_dim * num_lstm_layers))
        c1_0 = np.zeros((node_num, hidden_dim * num_lstm_layers))
        h2_0 = np.zeros((node_num, hidden_dim * num_lstm_layers))
        c2_0 = np.zeros((node_num, hidden_dim * num_lstm_layers))
        latent_0 = (h1_0, c1_0, h2_0, c2_0)
        from lstm_net import LSTMNet
        lstm1_ego = LSTMNet(input_node_dim, action_dim, hidden_dim,
                            num_lstm_layers)
        lstm1_other = LSTMNet(input_node_dim, 0, hidden_dim, num_lstm_layers)
        lstm2_ego = LSTMNet(node_dim, 0, hidden_dim, num_lstm_layers)
        lstm2_other = LSTMNet(node_dim, 0, hidden_dim, num_lstm_layers)
        from graph_builder import TrafficGraphBuilder
        gb = TrafficGraphBuilder(
            input_dim=hidden_dim,
            node_num=node_num,
            ego_init=torch.tensor([0., 1.]),
            other_init=torch.tensor([1., 0.]),
        )
        from gnn_net import GNNNet
        gnn = GNNNet(
            pre_graph_builder=gb,
            node_dim=variant['gnn_kwargs']['node_dim'],
            conv_type=variant['gnn_kwargs']['conv_type'],
            num_conv_layers=variant['gnn_kwargs']['num_layers'],
            hidden_activation=variant['gnn_kwargs']['activation'],
        )
        from gnn_lstm2_net import GNNLSTM2Net
        policy_net = GNNLSTM2Net(node_num, gnn, lstm1_ego, lstm1_other,
                                 lstm2_ego, lstm2_other)
        from layers import FlattenLayer, SelectLayer
        decoder = nn.Sequential(SelectLayer(-2, 0), FlattenLayer(2), nn.ReLU(),
                                nn.Linear(hidden_dim, action_dim))
        from layers import ReshapeLayer
        sup_learner = nn.Sequential(
            SelectLayer(-2, np.arange(1, node_num)),
            nn.ReLU(),
            nn.Linear(hidden_dim, label_dim),
        )
        from sup_softmax_lstm_policy import SupSoftmaxLSTMPolicy
        policy = SupSoftmaxLSTMPolicy(
            a_0=a_0,
            latent_0=latent_0,
            obs_dim=obs_dim,
            action_dim=action_dim,
            lstm_net=policy_net,
            decoder=decoder,
            sup_learner=sup_learner,
        )
        print('parameters: ',
              np.sum([p.view(-1).shape[0] for p in policy.parameters()]))

        vf = Mlp(
            hidden_sizes=[32, 32],
            input_size=obs_dim,
            output_size=1,
        )

    vf_criterion = nn.MSELoss()
    from rlkit.torch.policies.make_deterministic import MakeDeterministic
    eval_policy = MakeDeterministic(policy)
    expl_policy = policy

    eval_path_collector = MdpPathCollector(
        eval_env,
        eval_policy,
    )
    expl_path_collector = MdpPathCollector(
        expl_env,
        expl_policy,
    )

    from sup_replay_buffer import SupReplayBuffer
    replay_buffer = SupReplayBuffer(
        observation_dim=obs_dim,
        action_dim=action_dim,
        label_dim=label_num,
        max_replay_buffer_size=int(1e6),
        max_path_length=max_path_length,
        recurrent=True,
    )

    from rlkit.torch.vpg.ppo_sup_vanilla import PPOSupVanillaTrainer
    trainer = PPOSupVanillaTrainer(policy=policy,
                                   value_function=vf,
                                   vf_criterion=vf_criterion,
                                   replay_buffer=replay_buffer,
                                   recurrent=True,
                                   **variant['trainer_kwargs'])
    algorithm = TorchOnlineRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        log_path_function=get_traffic_path_information,
        **variant['algorithm_kwargs'])
    algorithm.to(ptu.device)
    algorithm.train()
Beispiel #20
0
    def __init__(
        self,
        outputs,
        inputs,
        priors,
        pre_pruned_model,
        layer_type='lrt',
        activation_type='softplus',
    ):
        super(BBBCustomConv6, self).__init__()

        self.num_classes = outputs
        self.layer_type = layer_type
        self.priors = priors

        if layer_type == 'lrt':
            BBBLinear = BBB_LRT_Linear
            BBBConv2d = BBB_LRT_Conv2d
        elif layer_type == 'bbb':
            BBBLinear = BBB_Linear
            BBBConv2d = BBB_Conv2d
        elif layer_type == 'mgp':
            BBBLinear = BBB_MGP_Linear
            BBBConv2d = BBB_MGP_Conv2d
        else:
            raise ValueError("Undefined layer_type")

        if activation_type == 'softplus':
            self.act = nn.Softplus
        elif activation_type == 'relu':
            self.act = nn.ReLU
        else:
            raise ValueError("Only softplus or relu supported")

        modules = [
            module for module in pre_pruned_model.modules()
            if ('Conv2d' in str(module.__class__)
                or 'Linear' in str(module.__class__))
        ]
        idx = 0

        self.conv1 = BBBConv2d(inputs,
                               int(modules[idx].out_channels),
                               3,
                               padding=1,
                               bias=True,
                               priors=self.priors)
        idx += 1
        self.act1 = self.act()
        self.conv2 = BBBConv2d(int(modules[idx].in_channels),
                               int(modules[idx].out_channels),
                               3,
                               1,
                               bias=True,
                               priors=self.priors)
        idx += 1
        self.act2 = self.act()
        self.pool1 = nn.MaxPool2d(kernel_size=2)

        self.conv3 = BBBConv2d(int(modules[idx].in_channels),
                               int(modules[idx].out_channels),
                               3,
                               padding=1,
                               bias=True,
                               priors=self.priors)
        idx += 1
        self.act3 = self.act()
        self.conv4 = BBBConv2d(int(modules[idx].in_channels),
                               int(modules[idx].out_channels),
                               3,
                               padding=1,
                               bias=True,
                               priors=self.priors)
        idx += 1
        self.act4 = self.act()
        self.pool2 = nn.MaxPool2d(kernel_size=2)

        self.conv5 = BBBConv2d(int(modules[idx].in_channels),
                               int(modules[idx].out_channels),
                               3,
                               padding=1,
                               bias=True,
                               priors=self.priors)
        idx += 1
        self.act5 = self.act()
        self.conv6 = BBBConv2d(int(modules[idx].in_channels),
                               int(modules[idx].out_channels),
                               3,
                               padding=1,
                               bias=True,
                               priors=self.priors)
        idx += 1
        self.act6 = self.act()

        self.pool3 = nn.AdaptiveAvgPool2d((3, 3))

        self.flatten = FlattenLayer(3 * 3 * int(modules[idx - 1].out_channels))
        self.fc1 = BBBLinear(3 * 3 * int(modules[idx - 1].out_channels),
                             int(modules[idx].out_features),
                             bias=True,
                             priors=self.priors)
        idx += 1
        self.act7 = self.act()

        self.fc2 = BBBLinear(int(modules[idx].in_features),
                             int(modules[idx].out_features),
                             bias=True,
                             priors=self.priors)
        idx += 1
        self.act8 = self.act()

        self.fc3 = BBBLinear(int(modules[idx].in_features),
                             outputs,
                             bias=True,
                             priors=self.priors)