def __init__(self, num_inputs=1, num_outputs=6, S_LEN=8): super(agentNET, self).__init__() self.conv1 = nn.Conv2d(num_inputs, 32, (6, 3), stride=1) self.conv2 = nn.Conv2d(32, 64, (1, 3), stride=1) self.conv3 = nn.Conv2d(64, 128, (1, 2), stride=1) self.lstm = nn.LSTMCell(128 * (S_LEN - 2 - 2 - 1), 96) self.fc1 = nn.Linear(96, 48) self.fc2 = nn.Linear(48, 24) self.critic_linear = nn.Linear(24, 1) self.actor_linear = nn.Linear(24, num_outputs) self.apply(weights_init) self.actor_linear.weight.data = norm_col_init( self.actor_linear.weight.data, 0.01) self.actor_linear.bias.data.fill_(0) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 1.0) self.critic_linear.bias.data.fill_(0) self.fc1.weight.data = norm_col_init(self.fc1.weight.data, 1.0) self.fc1.bias.data.fill_(0) self.fc2.weight.data = norm_col_init(self.fc2.weight.data, 1.0) self.fc2.bias.data.fill_(0) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0) self.train()
def __init__(self, num_inputs, action_space): super(A3Clstm, self).__init__() self.conv1 = nn.Conv2d(num_inputs, 32, 5, stride=1, padding=2) self.maxp1 = nn.MaxPool2d(2, 2) self.conv2 = nn.Conv2d(32, 32, 5, stride=1, padding=1) self.maxp2 = nn.MaxPool2d(2, 2) self.conv3 = nn.Conv2d(32, 64, 4, stride=1, padding=1) self.maxp3 = nn.MaxPool2d(2, 2) self.conv4 = nn.Conv2d(64, 64, 3, stride=1, padding=1) self.maxp4 = nn.MaxPool2d(2, 2) self.lstm = nn.LSTMCell(1024, 512) num_outputs = action_space.n self.critic_linear = nn.Linear(512, 1) self.actor_linear = nn.Linear(512, num_outputs) self.apply(weights_init) relu_gain = nn.init.calculate_gain('relu') self.conv1.weight.data.mul_(relu_gain) self.conv2.weight.data.mul_(relu_gain) self.conv3.weight.data.mul_(relu_gain) self.conv4.weight.data.mul_(relu_gain) self.actor_linear.weight.data = norm_col_init( self.actor_linear.weight.data, 0.01) self.actor_linear.bias.data.fill_(0) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 1.0) self.critic_linear.bias.data.fill_(0) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0) self.train()
def __init__(self, num_inputs, action_space): super(A3Clstm, self).__init__() self.basic = nn.Sequential( nn.Conv2d(num_inputs, 32, 5, stride=1, padding=2), nn.MaxPool2d(2, 2), nn.ReLU(), nn.Conv2d(32, 32, 5, stride=1, padding=1), nn.MaxPool2d(2, 2), nn.ReLU(), nn.Conv2d(32, 64, 4, stride=1, padding=1), nn.MaxPool2d(2, 2), nn.ReLU(), nn.Conv2d(64, 64, 3, stride=1, padding=1), nn.MaxPool2d(2, 2), nn.ReLU()) self.lstm = nn.LSTMCell(1024, 512) num_outputs = action_space.n self.critic_linear = nn.Linear(512, 1) self.actor_linear = nn.Linear(512, num_outputs) self.apply(weights_init) self.actor_linear.weight.data = norm_col_init( self.actor_linear.weight.data, 0.01) self.actor_linear.bias.data.fill_(0) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 1.0) self.critic_linear.bias.data.fill_(0) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0) self.train()
def __init__(self, num_inputs=1, num_outputs=5): super(agentNET, self).__init__() self.conv1 = nn.Conv2d(num_inputs, 16, 4, stride=1, padding=0) self.conv2 = nn.Conv2d(16, 16, 3, stride=1, padding=0) self.conv3 = nn.Conv2d(16, 8, 3, stride=1, padding=0) self.lstm = nn.LSTMCell(312, 78) self.fc1 = nn.Linear(78, 20) self.critic_linear = nn.Linear(20, 1) self.actor_linear = nn.Linear(20, num_outputs) self.apply(weights_init) self.actor_linear.weight.data = norm_col_init( self.actor_linear.weight.data, 0.01) self.actor_linear.bias.data.fill_(0) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 1.0) self.critic_linear.bias.data.fill_(0) self.fc1.weight.data = norm_col_init(self.fc1.weight.data, 1.0) self.fc1.bias.data.fill_(0) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0) self.train()
def __init__(self, embedding_length, classes=2): super(SA_NET, self).__init__() self.conv1 = nn.Conv2d(1, 256, (7, embedding_length), stride=1, padding=(3, 0)) self.conv2 = nn.Conv1d(256, 64, 5, stride=1, padding=2) self.conv3 = nn.Conv1d(64, 256, 3, stride=1, padding=1) self.conv4 = nn.Conv1d(256, 16, 1, stride=1, padding=0) self.lstm = nn.LSTMCell(embedding_length, LSTM_Hidden_Size) self.fc1 = nn.Linear(LSTM_Hidden_Size + CNN_Feature_Size, 128) self.fc2 = nn.Linear(128, 32) self.fc3 = nn.Linear(32, 1) self.apply(weights_init) self.fc1.weight.data = norm_col_init(self.fc1.weight.data, 1.0) self.fc1.bias.data.fill_(0) self.fc2.weight.data = norm_col_init(self.fc2.weight.data, 1.0) self.fc2.bias.data.fill_(0) self.fc3.weight.data = norm_col_init(self.fc3.weight.data, 1.0) self.fc3.bias.data.fill_(0) self.train()
def __init__(self, num_inputs, action_space, terminal_prediction, reward_prediction): super(A3C_CONV, self).__init__() self.conv1 = nn.Conv1d(num_inputs, 32, 3, stride=1, padding=1) self.lrelu1 = nn.LeakyReLU(0.1) self.conv2 = nn.Conv1d(32, 32, 3, stride=1, padding=1) self.lrelu2 = nn.LeakyReLU(0.1) self.conv3 = nn.Conv1d(32, 64, 2, stride=1, padding=1) self.lrelu3 = nn.LeakyReLU(0.1) self.conv4 = nn.Conv1d(64, 64, 1, stride=1) self.lrelu4 = nn.LeakyReLU(0.1) self.lstm = nn.LSTMCell(1600, 128) num_outputs = action_space.shape[0] self.critic_linear = nn.Linear(128, 1) self.actor_linear = nn.Linear(128, num_outputs) self.actor_linear2 = nn.Linear(128, num_outputs) self.terminal_aux_head = None if terminal_prediction: # this comes with the arg parser self.terminal_aux_head = nn.Linear(128, 1) # output a single prediction self.reward_aux_head = None if reward_prediction: self.reward_aux_head = nn.Linear( 128, 1) # output a single estimate of reward prediction self.apply(weights_init) lrelu_gain = nn.init.calculate_gain('leaky_relu') self.conv1.weight.data.mul_(lrelu_gain) self.conv2.weight.data.mul_(lrelu_gain) self.conv3.weight.data.mul_(lrelu_gain) self.conv4.weight.data.mul_(lrelu_gain) self.actor_linear.weight.data = norm_col_init( self.actor_linear.weight.data, 0.01) self.actor_linear.bias.data.fill_(0) self.actor_linear2.weight.data = norm_col_init( self.actor_linear2.weight.data, 0.01) self.actor_linear2.bias.data.fill_(0) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 1.0) self.critic_linear.bias.data.fill_(0) # new added parts for auxiliary tasks within the network if terminal_prediction: self.terminal_aux_head.weight.data = norm_col_init( self.terminal_aux_head.weight.data, 1.0) self.terminal_aux_head.bias.data.fill_(0) if reward_prediction: self.reward_aux_head.weight.data = norm_col_init( self.reward_aux_head.weight.data, 1.0) self.reward_aux_head.bias.data.fill_(0) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0) self.train()
def __init__(self, num_inputs, action_space, pre_rnet='None'): super(A3Clstm, self).__init__() self.lstm_1 = nn.LSTMCell(1024, 512) self.lstm_2 = nn.LSTMCell(1024, 512) num_outputs = action_space.n self.critic_linear = nn.Linear(512, 1) self.actor_linear = nn.Linear(512, num_outputs) self.apply(weights_init) self.actor_linear.weight.data = norm_col_init( self.actor_linear.weight.data, 0.01) self.actor_linear.bias.data.fill_(0) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 1.0) self.critic_linear.bias.data.fill_(0) self.lstm_1.bias_ih.data.fill_(0) self.lstm_2.bias_ih.data.fill_(0) self.lstm_1.bias_hh.data.fill_(0) self.lstm_2.bias_hh.data.fill_(0) if pre_rnet == 'None': self.r_net = RepresentNet() # self.c_net = TDClass() else: self.r_net = torch.load("pre_model/r_net_{}.pkl".format(pre_rnet)) # self.c_net = torch.load("pre_model/c_net_{}.pkl".format(pre_rnet)) self.train()
def __init__(self, num_inputs, action_space, terminal_prediction, reward_prediction): super(A3Clstm, self).__init__() self.conv1 = nn.Conv2d(num_inputs, 32, 5, stride=1, padding=2) self.maxp1 = nn.MaxPool2d(2, 2) self.conv2 = nn.Conv2d(32, 32, 5, stride=1, padding=1) self.maxp2 = nn.MaxPool2d(2, 2) self.conv3 = nn.Conv2d(32, 64, 4, stride=1, padding=1) self.maxp3 = nn.MaxPool2d(2, 2) self.conv4 = nn.Conv2d(64, 64, 3, stride=1, padding=1) self.maxp4 = nn.MaxPool2d(2, 2) self.lstm = nn.LSTMCell(1024, 128) # it was 1024 x 512 num_outputs = action_space.n self.critic_linear = nn.Linear(128, 1) # it was 512 x 1 self.actor_linear = nn.Linear(128, num_outputs) self.terminal_aux_head = None if terminal_prediction: # this comes with the arg parser self.terminal_aux_head = nn.Linear(128, 1) # output a single prediction # TODO later reward prediction will be added here as well ... self.reward_aux_head = None if reward_prediction: self.reward_aux_head = nn.Linear( 128, 1) # output a single estimate of reward prediction self.apply(weights_init) relu_gain = nn.init.calculate_gain('relu') self.conv1.weight.data.mul_(relu_gain) self.conv2.weight.data.mul_(relu_gain) self.conv3.weight.data.mul_(relu_gain) self.conv4.weight.data.mul_(relu_gain) self.actor_linear.weight.data = norm_col_init( self.actor_linear.weight.data, 0.01) self.actor_linear.bias.data.fill_(0) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 1.0) self.critic_linear.bias.data.fill_(0) # new added parts for auxiliary tasks within the network if terminal_prediction: self.terminal_aux_head.weight.data = norm_col_init( self.terminal_aux_head.weight.data, 1.0) self.terminal_aux_head.bias.data.fill_(0) if reward_prediction: self.reward_aux_head.weight.data = norm_col_init( self.reward_aux_head.weight.data, 1.0) self.reward_aux_head.bias.data.fill_(0) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0) self.train()
def __init__(self, num_inputs, action_space, num_options, nnWidth): super(OCPGModel, self).__init__() self.numbInputs = num_inputs self.module_list = nn.ModuleList() # self.lin1 = nn.Linear(num_inputs, nnWidth) # self.module_list += [self.lin1] # self.lin2 = nn.Linear(nnWidth, nnWidth) # self.module_list += [self.lin2] # self.lin3 = nn.Linear(nnWidth, nnWidth) # self.module_list += [self.lin3] try: num_outputs = action_space.n except AttributeError: num_outputs = len(action_space.sample()) self.critic_linear = nn.Linear(num_inputs, num_options) self.module_list += [self.critic_linear] self.optionpolicy = nn.Linear(num_inputs, num_options) self.module_list += [self.optionpolicy] self.optionpolicy.weight.data = norm_col_init( self.optionpolicy.weight.data, 0.01) self.optionpolicy.bias.data.fill_(0) self.apply(weights_init) relu_gain = nn.init.calculate_gain('relu') # self.lin1.weight.data = norm_col_init( # self.lin1.weight.data, 1.0) # self.lin1.bias.data.fill_(0) # self.lin2.weight.data = norm_col_init( # self.lin2.weight.data, 1.0) # self.lin2.bias.data.fill_(0) # # self.lin3.weight.data = norm_col_init( # self.lin3.weight.data, 1.0) # self.lin3.bias.data.fill_(0) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 1.0) self.critic_linear.bias.data.fill_(0) self.policylayer = {} self.termlayer = {} for i in range(0, num_options): self.policylayer[i] = nn.Linear(num_inputs, num_outputs) self.module_list += [self.policylayer[i]] self.termlayer[i] = nn.Linear(num_inputs, 1) self.module_list += [self.termlayer[i]] self.policylayer[i].weight.data = norm_col_init( self.policylayer[i].weight.data, 0.01) self.policylayer[i].bias.data.fill_(0) self.termlayer[i].weight.data = norm_col_init( self.termlayer[i].weight.data, 0.01) self.termlayer[i].bias.data.fill_(0) self.train()
def __init__(self, input_dim, action_dim): super(PolicyNet, self).__init__() self.fc1 = nn.Linear(input_dim, 512) self.fc1.weight.data = norm_col_init(self.fc1.weight.data, 1) self.fc2 = nn.Linear(512, 256) self.fc2.weight.data = norm_col_init(self.fc2.weight.data, 1) self.fc3 = nn.Linear(256, action_dim) self.fc3.weight.data = norm_col_init(self.fc3.weight.data, 1)
def __init__(self, num_inputs, action_space): super(A3Clstm, self).__init__() self.conv1 = nn.Conv2d(num_inputs, 32, 5, stride=1, padding=2) self.maxp1 = nn.MaxPool2d(2, 2) self.conv2 = nn.Conv2d(32, 32, 5, stride=1, padding=1) self.maxp2 = nn.MaxPool2d(2, 2) self.conv3 = nn.Conv2d(32, 64, 4, stride=1, padding=1) self.maxp3 = nn.MaxPool2d(2, 2) self.conv4 = nn.Conv2d(64, 64, 3, stride=1, padding=1) self.maxp4 = nn.MaxPool2d(2, 2) # 1024 = 64 * 64 / 4 self.flatten = nn.Linear(1024, 100) self.lstm = nn.LSTMCell(100, 100) num_outputs = action_space.n # Critic (State -> Value) self.critic_linear = nn.Linear(100, 1) # Actor (State -> Action Probabilities) self.actor_linear = nn.Linear(100, num_outputs) # LSTM for encoding state into language for actor self.lstm_enc = nn.LSTMCell(100, 100) # LSTM for decoding state self.lstm_dec = nn.LSTMCell(100, 100) self.apply(weights_init) relu_gain = nn.init.calculate_gain('relu') self.conv1.weight.data.mul_(relu_gain) self.conv2.weight.data.mul_(relu_gain) self.conv3.weight.data.mul_(relu_gain) self.conv4.weight.data.mul_(relu_gain) self.actor_linear.weight.data = norm_col_init( self.actor_linear.weight.data, 0.01) self.actor_linear.bias.data.fill_(0) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 1.0) self.critic_linear.bias.data.fill_(0) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0) self.lstm_enc.bias_ih.data.fill_(0) self.lstm_enc.bias_hh.data.fill_(0) self.lstm_dec.bias_ih.data.fill_(0) self.lstm_dec.bias_hh.data.fill_(0) self.train()
def __init__(self, num_inputs, action_space, num_options, nnWidth): super(AClstm, self).__init__() self.module_list = nn.ModuleList() # self.conv1 = nn.Conv2d(num_inputs, 32, 5, stride=1, padding=2) # self.module_list += [self.conv1] # self.maxp1 = nn.MaxPool2d(2, 2) # self.conv2 = nn.Conv2d(32, 32, 5, stride=1, padding=1) # self.module_list += [self.conv2] # self.maxp2 = nn.MaxPool2d(2, 2) # self.conv3 = nn.Conv2d(32, 64, 4, stride=1, padding=1) # self.module_list += [self.conv3] # self.maxp3 = nn.MaxPool2d(2, 2) # self.conv4 = nn.Conv2d(64, 64, 3, stride=1, padding=1) # self.module_list += [self.conv4] # self.maxp4 = nn.MaxPool2d(2, 2) self.lin1 = nn.Linear(num_inputs, nnWidth) self.module_list += [self.lin1] self.lin2 = nn.Linear(nnWidth, nnWidth) self.module_list += [self.lin2] self.lin3 = nn.Linear(nnWidth, 2 * nnWidth) self.module_list += [self.lin3] self.lstm = nn.LSTMCell(2 * nnWidth, nnWidth) self.module_list += [self.lstm] num_outputs = action_space.n self.critic_linear = nn.Linear(nnWidth, num_options) self.module_list += [self.critic_linear] self.actionpolicy = nn.Linear(nnWidth, num_options) self.module_list += [self.actionpolicy] self.actionpolicy.weight.data = norm_col_init( self.actionpolicy.weight.data, 0.01) self.actionpolicy.bias.data.fill_(0) self.apply(weights_init) relu_gain = nn.init.calculate_gain('relu') self.lin1.weight.data = norm_col_init(self.lin1.weight.data, 1.0) self.lin1.bias.data.fill_(0) self.lin2.weight.data = norm_col_init(self.lin2.weight.data, 1.0) self.lin2.bias.data.fill_(0) self.lin3.weight.data = norm_col_init(self.lin3.weight.data, 1.0) self.lin3.bias.data.fill_(0) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 1.0) self.critic_linear.bias.data.fill_(0) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0) self.train()
def __init__(self, input_dim, action_dim): super(ValueNet, self).__init__() self.fc1 = nn.Linear(input_dim + action_dim, 512) self.fc1.weight.data = norm_col_init(self.fc1.weight.data, 1) self.fc2 = nn.Linear(512, 512) self.fc2.weight.data = norm_col_init(self.fc2.weight.data, 1) self.fc3 = nn.Linear(512, 256) self.fc3.weight.data = norm_col_init(self.fc3.weight.data, 1) self.fc4 = nn.Linear(256, 1) self.fc4.weight.data.uniform_(-EPS, EPS)
def __init__(self, input_dim, num=1): super(ValueNet, self).__init__() self.critic_linear = nn.Linear(input_dim, num) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 0.1) self.critic_linear.bias.data.fill_(0)
def __init__(self, head_dim, args): super(Gate, self).__init__() gate_input_dim = head_dim self.feature_dim = 256 self.gate_fc1 = nn.Linear(gate_input_dim, self.feature_dim) self.gate_fc1.weight.data = norm_col_init(self.gate_fc1.weight.data, 0.1) self.gate_fc1.bias.data.fill_(0) self.gate_fc2 = nn.Linear(self.feature_dim, self.feature_dim) self.gate_fc2.weight.data = norm_col_init(self.gate_fc2.weight.data, 0.1) self.gate_fc2.bias.data.fill_(0) self.gate_fc3 = nn.Linear(self.feature_dim, 2) self.gate_fc3.weight.data = norm_col_init(self.gate_fc3.weight.data, 0.1) self.gate_fc3.bias.data.fill_(0)
def __init__(self, observation_space, action_space, n_frames): super(A3C_MLP, self).__init__() self.action_space = action_space self.training_steps = nn.Linear(1, 1) self.training_steps.weight.requires_grad = False self.training_steps.bias.requires_grad = False self.fc1 = nn.Linear(observation_space.shape[0], 256) self.lrelu1 = nn.LeakyReLU(0.1) self.fc2 = nn.Linear(256, 256) self.lrelu2 = nn.LeakyReLU(0.1) self.fc3 = nn.Linear(256, 128) self.lrelu3 = nn.LeakyReLU(0.1) self.fc4 = nn.Linear(128, 128) self.lrelu4 = nn.LeakyReLU(0.1) self.critic_linear = nn.Linear(128, 1) self.actor_linear = nn.Linear(128, action_space.shape[0]) self.actor_linear2 = nn.Linear(128, action_space.shape[0]) self.apply(weights_init_mlp) self.training_steps.weight.data = torch.Tensor([0]) self.training_steps.bias.data = torch.Tensor([0]) lrelu = nn.init.calculate_gain('leaky_relu') self.fc1.weight.data.mul_(lrelu) self.fc2.weight.data.mul_(lrelu) self.fc3.weight.data.mul_(lrelu) self.fc4.weight.data.mul_(lrelu) self.actor_linear.weight.data = norm_col_init( self.actor_linear.weight.data, 0.01) self.actor_linear.bias.data.fill_(0) self.actor_linear2.weight.data = norm_col_init( self.actor_linear2.weight.data, 0.01) self.actor_linear2.bias.data.fill_(0) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 1.0) self.critic_linear.bias.data.fill_(0) self.train() self.success_num = 0 self.done_nums = 0
def __init__(self, pca_dim=PCA[str(PCA_PERCENTAGE)], classes=CLASSES): super(SVM, self).__init__() self.fc1 = nn.Linear(pca_dim, classes) self.apply(weights_init) self.fc1.weight.data = norm_col_init(self.fc1.weight.data, 1.0) self.fc1.bias.data.fill_(0) self.train()
def __init__(self, input_dim, head_name, num=1): super(ValueNet, self).__init__() if 'ns' in head_name: self.noise = True self.critic_linear = NoisyLinear(input_dim, num, sigma_init=0.017) else: self.noise = False self.critic_linear = nn.Linear(input_dim, num) self.critic_linear.weight.data = norm_col_init(self.critic_linear.weight.data, 0.1) self.critic_linear.bias.data.fill_(0)
def __init__(self, input_dim, action_space, head_name): super(PolicyNet, self).__init__() self.head_name = head_name if 'discrete' in head_name: num_outputs = action_space.n self.continuous = False else: num_outputs = action_space.shape[0] self.continuous = True self.actor_linear = nn.Linear(input_dim, num_outputs) self.actor_linear2 = nn.Linear(input_dim, num_outputs) # init layers self.actor_linear.weight.data = norm_col_init( self.actor_linear.weight.data, 0.1) self.actor_linear.bias.data.fill_(0) self.actor_linear2.weight.data = norm_col_init( self.actor_linear2.weight.data, 0.1) self.actor_linear2.bias.data.fill_(0)
def __init__(self, num_inputs, num_outputs): super(A3Clstm, self).__init__() self.conv1 = nn.Conv2d(1, 32, 3, stride=1, padding=1) self.conv2 = nn.Conv2d(32, 32, 3, stride=2, padding=1) self.conv3 = nn.Conv2d(32, 64, 3, stride=2, padding=1) self.conv4 = nn.Conv2d(64, 512, 3, stride=1, padding=1) self.Wai = nn.Linear(ctx_dim[1], ctx_dim[1], bias=False) self.Wh = nn.Linear(512, ctx_dim[1], bias=False) self.att = nn.Linear(ctx_dim[1], 1) # self.fc = nn.Linear(ctx_dim[1] * 4 * 4, 256) self.lstm = nn.LSTMCell(ctx_dim[1], 512) self.critic_linear = nn.Linear(512, 1) self.actor_linear = nn.Linear(512, num_outputs) self.apply(weights_init) self.Wai.weight.data = norm_col_init(self.Wai.weight.data, 1.0) self.Wh.weight.data = norm_col_init(self.Wh.weight.data, 1.0) self.att.weight.data = norm_col_init(self.att.weight.data, 1.0) self.att.bias.data.fill_(0) # self.fc.weight.data = norm_col_init(self.fc.weight.data, 1.0) # self.fc.bias.data.fill_(0) self.actor_linear.weight.data = norm_col_init( self.actor_linear.weight.data, 0.01) self.actor_linear.bias.data.fill_(0) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 1.0) self.critic_linear.bias.data.fill_(0) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0) self.train()
def __init__(self, num_inputs, action_space, n_frames): super(A3C_MLP, self).__init__() self.fc1 = nn.Linear(num_inputs, 256) self.lrelu1 = nn.LeakyReLU(0.1) self.fc2 = nn.Linear(256, 256) self.lrelu2 = nn.LeakyReLU(0.1) self.fc3 = nn.Linear(256, 128) self.lrelu3 = nn.LeakyReLU(0.1) self.fc4 = nn.Linear(128, 128) self.lrelu4 = nn.LeakyReLU(0.1) self.m1 = n_frames * 128 num_outputs = action_space.shape[0] self.critic_linear = nn.Linear(128, 1) self.actor_linear = nn.Linear(128, num_outputs) self.actor_linear2 = nn.Linear(128, num_outputs) self.apply(weights_init_mlp) lrelu = nn.init.calculate_gain('leaky_relu') self.fc1.weight.data.mul_(lrelu) self.fc2.weight.data.mul_(lrelu) self.fc3.weight.data.mul_(lrelu) self.fc4.weight.data.mul_(lrelu) self.actor_linear.weight.data = norm_col_init(self.actor_linear.weight.data, 0.01) # self.actor_linear.bias.data.fill_(0) self.actor_linear.bias.data.normal_(0, 0.01) self.actor_linear2.weight.data = norm_col_init(self.actor_linear2.weight.data, 0.01) #self.actor_linear2.bias.data.fill_(0) self.actor_linear2.bias.data.normal_(0, 0.01) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 1.0) #self.critic_linear.bias.data.fill_(0) self.critic_linear.bias.data.normal_(0, 1.0) self.train() #initialize learning rates for each of the components self.learning_rates = torch.rand(len(list(self.parameters()))) * 0.001
def __init__(self, input_dim, head_name, num=1, device=torch.device('cpu')): super(AMCValueNet, self).__init__() self.head_name = head_name self.device = device if 'ns' in head_name: self.noise = True self.critic_linear = NoisyLinear(input_dim, num, sigma_init=0.017) if 'onlyJ' in head_name: self.noise = False self.critic_linear = nn.Linear(input_dim, num) self.critic_linear.weight.data = norm_col_init(self.critic_linear.weight.data, 0.1) self.critic_linear.bias.data.fill_(0) else: self.noise = False self.critic_linear = nn.Linear(2 * input_dim, num) self.critic_linear.weight.data = norm_col_init(self.critic_linear.weight.data, 0.1) self.critic_linear.bias.data.fill_(0) self.attention = AttentionLayer(input_dim, input_dim, device) self.feature_dim = input_dim
def __init__(self, num_inputs, action_space, quantile_embedding_dim=64, num_quantiles=32): super(A3Clstm, self).__init__() #Input Shape = [1,1,80,80] self.quantile_embedding_dim = quantile_embedding_dim self.num_quantiles = num_quantiles self.conv1 = nn.Conv2d(num_inputs, 32, 5, stride=1, padding=2) #Shape = [1,32,80,80] self.maxp1 = nn.MaxPool2d(2, 2) #Shape = [1,32,40,40] self.conv2 = nn.Conv2d(32, 32, 5, stride=1, padding=1) #Shape = [1,32,38,38] self.maxp2 = nn.MaxPool2d(2, 2) #Shape = [1,32,19,19] self.conv3 = nn.Conv2d(32, 64, 4, stride=1, padding=1) self.maxp3 = nn.MaxPool2d(2, 2) #Shape = [1,64,9,9] self.conv4 = nn.Conv2d(64, 64, 3, stride=1, padding=1) self.maxp4 = nn.MaxPool2d(2, 2) #Shape = [1,64,4,4] #self.lstm = nn.LSTMCell(1024, 512) #Shape = [1,512] num_outputs = action_space.n self.critic_linear = nn.Linear(512, num_outputs) # self.actor_linear = nn.Linear(512, num_outputs) self.quantile_linear = nn.Linear(64, 1024) self.middle_linear = nn.Linear(1024, 512) self.apply(weights_init) relu_gain = nn.init.calculate_gain('relu') self.conv1.weight.data.mul_(relu_gain) self.conv2.weight.data.mul_(relu_gain) self.conv3.weight.data.mul_(relu_gain) self.conv4.weight.data.mul_(relu_gain) # self.actor_linear.weight.data = norm_col_init( # self.actor_linear.weight.data, 0.01) # self.actor_linear.bias.data.fill_(0) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 0.01) self.critic_linear.bias.data.fill_(0) self.quantile_linear.weight.data = unif_col_init( self.quantile_linear.weight.data, std=1.0 / np.sqrt(3.0)) self.quantile_linear.bias.data.fill_(0) self.middle_linear.weight.data = unif_col_init( self.middle_linear.weight.data, std=1.0 / np.sqrt(3.0)) self.middle_linear.bias.data.fill_(0) # self.lstm.bias_ih.data.fill_(0) # self.lstm.bias_hh.data.fill_(0) self.train()
def process_output(rnn_out, outputs, a_size, num_units): """ here we compute the policy (the probability of each action) and the value from the output of the RNN """ # Actions actions = tf.placeholder(shape=[None], dtype=tf.int32) actions_onehot = tf.one_hot(actions, a_size, dtype=tf.float32) # Output layers for policy and value estimations policy = slim.fully_connected(rnn_out, a_size, activation_fn=tf.nn.softmax, weights_initializer=ut.norm_col_init(0.01), biases_initializer=None) value = slim.fully_connected(rnn_out, 1, activation_fn=None, weights_initializer=ut.norm_col_init(1.0), biases_initializer=None) return actions, actions_onehot, policy, value
def __init__(self, num_inputs, action_space, n_frames): super(A3C_MLP, self).__init__() self.fc1 = nn.Linear(num_inputs, 256) self.lrelu1 = nn.LeakyReLU(0.1) self.fc2 = nn.Linear(256, 256) self.lrelu2 = nn.LeakyReLU(0.1) self.fc3 = nn.Linear(256, 128) self.lrelu3 = nn.LeakyReLU(0.1) self.fc4 = nn.Linear(128, 128) self.lrelu4 = nn.LeakyReLU(0.1) self.m1 = n_frames * 128 self.lstm = nn.LSTMCell(self.m1, 128) num_outputs = action_space.shape[0] self.critic_linear = nn.Linear(128, 1) self.actor_linear = nn.Linear(128, num_outputs) self.actor_linear2 = nn.Linear(128, num_outputs) self.apply(weights_init_mlp) lrelu = nn.init.calculate_gain('leaky_relu') self.fc1.weight.data.mul_(lrelu) self.fc2.weight.data.mul_(lrelu) self.fc3.weight.data.mul_(lrelu) self.fc4.weight.data.mul_(lrelu) self.actor_linear.weight.data = norm_col_init( self.actor_linear.weight.data, 0.01) self.actor_linear.bias.data.fill_(0) self.actor_linear2.weight.data = norm_col_init( self.actor_linear2.weight.data, 0.01) self.actor_linear2.bias.data.fill_(0) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 1.0) self.critic_linear.bias.data.fill_(0) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0) self.train()
def __init__(self, num_inputs, action_space): super(A3C_CONV, self).__init__() self.conv1 = nn.Conv1d(num_inputs, 32, 3, stride=1, padding=1) self.lrelu1 = nn.LeakyReLU(0.1) self.conv2 = nn.Conv1d(32, 32, 3, stride=1, padding=1) self.lrelu2 = nn.LeakyReLU(0.1) self.conv3 = nn.Conv1d(32, 64, 2, stride=1, padding=1) self.lrelu3 = nn.LeakyReLU(0.1) self.conv4 = nn.Conv1d(64, 64, 1, stride=1) self.lrelu4 = nn.LeakyReLU(0.1) self.lstm = nn.LSTMCell(1600, 128) num_outputs = action_space.shape[0] self.critic_linear = nn.Linear(128, 1) self.actor_linear = nn.Linear(128, num_outputs) self.actor_linear2 = nn.Linear(128, num_outputs) self.apply(weights_init) lrelu_gain = nn.init.calculate_gain('leaky_relu') self.conv1.weight.data.mul_(lrelu_gain) self.conv2.weight.data.mul_(lrelu_gain) self.conv3.weight.data.mul_(lrelu_gain) self.conv4.weight.data.mul_(lrelu_gain) self.actor_linear.weight.data = norm_col_init( self.actor_linear.weight.data, 0.01) self.actor_linear.bias.data.fill_(0) self.actor_linear2.weight.data = norm_col_init( self.actor_linear2.weight.data, 0.01) self.actor_linear2.bias.data.fill_(0) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 1.0) self.critic_linear.bias.data.fill_(0) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0) self.train()
def __init__(self, pca_dim=453, classes=76): super(NET, self).__init__() self.fc1 = nn.Linear(pca_dim, 128) self.fc2 = nn.Linear(128, 256) self.fc3 = nn.Linear(256, 256) self.fc4 = nn.Linear(256, 128) self.fc5 = nn.Linear(128, classes) self.apply(weights_init) self.fc1.weight.data = norm_col_init(self.fc1.weight.data, 1.0) self.fc1.bias.data.fill_(0) self.fc2.weight.data = norm_col_init(self.fc2.weight.data, 1.0) self.fc2.bias.data.fill_(0) self.fc3.weight.data = norm_col_init(self.fc3.weight.data, 1.0) self.fc3.bias.data.fill_(0) self.fc4.weight.data = norm_col_init(self.fc4.weight.data, 1.0) self.fc4.bias.data.fill_(0) self.fc5.weight.data = norm_col_init(self.fc5.weight.data, 1.0) self.fc5.bias.data.fill_(0) self.train()
def __init__(self, outdim, action_space, lstm_out=128, head_name='cnn_lstm', stack_frames=1): super(Policy, self).__init__() self.head_name = head_name if 'lstm' in self.head_name: feature_dim = lstm_out else: feature_dim = outdim # create actor if 'discrete' in head_name: num_outputs = action_space.n else: num_outputs = action_space.shape[0] self.actor_linear = nn.Linear(feature_dim, num_outputs) self.actor_linear2 = nn.Linear(feature_dim, num_outputs) self.actor_linear.weight.data = norm_col_init( self.actor_linear.weight.data, 0.1) self.actor_linear.bias.data.fill_(0) self.actor_linear2.weight.data = norm_col_init( self.actor_linear2.weight.data, 0.1) self.actor_linear2.bias.data.fill_(0) # create critic if 'mc' in head_name: self.critic_linear = nn.Linear(feature_dim, num_outputs) else: self.critic_linear = nn.Linear(feature_dim, 1) self.critic_linear.weight.data = norm_col_init( self.critic_linear.weight.data, 0.1) self.critic_linear.bias.data.fill_(0)
def __init__(self, obs_space, action_space, rnn_out=128, head_name='cnn_lstm', stack_frames=1, dim_action_tracker=-1, device=None): super(TAT, self).__init__() if dim_action_tracker > 0: self.sub_task = True else: self.sub_task = False self.head_name = head_name if 'cnn' in head_name: self.encoder = perception.CNN_simple(obs_space, stack_frames) if 'icml' in head_name: self.encoder = perception.ICML(obs_space, stack_frames) if 'maze' in head_name: self.encoder = perception.CNN_maze(obs_space, stack_frames) feature_dim = self.encoder.outdim if 'lstm' in head_name: self.lstm = nn.LSTMCell(feature_dim, rnn_out) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0) feature_dim = rnn_out if 'gru' in head_name: self.lstm = nn.GRUCell(feature_dim, rnn_out) self.lstm.bias_ih.data.fill_(0) self.lstm.bias_hh.data.fill_(0) feature_dim = rnn_out # create actor self.actor = PolicyNet(feature_dim, action_space, head_name, device) self.critic = ValueNet(feature_dim) self.fc_action_tracker = nn.Linear(dim_action_tracker, self.encoder.outdim) weights_init_mlp(self.fc_action_tracker) # create sub-task if self.sub_task: self.reward_aux = nn.Linear(feature_dim, 1) self.reward_aux.weight.data = norm_col_init( self.reward_aux.weight.data, 0.01) self.reward_aux.bias.data.fill_(0) self.apply(weights_init) self.train()
def __init__(self, input_dim, action_space, head_name, device): super(PolicyNet, self).__init__() self.head_name = head_name self.device = device num_outputs = action_space.n if 'ns' in head_name: self.noise = True self.actor_linear = NoisyLinear(input_dim, num_outputs, sigma_init=0.017) else: self.noise = False self.actor_linear = nn.Linear(input_dim, num_outputs) # init layers self.actor_linear.weight.data = norm_col_init(self.actor_linear.weight.data, 0.1) self.actor_linear.bias.data.fill_(0)