Ejemplo n.º 1
0
    def __init__(self, num_inputs, action_space):
        super(MLPPolicy, self).__init__()

        self.hook = False
        self.action_space = action_space
        self.nNode = 128

        self.actor = nn.Sequential(
            nn.Linear(num_inputs, self.nNode),
            nn.Tanh(),
            nn.Linear(self.nNode, self.nNode),
            nn.Tanh()
        )

        self.critic = nn.Sequential(
            nn.Linear(num_inputs, self.nNode),
            nn.Tanh(),
            nn.Linear(self.nNode, self.nNode),
            nn.Tanh()
        )

        self.critic_linear = nn.Linear(self.nNode, 1)
        self.dist = get_distribution(self.nNode, action_space)

        self.train()
        self.reset_parameters()
Ejemplo n.º 2
0
    def __init__(self, num_inputs, action_space):
        super(EmbBase, self).__init__()
        emb_dim = 500
        self.action_space = action_space
        emb0 = nn.Embedding(num_inputs, emb_dim)
        self.actor = nn.Sequential(
            emb0,
            # nn.Sigmoid(),
            # nn.Linear(emb_dim, emb_dim),
            # nn.Tanh()
        )
        emb1 = nn.Embedding(num_inputs, emb_dim)
        self.critic = nn.Sequential(
            emb1,
            # nn.Sigmoid(),
            # nn.Linear(emb_dim, emb_dim),
            # nn.Tanh()
        )

        self.critic_linear = nn.Linear(emb_dim, 1)
        self.dist = get_distribution(emb_dim, action_space)

        self.train()
        self.reset_parameters()
        emb0.weight.data = torch.eye(emb_dim)
        emb1.weight.data = torch.eye(emb_dim)
Ejemplo n.º 3
0
    def __init__(self, num_inputs, action_space, use_gru):
        super(CNNBase, self).__init__()

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0), nn.init.calculate_gain('relu'))

        self.main = nn.Sequential(
            init_(nn.Conv2d(num_inputs, 32, 8, stride=4)), nn.ReLU(),
            init_(nn.Conv2d(32, 64, 4, stride=2)), nn.ReLU(),
            init_(nn.Conv2d(64, 32, 3, stride=1)), nn.ReLU(), Flatten(),
            init_(nn.Linear(32 * 7 * 7, 512)), nn.ReLU())

        if use_gru:
            self.gru = nn.GRUCell(512, 512)
            nn.init.orthogonal_(self.gru.weight_ih.data)
            nn.init.orthogonal_(self.gru.weight_hh.data)
            self.gru.bias_ih.data.fill_(0)
            self.gru.bias_hh.data.fill_(0)

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.critic_linear = init_(nn.Linear(512, 1))

        self.dist = get_distribution(512, action_space)

        self.train()
Ejemplo n.º 4
0
    def __init__(self, input_size, hidden_size, action_space, num_layers=1):
        super(DirectRLModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.input_size = input_size
        self.action_space = action_space

        self.seq_no_dropout = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 20),
            nn.ReLU(),
        )
        self.apply(self.weights_init)

        # Initialisation de la partie rnn du réseau
        self.rnn = nn.GRUCell(20, hidden_size)

        # Initialisation de la critique v(s) dans A2c
        self.critic_linear = nn.Linear(hidden_size, 1)

        # # Initialisation de l'acteur qui décidera des actions entre Short (0), Neutral (1) ou Buy (2) dans A2c
        self.dist = get_distribution(hidden_size, self.action_space)
Ejemplo n.º 5
0
def train(encoder, decoder, discriminator, opt):
    encode_optimizer = Adam(encoder.parameters(), lr=0.001)
    decode_optimizer = Adam(decoder.parameters(), lr=0.001)
    discriminate_optimizer = Adam(discriminator.parameters(), lr=0.001)

    image_dataset = get_dataset(opt.data_name, opt.data_root, opt.image_size, train=True)
    image_loader = DataLoader(image_dataset, batch_size=opt.batch_size, shuffle=True)

    dist_dataset = get_distribution(opt.distribution, len(image_dataset), opt.num_classes)
    dist_loader = DataLoader(dist_dataset, batch_size=opt.batch_size, shuffle=True, worker_init_fn=lambda x: np.random.seed())

    encoder.train()
    decoder.train()
    discriminator.train()
    for i, ((image, image_label), (z_real, z_label)) in enumerate(zip(image_loader, dist_loader)):
        image = image.to(opt.device)
        image_label = image_label.to(opt.device)

        z_real = z_real.to(opt.device)
        z_label = z_label.to(opt.device)

        # reconstruct
        encode_optimizer.zero_grad()
        decode_optimizer.zero_grad()
        output = decoder(encoder(image))
        reconstruct_loss = -torch.mean(image * torch.log(output + 1e-8) + (1 - image) * torch.log(1 - output + 1e-8))

        reconstruct_loss.backward()
        encode_optimizer.step()
        decode_optimizer.step()

        # discriminator
        encode_optimizer.zero_grad()
        discriminate_optimizer.zero_grad()

        with torch.no_grad():
            z_fake = encoder(image)

        d_real = discriminator(z_real, z_label)
        d_fake = discriminator(z_fake, image_label)
        d_loss = -0.02 * torch.mean(torch.log(d_real + 1e-8) + torch.log(1 - d_fake + 1e-8))

        d_loss.backward()
        discriminate_optimizer.step()

        # encoder
        encode_optimizer.zero_grad()
        discriminate_optimizer.zero_grad()

        z_fake = encoder(image)
        e_fake = discriminator(z_fake, image_label)
        e_loss = -0.02 * torch.mean(torch.log(e_fake + 1e-8))

        e_loss.backward()
        encode_optimizer.step()

    return reconstruct_loss, e_loss, d_loss
Ejemplo n.º 6
0
    def __init__(self,
                 num_actor_inputs,
                 num_critic_inputs,
                 action_space,
                 symm_policy=True,
                 use_seq=False,
                 cuda_use=False):
        super(MLPPolicy, self).__init__()

        self.action_space = action_space
        self.nNode = 512  # 64, 128
        self.hidden_dim = 512  # 64, 128
        self.cuda_use = cuda_use
        self.symm_policy = symm_policy

        if use_seq == True:
            self.seq = 0

        # as input, (N, Cin, L), N: batch size, Cin: input size, L: length of signal seq
        self.actor = nn.Sequential(
            nn.Linear(num_actor_inputs, self.nNode),

            # nn.Tanh(),
            nn.ReLU(),
            nn.Linear(self.nNode, self.hidden_dim),
            # nn.Tanh(),
            nn.ReLU(),
            nn.Linear(self.nNode, self.hidden_dim),
            # nn.Tanh(),
            nn.ReLU(),
        )

        self.critic = nn.Sequential(
            nn.Linear(num_critic_inputs, self.nNode),
            # nn.Tanh(),
            nn.ReLU(),
            nn.Linear(self.nNode, self.hidden_dim),
            # nn.Tanh(),
            nn.ReLU(),
            nn.Linear(self.nNode, self.hidden_dim),
            # nn.Tanh(),
            nn.ReLU(),
        )

        self.critic_linear = nn.Linear(self.hidden_dim, 1)  # self.hidden_dim
        self.dist = get_distribution(self.hidden_dim,
                                     action_space)  # self.hidden_dim

        self.train()
        self.reset_parameters()
Ejemplo n.º 7
0
    def __init__(self, num_inputs, action_space, use_gru, use_icm):
        super(CNNPolicy, self).__init__()
        self.head = NatureHead(num_inputs)

        if use_gru:
            self.gru = nn.GRUCell(512, 512)
        if use_icm:
            self.icm = ICM(action_space, 512, num_inputs)

        self.critic_linear = nn.Linear(512, 1)

        self.dist = get_distribution(512, action_space)

        self.train()
        self.reset_parameters()
Ejemplo n.º 8
0
Archivo: model.py Proyecto: gp1702/temp
    def __init__(self, num_inputs, action_space):
        super(MLPPolicy_linear, self).__init__()

        self.action_space = action_space

        self.a_fc1 = nn.Linear(num_inputs, 64)
        self.a_fc2 = nn.Linear(64, 64)

        self.v_fc1 = nn.Linear(num_inputs, 64)
        self.v_fc2 = nn.Linear(64, 64)

        self.critic_linear = nn.Linear(64, 1)
        self.dist = get_distribution(64, action_space)

        self.train()
        self.reset_parameters()
Ejemplo n.º 9
0
Archivo: model.py Proyecto: gp1702/temp
    def __init__(self, num_inputs, action_space, use_gru):
        super(CNNPolicy, self).__init__()
        self.conv1 = nn.Conv2d(num_inputs, 32, 8, stride=4)
        self.conv2 = nn.Conv2d(32, 64, 4, stride=2)
        self.conv3 = nn.Conv2d(64, 32, 3, stride=1)

        self.linear1 = nn.Linear(32 * 7 * 7, 512)

        if use_gru:
            self.gru = nn.GRUCell(512, 512)

        self.critic_linear = nn.Linear(512, 1)

        self.dist = get_distribution(512, action_space)

        self.train()
        self.reset_parameters()
Ejemplo n.º 10
0
    def __init__(self, num_inputs, action_space):
        super(CONVPolicy, self).__init__()

        # image size: (180, 180)
        self.main = nn.Sequential(nn.Conv2d(num_inputs, 32, 8, stride=4),
                                  nn.ReLU(), nn.Conv2d(32, 64, 5, stride=2),
                                  nn.ReLU(), nn.Conv2d(64, 32, 4, stride=2),
                                  nn.ReLU(), nn.Conv2d(32, 16, 3, stride=1),
                                  nn.ReLU(), Flatten(),
                                  nn.Linear(16 * 7 * 7, 512), nn.ReLU())

        self.critic_linear = nn.Linear(512, 1)

        self.dist = get_distribution(512, action_space)

        self.train()
        self.reset_parameters()
Ejemplo n.º 11
0
    def __init__(self, num_inputs, action_space, use_gru):
        super(CNNPolicy, self).__init__()

        self.main = nn.Sequential(nn.Conv2d(num_inputs, 32, 8, stride=4),
                                  nn.ReLU(), nn.Conv2d(32, 64, 4, stride=2),
                                  nn.ReLU(), nn.Conv2d(64, 32, 3, stride=1),
                                  nn.ReLU(), Flatten(),
                                  nn.Linear(32 * 7 * 7, 512), nn.ReLU())

        if use_gru:
            self.gru = nn.GRUCell(512, 512)

        self.critic_linear = nn.Linear(512, 1)

        self.dist = get_distribution(512, action_space)

        self.train()
        self.reset_parameters()
Ejemplo n.º 12
0
    def __init__(self, num_inputs, action_space):
        super(MLPBase, self).__init__()

        self.action_space = action_space

        init_ = lambda m: init(m, init_normc_, lambda x: nn.init.constant_(
            x, 0))

        self.actor = nn.Sequential(init_(nn.Linear(num_inputs, 64)), nn.Tanh(),
                                   init_(nn.Linear(64, 64)), nn.Tanh())

        self.critic = nn.Sequential(init_(nn.Linear(num_inputs, 64)),
                                    nn.Tanh(), init_(nn.Linear(64, 64)),
                                    nn.Tanh())

        self.critic_linear = init_(nn.Linear(64, 1))
        self.dist = get_distribution(64, action_space)

        self.train()
Ejemplo n.º 13
0
    def __init__(self, num_inputs, action_space, use_icm):
        super(MLPPolicy, self).__init__()

        self.action_space = action_space

        if use_icm:
            self.icm = ICM(action_space, num_inputs, cnn_head=False)

        self.a_fc1 = nn.Linear(num_inputs, 64)
        self.a_fc2 = nn.Linear(64, 64)

        self.v_fc1 = nn.Linear(num_inputs, 64)
        self.v_fc2 = nn.Linear(64, 64)

        self.critic_linear = nn.Linear(64, 1)
        self.dist = get_distribution(64, action_space)

        self.train()
        self.reset_parameters()
Ejemplo n.º 14
0
    def __init__(self,
                 num_actor_inputs,
                 num_critic_inputs,
                 action_space,
                 use_gru=False,
                 cuda_use=False):
        super(RNNPolicy, self).__init__()

        self.action_space = action_space
        self.nNode = 64
        self.hidden_dim = 64
        self.cuda_use = cuda_use

        if use_gru == True:
            self.gru = 0

        self.actor = nn.Sequential(nn.Linear(num_actor_inputs, self.nNode),
                                   nn.Tanh()
                                   # nn.ReLU()
                                   )

        self.actor_lstm = nn.LSTM(self.nNode, self.hidden_dim, num_layers=1)
        self.a_lstm_hidden = self.init_hidden()

        self.critic = nn.Sequential(nn.Linear(num_critic_inputs, self.nNode),
                                    nn.Tanh()
                                    # nn.ReLU()
                                    )

        self.critic_lstm = nn.LSTM(self.nNode, self.hidden_dim, num_layers=1)
        self.c_lstm_hidden = self.init_hidden()

        self.critic_linear = nn.Linear(self.hidden_dim, 1)
        self.dist = get_distribution(self.hidden_dim, action_space)

        self.train()
        self.reset_parameters()