def __init__(self, gamma: float): # CartPole 環境ロード self.env = gym.make('CartPole-v1') # Action self.list_action = np.array([0, 1]) # Value Function nn = TorchNN( self.env.env.observation_space.shape[0], Layer("fc1", torch.nn.Linear, 16, None, (), {}), Layer("relu1", torch.nn.ReLU, None, None, (), {}), Layer("fc2", torch.nn.Linear, 16, None, (), {}), Layer("relu2", torch.nn.ReLU, None, None, (), {}), Layer("fc3", torch.nn.Linear, 16, None, (), {}), Layer("relu3", torch.nn.ReLU, None, None, (), {}), Layer("fc4", torch.nn.Linear, self.env.env.action_space.n, None, (), {}), Layer("output", torch.nn.Softmax, None, None, (), {"dim": 1}), ) policy = PolicyGradientNN(nn, self.list_action, batch_size=-1, lr=0.001) policy.to_cuda() super().__init__(policy=policy, list_action=self.list_action, gamma=gamma) # Others self.done = False
def __init__(self, gamma: float, epsilon: float): # CartPole 環境ロード self.env = gym.make('CartPole-v0') # Action self.list_action = np.array([0, 1]) # Value Function nn = TorchNN( self.env.env.observation_space.shape[0], Layer("fc1", torch.nn.Linear, 16, None, (), {}), Layer("relu1", torch.nn.ReLU, None, None, (), {}), Layer("fc2", torch.nn.Linear, 16, None, (), {}), Layer("relu2", torch.nn.ReLU, None, None, (), {}), Layer("fc3", torch.nn.Linear, 16, None, (), {}), Layer("relu3", torch.nn.ReLU, None, None, (), {}), Layer("fc4", torch.nn.Linear, self.env.env.action_space.n, None, (), {}), ) qfunc = DQN(nn, self.list_action, gamma=gamma, batch_size=128, capacity=10000, lr=0.0001) qfunc.to_cuda() QLearn.__init__(self, qfunc=qfunc, epsilon=epsilon) # Others self.done = False
def __init__(self, epsilon: float, gamma: float, file_csv: str = "../data/s59h30megacities_utf8.csv", n_capital: int = None): # まずは Base class で初期化して, df を load TSPModelBase.__init__(self, file_csv=file_csv, n_capital=n_capital) # Action の定義 self.list_action = np.random.permutation( self.df["capital_en"].unique()) # State の定義 self.state_mng = StateManager() self.state_mng.set_state("country", state_type="onehot", state_list=self.list_action) self.state_mng.set_state("history", state_type="onehot_binary", state_list=self.list_action) # DQN の定義 torch_nn = TorchNN( len(self.state_mng), Layer("fc1", torch.nn.Linear, 128, None, (), {}), Layer("norm1", torch.nn.BatchNorm1d, 0, None, (), {}), Layer("relu1", torch.nn.ReLU, None, None, (), {}), Layer("fc2", torch.nn.Linear, 256, None, (), {}), Layer("norm2", torch.nn.BatchNorm1d, 0, None, (), {}), Layer("relu2", torch.nn.ReLU, None, None, (), {}), Layer("fc3", torch.nn.Linear, 128, None, (), {}), Layer("norm3", torch.nn.BatchNorm1d, 0, None, (), {}), Layer("relu3", torch.nn.ReLU, None, None, (), {}), Layer("fc4", torch.nn.Linear, len(self.list_action), None, (), {}), ) qfunc = DQN(torch_nn, self.list_action, gamma=gamma, batch_size=128, capacity=1000) qfunc.to_cuda() QLearn.__init__(self, qfunc=qfunc, epsilon=epsilon)
def __init__(self, gamma: float, file_csv: str = "../data/s59h30megacities_utf8.csv", n_capital: int = None): # まずは Base class で初期化して, df を load TSPModelBase.__init__(self, file_csv=file_csv, n_capital=n_capital) # Action の定義 self.list_action = np.random.permutation( self.df["capital_en"].unique()) # State の定義 self.state_mng = StateManager() self.state_mng.set_state("country", state_type="onehot", state_list=self.list_action) self.state_mng.set_state("history", state_type="onehot_binary", state_list=self.list_action) # 国の滞在履歴を状態に組み込む self.state_mng.set_state("is_last", state_type="binary") # NN の定義 torch_nn = TorchNN( len(self.state_mng), Layer("fc1", torch.nn.Linear, 64, None, (), {}), Layer("norm1", torch.nn.BatchNorm1d, 0, None, (), {}), Layer("fc2", torch.nn.Linear, 64, None, (), {}), Layer("norm2", torch.nn.BatchNorm1d, 0, None, (), {}), Layer("fc3", torch.nn.Linear, len(self.list_action), None, (), {}), Layer("soft", torch.nn.Softmax, None, None, (), {"dim": 1}), ) #torch_nn.set_weight(0.01) policy_nn = PolicyGradientNN(torch_nn, self.list_action, unit_memory=None, lr=0.001) policy_nn.to_cuda() PolicyGradient.__init__(self, gamma=gamma, policy=policy_nn, list_action=self.list_action) self.action_pprev = None # 巡回できるようにするためのパラメータ self.is_back = False self.first_country = None
def __init__(self, epsilon: float, gamma: float, file_csv: str = "../data/s59h30megacities_utf8.csv", n_capital: int = None): # まずは Base class で初期化して, df を load TSPModelBase.__init__(self, file_csv=file_csv, n_capital=n_capital) # Action の定義 self.list_action = np.random.permutation( self.df["capital_en"].unique()) # DQN の定義 torch_nn = TorchNN( 3, Layer("conv1", torch.nn.Conv2d, 128, None, (), { "kernel_size": 5, "stride": 5, }), Layer("relu1", torch.nn.ReLU, None, None, (), {}), Layer("pool1", torch.nn.MaxPool2d, None, None, (), { "kernel_size": 2, "stride": 2, }), Layer("conv2", torch.nn.Conv2d, 128, None, (), { "kernel_size": 3, "stride": 3, }), Layer("relu2", torch.nn.ReLU, None, None, (), {}), Layer("pool2", torch.nn.MaxPool2d, None, None, (), { "kernel_size": 2, "stride": 2, }), Layer("conv3", torch.nn.Conv2d, 256, None, (), { "kernel_size": 2, "stride": 2, }), Layer("relu3", torch.nn.ReLU, None, None, (), {}), Layer("pool3", torch.nn.MaxPool2d, None, None, (), { "kernel_size": 3, "stride": 3, }), Layer("view6", torch.nn.Identity, 256 * 1 * 2, "reshape(x,-1)", (), {}), Layer("fc7", torch.nn.Linear, 256, None, (), {}), Layer("norm7", torch.nn.BatchNorm1d, 0, None, (), {}), Layer("relu7", torch.nn.ReLU, None, None, (), {}), Layer("fc8", torch.nn.Linear, 128, None, (), {}), Layer("norm8", torch.nn.BatchNorm1d, 0, None, (), {}), Layer("relu8", torch.nn.ReLU, None, None, (), {}), Layer("fc9", torch.nn.Linear, 64, None, (), {}), Layer("norm9", torch.nn.BatchNorm1d, 0, None, (), {}), Layer("relu9", torch.nn.ReLU, None, None, (), {}), Layer("output", torch.nn.Linear, len(self.list_action), None, (), {}), ) qfunc = DQN(torch_nn, self.list_action, gamma=gamma, batch_size=128, capacity=1000, lr=0.001) qfunc.to_cuda() QLearn.__init__(self, qfunc=qfunc, epsilon=epsilon)
def __init__(self, epsilon: float, gamma: float, file_csv: str = "../data/s59h30megacities_utf8.csv", n_capital: int = None): super(TSPModel5, self).__init__(epsilon, gamma, file_csv=file_csv, n_capital=n_capital) # DQN の定義 torch_nn = TorchNN( len(self.state_mng), Layer("lstm", torch.nn.LSTM, 128, None, (), {}), Layer("calc1", torch.nn.Identity, None, "rnn_outonly", (), {}), Layer("calc2", torch.nn.Identity, None, "call_options", (), {}), Layer("calc3", torch.nn.Identity, None, "rnn_all", (), {}), Layer("norm1", torch.nn.BatchNorm1d, 0, None, (), {}), Layer("relu1", torch.nn.ReLU, None, None, (), {}), Layer("fc2", torch.nn.Linear, 128, None, (), {}), Layer("norm2", torch.nn.BatchNorm1d, 0, None, (), {}), Layer("relu2", torch.nn.ReLU, None, None, (), {}), Layer("fc3", torch.nn.Linear, 64, None, (), {}), Layer("norm3", torch.nn.BatchNorm1d, 0, None, (), {}), Layer("relu3", torch.nn.ReLU, None, None, (), {}), Layer("fc4", torch.nn.Linear, len(self.list_action), None, (), {}), ) qfunc = DQN(torch_nn, self.list_action, gamma=gamma, batch_size=20, capacity=100, unit_memory="episode", lr=0.001) self.qfunc = qfunc self.qfunc.to_cuda() self.state_history = [] # LSTMなのでepisode中のstate履歴を記憶する
def __init__(self, gamma: float, epsilon: float): # CartPole 環境ロード self.env = gym.make('CartPole-v0') # Action self.list_action = np.array([0, 1]) # Value Function self.insize = 20 nn = TorchNN( self.insize, Layer("conv1", torch.nn.Conv2d, 64, None, (), { "kernel_size": 20, "stride": 10, }), Layer("relu1", torch.nn.ReLU, None, None, (), {}), Layer("conv2", torch.nn.Conv2d, 64, None, (), { "kernel_size": 5, "stride": 5 }), Layer("relu2", torch.nn.ReLU, None, None, (), {}), Layer("view7", torch.nn.Identity, 64 * 3 * 11, "reshape(x,-1)", (), {}), Layer("fc7", torch.nn.Linear, 512, None, (), {}), Layer("norm7", torch.nn.BatchNorm1d, 0, None, (), {}), Layer("relu7", torch.nn.ReLU, None, None, (), {}), Layer("fc8", torch.nn.Linear, 256, None, (), {}), Layer("norm8", torch.nn.BatchNorm1d, 0, None, (), {}), Layer("relu8", torch.nn.ReLU, None, None, (), {}), Layer("fc9", torch.nn.Linear, 64, None, (), {}), Layer("norm9", torch.nn.BatchNorm1d, 0, None, (), {}), Layer("relu9", torch.nn.ReLU, None, None, (), {}), Layer("output", torch.nn.Linear, self.env.env.action_space.n, None, (), {}), ) qfunc = DQN(nn, self.list_action, gamma=gamma, batch_size=192, capacity=1000, lr=0.001) qfunc.to_cuda() QLearn.__init__(self, qfunc=qfunc, epsilon=epsilon) # Others self.done = False self.history = [None for _ in range(self.insize)]
def __init__(self, gamma: float, epsilon: float): # CartPole 環境ロード self.env = gym.make('CartPole-v0') # Action self.list_action = np.array([0, 1]) # Value Function cnn1 = TorchNN( 3, Layer("conv1", torch.nn.Conv2d, 128, None, (), { "kernel_size": 3, "stride": 1, }), Layer("relu1", torch.nn.ReLU, None, None, (), {}), Layer("pool1", torch.nn.MaxPool2d, None, None, (), { "kernel_size": 2, "stride": 2, }), Layer("conv2", torch.nn.Conv2d, 128, None, (), { "kernel_size": 2, "stride": 1, }), Layer("relu2", torch.nn.ReLU, None, None, (), {}), Layer("pool2", torch.nn.MaxPool2d, None, None, (), { "kernel_size": 2, "stride": 2, }), Layer("conv3", torch.nn.Conv2d, 64, None, (), { "kernel_size": 2, "stride": 2, }), Layer("relu3", torch.nn.ReLU, None, None, (), {}), Layer("pool3", torch.nn.MaxPool2d, None, None, (), { "kernel_size": 2, "stride": 2, }), Layer("conv4", torch.nn.Conv2d, 64, None, (), { "kernel_size": 2, "stride": 2, }), Layer("relu4", torch.nn.ReLU, None, None, (), {}), Layer("pool4", torch.nn.MaxPool2d, None, None, (), { "kernel_size": 2, "stride": 2, }), Layer("conv5", torch.nn.Conv2d, 32, None, (), { "kernel_size": 2, "stride": 1, }), Layer("relu5", torch.nn.ReLU, None, None, (), {}), Layer("pool5", torch.nn.MaxPool2d, None, None, (), { "kernel_size": 2, "stride": 2, }), ) cnn2 = TorchNN( 3, Layer("conv1", torch.nn.Conv2d, 128, None, (), { "kernel_size": 3, "stride": 1, }), Layer("relu1", torch.nn.ReLU, None, None, (), {}), Layer("pool1", torch.nn.MaxPool2d, None, None, (), { "kernel_size": 2, "stride": 2, }), Layer("conv2", torch.nn.Conv2d, 128, None, (), { "kernel_size": 2, "stride": 1, }), Layer("relu2", torch.nn.ReLU, None, None, (), {}), Layer("pool2", torch.nn.MaxPool2d, None, None, (), { "kernel_size": 2, "stride": 2, }), Layer("conv3", torch.nn.Conv2d, 64, None, (), { "kernel_size": 2, "stride": 2, }), Layer("relu3", torch.nn.ReLU, None, None, (), {}), Layer("pool3", torch.nn.MaxPool2d, None, None, (), { "kernel_size": 2, "stride": 2, }), Layer("conv4", torch.nn.Conv2d, 64, None, (), { "kernel_size": 2, "stride": 2, }), Layer("relu4", torch.nn.ReLU, None, None, (), {}), Layer("pool4", torch.nn.MaxPool2d, None, None, (), { "kernel_size": 2, "stride": 2, }), Layer("conv5", torch.nn.Conv2d, 32, None, (), { "kernel_size": 2, "stride": 1, }), Layer("relu5", torch.nn.ReLU, None, None, (), {}), Layer("pool5", torch.nn.MaxPool2d, None, None, (), { "kernel_size": 2, "stride": 2, }), ) cnn3 = TorchNN( 3, Layer("conv1", torch.nn.Conv2d, 128, None, (), { "kernel_size": 3, "stride": 1, }), Layer("relu1", torch.nn.ReLU, None, None, (), {}), Layer("pool1", torch.nn.MaxPool2d, None, None, (), { "kernel_size": 2, "stride": 2, }), Layer("conv2", torch.nn.Conv2d, 128, None, (), { "kernel_size": 2, "stride": 1, }), Layer("relu2", torch.nn.ReLU, None, None, (), {}), Layer("pool2", torch.nn.MaxPool2d, None, None, (), { "kernel_size": 2, "stride": 2, }), Layer("conv3", torch.nn.Conv2d, 64, None, (), { "kernel_size": 2, "stride": 2, }), Layer("relu3", torch.nn.ReLU, None, None, (), {}), Layer("pool3", torch.nn.MaxPool2d, None, None, (), { "kernel_size": 2, "stride": 2, }), Layer("conv4", torch.nn.Conv2d, 64, None, (), { "kernel_size": 2, "stride": 2, }), Layer("relu4", torch.nn.ReLU, None, None, (), {}), Layer("pool4", torch.nn.MaxPool2d, None, None, (), { "kernel_size": 2, "stride": 2, }), Layer("conv5", torch.nn.Conv2d, 32, None, (), { "kernel_size": 2, "stride": 1, }), Layer("relu5", torch.nn.ReLU, None, None, (), {}), Layer("pool5", torch.nn.MaxPool2d, None, None, (), { "kernel_size": 2, "stride": 2, }), ) nn = TorchNN( None, Layer("split", torch.nn.Identity, None, "split_numpy_3", (), {}), Layer("cnn1", cnn1, None, None, (), {}), Layer("cnn2", cnn2, None, None, (), {}), Layer("cnn3", cnn3, None, None, (), {}), Layer("view6", torch.nn.Identity, 3 * 32 * 1 * 4, "combine", (), {}), Layer("fc6", torch.nn.Linear, 128, None, (), {}), Layer("norm6", torch.nn.BatchNorm1d, 0, None, (), {}), Layer("relu6", torch.nn.ReLU, None, None, (), {}), Layer("fc7", torch.nn.Linear, 64, None, (), {}), Layer("norm7", torch.nn.BatchNorm1d, 0, None, (), {}), Layer("relu7", torch.nn.ReLU, None, None, (), {}), Layer("output", torch.nn.Linear, self.env.env.action_space.n, None, (), {}), ) qfunc = DQN(nn, self.list_action, gamma=gamma, batch_size=8, capacity=256, lr=0.0001) qfunc.to_cuda() QLearn.__init__(self, qfunc=qfunc, epsilon=epsilon) # Others self.done = False self.history = [None, None, None, None, None]