def __init__(self, data_set='mnist', batch_size=None, version=1): super().__init__() self.sequence = load_data(data_set, batch_size) num_of_labels = self.sequence.label_shape[0] feature_size = self.sequence.feature_shape[0] self.model = Model(feature_size, num_of_labels) past_history = 5 self.loss_hist = np.zeros((past_history, 1)) self.grad_hist = np.zeros((past_history, feature_size, num_of_labels)) self.wght_hist = np.zeros(self.grad_hist.shape) if version == 0: state_size = self.model.size elif version == 1: state_size = self.model.size + 1 elif version == 2: state_size = 2 * self.model.size + 1 elif version == 3: state_size = (self.model.size + 1) * past_history self.observation_space = Box(low=-1e3, high=1e3, dtype=np.float32, shape=(state_size, )) self.action_space = Box(low=-1e3, high=1e3, dtype=np.float32, shape=(self.model.size, )) self.seed() self.adjusted_loss_hist = np.zeros(self.loss_hist.shape) self.adjusted_grad_hist = np.zeros(self.grad_hist.shape) self.adjusted_wght_hist = np.zeros(self.wght_hist.shape) self.version = version
def __init__(self, log_file=None): data = load_data() features, labels = np.hsplit(data, [-1]) self.features = features self.labels, num_of_labels = to_onehot(labels) self.feature_size = features.shape[-1] self.model = Model(self.feature_size, num_of_labels) self.H = 3 self.obj_list = np.zeros(self.H) self.grad_list = np.zeros((self.H, self.feature_size, num_of_labels)) self.W_list = np.zeros_like(self.grad_list) #self.N = self.obj_list.size + self.grad_list.size + self.w.size self.N = 2 * self.model.size + 1 self.observation_space = spaces.Box(low=-1e8, high=1e8, shape=(self.N, ), dtype=np.float32) self.action_space = spaces.Box(low=-1e8, high=1e8, shape=(1, ), dtype=np.float32) self.steps = 1 self.seed() self.rewards = [] self.running_rewards = [] self.running_acc = [] self.num_of_labels = num_of_labels self.global_steps = 0 self.seed_no = 0
def __init__(self, data_set='mnist', batch_size=None, n_of_steps=None): super().__init__() self.sequence = load_data(data_set, batch_size) num_of_labels = self.sequence.label_shape[0] feature_size = self.sequence.feature_shape[0] self.model = Model(feature_size, num_of_labels) past_history = 3 self.loss_hist = np.zeros((past_history, 1)) self.grad_hist = np.zeros((past_history, feature_size, num_of_labels)) self.wght_hist = np.zeros(self.grad_hist.shape) state_size = 2*self.model.size + 1 self.observation_space = Box(low=-1e3, high=1e3, dtype=np.float32, shape=(state_size,)) self.action_space = Box(low=-1e3, high=1e3, dtype=np.float32, shape=(self.model.size,)) self.seed()
def __init__(self, data_set='iris', batch_size=None, version=1, max_batches=400, max_history=5, observation_version=0, action_version=0, reward_version=0): super().__init__() self.model = get_problem(data_set=load_data(data_set, batch_size)) model_size = (self.model.size, ) self.history = History(5, losses=(), gradients=model_size, weights=model_size) result = utils_env.get_obs_version(model_size, max_history, version) obs_space, self.adjusted_history = result if action_version == 0: action_low = -4. action_high = 4. elif action_version == 1: action_low = -1e8 action_high = 1e8 else: raise RuntimeError() self.observation_space = Dict({ MultiOptimize.AGENT_FMT.format(i): obs_space for i in range(self.model.size) }) self.action_space = Dict({ MultiOptimize.AGENT_FMT.format(i): Box(low=action_low, high=action_high, dtype=np.float32, shape=(1, )) for i in range(self.model.size) }) self.seed() self.max_history = max_history self.max_batches = max_batches self.version = VersionType(version, observation_version, action_version, reward_version)