def main(): PC = Computer_case(comp_id=1, manufacturer="BeQuiet!", price=299) plyta1 = Motherboard(comp_id=2, manufacturer="Gigabyte", price=399) procesor1 = Processor(comp_id=3, manufacturer="intel", model="i5-8600", price=999) procesor2 = Processor(comp_id=4, manufacturer="AMD", model="Ryzen 5 1600", price=999) ram1 = Memory(comp_id=5, manufacturer="Corsair", price=390, memory_type="DDR4", capacity=16) ram2 = Memory(comp_id=6, manufacturer="GoodRam", price=300, memory_type="DDR4", capacity=16) powersupply1 = Powersupply(comp_id=7, manufacturer="BeQuiet!", price=290, ps_size=600) plyta1.add_component(procesor1) plyta1.add_component(ram1) plyta1.add_component(ram2) PC.add_component(powersupply1) PC.add_component(plyta1) PC.do_operation(plyta1.mb_components_price())
def __init__(self, env, agent, mount_agent, model_dir="", data_end_index: int = 98): self.env = env self.agent = agent self.mount_agent = mount_agent self.experience = [] self.moder_dir = model_dir if not self.moder_dir: self.model_dir = os.path.join(os.path.dirname(__file__), "model") if not os.path.isdir(self.model_dir): os.mkdir(self.model_dir) self.agent.model = QNetWork() self.mount_agent.model = QNetWork() self._target_model = QNetWork() self._target_mount_model = QNetWork() self.callback = TensorBoard(self.model_dir) self.callback.set_model(self.agent.model) self.mount_base = 100 self.data_end_index = data_end_index self.name_action = {0: "buy", 1: "sell", 2: "stay"} self.memory = Memory() self.memory_TDerror = MemoryTDerror() self.memory_mount_TDerror = MemoryTDerror()
def prioritized_experience_replay(self, memory, batch_size, gamma, targetQN, memory_TDerror): sum_absoluate_TDerror = memory_TDerror.get_sum_absolute_TDerror() generatedrand_list = np.random.uniform(0, sum_absoluate_TDerror, batch_size) generatedrand_list = np.sort(generatedrand_list) batch_memory = Memory(max_size=batch_size) idx = 0 tmp_sum_absolute_TDerror = 0 for (i, randnum) in enumerate(generatedrand_list): while tmp_sum_absolute_TDerror < randnum: tmp_sum_absolute_TDerror += abs( memory_TDerror.buffer[idx]) + 0.0001 idx += 1 batch_memory.add(memory.buffer[idx]) inputs = np.zeros((batch_size, self.state_size)) targets = np.zeros((batch_size, self.actions_size)) for i, (state_b, action_b, reward_b, next_state_b) in enumerate(batch_memory.buffer): inputs[i:i + 1] = state_b targets = reward_b if not (next_state_b == np.zeros(state_b.shape)).all(axis=1): retmainQs = self.model.predict(next_state_b)[0] next_action = np.argmax(retmainQs) target = reward_b + gamma * targetQN.model.predict( next_state_b)[0][next_action] targets[i] = self.model.predict(state_b) targets[i][action_b] = target self.model.train_on_batch(inputs, targets)
def poll(cls) -> "Hardware": return Hardware( cpu=CPU.poll(), memory=Memory.poll(), disks=cls._disks(), networks=cls._networks(), )
def solve_uniform_online(self, planner, nn_model): iteration = 1 number_solved = 0 total_expanded = 0 total_generated = 0 budget = self._initial_budget memory = Memory() start = time.time() current_solved_puzzles = set() while len(current_solved_puzzles) < self._number_problems: number_solved = 0 batch_problems = {} for name, state in self._states.items(): # if name in current_solved_puzzles: # continue batch_problems[name] = state if len(batch_problems ) < self._batch_size and self._number_problems - len( current_solved_puzzles) > self._batch_size: continue with ProcessPoolExecutor(max_workers=self._ncpus) as executor: args = ((state, name, budget, nn_model) for name, state in batch_problems.items()) results = executor.map(planner.search_for_learning, args) for result in results: has_found_solution = result[0] trajectory = result[1] total_expanded += result[2] total_generated += result[3] puzzle_name = result[4] if has_found_solution: memory.add_trajectory(trajectory) if has_found_solution and puzzle_name not in current_solved_puzzles: number_solved += 1 current_solved_puzzles.add(puzzle_name) if memory.number_trajectories() > 0: for _ in range(self._gradient_steps): loss = nn_model.train_with_memory(memory) print('Loss: ', loss) memory.clear() nn_model.save_weights( join(self._models_folder, 'model_weights')) batch_problems.clear() end = time.time() with open( join(self._log_folder + 'training_bootstrap_' + self._model_name), 'a') as results_file: results_file.write( ("{:d}, {:d}, {:d}, {:d}, {:d}, {:d}, {:f} ".format( iteration, number_solved, self._number_problems - len(current_solved_puzzles), budget, total_expanded, total_generated, end - start))) results_file.write('\n') print('Number solved: ', number_solved) if number_solved == 0: budget *= 2 print('Budget: ', budget) continue iteration += 1
class Trainer_priority(object): def __init__(self, env, agent, mount_agent, model_dir="", data_end_index: int = 98): self.env = env self.agent = agent self.mount_agent = mount_agent self.experience = [] self.moder_dir = model_dir if not self.moder_dir: self.model_dir = os.path.join(os.path.dirname(__file__), "model") if not os.path.isdir(self.model_dir): os.mkdir(self.model_dir) self.agent.model = QNetWork() self.mount_agent.model = QNetWork() self._target_model = QNetWork() self._target_mount_model = QNetWork() self.callback = TensorBoard(self.model_dir) self.callback.set_model(self.agent.model) self.mount_base = 100 self.data_end_index = data_end_index self.name_action = {0: "buy", 1: "sell", 2: "stay"} self.memory = Memory() self.memory_TDerror = MemoryTDerror() self.memory_mount_TDerror = MemoryTDerror() def get_batch(self, batch_size: int = 32, gamma=0.99, agent=None, _target_model=None): batch_indices = np.random.randint(low=0, high=len(self.experience), size=batch_size) X = np.zeros((batch_size, +agent.input_shape[0])) y = np.zeros((batch_size, +agent.num_actions)) for i, b_i in enumerate(batch_indices): s, a, r, next_s, game_over = self.experience[b_i] X[i] = s y[i] = agent.evaluate(s) Q_sa = np.max(self.agent.evaluate(next_s, model=_target_model)) if game_over: y[i, a] = r else: y[i, a] = r + gamma * Q_sa return X, y def write_log(self, index, loss, score): for name, value in zip(("loss", "score"), (loss, score)): summary = tf.Summary() summary_value = summary.value.add() summary_value.simple_value = value summary_value.tag = name self.callback.writer.add_summary(summary, index) self.callback.writer.flush() def train( self, gamma: float = 0.99, initial_epsilon: float = 0.1, final_epsilon: float = 0.0001, memory_size: int = 50000, observation_epochs: int = 100, train_epochs: int = 2000, batch_size: int = 32, ddqn_flag: bool = True, ): epochs = observation_epochs + train_epochs epsilon = initial_epsilon model_path = os.path.join(self.model_dir, "agent_network.h5") fmt = "Epoch {:04d}/{:d} | Loss {:.5f} | Score: {} e={:.4f} train={}" for e in range(epochs): loss = 0.0 rewards = [] self.env.reset() state = ( self.env.balance, self.env.stock_balance, self.env.fx_time_data_buy[self.env.state], self.env.fx_time_data_sell[self.env.state], self.env.closeAsk_data[self.env.state], self.env.closeBid_data[self.env.state], self.env.lowAsk_data[self.env.state], self.env.lowBid_data[self.env.state], self.env.openAsk_data[self.env.state], self.env.openBid_data[self.env.state], ) game_over = False is_training = True if e > observation_epochs else False while not game_over: if not is_training: action = self.agent.act(state, epsilon=1) mount = self.mount_agent.act(state, epsilon=1) + 1 else: action = self.agent.act(state, epsilon) mount = self.mount_agent.act(state, epsilon=1) + 1 reward = self.env.step(action=self.name_action[action], mount=mount * self.mount_base) if "success" in reward: reward = reward["success"] elif "fail" in reward: # print("******** fail process *************") reward = reward["fail"] next_state = ( self.env.balance, self.env.stock_balance, self.env.fx_time_data_buy[self.env.state], self.env.fx_time_data_sell[self.env.state], self.env.closeAsk_data[self.env.state], self.env.closeBid_data[self.env.state], self.env.lowAsk_data[self.env.state], self.env.lowBid_data[self.env.state], self.env.openAsk_data[self.env.state], self.env.openBid_data[self.env.state], ) next_state = np.reshape(next_state, [1, 10]) state = np.reshape(state, [1, 10]) if self.env.balance == 0 or self.env.state > self.data_end_index: game_over = True self.memory.add((state, action, reward, next_state)) TDError = self.memory_TDerror.get_TDerror( self.memory, gamma, self.agent.model, self._target_model) self.memory_TDerror.add(TDError) TDError_mount = self.memory_mount_TDerror.get_TDerror( self.memory, gamma, self.mount_agent.model, self._target_mount_model) self.memory_mount_TDerror.add(TDError_mount) # self.experience.append( # (state, action, reward, next_state, game_over)) rewards.append(reward) # print("mount {}".format(mount)) # print("reward {}".format(reward)) if is_training: if sum(rewards) / len(rewards) < 20: loss += self.agent.model.replay( self.memory, batch_size, gamma, self._target_model) loss += self.mount_agent.model.replay( self.memory, batch_size, gamma, self._target_mount_model) else: loss += self.agent.model.prioritized_experience_replay( self.memory, batch_size, gamma, self._target_model) loss += self.mount_agent.model.prioritized_experience_replay( self.memory, batch_size, gamma, self._target_mount_model) state = next_state self.memory_TDerror.update_TDerror(self.memory, gamma, self.agent.model, self._target_model) self.memory_mount_TDerror.update_TDerror( self.memory, gamma, self.mount_agent.model, self._target_mount_model) loss = loss / len(rewards) score = sum(rewards) if is_training: self.write_log(e - observation_epochs, loss, score) self._target_model.modeol.set_weights( self.agent.model.model.get_weights()) self._target_mount_model.model.set_weights( self.mount_agent.model.model.get_weights()) if epsilon > final_epsilon: epsilon -= (initial_epsilon - final_epsilon) / epochs print(fmt.format(e + 1, epochs, loss, score, epsilon, is_training)) stock_value = self.env.fx_time_data_sell[ self.env.state] * self.env.stock_balance print("balance {}, stock_value {} total_balance {}".format( self.env.balance, stock_value, self.env.balance + stock_value)) if e % 100 == 0: self.agent.model.model.save(model_path, overwrite=True) self.agent.model.save(model_path, overwrite=True)
def memory_map(self) -> Memory: return Memory(io.BytesIO(pickle.dumps(self, 3)))