class AgentNTD(AgentNeural): def __init__(self, state_class, load_knowledge=False): super(AgentNTD, self).__init__(state_class, NTD_NUM_OUTPUTS, init_weights=NTD_NETWORK_INIT_WEIGHTS) # Predicting separate state-action values for white and black only makes # sense when training against self. if NTD_NUM_OUTPUTS == 2: assert TRAIN_BUDDY == TRAIN_BUDDY_SELF self.trainer = BackpropTrainer(self.network, learningrate=NTD_LEARNING_RATE, momentum=0.0, verbose=False) self.epsilon = NTD_EPSILON self.lamda = NTD_LAMBDA self.alpha = NTD_ALPHA self.gamma = NTD_GAMMA # self.state_str = None # self.state_in = None self.last_state_str = None # self.last_state_in = None self.last_action = None # Since we apply updates with one step delay, need to remember Whether # the action in previous time step was exploratory. self.was_last_action_random = False self.processed_final_reward = False self.episode_traj = '' self.is_learning = True self.e = {} self.updates = {} # astar_value[s'] = argmax_b Q(s', b) for undetermined roll. self.astar_value = {} # Used for alpha annealing. Note that the roll value that is recorded # reflects the roll chosen by the agent, not the original random roll. # So, including the roll makes sense for calculating the updates, which # are based on the action chosen by the agent. But it doesn't make # sense for epsilon annealing, which is calculated before the agent # is asked to take an action. self.visit_count = {} # Example key: (w-5-1, action) # Used for epsilon annealing. self.visit_count_no_roll = {} # Example key: w-5 self.visited_in_episode = {} self.network_inputs = {} self.network_outputs = {} # Recording value of intersting states over time. self.num_training_games = 0 self.value_tracker_file = None # network_predictions are gathered at the end of each iteration to # produce reports. self.network_predictions = {} # TODO: Merge this functionality with COLLECT_STATS logic. self.traj_count = {} if load_knowledge: raise ValueError('AgentNTD does not support load_knowledge.') # self.is_learning = False def begin_episode(self): self.e = {} self.astar_value = {} self.updates = {} if self.is_learning: self.network_outputs = {} self.visited_in_episode = {} # self.state_str = None # self.state_in = None self.last_state_str = None # self.last_state_in = None self.last_action = None self.processed_final_reward = False self.episode_traj = '' def end_episode(self, reward): if self.is_learning and not self.processed_final_reward: if TRAIN_BUDDY == TRAIN_BUDDY_SELF: # Ignore the reward parameter and construct own reward signal # corresponding to the probability of white winning. rewards = self.compute_values_for_final_state(self.state) # winner = other_player(self.state.player_to_move) # if winner == PLAYER_WHITE: # rewards = np.array([REWARD_WIN, REWARD_LOSE]) # else: # rewards = np.array([REWARD_LOSE, REWARD_WIN]) # # if self.outputdim == 1: # rewards = rewards[:1] else: rewards = np.array([reward]) self.ntd_step(action=None, is_action_random=False, rewards=rewards) if PRINT_GAME_RESULTS: print 'Episode traj: %s' % self.episode_traj self.traj_count[self.episode_traj] = self.traj_count.get( self.episode_traj, 0) + 1 self.apply_updates() self.processed_final_reward = True def update_values(self, delta): # Number of elements in delta depends on NTD_NUM_OUTPUTS. if all(v == 0 for v in delta): # If aLL elements in delta are zero. return alpha = self.alpha for (si, ai) in self.e.iterkeys(): if NTD_USE_ALPHA_ANNEALING: alpha = 1.0 / self.visit_count.get((si, ai), 1) alpha = max(alpha, NTD_ALPHA) if self.e[(si, ai)] != 0.0: change = [alpha * x * self.e[(si, ai)] for x in delta] # network_in = self.network_inputs[si] current_update = self.updates.get((si, ai), [0.0] * self.outputdim) self.updates[(si, ai)] = [ a + b for a, b in zip(current_update, change) ] def apply_updates(self): dataset = SupervisedDataSet(self.inputdim, self.outputdim) for (si, ai) in self.updates.iterkeys(): si_ai = '%s-%s' % (si, ai) network_in = self.network_inputs[si_ai] current_value = self.get_network_value(None, None, si_ai) new_value = [ a + b for a, b in zip(current_value, self.updates[(si, ai)]) ] dataset.addSample(network_in, new_value) if PRINT_GAME_RESULTS: print 'updating (%s, %s) from %s to %s' % ( si, ai, map(PrettyFloat, current_value), map(PrettyFloat, new_value)) # import pdb; pdb.set_trace() if dataset: # len(dataset) > 0: self.trainer.setData(dataset) self.trainer.trainEpochs(NTD_TRAIN_EPOCHS) # print '----' def compute_values_for_final_state(self, state): if state.has_player_won(PLAYER_WHITE): values = np.array([REWARD_WIN, REWARD_LOSE]) else: values = np.array([REWARD_LOSE, REWARD_WIN]) if self.outputdim == 1: values = values[:1] return values def get_Q_value(self, state, action): """Returns state-action value. Args: state: State for which value is requested. action: Action for which value is requested. Returns: List containing NTD_NUM_OUTPUTS elements. When NTD_NUM_OUTPUTS == 1, one-dimensional return value can be intepretted as [p_w] showing the probability for white winning. When NTD_NUM_OUTPUTS == 2, the two-dimensional return value can be intepretted as [p_w, p_b] showing probabilities for white or black winning. """ if state.is_final(): # The algorithm never trains the network on final states, so it # cannot know their values. Need to retrieve the value of final # states directly. values = self.compute_values_for_final_state(state) else: network_out = self.get_network_value(state, action) values = network_out # # If player to move is white, it means black is considering a move # # outcome, so black is evaluating the position. # if state.player_to_move == PLAYER_WHITE: # multiplier = -1.0 # else: # multiplier = 1.0 # return multiplier * state_value return values # if state.player_to_move == PLAYER_WHITE: # return network_out[1] # else: # return network_out[0] # def cache_network_values(self, state): # state_str = str(state)[:-2] # if state_str not in self.network_inputs: # self.network_inputs[state_str] = state.encode_network_input() # network_in = self.network_inputs[state_str] # if state_str not in self.network_outputs: # self.network_outputs[state_str] = self.network.activate(network_in) # This function needs to receive the actual state object, because it needs # to calculate the corresponding network inputs for it. def get_network_value(self, state, action, state_action_str=None): if state_action_str: assert state is None assert action is None assert state_action_str in self.network_outputs return self.network_outputs[state_action_str] else: state_action_str = '%s-%s' % (state, action) if state_action_str in self.network_outputs: return self.network_outputs[state_action_str] if state_action_str not in self.network_inputs: self.network_inputs[ state_action_str] = state.encode_network_input(action) network_in = self.network_inputs[state_action_str] self.network_outputs[state_action_str] = self.network.activate( network_in) return self.network_outputs[state_action_str] def ntd_step(self, action, is_action_random, rewards=None): """Updates the underlying model after every transition. This method is called in self.select_action() and self.end_episode(). Args: action: Action taken by the agent. is_action_random: Whether action was an exploratory action. rewards: List of reward components received from the environment. Returns: None """ if rewards is None: rewards = [0.0] * self.outputdim assert len(rewards) == self.outputdim s = self.last_state_str a = self.last_action sp = self.state ap = action # state_str_no_roll = str(self.state)[:-2] if action is None: self.episode_traj += ' -> %s.' % str(self.state) else: self.episode_traj += ' -> %s, %s' % (str(self.state), action) if s is not None: # update e if ALGO == ALGO_Q_LEARNING and self.was_last_action_random: # Q(lambda): Set all traces to zero. self.e = {} else: for key in self.e.iterkeys(): self.e[key] *= (self.gamma * self.lamda) # replacing traces self.e[(s, a)] = 1.0 # set the trace for the other actions to 0 for other_action in self.state.action_object.get_all_actions(): if other_action != a: if (s, other_action) in self.e: self.e[(s, other_action)] = 0 s_a = '%s-%s' % (s, a) if self.state.is_final(): # delta = reward - self.Q.get((s, a), self.default_q) # print 'Shouldn\'t happen' # if self.is_learning: # import pdb; pdb.set_trace() delta = rewards - self.get_network_value(None, None, s_a) else: # delta = (reward + self.gamma * self.Q.get((sp, ap), self.default_q) - # self.Q.get((s, a), self.default_q)) # delta = (reward + self.gamma * self.get_network_value(sp_in) - # self.get_network_value(s_in)) # In our domains, only the very last state transition receives # a reward. assert all(v == 0 for v in rewards) if ALGO == ALGO_SARSA: # Just consider the action we took in sp. next_state_v = self.get_network_value(sp, ap) elif ALGO == ALGO_Q_LEARNING: # Consider the best we could do from sp. next_state_v = self.astar_value[str(sp) [:-2]] # state_str_no_roll delta = (rewards + self.gamma * next_state_v - self.get_network_value(None, None, s_a)) self.update_values(delta) else: # Just cache the value of current state-action, so we can access # it on the next call to this method, when it's requested as s_a. self.get_network_value(sp, ap) # save visited state and chosen action self.last_state_str = str(self.state) # self.last_state_in = self.state_in self.last_action = action self.was_last_action_random = is_action_random if action is not None: # end_episode calls this with action=None. key = (self.last_state_str, self.last_action) if key not in self.visited_in_episode: self.visit_count[key] = self.visit_count.get(key, 0) + 1 self.visited_in_episode[key] = True def select_action(self): # self.last_played_as = self.state.player_to_move # self.cache_network_values(self.state) # self.state_str = str(self.state)[:-2] # if self.state_str not in self.network_inputs: # self.network_inputs[self.state_str] = self.encode_network_input(self.state) # self.state_in = self.network_inputs[self.state_str] if self.is_learning: if NTD_USE_EPSILON_ANNEALING: # Since under some conditions the current roll can be entirely # ignored (--chooseroll=1.0), it makes sense to exclude the # current roll from visit counts. state_str_no_roll = str(self.state)[:-2] self.visit_count_no_roll[state_str_no_roll] = ( self.visit_count_no_roll.get(state_str_no_roll, 0) + 1) # Following logic with example: anneal_time = 100, visit_count = 5. time_to_end = max( 0, NTD_EPSILON_ANNEAL_TIME - self.visit_count_no_roll.get(state_str_no_roll, 0)) ratio = float(time_to_end) / NTD_EPSILON_ANNEAL_TIME # 0.95 epsilon = NTD_EPSILON_END + (NTD_EPSILON_START - NTD_EPSILON_END) * ratio # print "State: %s, visits: %d, time_to_end: %d, ratio: %.2f, epsilon: %.2f" % ( # state_str, self.visit_count.get(state_str, 0), time_to_end, ratio, epsilon) else: epsilon = self.epsilon else: epsilon = 0 choose_random_action = True if random.random() < epsilon else False # Select the best action. action, _ = self.select_action_with_search( state=self.state, choose_random_action=choose_random_action, plies=NTD_SEARCH_PLIES) # Update values. if self.is_learning: self.ntd_step(action, is_action_random=choose_random_action) return action # def save_knowledge(self): # # filename = './td-network.txt' % Domain.name # f = open(filename, 'w') # pickle.dump(self.network, f) # f.close() # # def load_knowledge(self): # filename = './td-network-%s.txt' % Domain.name # f = open(filename, 'r') # self.network = pickle.load(f) # f.close() def pause_learning(self): self.is_learning = False def resume_learning(self): self.is_learning = True def print_e(self): e_keys = self.e.keys() e_keys.sort() print "e:" for key in e_keys: print "e%s -> %.10f" % (key, self.e[key]) def print_visit_count(self): print "Visit Counts:" # keys = self.visit_count.keys() # keys.sort() # for key in Q_keys: # print "Q%s -> %.2f" % (key, self.Q[key]) for key, value in sorted(self.visit_count.iteritems(), key=lambda (k, v): (v, k)): print "%s: %s" % (key, value) def probe_network(self): exp_params = ExpParams.get_exp_params_from_command_line_args() graph = exp_params.state_class.GAME_GRAPH print "Network predictions:" self.network_predictions = {} # Network predictions. true_values = { } # True values obtained from the graph using value iteration. for state_roll_action_str in sorted(self.network_inputs.iterkeys()): # state_value = self.network_outputs[state_str] state_roll_action_value = self.network.activate( self.network_inputs[state_roll_action_str]) self.network_predictions[ state_roll_action_str] = state_roll_action_value node_id = graph.get_node_id( state_roll_action_str[:-4]) # Removes roll and action. true_value = graph.get_attr(node_id, VAL_ATTR) true_values[state_roll_action_str] = true_value # print "%s -> %s (%.2f)" % (state_str, state_value, abs_value) for (si, ai), _ in sorted(self.visit_count.iteritems(), key=lambda (k, v): (v, k)): state_roll_action_str = '%s-%s' % (si, ai) true_value = true_values[state_roll_action_str] # Reward for white win is [1, 0], # Reward for black win is [0, 1], # state_value[0] - state_value[1] ranges from -1 to +1, although # it can exceed those bounds when the network outputs are # outside the range [0, 1]. # The following formula is meant to scale the difference to range [0, 1]. print "(%s, %s): opt. val. for white: %+.2f prediction: %s visited: %d" % ( si, ai, true_value, map(PrettyFloat, self.network_predictions[state_roll_action_str]), self.visit_count.get((si, ai), 0)) print( 'Note: optimal values for white are based on the board ' 'positions only and ignore the current roll.') def track_interesting_states(self): interesting_states = self.state.interesting_states() if interesting_states: if not self.value_tracker_file: value_tracker_filename = ( self.state.exp_params.get_value_tracker_filename( FILE_PREFIX_NTD)) self.value_tracker_file = open(value_tracker_filename, 'w') self.num_training_games += NTD_NUM_TRAINING_GAMES self.value_tracker_file.write('%d' % self.num_training_games) for s in interesting_states: s_val = self.network_predictions[s][ 0] if s in self.network_predictions else 0.5 self.value_tracker_file.write(' %f' % s_val) self.value_tracker_file.write('\n') self.value_tracker_file.flush() def print_traj_counts(self): print "Trajectories in training:" import operator sorted_traj_count = sorted(self.traj_count.items(), key=operator.itemgetter(1), reverse=True) for traj, cnt in sorted_traj_count: print "%s: %d" % (traj, cnt) # Reset after each query. self.traj_count = {} def print_learner_state(self): self.print_visit_count() self.print_e() self.probe_network() self.print_traj_counts()
class NeuralNetwork(object): """ The neural network class does all the heavy lifting to incorporate pybrain neural networks into the NowTrade ecosystem. """ def __init__(self, train_data, prediction_data, network_type=FEED_FORWARD_NETWORK, network_dataset_type=SUPERVISED_DATASET, trainer_type=BACKPROP_TRAINER): self.train_data = train_data self.prediction_data = prediction_data self.network_type = network_type self.network_dataset_type = network_dataset_type self.trainer_type = trainer_type self.network = None self.network_dataset = None self.dataset = None self.trainer = None self.trained_iterations = 0 self.momentum = None self.learning_rate = None self.hidden_layers = None self.prediction_window = None self.logger = logger.Logger(self.__class__.__name__) self.logger.info('train_data: %s prediction_data: %s, network_type: %s, \ network_dataset_type: %s, trainer_type: %s' %(train_data, prediction_data, network_type, \ network_dataset_type, trainer_type)) def save(self): """ Returns the pickled trained/tested neural network as a string. """ return cPickle.dumps(self) def save_to_file(self, filename): """ Saves a neural network to file for later use. Look into pybrain.datasets.supervised.SupervisedDataSet.saveToFile() http://pybrain.org/docs/api/datasets/superviseddataset.html """ file_handler = open(filename, 'wb') cPickle.dump(self, file_handler) file_handler.close() def build_network(self, dataset, new=True, **kwargs): """ Builds a neural network using the dataset provided. Expected keyword args: - 'hidden_layers' - 'prediction_window' - 'learning_rate' - 'momentum' """ self.hidden_layers = kwargs.get('hidden_layers', 3) self.prediction_window = kwargs.get('prediction_window', 1) self.learning_rate = kwargs.get('learning_rate', 0.1) self.momentum = kwargs.get('momentum', 0.01) if not new: self.network.sorted = False self.network.sortModules() if self.network_dataset_type == SUPERVISED_DATASET: self.ready_supervised_dataset(dataset) else: raise InvalidNetworkDatasetType() else: if self.network_type == FEED_FORWARD_NETWORK: self.network = buildNetwork(len(self.train_data), self.hidden_layers, 1) else: raise InvalidNetworkType() if self.network_dataset_type == SUPERVISED_DATASET: self.ready_supervised_dataset(dataset) else: raise InvalidNetworkDatasetType() if self.trainer_type == BACKPROP_TRAINER: self.trainer = BackpropTrainer(self.network, learningrate=self.learning_rate, momentum=self.momentum, verbose=True) self.trainer.setData(self.network_dataset) else: raise InvalidTrainerType() def ready_supervised_dataset(self, dataset): """ Ready the supervised dataset for training. @TODO: Need to randomize the data being fed to the network. See randomBatches() here: http://pybrain.org/docs/api/datasets/superviseddataset.html """ self.network_dataset = SupervisedDataSet(len(self.train_data), 1) # Currently only supports log function for normalizing data training_values = np.log(dataset.data_frame[self.train_data]) results = np.log(dataset.data_frame[self.prediction_data].shift( -self.prediction_window)) training_values['PREDICTION_%s' % self.prediction_data[0]] = results training_values = training_values.dropna() for _, row_data in enumerate(training_values.iterrows()): _, data = row_data sample = list(data[:-1]) result = [data[-1]] self.network_dataset.addSample(sample, result) def train(self, cycles=1): """ Trains the network the number of iteration specified in the cycles parameter. """ for _ in range(cycles): res = self.trainer.train() self.trained_iterations += 1 return res def train_until_convergence(self, max_cycles=1000, continue_cycles=10, validation_proportion=0.25): """ Wrapper around the pybrain BackpropTrainer trainUntilConvergence method. @see: http://pybrain.org/docs/api/supervised/trainers.html """ self.trainer = \ self.trainer.trainUntilConvergence(maxEpochs=max_cycles, continueEpochs=continue_cycles, validationProportion=validation_proportion) def _activate(self, data): """ Activates the network using the data specified. Returns the network's prediction. """ return self.network.activate(data)[0] def activate_all(self, data_frame): """ Activates the network for all values in the dataframe specified. """ dataframe = np.log(data_frame[self.train_data]) res = [] for _, row_data in enumerate(dataframe.iterrows()): _, data = row_data sample = list(data) res.append(self._activate(sample)) return np.exp(res)
class StrategyANN(Strategy): def __init__(self, features_num, hidden_neurons_num): super().__init__() self.is_learning = True self.features_num = features_num # self.net = buildNetwork(features_num, hidden_neurons_num, 1, bias = True) # self.net = buildNetwork(features_num, hidden_neurons_num, hidden_neurons_num, 1, bias = True) # self.net = ConvolutionalBoardNetwork(Board.BOARD_SIZE, 5, 3) # self.trainer = BackpropTrainer(self.net) self.net_attack = buildNetwork(features_num, hidden_neurons_num, hidden_neurons_num, 1, bias=True) self.net_defence = buildNetwork(features_num, hidden_neurons_num, hidden_neurons_num, 1, bias=True) self.trainer_attack = BackpropTrainer(self.net_attack) self.trainer_defence = BackpropTrainer(self.net_defence) self.gamma = 0.9 self.errors = [] self.buf = np.zeros(200) self.buf_index = 0 self.setup() def update_at_end(self, old, new): if not self.needs_update(): return if new.winner == Board.STONE_EMPTY: reward = 0 else: reward = 2 if self.stand_for == new.winner else -2 if old is None: if self.prev_state is not None: self._update_impl(self.prev_state, new, reward) else: self._update_impl(old, new, reward) def update(self, old, new): if not self.needs_update(): return if self.prev_state is None: self.prev_state = old return if new is None: self._update_impl(self.prev_state, old, 0) self.prev_state = old def _update_impl(self, old, new, reward): old_input = self.get_input_values(old) v1_a = self.net_attack.activate(self.get_input_values(new)) target = self.gamma * v1_a ds_a = SupervisedDataSet(self.features_num, 1) ds_a.addSample(old_input, target + max(0, reward)) ds_d = SupervisedDataSet(self.features_num, 1) ds_d.addSample(old_input, target + min(0, reward)) # self.trainer.setData(ds) # err = self.trainer.train() self.trainer_attack.setData(ds_a) self.trainer_attack.train() self.trainer_defence.setData(ds_d) self.trainer_defence.train() # self.buf[self.buf_index] = err # self.buf_index += 1 # if self.buf_index >= self.buf.size: # if len(self.errors) < 2000: # self.errors.append(np.average(self.buf)) # self.buf.fill(0) # self.buf_index = 0 def board_value(self, board, context): iv = self.get_input_values(board) # return self.net.activate(iv) return self.net_attack.activate(iv), self.net_defence.activate(iv) def _decide_move(self, moves): best_move_a, best_av = None, None best_move_d, best_dv = None, None for m in moves: iv = self.get_input_values(m) av, dv = self.net_attack.activate(iv), self.net_defence.activate( iv) if best_av is None or best_av < av: best_move_a, best_av = m, av if best_dv is None or best_dv < dv: best_move_d, best_dv = m, dv return best_move_a if best_av >= best_dv else best_move_d def preferred_board(self, old, moves, context): if not moves: return old if len(moves) == 1: return moves[0] if np.random.rand() < self.epsilon: # exploration the_board = random.choice(moves) the_board.exploration = True return the_board else: # board_most_value = max(moves, key=lambda m: self.board_value(m, context)) # return board_most_value return self._decide_move(moves) def get_input_values(self, board): ''' Returns: ----------- vector: numpy.1darray the input vector ''' # print('boar.stone shape: ' + str(board.stones.shape)) v = board.stones # print('vectorized board shape: ' + str(v.shape)) # print('b[%d], w[%d]' % (black, white)) iv = np.zeros(v.shape[0] * 2 + 2) iv[0:v.shape[0]] = (v == Board.STONE_BLACK).astype(int) iv[v.shape[0]:v.shape[0] * 2] = (v == Board.STONE_WHITE).astype(int) who = board.whose_turn_now() iv[-2] = 1 if who == Board.STONE_BLACK else 0 # turn to black move iv[-1] = 1 if who == Board.STONE_WHITE else 0 # turn to white move # print(iv.shape) # print(iv) return iv def save(self, file): pass def load(self, file): pass def setup(self): self.prev_state = None def mind_clone(self): pass
class StrategyANN(Strategy): def __init__(self, features_num, hidden_neurons_num): super().__init__() self.is_learning = True self.features_num = features_num # self.net = buildNetwork(features_num, hidden_neurons_num, 1, bias = True) # self.net = buildNetwork(features_num, hidden_neurons_num, hidden_neurons_num, 1, bias = True) # self.net = ConvolutionalBoardNetwork(Board.BOARD_SIZE, 5, 3) # self.trainer = BackpropTrainer(self.net) self.net_attack = buildNetwork(features_num, hidden_neurons_num, hidden_neurons_num, 1, bias = True) self.net_defence = buildNetwork(features_num, hidden_neurons_num, hidden_neurons_num, 1, bias = True) self.trainer_attack = BackpropTrainer(self.net_attack) self.trainer_defence = BackpropTrainer(self.net_defence) self.gamma = 0.9 self.errors = [] self.buf = np.zeros(200) self.buf_index = 0 self.setup() def update_at_end(self, old, new): if not self.needs_update(): return if new.winner == Board.STONE_EMPTY: reward = 0 else: reward = 2 if self.stand_for == new.winner else -2 if old is None: if self.prev_state is not None: self._update_impl(self.prev_state, new, reward) else: self._update_impl(old, new, reward) def update(self, old, new): if not self.needs_update(): return if self.prev_state is None: self.prev_state = old return if new is None: self._update_impl(self.prev_state, old, 0) self.prev_state = old def _update_impl(self, old, new, reward): old_input = self.get_input_values(old) v1_a = self.net_attack.activate(self.get_input_values(new)) target = self.gamma * v1_a ds_a = SupervisedDataSet(self.features_num, 1) ds_a.addSample(old_input, target + max(0, reward)) ds_d = SupervisedDataSet(self.features_num, 1) ds_d.addSample(old_input, target + min(0, reward)) # self.trainer.setData(ds) # err = self.trainer.train() self.trainer_attack.setData(ds_a) self.trainer_attack.train() self.trainer_defence.setData(ds_d) self.trainer_defence.train() # self.buf[self.buf_index] = err # self.buf_index += 1 # if self.buf_index >= self.buf.size: # if len(self.errors) < 2000: # self.errors.append(np.average(self.buf)) # self.buf.fill(0) # self.buf_index = 0 def board_value(self, board, context): iv = self.get_input_values(board) # return self.net.activate(iv) return self.net_attack.activate(iv), self.net_defence.activate(iv) def _decide_move(self, moves): best_move_a, best_av = None, None best_move_d, best_dv = None, None for m in moves: iv = self.get_input_values(m) av, dv = self.net_attack.activate(iv), self.net_defence.activate(iv) if best_av is None or best_av < av: best_move_a, best_av = m, av if best_dv is None or best_dv < dv: best_move_d, best_dv = m, dv return best_move_a if best_av >= best_dv else best_move_d def preferred_board(self, old, moves, context): if not moves: return old if len(moves) == 1: return moves[0] if np.random.rand() < self.epsilon: # exploration the_board = random.choice(moves) the_board.exploration = True return the_board else: # board_most_value = max(moves, key=lambda m: self.board_value(m, context)) # return board_most_value return self._decide_move(moves) def get_input_values(self, board): ''' Returns: ----------- vector: numpy.1darray the input vector ''' # print('boar.stone shape: ' + str(board.stones.shape)) v = board.stones # print('vectorized board shape: ' + str(v.shape)) # print('b[%d], w[%d]' % (black, white)) iv = np.zeros(v.shape[0] * 2 + 2) iv[0:v.shape[0]] = (v == Board.STONE_BLACK).astype(int) iv[v.shape[0]:v.shape[0] * 2] = (v == Board.STONE_WHITE).astype(int) who = board.whose_turn_now() iv[-2] = 1 if who == Board.STONE_BLACK else 0 # turn to black move iv[-1] = 1 if who == Board.STONE_WHITE else 0 # turn to white move # print(iv.shape) # print(iv) return iv def save(self, file): pass def load(self, file): pass def setup(self): self.prev_state = None def mind_clone(self): pass
class NeuralNetwork(object): """ The neural network class does all the heavy lifting to incorporate pybrain neural networks into the NowTrade ecosystem. """ def __init__(self, train_data, prediction_data, network_type=FEED_FORWARD_NETWORK, network_dataset_type=SUPERVISED_DATASET, trainer_type=BACKPROP_TRAINER): self.train_data = train_data self.prediction_data = prediction_data self.network_type = network_type self.network_dataset_type = network_dataset_type self.trainer_type = trainer_type self.network = None self.network_dataset = None self.dataset = None self.trainer = None self.trained_iterations = 0 self.momentum = None self.learning_rate = None self.hidden_layers = None self.prediction_window = None self.logger = logger.Logger(self.__class__.__name__) self.logger.info('train_data: %s prediction_data: %s, network_type: %s, \ network_dataset_type: %s, trainer_type: %s' %(train_data, prediction_data, network_type, \ network_dataset_type, trainer_type)) def save(self): """ Returns the pickled trained/tested neural network as a string. """ return cPickle.dumps(self) def save_to_file(self, filename): """ Saves a neural network to file for later use. Look into pybrain.datasets.supervised.SupervisedDataSet.saveToFile() http://pybrain.org/docs/api/datasets/superviseddataset.html """ file_handler = open(filename, 'wb') cPickle.dump(self, file_handler) file_handler.close() def build_network(self, dataset, new=True, **kwargs): """ Builds a neural network using the dataset provided. Expected keyword args: - 'hidden_layers' - 'prediction_window' - 'learning_rate' - 'momentum' """ self.hidden_layers = kwargs.get('hidden_layers', 3) self.prediction_window = kwargs.get('prediction_window', 1) self.learning_rate = kwargs.get('learning_rate', 0.1) self.momentum = kwargs.get('momentum', 0.01) if not new: self.network.sorted = False self.network.sortModules() if self.network_dataset_type == SUPERVISED_DATASET: self.ready_supervised_dataset(dataset) else: raise InvalidNetworkDatasetType() else: if self.network_type == FEED_FORWARD_NETWORK: self.network = buildNetwork(len(self.train_data), self.hidden_layers, 1) else: raise InvalidNetworkType() if self.network_dataset_type == SUPERVISED_DATASET: self.ready_supervised_dataset(dataset) else: raise InvalidNetworkDatasetType() if self.trainer_type == BACKPROP_TRAINER: self.trainer = BackpropTrainer(self.network, learningrate=self.learning_rate, momentum=self.momentum, verbose=True) self.trainer.setData(self.network_dataset) else: raise InvalidTrainerType() def ready_supervised_dataset(self, dataset): """ Ready the supervised dataset for training. @TODO: Need to randomize the data being fed to the network. See randomBatches() here: http://pybrain.org/docs/api/datasets/superviseddataset.html """ self.network_dataset = SupervisedDataSet(len(self.train_data), 1) # Currently only supports log function for normalizing data training_values = np.log(dataset.data_frame[self.train_data]) results = np.log(dataset.data_frame[self.prediction_data].shift(-self.prediction_window)) training_values['PREDICTION_%s' %self.prediction_data[0]] = results training_values = training_values.dropna() for _, row_data in enumerate(training_values.iterrows()): _, data = row_data sample = list(data[:-1]) result = [data[-1]] self.network_dataset.addSample(sample, result) def train(self, cycles=1): """ Trains the network the number of iteration specified in the cycles parameter. """ for _ in range(cycles): res = self.trainer.train() self.trained_iterations += 1 return res def train_until_convergence(self, max_cycles=1000, continue_cycles=10, validation_proportion=0.25): """ Wrapper around the pybrain BackpropTrainer trainUntilConvergence method. @see: http://pybrain.org/docs/api/supervised/trainers.html """ self.trainer = \ self.trainer.trainUntilConvergence(maxEpochs=max_cycles, continueEpochs=continue_cycles, validationProportion=validation_proportion) def _activate(self, data): """ Activates the network using the data specified. Returns the network's prediction. """ return self.network.activate(data)[0] def activate_all(self, data_frame): """ Activates the network for all values in the dataframe specified. """ dataframe = np.log(data_frame[self.train_data]) res = [] for _, row_data in enumerate(dataframe.iterrows()): _, data = row_data sample = list(data) res.append(self._activate(sample)) return np.exp(res)