def get_q_value(self, state, action): """ Should return Q(state,action) = w * feature_vector where * is the dot_product operator """ "*** YOUR CODE HERE ***" util.raise_not_defined()
def classify(self, data): """ Classifies each datum as the label that most closely matches the prototype vector for that label. See the project description for details. Recall that a datum is a util.counter...the data list will first be converted to a numpy array then you must return the prediction for each entry. """ if self.weights is None: raise Exception( "the perceptron must be trained before data can be classified") # convert to numpy matrix data_matrix = np.asarray( [datum.values_as_numpy_array() for datum in data]) "*** YOUR CODE HERE ***" print("wello horld") guesses = [] for datum in data: vectors = util.Counter() for l in self.legal_labels: vectors[l] = self.weights[l] * datum guesses.append(vectors.arg_max()) return guesses util.raise_not_defined()
def choose_action(self, game_state): """Override this method to make a good agent. It should return a legal action within the time limit (otherwise a random legal action will be chosen for you). """ util.raise_not_defined()
def is_goal_state(self, state): """ state: Search state Returns True if and only if the state is a valid goal state. """ util.raise_not_defined()
def get_features(self, state, action): """ Returns a dict from features to counts Usually, the count will just be 1.0 for indicator functions. """ util.raise_not_defined()
def compute_q_value_from_values(self, state, action): """ Compute the Q-value of action in state from the value function stored in self.values. """ "*** YOUR CODE HERE ***" util.raise_not_defined()
def train( self, training_data, training_labels, validation_data, validation_labels ): """ The training loop for the softmax classifier passes through the training data several times and updates the weight vector for each label based on the cross entropy loss You will need to setup tensor flow variables, computation graph, and optimization procedure, then run the training step self.max_iterations times. This should be very similar to what is shown https://www.tensorflow.org/get_started/mnist/beginners except for where the data is coming from Important note: this should operate in batch mode, using all training_data for each batch """ self.features = list(training_data[0].keys()) # could be useful later learning_rate = self.learning_rates[0] # Note: features should come into tf.placeholder self.x and output # should be in self.y to make the classify method work correctly. # If you use different variable names, then you will need to change # that method accordingly "*** YOUR CODE HERE ***" util.raise_not_defined()
def compute_action_from_q_values(self, state): """ Compute the best action to take in a state. Note that if there are no legal actions, which is the case at the terminal state, you should return None. """ "*** YOUR CODE HERE ***" util.raise_not_defined()
def get_value(self, state): """ What is the value of this state under the best action? Concretely, this is given by V(s) = max_{a in actions} Q(s,a) """ util.raise_not_defined()
def get_q_value(self, state, action): """ Returns Q(state,action) Should return 0.0 if we have never seen a state or the Q node value otherwise """ "*** YOUR CODE HERE ***" util.raise_not_defined()
def get_cost_of_actions(self, actions): """ actions: A list of actions to take This method returns the total cost of a particular sequence of actions. The sequence must be composed of legal moves. """ util.raise_not_defined()
def compute_value_from_q_values(self, state): """ Returns max_action Q(state,action) where the max is over legal actions. Note that if there are no legal actions, which is the case at the terminal state, you should return a value of 0.0. """ "*** YOUR CODE HERE ***" util.raise_not_defined()
def find_high_weight_features(self, label, num=100): """ Returns a list of the num features with the greatest weight for some label """ # this function is optional for this classifier, but if you want to # visualize the weights of this classifier, you will need to implement # it util.raise_not_defined()
def get_successors(self, state): """ state: Search state For a given state, this should return a list of triples, (successor, action, step_cost), where 'successor' is a successor to the current state, 'action' is the action required to get there, and 'step_cost' is the incremental cost of expanding to that successor. """ util.raise_not_defined()
def compute_action_from_values(self, state): """ The policy is the best action in the given state according to the values currently stored in self.values. You may break ties any way you see fit. Note that if there are no legal actions, which is the case at the terminal state, you should return None. """ "*** YOUR CODE HERE ***" util.raise_not_defined()
def update(self, state, action, next_state, reward): """ The parent class calls this to observe a state = action => next_state and reward transition. You should do your Q-Value update here NOTE: You should never call this function, it will be called on your behalf """ "*** YOUR CODE HERE ***" util.raise_not_defined()
def enhanced_feature_extractor_digit(datum): """ Your feature extraction playground. You should return a util.Counter() of features for this datum (datum is of type samples.Datum). ## DESCRIBE YOUR ENHANCED FEATURES HERE... ## """ features = basic_feature_extractor_digit(datum) "*** YOUR CODE HERE ***" util.raise_not_defined() return features
def find_high_weight_features(self, label, num=100): """ Returns a list of num (default 100) features with the greatest weight for some label Hint: self.features stores the list of features names. Here you will have to find which rows contain the largest values in the column of self.weights corresponding to the given label, and then return the feature names for those rows Hint: to get the keys of a dictionary sorted by their value you can do sorted([key for key in dictionary.keys()], key=lambda k: dictionary[k]) You can also set some other function or lambda expression as the sort key """ "*** YOUR CODE HERE ***" util.raise_not_defined()
def get_action(self, state): """ Compute the action to take in the current state. With probability self.epsilon, we should take a random action and take the best policy action otherwise. Note that if there are no legal actions, which is the case at the terminal state, you should choose None as the action. HINT: You might want to use util.flip_coin(prob) HINT: To pick randomly from a list, use random.choice(list) """ # Pick Action legal_actions = self.get_legal_actions(state) action = None "*** YOUR CODE HERE ***" util.raise_not_defined() return action
def classify(self, data): """ Classifies each datum as the label that most closely matches the prototype vector for that label. See the project description for details. Recall that a datum is a util.counter...the data list will first be converted to a numpy array then you must return the prediction for each entry. """ if self.weights is None: raise Exception( "the perceptron must be trained before data can be classified") # convert to numpy matrix data_matrix = np.asarray( [datum.values_as_numpy_array() for datum in data]) "*** YOUR CODE HERE ***" util.raise_not_defined()
def train(self, training_data, training_labels, validation_data, validation_labels): """ The training loop for the perceptron passes through the training data several times and updates the weight vector for each label based on classification errors. See the assignment description for details. The data will still come in with each data point as a counter from features to values for those features (and thus represents a vector a values), so you should first convert this to a numpy array using the counter.values_as_numpy_array method """ # now we can initialize the weights if self.weights is None: self.features = list( training_data[0].keys()) # could be useful later self.weights = np.zeros( (len(self.features), len(self.legal_labels))) "*** YOUR CODE HERE ***" util.raise_not_defined()