def _transition_q_learning(self): ''' Updates self.state according to an epsilon-greedy strategy''' if self.bucketed_state.as_tuple() not in self.qstore.q: self.enum.enumerate_state(self.state, self.qstore.q) action_values = self.qstore.q[self.bucketed_state.as_tuple()] # epsilon greedy choice if np.random.random() < self.epsilon: action = se.State(state_list=action_values['actions'][ np.random.randint(len(action_values['actions']))]) else: max_q_value = max(action_values['utilities']) max_q_indexes = [ i for i in range(len(action_values['actions'])) if action_values['utilities'][i] == max_q_value ] max_actions = [action_values['actions'][i] for i in max_q_indexes] action = se.State( state_list=max_actions[np.random.randint(len(max_actions))]) #print("Transitioning from "+str(self.state.as_tuple())+" ("+str(self.bucketed_state.as_tuple())+") with action "+str(action.as_tuple())) self.state = self.enum.state_action_transition(self.state, action) self.bucketed_state = self.enum.bucket_state(self.state) #print("New state "+str(self.bucketed_state.as_tuple())) self._post_transition_updates()
def save_to_csv(self, q_csv_path): start_layer_type = [] start_layer_depth = [] start_filter_depth = [] start_filter_size = [] start_stride = [] start_image_size = [] start_fc_size = [] start_terminate = [] end_layer_type = [] end_layer_depth = [] end_filter_depth = [] end_filter_size = [] end_stride = [] end_image_size = [] end_fc_size = [] end_terminate = [] utility = [] for start_state_list in self.q.keys(): print('amal Tarifa',start_state_list) start_state = se.State(state_list=start_state_list) for to_state_ix in range(len(self.q[start_state_list]['actions'])): to_state = se.State(state_list=self.q[start_state_list]['actions'][to_state_ix]) utility.append(self.q[start_state_list]['utilities'][to_state_ix]) start_layer_type.append(start_state.layer_type) start_layer_depth.append(start_state.layer_depth) start_filter_depth.append(start_state.filter_depth) start_filter_size.append(start_state.filter_size) start_stride.append(start_state.stride) start_image_size.append(start_state.image_size) start_fc_size.append(start_state.fc_size) start_terminate.append(start_state.terminate) end_layer_type.append(to_state.layer_type) end_layer_depth.append(to_state.layer_depth) end_filter_depth.append(to_state.filter_depth) end_filter_size.append(to_state.filter_size) end_stride.append(to_state.stride) end_image_size.append(to_state.image_size) end_fc_size.append(to_state.fc_size) end_terminate.append(to_state.terminate) q_csv = pd.DataFrame({'start_layer_type': start_layer_type, 'start_layer_depth': start_layer_depth, 'start_filter_depth': start_filter_depth, 'start_filter_size': start_filter_size, 'start_stride': start_stride, 'start_image_size': start_image_size, 'start_fc_size': start_fc_size, 'start_terminate': start_terminate, 'end_layer_type': end_layer_type, 'end_layer_depth': end_layer_depth, 'end_filter_depth': end_filter_depth, 'end_filter_size': end_filter_size, 'end_stride': end_stride, 'end_image_size': end_image_size, 'end_fc_size': end_fc_size, 'end_terminate': end_terminate, 'utility': utility}) q_csv.to_csv(q_csv_path, index=False)
def test_bucket_state_tuple(self): test = True cases = [(se.State('start', 0, 1, 0, 0, 30, 0, 0, 0, 0).as_tuple(), 8), (se.State('start', 0, 1, 0, 0, 7, 0, 0, 0, 0).as_tuple(), 4), (se.State('start', 0, 1, 0, 0, 3, 0, 0, 0, 0).as_tuple(), 1) ] for case in cases: test = test and self.se.bucket_state_tuple(case[0])[5] == case[1] test = test and case[0][5] != case[1] return test
def test_bucket_state(self): test = True cases = [(se.State('start', 0, 1, 0, 0, 30, 0, 0, 0, 0), 8), (se.State('start', 0, 1, 0, 0, 7, 0, 0, 0, 0), 4), (se.State('start', 0, 1, 0, 0, 3, 0, 0, 0, 0), 1) ] for case in cases: test = test and self.se.bucket_state(case[0]).image_size == case[1] test = test and case[0].image_size != case[1] return test
def __init__(self, state_space_parameters, epsilon, state=None, qstore=None, replay_dictionary=pd.DataFrame(columns=['net', 'accuracy_best_val', 'accuracy_last_val', 'accuracy_best_test', 'accuracy_last_test', 'ix_q_value_update', 'epsilon'])): self.state_list = [] self.state_space_parameters = state_space_parameters # Class that will expand states for us self.enum = se.StateEnumerator(state_space_parameters) self.stringutils = StateStringUtils(state_space_parameters) self.model=self._build_model() # Starting State self.state = se.State('start', 0, 1, 0, 0, state_space_parameters.image_size, 0, 0) if not state else state self.bucketed_state = self.enum.bucket_state(self.state) # Cached Q-Values -- used for q learning update and transition self.qstore = QValues() if not qstore else qstore self.replay_dictionary = replay_dictionary self.epsilon = epsilon # epsilon: parameter for epsilon greedy strategy
def test_epsilon0_generation(self): test = True qstore = QValues() qstore.load_q_values(self.q_path) optimal_states = [self.start_state.copy()] bucketed_state = self.se.bucket_state(self.start_state) while bucketed_state.as_tuple() in qstore.q and len(qstore.q[bucketed_state.as_tuple()]['utilities']): next_action_index = np.random.randint(len(qstore.q[bucketed_state.as_tuple()]['utilities'])) qstore.q[bucketed_state.as_tuple()]['utilities'][next_action_index] = 100000000000.0 next_state = self.se.state_action_transition(optimal_states[-1], se.State(state_list=qstore.q[bucketed_state.as_tuple()]['actions'][next_action_index])) optimal_states.append(next_state) bucketed_state = self.se.bucket_state(optimal_states[-1]) ql = QLearner(ssp, 0.0, qstore=qstore) states = ql._run_agent() states = [state.as_tuple() for state in states] optimal_states = [self.se.bucket_state(state).as_tuple() for state in optimal_states[1:]] if len(states) != len(optimal_states): print(states) print(optimal_states) print('Wrong Length') return False for i in range(len(states)): test = test and states[i] == optimal_states[i] return test
def _reset_for_new_walk(self): '''Reset the state for a new random walk''' # Architecture String self.state_list = [] # Starting State self.state = se.State('start', 0, 1, 0, 0, self.state_space_parameters.image_size, 0, 0) self.bucketed_state = self.enum.bucket_state(self.state)
def test_transition_to_action(self): test = True cases = [(se.State('start', 0, 1, 0, 0, 30, 0, 0, 0, 0), se.State('conv', 1, 1, 0, 0, 7, 0, 0, 0, 0), 30), (se.State('conv', 0, 1, 0, 0, 7, 0, 0, 0, 0), se.State('fc', 1, 1, 0, 0, 0, 0, 0, 512, 0), 0), (se.State('conv', 0, 1, 0, 0, 7, 0, 0, 0, 0), se.State('gap', 1, 1, 0, 0, 0, 0, 0, 0, 0), 0), (se.State('start', 0, 1, 0, 0, 30, 0, 0, 0, 0), se.State('pool', 1, 1, 0, 0, 7, 0, 0, 0, 0), 30) ] for case in cases: test = test and self.se.transition_to_action(case[0], case[1]).image_size == case[2] return test
def load_q_values(self, q_csv_path): self.q = {} q_csv = pd.read_csv(q_csv_path) for row in zip(*[q_csv[col].values.tolist() for col in ['start_layer_type', 'start_layer_depth', 'start_filter_depth', 'start_filter_size', 'start_stride', 'start_image_size', 'start_fc_size', 'start_terminate', 'end_layer_type', 'end_layer_depth', 'end_filter_depth', 'end_filter_size', 'end_stride', 'end_image_size', 'end_fc_size', 'end_terminate', 'utility']]): start_state = se.State(layer_type=row[0], layer_depth=row[1], filter_depth=row[2], filter_size=row[3], stride=row[4], image_size=row[5], fc_size=row[6], terminate=row[7]).as_tuple() end_state = se.State(layer_type=row[8], layer_depth=row[9], filter_depth=row[10], filter_size=row[11], stride=row[12], image_size=row[13], fc_size=row[14], terminate=row[15]).as_tuple() utility = row[16] if start_state not in self.q: self.q[start_state] = {'actions': [end_state], 'utilities': [utility]} else: self.q[start_state]['actions'].append(end_state) self.q[start_state]['utilities'].append(utility)
def _transition_q_learning(self,i): ''' Updates self.state according to an epsilon-greedy strategy''' # I added the prediction of all Q value of S given a' and select the best action that qives me the best Q print('hhh', self.bucketed_state.as_tuple()) print('mmm', self.qstore.q) print('qstore', self.qstore) if self.bucketed_state.as_tuple() not in self.qstore.q: self.enum.enumerate_state(self.bucketed_state, self.qstore.q) input_matrix_state=self.convert(self.state_list) action_values = self.qstore.q[self.bucketed_state.as_tuple()] # epsilon greedy choice if np.random.random() < self.epsilon: action = [se.State(state_list=action_values['actions'][np.random.randint(len(action_values['actions']))])] mylist=self.state_list mylist.append(action[0]) newmatrix=self.convert(mylist) best_utility=self.model.predict(newmatrix) else: actions=self.qstore.q[self.bucketed_state.as_tuple()]['action'] utility_predict=[] mylist=self.state_list for act in actions: mylist.append(act) newmatrix=self.convert(mylist) r=self.model.predict(newmatrix) mylist.append(r) best_utility=np.argmax(mylist) action=[actions[i] for i in range(len(mylist)) if mylist[i]==best_utility ] self.state = self.enum.state_action_transition(self.state, action[0]) self.bucketed_state = self.enum.bucket_state(self.state) #self.bucketed_complexstate = self.enum.bucket_complexstate(self.state) self._post_transition_updates(input_matrix_state,best_utility)
def __init__(self): self.start_state = se.State('start', 0, 1, 0, 0, ssp.image_size, 0, 0, 0, 0) self.q_path = 'needed_for_testing/q_values.csv' self.se = se.StateEnumerator(ssp)
def convert_model_string_to_states(self, parsed_list, start_state=None): '''Takes a parsed model string and returns a recursive list of states.''' states = [start_state] if start_state else [ se.State('start', 0, 1, 0, 0, self.image_size, 0, 0) ] for layer in parsed_list: if layer[0] == 'conv': states.append( se.State(layer_type='conv', layer_depth=states[-1].layer_depth + 1, filter_depth=layer[1], filter_size=layer[2], stride=layer[3], branch=layer[4], image_size=states[-1].image_size, fc_size=0, terminate=0)) elif layer[0] == 'gap': states.append( se.State(layer_type='gap', layer_depth=states[-1].layer_depth + 1, filter_depth=0, filter_size=0, stride=0, branch=layer[4], image_size=1, fc_size=0, terminate=0)) elif layer[0] == 'pool': states.append( se.State(layer_type='pool', layer_depth=states[-1].layer_depth + 1, filter_depth=0, filter_size=layer[1], stride=layer[2], image_size=self.enum._calc_new_image_size( states[-1].image_size, layer[1], layer[2]), fc_size=0, branch=layer[4], terminate=0)) elif layer[0] == 'fc': states.append( se.State(layer_type='fc', layer_depth=states[-1].layer_depth + 1, filter_depth=len([ state for state in states if state.layer_type == 'fc' ]), filter_size=0, stride=0, branch=layer[4], image_size=0, fc_size=layer[1], terminate=0)) elif layer[0] == 'dropout': states.append( se.State(layer_type='dropout', layer_depth=states[-1].layer_depth, filter_depth=layer[1], filter_size=0, stride=0, branch=layer[4], image_size=states[-1].image_size, fc_size=layer[2], terminate=0)) elif layer[0] == 'concat': states.append( se.State(layer_type='concat', layer_depth=states[-1].layer_depth, filter_depth=layer[1], filter_size=0, stride=0, branch=layer[4], image_size=self.enum._calc_new_image_concat( self, states[-1]), fc_size=0, terminate=0)) elif layer[0] == 'softmax': termination_state = states[-1].copy( ) if states[-1].layer_type != 'dropout' else states[-2].copy() termination_state.terminate = 1 termination_state.layer_depth += 1 states.append(termination_state) list_new = [] for j in range(len(states)): if states[j].branch == -1: list_new.append(states[j]) if states[j].branch != -1: i = j complex = [] while states[i].branch != -1: complex.append(states[i]) i += 1 list0 = [com for com in complex if com.branch == 0] list1 = [com for com in complex if com.branch == 1] list2 = [com for com in complex if com.branch == 2] list_new.append( se.Statecomplex(list0, list1, lis2, terminate == 0)) j = i + 1 return list_new
def _transition_q_learning(self): ''' Updates self.state according to an epsilon-greedy strategy''' print('hhh', self.bucketed_state.as_tuple()) print('mmm', self.qstore.q) if self.bucketed_complexstate == None and self.bucketed_state != None: if self.bucketed_state.as_tuple() not in self.qstore.q: self.enum.enumerate_state(self.bucketed_state, self.bucketed_complexstate, self.qstore.q, branch=-1) action_values = self.qstore.q[self.bucketed_state.as_tuple()] # epsilon greedy choice if np.random.random() < self.epsilon: x = np.random.randint(1, 2) if x == 1: action = se.State(state_list=action_values['actions'][ np.random.randint(len(action_values['actions']))]) if x == 2: actioncomplex = se.Statecomplex( state_list=action_values['actioncomplex'][ np.random.randint( len(action_values['actioncomplex']))]) else: n = max(action_values['utilities']) m = max(action_values['utilitiesC']) if n > m: max_q_value = max(action_values['utilities']) max_q_indexes = [ i for i in range(len(action_values['actions'])) if action_values['utilities'][i] == max_q_value ] max_actions = [ action_values['actions'][i] for i in max_q_indexes ] action = se.State(state_list=max_actions[np.random.randint( len(max_actions))]) self.state = self.enum.state_action_transition( self.state, action) else: max_q_valuee = max(action_values['utilitiesC']) max_q_indexes = [ i for i in range(len(action_values['actioncomplex'])) if action_values['utilitiesC'][i] == max_q_valuee ] max_actions = [ action_values['actioncomplex'][i] for i in max_q_indexes ] actioncomplex = max_actions[np.random.randint( len(max_actions))] self.statecomplex = self.enum.state_action_transitioncomplex( self.state, actioncomplex) else: if self.bucketed_complexstate != None and self.bucketed_state == None: if self.bucketed_complexstate.as_tupleC() not in self.qstore.q: self.enum.enumerate_state(self.bucketed_state, self.bucketed_complexstate, self.qstore.q, branch=-1) action_values = self.qstore.q[ self.bucketed_complexstate.as_tupleC()] # epsilon greedy choice if np.random.random() < self.epsilon: action = se.State(state_list=action_values['actions'][ np.random.randint(len(action_values['actions']))]) else: max_q_value = max(action_values['utilities']) max_q_indexes = [ i for i in range(len(action_values['actions'])) if action_values['utilities'][i] == max_q_value ] max_actions = [ action_values['actions'][i] for i in max_q_indexes ] action = se.State(state_list=max_actions[ np.random.randint(len(max_actions))]) branch = action.branch if branch == 0: self.bucketed_complexstate.liststate_branch0.append( action) if branch == 1: self.bucketed_complexstate.liststate_branch1.append( action) if branch == 2: self.bucketed_complexstate.liststate_branch2.append( action) self.state = self.enum.state_action_transition( self.state, action) self.bucketed_state = self.enum.bucket_state(self.state) self.bucketed_complexstate = self.enum.bucket_complexstate( self.statecomplex) # has to be integrated in self.enum self._post_transition_updates()