Example #1
0
    def _transition_q_learning(self):
        ''' Updates self.state according to an epsilon-greedy strategy'''
        if self.bucketed_state.as_tuple() not in self.qstore.q:
            self.enum.enumerate_state(self.state, self.qstore.q)

        action_values = self.qstore.q[self.bucketed_state.as_tuple()]
        # epsilon greedy choice
        if np.random.random() < self.epsilon:
            action = se.State(state_list=action_values['actions'][
                np.random.randint(len(action_values['actions']))])
        else:
            max_q_value = max(action_values['utilities'])
            max_q_indexes = [
                i for i in range(len(action_values['actions']))
                if action_values['utilities'][i] == max_q_value
            ]
            max_actions = [action_values['actions'][i] for i in max_q_indexes]
            action = se.State(
                state_list=max_actions[np.random.randint(len(max_actions))])

        #print("Transitioning from "+str(self.state.as_tuple())+" ("+str(self.bucketed_state.as_tuple())+") with action "+str(action.as_tuple()))
        self.state = self.enum.state_action_transition(self.state, action)
        self.bucketed_state = self.enum.bucket_state(self.state)
        #print("New state "+str(self.bucketed_state.as_tuple()))

        self._post_transition_updates()
Example #2
0
    def save_to_csv(self, q_csv_path):
        start_layer_type = []
        start_layer_depth = []
        start_filter_depth = []
        start_filter_size = []
        start_stride = []
        start_image_size = []
        start_fc_size = []
        start_terminate = []
        end_layer_type = []
        end_layer_depth = []
        end_filter_depth = []
        end_filter_size = []
        end_stride = []
        end_image_size = []
        end_fc_size = []
        end_terminate = []
        utility = []
        for start_state_list in self.q.keys():
            print('amal Tarifa',start_state_list)
            start_state = se.State(state_list=start_state_list)
            for to_state_ix in range(len(self.q[start_state_list]['actions'])):
                to_state = se.State(state_list=self.q[start_state_list]['actions'][to_state_ix])
                utility.append(self.q[start_state_list]['utilities'][to_state_ix])
                start_layer_type.append(start_state.layer_type)
                start_layer_depth.append(start_state.layer_depth)
                start_filter_depth.append(start_state.filter_depth)
                start_filter_size.append(start_state.filter_size)
                start_stride.append(start_state.stride)
                start_image_size.append(start_state.image_size)
                start_fc_size.append(start_state.fc_size)
                start_terminate.append(start_state.terminate)
                end_layer_type.append(to_state.layer_type)
                end_layer_depth.append(to_state.layer_depth)
                end_filter_depth.append(to_state.filter_depth)
                end_filter_size.append(to_state.filter_size)
                end_stride.append(to_state.stride)
                end_image_size.append(to_state.image_size)
                end_fc_size.append(to_state.fc_size)
                end_terminate.append(to_state.terminate)

        q_csv = pd.DataFrame({'start_layer_type': start_layer_type,
                              'start_layer_depth': start_layer_depth,
                              'start_filter_depth': start_filter_depth,
                              'start_filter_size': start_filter_size,
                              'start_stride': start_stride,
                              'start_image_size': start_image_size,
                              'start_fc_size': start_fc_size,
                              'start_terminate': start_terminate,
                              'end_layer_type': end_layer_type,
                              'end_layer_depth': end_layer_depth,
                              'end_filter_depth': end_filter_depth,
                              'end_filter_size': end_filter_size,
                              'end_stride': end_stride,
                              'end_image_size': end_image_size,
                              'end_fc_size': end_fc_size,
                              'end_terminate': end_terminate,
                              'utility': utility})
        q_csv.to_csv(q_csv_path, index=False)
Example #3
0
    def test_bucket_state_tuple(self):
        test = True

        cases = [(se.State('start', 0, 1, 0, 0, 30, 0, 0, 0, 0).as_tuple(), 8),
                 (se.State('start', 0, 1, 0, 0, 7, 0, 0, 0, 0).as_tuple(), 4),
                 (se.State('start', 0, 1, 0, 0, 3, 0, 0, 0, 0).as_tuple(), 1)
                ]
        for case in cases:
            test = test and self.se.bucket_state_tuple(case[0])[5] == case[1]
            test = test and case[0][5] != case[1]

        return test
Example #4
0
    def test_bucket_state(self):
        test = True

        cases = [(se.State('start', 0, 1, 0, 0, 30, 0, 0, 0, 0), 8),
                 (se.State('start', 0, 1, 0, 0, 7, 0, 0, 0, 0), 4),
                 (se.State('start', 0, 1, 0, 0, 3, 0, 0, 0, 0), 1)
                ]
        for case in cases:
            test = test and self.se.bucket_state(case[0]).image_size == case[1]
            test = test and case[0].image_size != case[1]

        return test
Example #5
0
    def __init__(self,
                 state_space_parameters,
                 epsilon,
                 state=None,
                 qstore=None,
                 replay_dictionary=pd.DataFrame(columns=['net',
                                                         'accuracy_best_val',
                                                         'accuracy_last_val',
                                                         'accuracy_best_test',
                                                         'accuracy_last_test',
                                                         'ix_q_value_update',
                                                         'epsilon'])):
        self.state_list = []

        self.state_space_parameters = state_space_parameters

        # Class that will expand states for us
        self.enum = se.StateEnumerator(state_space_parameters)
        self.stringutils = StateStringUtils(state_space_parameters)
        self.model=self._build_model()

        # Starting State
        self.state = se.State('start', 0, 1, 0, 0, state_space_parameters.image_size, 0, 0) if not state else state
        self.bucketed_state = self.enum.bucket_state(self.state)

        # Cached Q-Values -- used for q learning update and transition
        self.qstore = QValues() if not qstore else qstore
        self.replay_dictionary = replay_dictionary

        self.epsilon = epsilon  # epsilon: parameter for epsilon greedy strategy
Example #6
0
    def test_epsilon0_generation(self):
        test = True

        qstore = QValues()
        qstore.load_q_values(self.q_path)
        optimal_states = [self.start_state.copy()]
        bucketed_state = self.se.bucket_state(self.start_state)
        while bucketed_state.as_tuple() in qstore.q and  len(qstore.q[bucketed_state.as_tuple()]['utilities']):
            next_action_index = np.random.randint(len(qstore.q[bucketed_state.as_tuple()]['utilities']))
            qstore.q[bucketed_state.as_tuple()]['utilities'][next_action_index] = 100000000000.0
            next_state = self.se.state_action_transition(optimal_states[-1], 
                                                         se.State(state_list=qstore.q[bucketed_state.as_tuple()]['actions'][next_action_index]))
            optimal_states.append(next_state)
            bucketed_state = self.se.bucket_state(optimal_states[-1])

        ql = QLearner(ssp, 0.0, qstore=qstore)
        states = ql._run_agent()

        states = [state.as_tuple() for state in states]
        optimal_states = [self.se.bucket_state(state).as_tuple() for state in optimal_states[1:]]

        if len(states) != len(optimal_states):
            print(states)
            print(optimal_states)
            print('Wrong Length')
            return False

        for i in range(len(states)):
            test = test and states[i] == optimal_states[i]

        return test
Example #7
0
    def _reset_for_new_walk(self):
        '''Reset the state for a new random walk'''
        # Architecture String
        self.state_list = []

        # Starting State
        self.state = se.State('start', 0, 1, 0, 0, self.state_space_parameters.image_size, 0, 0)
        self.bucketed_state = self.enum.bucket_state(self.state)
Example #8
0
    def test_transition_to_action(self):
        test = True
        cases = [(se.State('start', 0, 1, 0, 0, 30, 0, 0, 0, 0), se.State('conv', 1, 1, 0, 0, 7, 0, 0, 0, 0), 30),
                 (se.State('conv', 0, 1, 0, 0, 7, 0, 0, 0, 0), se.State('fc', 1, 1, 0, 0, 0, 0, 0, 512, 0), 0),
                 (se.State('conv', 0, 1, 0, 0, 7, 0, 0, 0, 0), se.State('gap', 1, 1, 0, 0, 0, 0, 0, 0, 0), 0),
                 (se.State('start', 0, 1, 0, 0, 30, 0, 0, 0, 0), se.State('pool', 1, 1, 0, 0, 7, 0, 0, 0, 0), 30)
                ]

        for case in cases:
            test = test and self.se.transition_to_action(case[0], case[1]).image_size == case[2]

        return test
Example #9
0
    def load_q_values(self, q_csv_path):
        self.q = {}
        q_csv = pd.read_csv(q_csv_path)
        for row in zip(*[q_csv[col].values.tolist() for col in ['start_layer_type',
                                                                'start_layer_depth',
                                                                'start_filter_depth',
                                                                'start_filter_size',
                                                                'start_stride',
                                                                'start_image_size',
                                                                'start_fc_size',
                                                                'start_terminate',
                                                                'end_layer_type',
                                                                'end_layer_depth',
                                                                'end_filter_depth',
                                                                'end_filter_size',
                                                                'end_stride',
                                                                'end_image_size',
                                                                'end_fc_size',
                                                                'end_terminate',
                                                                'utility']]):
            start_state = se.State(layer_type=row[0],
                                   layer_depth=row[1],
                                   filter_depth=row[2],
                                   filter_size=row[3],
                                   stride=row[4],
                                   image_size=row[5],
                                   fc_size=row[6],
                                   terminate=row[7]).as_tuple()
            end_state = se.State(layer_type=row[8],
                                 layer_depth=row[9],
                                 filter_depth=row[10],
                                 filter_size=row[11],
                                 stride=row[12],
                                 image_size=row[13],
                                 fc_size=row[14],
                                 terminate=row[15]).as_tuple()
            utility = row[16]

            if start_state not in self.q:
                self.q[start_state] = {'actions': [end_state], 'utilities': [utility]}
            else:
                self.q[start_state]['actions'].append(end_state)
                self.q[start_state]['utilities'].append(utility)
Example #10
0
    def _transition_q_learning(self,i):
        ''' Updates self.state according to an epsilon-greedy strategy'''
        # I added the prediction of all Q value of S given a' and select the best action that qives me the best Q
        print('hhh', self.bucketed_state.as_tuple())
        print('mmm', self.qstore.q)
        print('qstore', self.qstore)
        if self.bucketed_state.as_tuple() not in self.qstore.q:
            self.enum.enumerate_state(self.bucketed_state, self.qstore.q)
        input_matrix_state=self.convert(self.state_list)
        action_values = self.qstore.q[self.bucketed_state.as_tuple()]
        # epsilon greedy choice
        if np.random.random() < self.epsilon:
            action = [se.State(state_list=action_values['actions'][np.random.randint(len(action_values['actions']))])]
            mylist=self.state_list
            mylist.append(action[0])
            newmatrix=self.convert(mylist)
            best_utility=self.model.predict(newmatrix)

        else:

            actions=self.qstore.q[self.bucketed_state.as_tuple()]['action']
            utility_predict=[]
            mylist=self.state_list
            for act in actions:
                mylist.append(act)
                newmatrix=self.convert(mylist)
                r=self.model.predict(newmatrix)
                mylist.append(r)
            best_utility=np.argmax(mylist)
            action=[actions[i] for i in range(len(mylist)) if mylist[i]==best_utility ]


        self.state = self.enum.state_action_transition(self.state, action[0])
        self.bucketed_state = self.enum.bucket_state(self.state)
        #self.bucketed_complexstate = self.enum.bucket_complexstate(self.state)

        self._post_transition_updates(input_matrix_state,best_utility)
Example #11
0
 def __init__(self):
     self.start_state = se.State('start', 0, 1, 0, 0, ssp.image_size, 0, 0,
                                 0, 0)
     self.q_path = 'needed_for_testing/q_values.csv'
     self.se = se.StateEnumerator(ssp)
Example #12
0
    def convert_model_string_to_states(self, parsed_list, start_state=None):
        '''Takes a parsed model string and returns a recursive list of states.'''

        states = [start_state] if start_state else [
            se.State('start', 0, 1, 0, 0, self.image_size, 0, 0)
        ]

        for layer in parsed_list:
            if layer[0] == 'conv':
                states.append(
                    se.State(layer_type='conv',
                             layer_depth=states[-1].layer_depth + 1,
                             filter_depth=layer[1],
                             filter_size=layer[2],
                             stride=layer[3],
                             branch=layer[4],
                             image_size=states[-1].image_size,
                             fc_size=0,
                             terminate=0))
            elif layer[0] == 'gap':
                states.append(
                    se.State(layer_type='gap',
                             layer_depth=states[-1].layer_depth + 1,
                             filter_depth=0,
                             filter_size=0,
                             stride=0,
                             branch=layer[4],
                             image_size=1,
                             fc_size=0,
                             terminate=0))
            elif layer[0] == 'pool':
                states.append(
                    se.State(layer_type='pool',
                             layer_depth=states[-1].layer_depth + 1,
                             filter_depth=0,
                             filter_size=layer[1],
                             stride=layer[2],
                             image_size=self.enum._calc_new_image_size(
                                 states[-1].image_size, layer[1], layer[2]),
                             fc_size=0,
                             branch=layer[4],
                             terminate=0))
            elif layer[0] == 'fc':
                states.append(
                    se.State(layer_type='fc',
                             layer_depth=states[-1].layer_depth + 1,
                             filter_depth=len([
                                 state for state in states
                                 if state.layer_type == 'fc'
                             ]),
                             filter_size=0,
                             stride=0,
                             branch=layer[4],
                             image_size=0,
                             fc_size=layer[1],
                             terminate=0))
            elif layer[0] == 'dropout':
                states.append(
                    se.State(layer_type='dropout',
                             layer_depth=states[-1].layer_depth,
                             filter_depth=layer[1],
                             filter_size=0,
                             stride=0,
                             branch=layer[4],
                             image_size=states[-1].image_size,
                             fc_size=layer[2],
                             terminate=0))
            elif layer[0] == 'concat':
                states.append(
                    se.State(layer_type='concat',
                             layer_depth=states[-1].layer_depth,
                             filter_depth=layer[1],
                             filter_size=0,
                             stride=0,
                             branch=layer[4],
                             image_size=self.enum._calc_new_image_concat(
                                 self, states[-1]),
                             fc_size=0,
                             terminate=0))
            elif layer[0] == 'softmax':
                termination_state = states[-1].copy(
                ) if states[-1].layer_type != 'dropout' else states[-2].copy()
                termination_state.terminate = 1
                termination_state.layer_depth += 1
                states.append(termination_state)
        list_new = []
        for j in range(len(states)):
            if states[j].branch == -1:
                list_new.append(states[j])
            if states[j].branch != -1:
                i = j
                complex = []
                while states[i].branch != -1:
                    complex.append(states[i])
                    i += 1
                list0 = [com for com in complex if com.branch == 0]
                list1 = [com for com in complex if com.branch == 1]
                list2 = [com for com in complex if com.branch == 2]
                list_new.append(
                    se.Statecomplex(list0, list1, lis2, terminate == 0))
                j = i + 1

        return list_new
Example #13
0
    def _transition_q_learning(self):
        ''' Updates self.state according to an epsilon-greedy strategy'''
        print('hhh', self.bucketed_state.as_tuple())
        print('mmm', self.qstore.q)
        if self.bucketed_complexstate == None and self.bucketed_state != None:
            if self.bucketed_state.as_tuple() not in self.qstore.q:
                self.enum.enumerate_state(self.bucketed_state,
                                          self.bucketed_complexstate,
                                          self.qstore.q,
                                          branch=-1)

            action_values = self.qstore.q[self.bucketed_state.as_tuple()]
            # epsilon greedy choice
            if np.random.random() < self.epsilon:
                x = np.random.randint(1, 2)
                if x == 1:
                    action = se.State(state_list=action_values['actions'][
                        np.random.randint(len(action_values['actions']))])
                if x == 2:
                    actioncomplex = se.Statecomplex(
                        state_list=action_values['actioncomplex'][
                            np.random.randint(
                                len(action_values['actioncomplex']))])
            else:
                n = max(action_values['utilities'])
                m = max(action_values['utilitiesC'])
                if n > m:
                    max_q_value = max(action_values['utilities'])
                    max_q_indexes = [
                        i for i in range(len(action_values['actions']))
                        if action_values['utilities'][i] == max_q_value
                    ]
                    max_actions = [
                        action_values['actions'][i] for i in max_q_indexes
                    ]
                    action = se.State(state_list=max_actions[np.random.randint(
                        len(max_actions))])
                    self.state = self.enum.state_action_transition(
                        self.state, action)
                else:
                    max_q_valuee = max(action_values['utilitiesC'])
                    max_q_indexes = [
                        i for i in range(len(action_values['actioncomplex']))
                        if action_values['utilitiesC'][i] == max_q_valuee
                    ]
                    max_actions = [
                        action_values['actioncomplex'][i]
                        for i in max_q_indexes
                    ]
                    actioncomplex = max_actions[np.random.randint(
                        len(max_actions))]
                    self.statecomplex = self.enum.state_action_transitioncomplex(
                        self.state, actioncomplex)
        else:
            if self.bucketed_complexstate != None and self.bucketed_state == None:
                if self.bucketed_complexstate.as_tupleC() not in self.qstore.q:
                    self.enum.enumerate_state(self.bucketed_state,
                                              self.bucketed_complexstate,
                                              self.qstore.q,
                                              branch=-1)
                    action_values = self.qstore.q[
                        self.bucketed_complexstate.as_tupleC()]
                    # epsilon greedy choice
                    if np.random.random() < self.epsilon:
                        action = se.State(state_list=action_values['actions'][
                            np.random.randint(len(action_values['actions']))])
                    else:
                        max_q_value = max(action_values['utilities'])
                        max_q_indexes = [
                            i for i in range(len(action_values['actions']))
                            if action_values['utilities'][i] == max_q_value
                        ]
                        max_actions = [
                            action_values['actions'][i] for i in max_q_indexes
                        ]
                        action = se.State(state_list=max_actions[
                            np.random.randint(len(max_actions))])
                    branch = action.branch
                    if branch == 0:
                        self.bucketed_complexstate.liststate_branch0.append(
                            action)
                    if branch == 1:
                        self.bucketed_complexstate.liststate_branch1.append(
                            action)
                    if branch == 2:
                        self.bucketed_complexstate.liststate_branch2.append(
                            action)
                    self.state = self.enum.state_action_transition(
                        self.state, action)
        self.bucketed_state = self.enum.bucket_state(self.state)
        self.bucketed_complexstate = self.enum.bucket_complexstate(
            self.statecomplex)  # has to be integrated in self.enum

        self._post_transition_updates()