class LearningModule: def __init__(self): self.mdp_list = [] self.success_config = [] self.decision_tree = None self.StateActionPairs= [] src_error = rospy.Service('LMErrorHasOccured',LMErrorHasOccured, self.errorHandle) src_rules = rospy.Service('LMGenerateRules', LMGenerateRules, self.generateRules) srv_state = rospy.Service('LMInitialise', LMInitialise, self.initialise_mdp) srv_state = rospy.Service('LMNewBlocks', LMNewBlocks, self.newBlocks) srv_action = rospy.Service('LMStateActionTaken', LMStateActionTaken, self.onPolicyLearning) # initialise self.mdp_list.append([]) def initialise_lists(self): self.success_config.append([]) def initialise_mdp(self, state): try: blocks = [] for prop in state.initial_state.block_properties: blocks.append(Block(prop.label, prop.shape, prop.colour, prop.size)) start_config = state.initial_state.configuration.config startingState = State(0, start_config) self.initialise_lists() self.success_config[-1].append(startingState) label = len(self.mdp_list[-1]) print "" print label print "" mdp = MDP(label, blocks) mdp.statelist.append(startingState) mdp.initMDP(startingState) self.mdp_list[-1].append(mdp) print "MDP initialised" return True except: return False def newBlocks(self, blockSet): try: # combine MDPS self.mdp_list[-1] = self.combineIdenticalMDPs(self.mdp_list[-1]) # add combined MDPs state action pairs to the list! self.writeToList(self.mdp_list[-1][-1]) # start new layer self.new_layer() return True except: return False def new_layer(self): try: self.mdp_list.append([]) return True except: return False def combineIdenticalMDPs(self, mdp_list): print "combining" sum_distance = [[0.0 for i in range(0,len(mdp_list[0].getStateList()))] for j in range(0,len(mdp_list[0].getStateList()))] weighted_average = [[0.0 for i in range(0,len(mdp_list[0].getStateList()))] for j in range(0,len(mdp_list[0].getStateList()))] for mdp in mdp_list: for i, row in enumerate(mdp.getDistanceMatrix()): for j, distance in enumerate(row): if distance > 0: sum_distance[i][j] += 1/distance for mdp in mdp_list: for i, row in enumerate(mdp.getDistanceMatrix()): for j, distance in enumerate(row): if distance > 0.0 and sum_distance[i][j] > 0.0: weight = (1/distance)/(sum_distance[i][j]) weighted_average[i][j] += weight*mdp.getQMatrix()[i][j] newMDP = deepcopy(mdp_list[0]) newMDP.setQMatrix(weighted_average) return newMDP def findState(self, config, mdp): for state in mdp.getStateList(): if state.getConfiguration() == config: return state def errorHandle(self, action_chosen): # try: print "errr" action_chosen = action_chosen.action_chosen actionableBlock = int(re.findall('\d+$', action_chosen.actionableBlock)[0]) destinationBlock = int(re.findall('\d+$', action_chosen.destinationBlock)[0]) action_block = actionableBlock dest_block = destinationBlock action_chosen = None for action in self.mdp_list[-1][-1].getErrorState().getActions(): if action.getActionableBlock() == action_block: if action.getDestinationBlock() == dest_block: action_chosen = action self.mdp_list[-1][-1].onPolicyLearning(action_chosen) error_config = self.mdp_list[-1][-1].getErrorState() print self.success_config[-1] self.mdp_list[-1][-1].simulation(error_config, self.success_config[-1]) return True # except: # print "OMGMMMM" # return False def onPolicyLearning(self, action): # try: """ This will be the callback function""" actionableBlock = int(re.findall('\d+$',action.action_chosen.actionableBlock)[0]) if(re.findall('tab',action.action_chosen.destinationBlock)): print "###############TABLE################" destinationBlock = None else: destinationBlock = int(re.findall('\d+$',action.action_chosen.destinationBlock)[0]) action_chosen = None for action in self.mdp_list[-1][-1].errorstate.actions: print action.actionableBlock print action.destinationBlock if(actionableBlock == action.actionableBlock) and (destinationBlock == action.destinationBlock): action_chosen = action self.mdp_list[-1][-1].onPolicyLearning(action_chosen) config = self.mdp_list[-1][-1].getErrorState() self.success_config[-1].append(config) return True def writeToList(self, mdp): blocks = mdp.getBlocks() for state in mdp.getStateList(): for action in state.getActions(): action_block = action.getActionableBlock() dest_block = action.getDestinationBlock() if dest_block == None: example = (blocks[action_block].getShape(), blocks[action_block].getColour(), blocks[action_block].getSize(), mdp.getQMatrix()[state.getLabel()][action.getNextStateAddr()]) else: example = (blocks[action_block].getShape(), blocks[action_block].getColour(),blocks[action_block].getSize(), blocks[dest_block].getShape(), blocks[dest_block].getColour(),blocks[dest_block].getSize(), mdp.getQMatrix()[state.getLabel()][action.getNextStateAddr()]) self.StateActionPairs.append(example) return def generateRules(self, randomCharacterBeingSentSomehow): print"generateRules" reduced_mdp_list = [] attributes = [] self.mdp_list[-1] = [self.combineIdenticalMDPs(self.mdp_list[-1])] self.writeToList(self.mdp_list[-1][-1]) training_set = self.StateActionPairs attr_shape = ("cube", "prism", "cuboid") attr_colour = ("red", "blue", "green") attr_size = ("small","medium","large") attribute_dict = [("has_shape(A,", attr_shape), ("has_colour(A, ", attr_colour), ("has_size(A, ", attr_size), ("has_shape(D, ", attr_shape), ("has_colour(D, ",attr_colour), ("has_size(D, ", attr_size)] attribute_dict = OrderedDict(attribute_dict) index = 0 names = attribute_dict.keys() values = attribute_dict.values() for name, vals in zip(names, values): attributes.append(Attribute(name, index, vals)) index += 1 self.decision_tree = DecisionTree(attributes, training_set) rules = self.decision_tree.getRules() rules = self.selectRules(rules) print "" print rules return rules def selectRules(self, rules): """ Select the best rules """ """ Think about doing it using SVM""" rules = sorted(rules, key=operator.itemgetter(-1)) q_val = [] for index, rule in enumerate(rules): q_val.append([index, rule[-1]]) whitened = whiten(q_val) centroids,_ = kmeans(whitened, 3, thresh = 1,iter = 100) ids,_= vq(whitened, centroids) key = ids[-1] indices = [] for index, keys in enumerate(ids): if key == keys: indices.append(index) valid_rules = [] for index in indices: valid_rules.append(rules[index][0]) return self.parseRules(valid_rules) def parseRules(self, rules): valid_rules = [] for rule in rules: sentence = "" for segment in rule: sentence = sentence + segment + ", " sentence = sentence[:-2] valid_rules.append(sentence) return Rules(rule = valid_rules) def reduceMDP(self,errorconfig, stack_config, start_config, blocks): mdp_list = [] for i in range(0, len(errorconfig)): mdp_list.append(MDP(i, blocks)) startingState = State(0, start_config) mdp_list[i].statelist.append(startingState) mdp_list[i].initMDP(startingState) errorstate = self.findState(errorconfig[i], mdp_list[i]) stackstate = [] for j in range(0,len(stack_config)): stackstate.append(self.findState(stack_config[j], mdp_list[i])) mdp_list[i].simulation(errorstate, stackstate) mdp_list[i].updateDistanceMatrix(errorstate) reduced_mdp = self.combineIdenticalMDPs(mdp_list) return reduced_mdp
class LearningModule: def __init__(self): self.mdp_list = [] self.success_config = [] self.decision_tree = None self.StateActionPairs= [] # initialise self.mdp_list.append([]) def initialiseAttributes(self): binary_values = ("true", "false") attributes = ["on(b0, table)", "on(b0, b1)", "on(b0,b2)", "on(b1, table)", "on(b1, b0)", "on(b1, b2)", "on(b2, table)", "on(b2, b0)", "on(b2,b1)", "has_shape(b0, prism)", "has_shape(b1, prism)", "has_shape(b2, prism)", "has_shape(b0, cube)", "has_shape(b1, cube)", "has_shape(b2, cube)", "has_shape(b0, cuboid)", "has_shape(b1, cuboid)", "has_shape(b2, cuboid)", "has_colour(b0, red)", "has_colour(b1, red)", "has_colour(b2, red)", "has_colour(b0, blue)", "has_colour(b1, blue)", "has_colour(b2, blue)", "has_colour(b0, green)", "has_colour(b1, green)", "has_colour(b2, green)", "has_size(b0, small)", "has_size(b1, small)", "has_size(b2, small)", "has_size(b0, medium)", "has_size(b1, medium)", "has_size(b2, medium)", "has_size(b0, large)", "has_size(b1, large)", "has_size(b2, large)", "move(b0, table)", "move(b0, b1)", "move(b0, b2)", "move(b1, table)", "move(b1, b0)", "move(b1, b2)", "move(b2, table)", "move(b2, b0)", "move(b2, b1)"] attribute_dict = [] for attribute in attributes: attribute_dict.append((attribute,binary_values)) attribute_dict = OrderedDict(attribute_dict) index = 0 names = attribute_dict.keys() values = attribute_dict.values() attributes = [] for name, vals in zip(names, values): attributes.append(Attribute(name, index, vals)) index += 1 return attributes def initialise_mdp(self, blocks): start_config = [-1,-1,-1] startingState = State(0, blocks, start_config) self.initialise_lists() self.success_config[-1].append(startingState) label = len(self.mdp_list[-1]) mdp = MDP(label, blocks) mdp.statelist.append(startingState) mdp.initMDP(startingState) self.mdp_list[-1].append(mdp) def new_layer(self): try: self.mdp_list.append([]) return True except: return False def initialise_lists(self): self.success_config.append([]) def combineIdenticalMDPs(self, mdp_list): print "combining" sum_distance = [[0.0 for i in range(0,len(mdp_list[0].getStateList()))] for j in range(0,len(mdp_list[0].getStateList()))] weighted_average = [[0.0 for i in range(0,len(mdp_list[0].getStateList()))] for j in range(0,len(mdp_list[0].getStateList()))] for mdp in mdp_list: for i, row in enumerate(mdp.getDistanceMatrix()): for j, distance in enumerate(row): if distance > 0: sum_distance[i][j] += 1/distance for mdp in mdp_list: for i, row in enumerate(mdp.getDistanceMatrix()): for j, distance in enumerate(row): if distance > 0.0 and sum_distance[i][j] > 0.0: weight = (1/distance)/(sum_distance[i][j]) weighted_average[i][j] += weight*mdp.getQMatrix()[i][j] newMDP = deepcopy(mdp_list[0]) newMDP.setQMatrix(weighted_average) return newMDP def findState(self, config, mdp): for state in mdp.getStateList(): if state.getConfiguration() == config: return state def errorHandle(self, error_config, success_config, attributes): success_states = [] for config in success_config: success_states.append(self.findState(config, self.mdp_list[-1][-1])) error_state = self.findState(error_config, self.mdp_list[-1][-1]) self.mdp_list[-1][-1].simulation(error_state, self.success_config[-1], attributes) def writeToList(self, mdp): blocks = mdp.getBlocks() for state in mdp.getStateList(): for action in state.getActions(): action_block = action.getActionableBlock() dest_block = action.getDestinationBlock() if dest_block == None: example = (blocks[action_block].getShape(), blocks[action_block].getColour(), blocks[action_block].getSize(), mdp.getQMatrix()[state.getLabel()][action.getNextStateAddr()]) else: example = (blocks[action_block].getShape(), blocks[action_block].getColour(),blocks[action_block].getSize(), blocks[dest_block].getShape(), blocks[dest_block].getColour(),blocks[dest_block].getSize(), mdp.getQMatrix()[state.getLabel()][action.getNextStateAddr()]) self.StateActionPairs.append(example) return def generateRules(self): reduced_mdp_list = [] attributes = [] self.mdp_list[-1] = [self.combineIdenticalMDPs(self.mdp_list[-1])] self.writeToList(self.mdp_list[-1][-1]) training_set = self.StateActionPairs attr_shape = ("cube", "prism", "cuboid") attr_colour = ("red", "blue", "green") attr_size = ("small","medium","large") attribute_dict = [("has_shape(A,", attr_shape), ("has_colour(A,", attr_colour), ("has_size(A,", attr_size), ("has_shape(D,", attr_shape), ("has_colour(D,",attr_colour), ("has_size(D,", attr_size)] attribute_dict = OrderedDict(attribute_dict) index = 0 names = attribute_dict.keys() values = attribute_dict.values() for name, vals in zip(names, values): attributes.append(Attribute(name, index, vals)) index += 1 self.decision_tree = DecisionTree(attributes, training_set) rules = self.decision_tree.getRules() for rule in rules: print rule # rules = self.selectRules(rules) def selectRules(self, rules): """ Select the best rules """ """ Think about doing it using SVM""" print rules for rule in rules: print rule print "\n" rules = sorted(rules, key=operator.itemgetter(-1)) q_val = [] for index, rule in enumerate(rules): q_val.append([index, rule[-1]]) whitened = whiten(q_val) centroids,_ = kmeans(whitened, 3, thresh = 1,iter = 100) ids,_= vq(whitened, centroids) key = ids[-1] indices = [] for index, keys in enumerate(ids): if key == keys: indices.append(index) valid_rules = [] for index in indices: valid_rules.append(rules[index][0]) return self.parseRules(valid_rules) def parseRules(self, rules): valid_rules = [] for rule in rules: sentence = "" for segment in rule: sentence = sentence + segment + ", " sentence = sentence[:-2] valid_rules.append(sentence) return valid_rules def reduceMDP(self,errorconfig, stack_config, start_config, blocks): mdp_list = [] attributes = self.initialiseAttributes() for i in range(0, len(errorconfig)): mdp_list.append(MDP(i, blocks)) startingState = State(0, start_config) mdp_list[i].statelist.append(startingState) mdp_list[i].initMDP(startingState) errorstate = self.findState(errorconfig[i], mdp_list[i]) stackstate = [] for j in range(0,len(stack_config)): stackstate.append(self.findState(stack_config[j], mdp_list[i])) mdp_list[i].simulation(errorstate, stackstate, attributes) mdp_list[i].updateDistanceMatrix(errorstate) reduced_mdp = self.combineIdenticalMDPs(mdp_list) return reduced_mdp