def push(self, t, current_state, action, next_state, reward, done): try: if (current_state.period != t - 1) or (action.period != t) or (next_state.period != t): raise Exception self.buffers[t - 1].append( (current_state, action, next_state, reward, done)) except: utils.printErrorAndExit('push')
def actionValue(nets, t, current_state, action): try: if (current_state.period != t - 1) or (action.period != t): raise Exception input = list(current_state.inventory) input.extend(action.order) input = torch.Tensor(input) return nets[t - 1](input) except: utils.printErrorAndExit('actionValue')
def main(argv): try: if argv[1] == 'DP': value = DP.DPMethod(argv[2]) elif argv[1] == 'DQN': # InFile, batch_size, buffer_size, episodes_train, episodes_test, startTime value = DQN.DQNMethod(argv[2], int(argv[3]), int(argv[4]), int(argv[5]), int(argv[6]), time.process_time()) else: raise Exception print('value = {}'.format(value)) except: utils.printErrorAndExit('main')
def setofstate(t, pdf, current_state, action, demand): '''given the current state current_state and the action, return the set of next states''' try: if pdf.name == 'general': inventory = current_state.inventory + action.order - demand.demand[ t - 1] inventory = utils.maxElementwise(inventory, np.zeros(len(inventory))) result = [] result.append(utils.State(t, inventory)) return result else: raise Exception except: utils.printErrorAndExit('setofstate')
def stateValue(env, nets, t, current_state): try: if current_state.period != t - 1: raise Exception if t == T + 1: action = utils.Action(T + 1, np.zeros(N)) return actionValue(nets, t, current_state, action) actions = env.setofaction(t, current_state) values = [] for action in actions: values.append(actionValue(nets, t, current_state, action)) idx = values.index(max(values)) return actionValue(nets, t, current_state, actions[idx]) except: utils.printErrorAndExit('stateValue')
def act(env, nets, t, current_state, epsilon): '''given the current state, return the action''' try: if current_state.period != t - 1: raise Exception if t == T + 1: return utils.Action(T + 1, np.zeros(N)) actions = env.setofaction(t, current_state) if random.random() < epsilon: return actions[random.randrange(len(actions))] else: values = [] for action in actions: values.append(actionValue(nets, t, current_state, action)) pos = values.index(max(values)) return actions[pos] except: utils.printErrorAndExit('act')
def setofaction(t, parameterDP, current_state): '''return the set of actions which can be adopted according to capacity and the current state current_state''' try: if current_state.period != t - 1: raise Exception except: utils.printErrorAndExit('setofaction') temp = np.array(parameterDP.MaxInventory) - np.array( current_state.inventory) limit = utils.minElementwise(parameterDP.MaxOrder, temp.tolist()) order = [] partial = np.zeros(len(limit)) utils.enumerate(order, limit, partial, 0) result = [] for elem in order: result.append(utils.Action(t, elem)) return result
def step(self, t, current_state, action): try: if (current_state.period != t - 1) or (action.period != t) or (t > T + 1): raise Exception if t == T + 1: next_state = utils.State(T + 1, np.zeros(N)) return self.rf.rewardfunction(t, self.parameterDP, current_state, action, next_state) inventory = current_state.inventory + action.order - self.demand.demand[ t - 1] inventory = utils.maxElementwise(inventory, np.zeros(len(inventory))) next_state = utils.State(t, inventory) reward = self.rf.rewardfunction(t, self.parameterDP, current_state, action, next_state) done = True if t == T + 1 else False return next_state, reward, done except: utils.printErrorAndExit('step')
testing = True elif arg == "--testing={}".format(c.TEXT_DISABLE): testing = False elif arg == "--gui={}".format(c.TEXT_ENABLE): useChatGui = True elif arg == "--gui={}".format(c.TEXT_DISABLE): useChatGui = False elif re.compile(r"^--dataLimit=\d+$").match(arg): dataLimit = int(arg[12:]) if dataLimit < c.MIN_DATA_SIZE: printErrorAndExit(c.ERROR_MIN_DATA_SIZE) elif re.compile(r"^--testDataLimit=\d+$").match(arg): testDataLimit = int(arg[16:]) if testDataLimit < c.MIN_TEST_DATA_SIZE: printErrorAndExit(c.ERROR_MIN_TEST_DATA_SIZE) elif re.compile(r"^--model=\d+$").match(arg): modelNumber = int(arg[8:]) if modelNumber > c.NUMBER_OF_MODELS: printErrorAndExit(c.ERROR_ARGUMENTS) else: printErrorAndExit(c.ERROR_ARGUMENTS)
def train(env, nets, replayBuffer, batch_size, episodes_train, episodes_test, startTime): try: profits = [] epsilon = 1.0 decayEpsilon = 0.99 optimizers = [] for net in nets: optimizers.append(optim.Adam(net.parameters())) for episode in range(episodes_train + 1): epsilon *= decayEpsilon profit = 0 episode_loss = 0 current_state = utils.State(0, np.zeros(N)) criterion = torch.nn.MSELoss() for t in range(1, T + 2): if t == T + 1: qOld = stateValue(env, nets, t, current_state) action = utils.Action(T + 1, np.zeros(N)) reward = env.step(t, current_state, action) profit += reward qNew = torch.Tensor([reward]) loss = criterion(qOld, qNew) episode_loss += loss optimizers[t - 1].zero_grad() loss.backward() optimizers[t - 1].step() else: ''' action = act(env, nets, t, current_state, epsilon) next_state, reward, done = env.step(t, current_state, action) profit += reward qOld = actionValue(nets, t, current_state, action) qNew = torch.Tensor([reward]) + stateValue(env, nets, t + 1, next_state) loss = criterion(qOld, qNew) episode_loss += loss optimizers[t - 1].zero_grad() loss.backward() optimizers[t - 1].step() current_state = next_state ''' action = act(env, nets, t, current_state, epsilon) next_state, reward, done = env.step( t, current_state, action) profit += reward replayBuffer.push(t, current_state, action, next_state, reward, done) current_state = next_state if replayBuffer.len(t) > batch_size: current_states, actions, next_states, rewards, dones = replayBuffer.sample( t, batch_size) qOld = [] qNew = [] for i in range(batch_size): qOld.append( actionValue(nets, t, current_states[i], actions[i])) qNew.append( torch.Tensor([rewards[i]]) + stateValue(env, nets, t + 1, next_states[i])) qOld = torch.stack(qOld, 0) qNew = torch.stack(qNew, 0) loss = criterion(qOld, qNew) episode_loss += loss optimizers[t - 1].zero_grad() loss.backward() optimizers[t - 1].step() profits.append(profit) if episode % 100 == 0: print('episode = {} \t time = {:.2f} \t loss = {:.2f} \t average training profit = {} \t average testing profit = {}'\ .format(episode, utils.runTime(startTime), episode_loss, \ np.mean(profits), test(env, nets, episodes_test, 0)), flush = True) profits = [] epsilon = 1.0 except: utils.printErrorAndExit('train')
def chat(modelNumber, useChatGui): """Chats with user. Args: modelNumber: number of model useChatGui: True if chatting GUI should be used, False if chatting in console """ print("Chatting") savedWeightsFile, _, _ = getFileWithLastSavedWeights(modelNumber, None) if savedWeightsFile == None: printErrorAndExit(c.ERROR_WEIGHTS_FILE_NOT_FOUND) vocabulary, wordToIndexVocabulary = loadVocabulary() model = createModel(modelNumber, vocabulary, None, savedWeightsFile) chatManager = ChatManager(modelNumber, model, vocabulary, wordToIndexVocabulary) if useChatGui: gui = ChatGUI(chatManager) gui.start() else: if c.CLEAR_CONSOLE_BEFORE_CHAT: clearConsole() else: printDivider() if c.PRINT_CONSOLE_GREETING: welcomeMessage = greetings[randint(0, len(greetings) - 1)] if welcomeMessage[len(welcomeMessage) - 1] in sentenceEndCharacters: introduction = introductions[randint( 0, len(introductions) - 1)] welcomeMessage += " {}{}".format( introduction[0].upper(), introduction[1:]) else: welcomeMessage += ", {}".format( introductions[randint(0, len(introductions) - 1)]) welcomeMessage += " {}.{}".format(c.CHATBOT_NAME, " {}".format( infoMessage) if c.PRINT_CONSOLE_INFO_MESSAGE else "") print(welcomeMessage) elif c.PRINT_CONSOLE_INFO_MESSAGE: print(infoMessage) while True: oneLinePrint(">") for line in stdin: context = line break if context[len(context) - 1] == "\n": context = context[:-1] if context == "": break answer = chatManager.getAnswer(context) print(answer) if c.PRINT_CONSOLE_GOODBYE: print(goodbyes[randint(0, len(goodbyes) - 1)])