Exemple #1
0
 def push(self, t, current_state, action, next_state, reward, done):
     try:
         if (current_state.period !=
                 t - 1) or (action.period != t) or (next_state.period != t):
             raise Exception
         self.buffers[t - 1].append(
             (current_state, action, next_state, reward, done))
     except:
         utils.printErrorAndExit('push')
Exemple #2
0
def actionValue(nets, t, current_state, action):
    try:
        if (current_state.period != t - 1) or (action.period != t):
            raise Exception
        input = list(current_state.inventory)
        input.extend(action.order)
        input = torch.Tensor(input)
        return nets[t - 1](input)
    except:
        utils.printErrorAndExit('actionValue')
Exemple #3
0
def main(argv):
    try:
        if argv[1] == 'DP':
            value = DP.DPMethod(argv[2])
        elif argv[1] == 'DQN':
            # InFile, batch_size, buffer_size, episodes_train, episodes_test, startTime
            value = DQN.DQNMethod(argv[2], int(argv[3]), int(argv[4]),
                                  int(argv[5]), int(argv[6]),
                                  time.process_time())
        else:
            raise Exception
        print('value = {}'.format(value))
    except:
        utils.printErrorAndExit('main')
Exemple #4
0
def setofstate(t, pdf, current_state, action, demand):
    '''given the current state current_state and the action, return the 
    set of next states'''

    try:
        if pdf.name == 'general':
            inventory = current_state.inventory + action.order - demand.demand[
                t - 1]
            inventory = utils.maxElementwise(inventory,
                                             np.zeros(len(inventory)))
            result = []
            result.append(utils.State(t, inventory))
            return result
        else:
            raise Exception
    except:
        utils.printErrorAndExit('setofstate')
Exemple #5
0
def stateValue(env, nets, t, current_state):
    try:
        if current_state.period != t - 1:
            raise Exception
        if t == T + 1:
            action = utils.Action(T + 1, np.zeros(N))
            return actionValue(nets, t, current_state, action)
        actions = env.setofaction(t, current_state)

        values = []
        for action in actions:
            values.append(actionValue(nets, t, current_state, action))
        idx = values.index(max(values))

        return actionValue(nets, t, current_state, actions[idx])
    except:
        utils.printErrorAndExit('stateValue')
Exemple #6
0
def act(env, nets, t, current_state, epsilon):
    '''given the current state, return the action'''

    try:
        if current_state.period != t - 1:
            raise Exception
        if t == T + 1:
            return utils.Action(T + 1, np.zeros(N))
        actions = env.setofaction(t, current_state)
        if random.random() < epsilon:
            return actions[random.randrange(len(actions))]
        else:
            values = []
            for action in actions:
                values.append(actionValue(nets, t, current_state, action))
            pos = values.index(max(values))
            return actions[pos]
    except:
        utils.printErrorAndExit('act')
Exemple #7
0
def setofaction(t, parameterDP, current_state):
    '''return the set of actions which can be adopted according to 
    capacity and the current state current_state'''

    try:
        if current_state.period != t - 1:
            raise Exception
    except:
        utils.printErrorAndExit('setofaction')

    temp = np.array(parameterDP.MaxInventory) - np.array(
        current_state.inventory)
    limit = utils.minElementwise(parameterDP.MaxOrder, temp.tolist())
    order = []
    partial = np.zeros(len(limit))
    utils.enumerate(order, limit, partial, 0)

    result = []
    for elem in order:
        result.append(utils.Action(t, elem))
    return result
Exemple #8
0
 def step(self, t, current_state, action):
     try:
         if (current_state.period != t - 1) or (action.period !=
                                                t) or (t > T + 1):
             raise Exception
         if t == T + 1:
             next_state = utils.State(T + 1, np.zeros(N))
             return self.rf.rewardfunction(t, self.parameterDP,
                                           current_state, action,
                                           next_state)
         inventory = current_state.inventory + action.order - self.demand.demand[
             t - 1]
         inventory = utils.maxElementwise(inventory,
                                          np.zeros(len(inventory)))
         next_state = utils.State(t, inventory)
         reward = self.rf.rewardfunction(t, self.parameterDP, current_state,
                                         action, next_state)
         done = True if t == T + 1 else False
         return next_state, reward, done
     except:
         utils.printErrorAndExit('step')
Exemple #9
0
            testing = True

        elif arg == "--testing={}".format(c.TEXT_DISABLE):
            testing = False

        elif arg == "--gui={}".format(c.TEXT_ENABLE):
            useChatGui = True

        elif arg == "--gui={}".format(c.TEXT_DISABLE):
            useChatGui = False

        elif re.compile(r"^--dataLimit=\d+$").match(arg):
            dataLimit = int(arg[12:])

            if dataLimit < c.MIN_DATA_SIZE:
                printErrorAndExit(c.ERROR_MIN_DATA_SIZE)

        elif re.compile(r"^--testDataLimit=\d+$").match(arg):
            testDataLimit = int(arg[16:])

            if testDataLimit < c.MIN_TEST_DATA_SIZE:
                printErrorAndExit(c.ERROR_MIN_TEST_DATA_SIZE)

        elif re.compile(r"^--model=\d+$").match(arg):
            modelNumber = int(arg[8:])

            if modelNumber > c.NUMBER_OF_MODELS:
                printErrorAndExit(c.ERROR_ARGUMENTS)

        else:
            printErrorAndExit(c.ERROR_ARGUMENTS)
Exemple #10
0
def train(env, nets, replayBuffer, batch_size, episodes_train, episodes_test,
          startTime):
    try:
        profits = []
        epsilon = 1.0
        decayEpsilon = 0.99

        optimizers = []
        for net in nets:
            optimizers.append(optim.Adam(net.parameters()))

        for episode in range(episodes_train + 1):
            epsilon *= decayEpsilon
            profit = 0
            episode_loss = 0
            current_state = utils.State(0, np.zeros(N))
            criterion = torch.nn.MSELoss()

            for t in range(1, T + 2):
                if t == T + 1:
                    qOld = stateValue(env, nets, t, current_state)
                    action = utils.Action(T + 1, np.zeros(N))
                    reward = env.step(t, current_state, action)
                    profit += reward
                    qNew = torch.Tensor([reward])
                    loss = criterion(qOld, qNew)
                    episode_loss += loss
                    optimizers[t - 1].zero_grad()
                    loss.backward()
                    optimizers[t - 1].step()
                else:
                    '''
                    action = act(env, nets, t, current_state, epsilon)
                    next_state, reward, done = env.step(t, current_state, action)
                    profit += reward
                    qOld = actionValue(nets, t, current_state, action)
                    qNew = torch.Tensor([reward]) + stateValue(env, nets, t + 1, next_state)
                    loss = criterion(qOld, qNew)
                    episode_loss += loss
                    optimizers[t - 1].zero_grad()
                    loss.backward()
                    optimizers[t - 1].step()

                    current_state = next_state
                    '''
                    action = act(env, nets, t, current_state, epsilon)
                    next_state, reward, done = env.step(
                        t, current_state, action)
                    profit += reward
                    replayBuffer.push(t, current_state, action, next_state,
                                      reward, done)
                    current_state = next_state

                    if replayBuffer.len(t) > batch_size:
                        current_states, actions, next_states, rewards, dones = replayBuffer.sample(
                            t, batch_size)
                        qOld = []
                        qNew = []
                        for i in range(batch_size):
                            qOld.append(
                                actionValue(nets, t, current_states[i],
                                            actions[i]))
                            qNew.append(
                                torch.Tensor([rewards[i]]) +
                                stateValue(env, nets, t + 1, next_states[i]))
                        qOld = torch.stack(qOld, 0)
                        qNew = torch.stack(qNew, 0)
                        loss = criterion(qOld, qNew)
                        episode_loss += loss
                        optimizers[t - 1].zero_grad()
                        loss.backward()
                        optimizers[t - 1].step()
            profits.append(profit)
            if episode % 100 == 0:
                print('episode = {} \t time = {:.2f} \t loss = {:.2f} \t average training profit = {} \t average testing profit = {}'\
                          .format(episode, utils.runTime(startTime), episode_loss, \
                                  np.mean(profits), test(env, nets, episodes_test, 0)), flush = True)
                profits = []
                epsilon = 1.0
    except:
        utils.printErrorAndExit('train')
Exemple #11
0
def chat(modelNumber, useChatGui):
    """Chats with user.

    Args:
        modelNumber: number of model
        useChatGui: True if chatting GUI should be used, False if chatting in console
    """

    print("Chatting")

    savedWeightsFile, _, _ = getFileWithLastSavedWeights(modelNumber, None)

    if savedWeightsFile == None:
        printErrorAndExit(c.ERROR_WEIGHTS_FILE_NOT_FOUND)

    vocabulary, wordToIndexVocabulary = loadVocabulary()

    model = createModel(modelNumber, vocabulary, None, savedWeightsFile)

    chatManager = ChatManager(modelNumber, model, vocabulary, wordToIndexVocabulary)

    if useChatGui:
        gui = ChatGUI(chatManager)
        gui.start()

    else:
        if c.CLEAR_CONSOLE_BEFORE_CHAT:
            clearConsole()
        else:
            printDivider()

        if c.PRINT_CONSOLE_GREETING:
            welcomeMessage = greetings[randint(0, len(greetings) - 1)]

            if welcomeMessage[len(welcomeMessage) - 1] in sentenceEndCharacters:
                introduction = introductions[randint(
                    0, len(introductions) - 1)]
                welcomeMessage += " {}{}".format(
                    introduction[0].upper(), introduction[1:])
            else:
                welcomeMessage += ", {}".format(
                    introductions[randint(0, len(introductions) - 1)])

            welcomeMessage += " {}.{}".format(c.CHATBOT_NAME, " {}".format(
                infoMessage) if c.PRINT_CONSOLE_INFO_MESSAGE else "")

            print(welcomeMessage)

        elif c.PRINT_CONSOLE_INFO_MESSAGE:
            print(infoMessage)

        while True:
            oneLinePrint(">")

            for line in stdin:
                context = line
                break

            if context[len(context) - 1] == "\n":
                context = context[:-1]

            if context == "":
                break

            answer = chatManager.getAnswer(context)

            print(answer)

        if c.PRINT_CONSOLE_GOODBYE:
            print(goodbyes[randint(0, len(goodbyes) - 1)])