예제 #1
0
def play(model):
    global lastState
    global lastaction
    car_distance = 0
    game_state = carmunk.GameState()

    # Do nothing to get initial.
    _, state = game_state.frame_step((2))
    train_state = np.append(lastState, state[0])
    train_state = np.append(train_state, lastaction)
    train_state = np.expand_dims(train_state, axis=0)
    # Move.
    while True:
        car_distance += 1

        # Choose action.
        action = (np.argmax(model.predict(train_state, batch_size=1)))
        print(action)

        # Take action.
        _, state = game_state.frame_step(action)
        train_state = np.append(lastState, state[0])

        train_state = np.append(train_state, action)
        train_state = np.expand_dims(train_state, axis=0)
        lastState = state[0]
        # Tell us something.
        if car_distance % 1000 == 0:
            print("Current distance: %d frames." % car_distance)
예제 #2
0
def play(model):

    car_distance = 0
    game_state = carmunk.GameState()

    # Do nothing to get initial.
    _, state = game_state.frame_step((2))

    # Move.
    while True:
        car_distance += 1

        # Choose action.
        #action = (np.argmax(model.predict(state, batch_size=1)))

        # Take action.

        if random.random() < 0.4:
            action = np.random.randint(0, 3)  # random
        else:
            # Get Q values for each action.
            qval = model.predict(state, batch_size=1)
            action = (np.argmax(qval))  # best

        _, state = game_state.frame_step(action)

        # Tell us something.
        if car_distance % 1000 == 0:
            print("Current distance: %d frames." % car_distance)
예제 #3
0
def play(model):
    """
    DOCSTRING
    """
    car_distance = 0
    game_state = carmunk.GameState()
    _, state = game_state.frame_step((2))
    while True:
        car_distance += 1
        action = (numpy.argmax(model.predict(state, batch_size=1)))
        _, state = game_state.frame_step(action)
        if car_distance % 1000 == 0:
            print("Current distance: %d frames." % car_distance)
def play(model):

    car_distance = 0
    game_state = carmunk.GameState()

    # Do nothing to get initial.
    _, state = game_state.frame_step((2))

    # Move.
    while True:
        car_distance += 1

        # Choose action.
        action = (np.argmax(model.predict(state, batch_size=1)))

        # Take action.
        _, state = game_state.frame_step(action)

        # Tell us something.
        if car_distance % 1000 == 0:
            print("Current distance: %d frames." % car_distance)
예제 #5
0
def train_net(model, params):
    """
    DOCSTRING
    """
    filename = params_to_filename(params)
    observe = 1000  # Number of frames to observe before training.
    epsilon = 1
    train_frames = 1000000  # Number of frames to play.
    batchSize = params['batchSize']
    buffer = params['buffer']
    max_car_distance = 0
    car_distance = 0
    t = 0
    data_collect = []
    replay = []  # stores tuples of (S, A, R, S').
    loss_log = []
    game_state = carmunk.GameState()
    _, state = game_state.frame_step((2))
    start_time = timeit.default_timer()
    while t < train_frames:
        t += 1
        car_distance += 1
        if random.random() < epsilon or t < observe:
            action = np.random.randint(0, 3)
        else:
            qval = model.predict(state, batch_size=1)
            action = (np.argmax(qval))  # best
        reward, new_state = game_state.frame_step(action)
        replay.append((state, action, reward, new_state))
        if t > observe:
            if len(replay) > buffer:
                replay.pop(0)
            minibatch = random.sample(replay, batchSize)
            X_train, y_train = process_minibatch(minibatch, model)
            history = nn.LossHistory()
            model.fit(X_train,
                      y_train,
                      batch_size=batchSize,
                      nb_epoch=1,
                      verbose=0,
                      callbacks=[history])
            loss_log.append(history.losses)
        state = new_state
        if epsilon > 0.1 and t > observe:
            epsilon -= (1 / train_frames)
        if reward == -500:
            data_collect.append([t, car_distance])
            if car_distance > max_car_distance:
                max_car_distance = car_distance
            tot_time = timeit.default_timer() - start_time
            fps = car_distance / tot_time
            print("Max: %d at %d\tepsilon %f\t(%d)\t%f fps" %
                  (max_car_distance, t, epsilon, car_distance, fps))
            car_distance = 0
            start_time = timeit.default_timer()
        if t % 25000 == 0:
            model.save_weights('saved-models/' + filename + '-' + str(t) +
                               '.h5',
                               overwrite=True)
            print("Saving model %s - %d" % (filename, t))
    log_results(filename, data_collect, loss_log)
def train_net(model, params):

    filename = params_to_filename(params)

    observe = 1000  # Number of frames to observe before training.
    epsilon = 1
    train_frames = 100000  # Number of frames to play.
    batchSize = params['batchSize']
    buffer = params['buffer']

    # Just stuff used below.
    max_car_distance = 0
    car_distance = 0
    t = 0
    data_collect = []
    replay = []  # stores tuples of (S, A, R, S').

    loss_log = []

    # Create a new game instance.
    game_state = carmunk.GameState()

    # Get initial state by doing nothing and getting the state.
    _, state = game_state.frame_step((2))

    # Let's time it.
    start_time = timeit.default_timer()

    # Run the frames.
    while t < train_frames:

        t += 1
        car_distance += 1

        # Choose an action.
        if random.random() < epsilon or t < observe:
            action = np.random.randint(0, 3)  # random
        else:
            # Get Q values for each action.
            qval = model.predict(state, batch_size=1)
            action = (np.argmax(qval))  # best

        # Take action, observe new state and get our treat.
        reward, new_state = game_state.frame_step(action)

        # Experience replay storage.
        replay.append((state, action, reward, new_state))

        # If we're done observing, start training.
        if t > observe:

            # If we've stored enough in our buffer, pop the oldest.
            if len(replay) > buffer:
                replay.pop(0)

            # Randomly sample our experience replay memory
            minibatch = random.sample(replay, batchSize)

            # Get training values.
            X_train, y_train = process_minibatch2(minibatch, model)

            # Train the model on this batch.
            history = LossHistory()
            model.fit(X_train,
                      y_train,
                      batch_size=batchSize,
                      nb_epoch=1,
                      verbose=0,
                      callbacks=[history])
            loss_log.append(history.losses)

        # Update the starting state with S'.
        state = new_state

        # Decrement epsilon over time.
        if epsilon > 0.1 and t > observe:
            epsilon -= (1.0 / train_frames)

        # We died, so update stuff.
        if reward == -500:
            # Log the car's distance at this T.
            data_collect.append([t, car_distance])

            # Update max.
            if car_distance > max_car_distance:
                max_car_distance = car_distance

            # Time it.
            tot_time = timeit.default_timer() - start_time
            fps = car_distance / tot_time

            # Output some stuff so we can watch.
            print("Max: %d at %d\tepsilon %f\t(%d)\t%f fps" %
                  (max_car_distance, t, epsilon, car_distance, fps))

            # Reset.
            car_distance = 0
            start_time = timeit.default_timer()

        # Save the model every 25,000 frames.
        if t % 25000 == 0:
            model.save_weights('saved-models/' + filename + '-' + str(t) +
                               '.h5',
                               overwrite=True)
            print("Saving model %s - %d" % (filename, t))

    # Log results after we're done all frames.
    log_results(filename, data_collect, loss_log)