Ejemplo n.º 1
0
def test6():
    """ Now with memory!"""
    from numpy import ndarray
    from examples.gridphysics.mazes import polarmaze_game
    from pybrain.optimization import SNES
    g = VGDLParser().parseGame(polarmaze_game)
    g.buildLevel(cheese_maze)
    game_env = GameEnvironment(g)
    net = buildNet(game_env.outdim, 10, 4, temperature=0.1, recurrent=True)
    
    algo = SNES(lambda x: someEpisodes(game_env, x, avgOver=6, maxSteps=30, exploretoo=False), net, verbose=True, desiredEvaluation=0.85)
    print algo.batchSize
    rows, cols = 2,3
    episodesPerStep = 5
    for i in range(rows*cols):
        pylab.subplot(rows, cols, i+1)
        algo.learn(episodesPerStep)
        if isinstance(algo.bestEvaluable, ndarray):
            net._setParameters(algo.bestEvaluable)
        else:
            net = algo.bestEvaluable
        plotBackground(game_env)    
        plotTrajectories(game_env, net)
        pylab.title(str((i+1)*episodesPerStep))
        if algo.desiredEvaluation <= algo.bestEvaluation:
            break
        print
    pylab.show()
Ejemplo n.º 2
0
def test6():
    """ Now with memory!"""
    from numpy import ndarray
    from examples.gridphysics.mazes import polarmaze_game
    from pybrain.optimization import SNES
    g = VGDLParser().parseGame(polarmaze_game)
    g.buildLevel(cheese_maze)
    game_env = GameEnvironment(g)
    net = buildNet(game_env.outdim, 10, 4, temperature=0.1, recurrent=True)
    
    algo = SNES(lambda x: someEpisodes(game_env, x, avgOver=6, maxSteps=30, exploretoo=False), net, verbose=True, desiredEvaluation=0.85)
    print algo.batchSize
    rows, cols = 2,3
    episodesPerStep = 5
    for i in range(rows*cols):
        pylab.subplot(rows, cols, i+1)
        algo.learn(episodesPerStep)
        if isinstance(algo.bestEvaluable, ndarray):
            net._setParameters(algo.bestEvaluable)
        else:
            net = algo.bestEvaluable
        plotBackground(game_env)    
        plotTrajectories(game_env, net)
        pylab.title(str((i+1)*episodesPerStep))
        if algo.desiredEvaluation <= algo.bestEvaluation:
            break
        print
    pylab.show()
Ejemplo n.º 3
0
def test4():
    from numpy import ndarray
    from examples.gridphysics.mazes import polarmaze_game
    from pybrain.optimization import SNES, WeightGuessing
    g = VGDLParser().parseGame(polarmaze_game)
    g.buildLevel(labyrinth2)
    game_env = GameEnvironment(g)
    net = buildNet(game_env.outdim, 5, 4, temperature=0.1, recurrent=False)
    
    algo = SNES(lambda x: someEpisodes(game_env, x, avgOver=3), net, verbose=True, desiredEvaluation=0.75)
    #algo = WeightGuessing(lambda x: someEpisodes(game_env, x), net, verbose=True, desiredEvaluation=0.78)
    rows, cols = 2,2
    episodesPerStep = 4
    for i in range(rows*cols):
        pylab.subplot(rows, cols, i+1)
        algo.learn(episodesPerStep)
        if isinstance(algo.bestEvaluable, ndarray):
            net._setParameters(algo.bestEvaluable)
        else:
            net = algo.bestEvaluable
        plotBackground(game_env)    
        plotTrajectories(game_env, net)
        pylab.title(str((i+1)*episodesPerStep))
        if algo.desiredEvaluation <= algo.bestEvaluation:
            break
        print
    pylab.show()
Ejemplo n.º 4
0
def test4():
    from numpy import ndarray
    from examples.gridphysics.mazes import polarmaze_game
    from pybrain.optimization import SNES, WeightGuessing
    g = VGDLParser().parseGame(polarmaze_game)
    g.buildLevel(labyrinth2)
    game_env = GameEnvironment(g)
    net = buildNet(game_env.outdim, 5, 4, temperature=0.1, recurrent=False)
    
    algo = SNES(lambda x: someEpisodes(game_env, x, avgOver=3), net, verbose=True, desiredEvaluation=0.75)
    #algo = WeightGuessing(lambda x: someEpisodes(game_env, x), net, verbose=True, desiredEvaluation=0.78)
    rows, cols = 2,2
    episodesPerStep = 4
    for i in range(rows*cols):
        pylab.subplot(rows, cols, i+1)
        algo.learn(episodesPerStep)
        if isinstance(algo.bestEvaluable, ndarray):
            net._setParameters(algo.bestEvaluable)
        else:
            net = algo.bestEvaluable
        plotBackground(game_env)    
        plotTrajectories(game_env, net)
        pylab.title(str((i+1)*episodesPerStep))
        if algo.desiredEvaluation <= algo.bestEvaluation:
            break
        print
    pylab.show()
Ejemplo n.º 5
0
def test3():
    from examples.gridphysics.mazes.simple import consistent_corridor
    from examples.gridphysics.mazes import polarmaze_game
    from pybrain.optimization import SNES
    g = VGDLParser().parseGame(polarmaze_game)
    g.buildLevel(consistent_corridor)
    game_env = GameEnvironment(g)
    net = buildNet(game_env.outdim, 4, 4, temperature=0.05, recurrent=False)

    algo = SNES(lambda x: someEpisodes(game_env, x),
                net,
                verbose=True,
                desiredEvaluation=0.78)
    rows, cols = 2, 2
    episodesPerStep = 3
    for i in range(rows * cols):
        pylab.subplot(rows, cols, i + 1)
        algo.learn(episodesPerStep)
        net._setParameters(algo.bestEvaluable)
        plotBackground(game_env)
        plotTrajectories(game_env, net)
        pylab.title(str((i + 1) * episodesPerStep))
        if algo.desiredEvaluation <= algo.bestEvaluation:
            break
        print
    pylab.show()
Ejemplo n.º 6
0
def test2():
    from examples.gridphysics.mazes import polarmaze_game, maze_level_1
    from pybrain.optimization import SNES

    game_str, map_str = polarmaze_game, maze_level_1
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    game_env = GameEnvironment(g, actionDelay=100, recordingEnabled=True)
    net = buildNet(game_env.outdim, 6, 2)

    algo = SNES(lambda x: someEpisodes(game_env, x),
                net,
                verbose=True,
                desiredEvaluation=0.43)
    rows, cols = 3, 3
    episodesPerStep = 2
    for i in range(rows * cols):
        pylab.subplot(rows, cols, i + 1)
        algo.learn(episodesPerStep)
        net._setParameters(algo.bestEvaluable)
        plotBackground(game_env)
        plotTrajectories(game_env, net)
        pylab.title(str((i + 1) * episodesPerStep))
        if algo.desiredEvaluation <= algo.bestEvaluation:
            break
        print
    pylab.show()
Ejemplo n.º 7
0
def evaulateGame():
    global zelda_game, now_zelda_game, rules
    #buildNetWork
    net = buildNetwork(336, 10, 8, hiddenclass=SigmoidLayer)

    #randomMove
    avg = 0
    oldlink = rules[3]
    rules[3] = rules[3].replace('random=0', 'random=1')
    print rules[3]
    setRule(rules)
    print "randomPlay"
    for i in range(10):
        avg += evaluate(net)
    """
  rules[3] = 'ShootNNSprite stype=sword israndom=1'
  setRule(rules)
  print "randomShootNNSprite"
  for i in range(20):
    avg += evaluate(net)
  rules[3] = 'ShootNNSprite stype=bullet israndom=1'
  setRule(rules)
  print "randomShootNNSprite"
  for i in range(20):
    avg += evaluate(net)
  """
    if (avg / 10.0 > 0.4):
        return -1, net

    from pybrain.optimization import SNES
    from pybrain.optimization import OriginalNES
    from pybrain.optimization import GA
    from numpy import ndarray

    rules[3] = oldlink
    print "oldlink........." + oldlink
    setRule(rules)
    best = 0
    print "SNES starting......"
    algo = SNES(lambda x: evaluate(x), net, verbose=True)
    #algo = GA(lambda x: evaluate(x), net, verbose=True)
    #algo = OriginalNES(lambda x: evaluate(x), net, verbose=True, desiredEvaluation=0.85)
    episodesPerStep = 5
    for i in range(5):
        algo.learn(episodesPerStep)
        print net.params
        if isinstance(algo.bestEvaluable, ndarray):
            net._setParameters(algo.bestEvaluable)
        else:
            net = algo.bestEvaluable
        if algo.bestEvaluation > best:
            best = algo.bestEvaluation
            """
      if best < 0.1:
        #too hard
        return -1,net
      """
    print now_zelda_game
    return best, net
Ejemplo n.º 8
0
def trainNetwork(data, n_classes, buildNet, file, seed, max_evaluations,
                 num_samples):
    # The training functions uses the average of the cumulated reward and maximum height as fitness
    X_train = data["X_train"]
    y_train = data["y_train"]

    def objF(params):
        nn = buildNet(X_train.shape[1], n_classes)
        nn._setParameters(np.array(params))

        random_state = np.random.get_state()
        np.random.seed(l.numLearningSteps)
        sampled_data = np.random.choice(len(X_train),
                                        num_samples,
                                        replace=False)
        np.random.set_state(random_state)
        cur_data = X_train[sampled_data]
        cur_label = y_train[sampled_data]

        cum_correct = 0

        for example, cor in zip(cur_data, cur_label):
            result = nn.activate(example)
            loss_sum = 0
            for q, out in enumerate(result):
                if q != cor:
                    loss_sum += max(0, out - result[int(cor)] + 1)
            # guess = np.argmax(result)
            #if guess == cor:
            #cum_correct += 1
            cum_correct += loss_sum
            nn.reset()

        return cum_correct

    # Build net for initial random params
    n = buildNet(X_train.shape[1], n_classes)
    learned = n.params

    testNetwork(data, n_classes, learned, buildNet, 0, file, seed)

    l = SNES(objF, learned, verbose=False)
    # l.batchSize = batch_size
    batch_size = l.batchSize
    l.maxEvaluations = max_evaluations
    l.minimize = True

    for i in xrange((max_evaluations / batch_size)):
        result = l.learn(additionalLearningSteps=1)
        learned = result[0]

        testNetwork(data, n_classes, learned, buildNet,
                    num_samples * (i + 1) * batch_size, file, seed)

    return learned
Ejemplo n.º 9
0
def evaulateGame(without=0, iteration=20):
    global zelda_game, now_zelda_game, rules
    #buildNetWork
    net = buildNetwork(336, 10, 8, hiddenclass=SigmoidLayer)
    from pybrain.optimization import SNES
    from pybrain.optimization import OriginalNES
    from pybrain.optimization import GA
    from numpy import ndarray
    best = 0
    #SNES
    algo = SNES(lambda x: evaluate(x, iteration=iteration, without=without),
                net,
                verbose=True)
    episodesPerStep = 5
    for i in range(4):
        algo.learn(episodesPerStep)
        print net.params
        if isinstance(algo.bestEvaluable, ndarray):
            net._setParameters(algo.bestEvaluable)
        else:
            net = algo.bestEvaluable
        if algo.bestEvaluation > best:
            best = algo.bestEvaluation
            """
      if best < 0.1:
        #too hard
        return -1,net
      """
    #Standard NES
    """
  net = buildNetwork(108,10,8,hiddenclass=SigmoidLayer)
  algo = OriginalNES(lambda x: evaluate(x), net, verbose=True)
  episodesPerStep = 2
  for i in range(5):
    algo.learn(episodesPerStep)
    print net.params
    if isinstance(algo.bestEvaluable, ndarray):
      net._setParameters(algo.bestEvaluable)
    else:
      net = algo.bestEvaluable
    g = open(os.path.dirname(os.path.realpath(__file__))+"/stats"+str(threadnumber)+".txt", 'a+')
    print >> g,str(net.params) + '\n' + "Standard NES:" + str(algo.bestEvaluation) + '\n'
    g.close()
    if algo.bestEvaluation > best:
      best = algo.bestEvaluation
  """
    print now_zelda_game
    return best, net
Ejemplo n.º 10
0
def get_population_size(learned, cmaes):
    if cmaes:
        l = CMAES(lambda x: None, learned, verbose=False)
        return l.batchSize
    else:
        l = SNES(lambda x: None, learned, verbose=False)
        return l.batchSize
Ejemplo n.º 11
0
def configure_snes(objF, start_params, minimize=False):
    l = SNES(objF, start_params, verbose=False)
    l.minimize = minimize

    def run_snes():
        global generation
        generation = 0

        while True:
            generation += 1
            result = l.learn(additionalLearningSteps=1)
            current_best = result[0]

            yield current_best

    return run_snes, l.batchSize
Ejemplo n.º 12
0
def test3():
    from examples.gridphysics.mazes.simple import office_layout_2, consistent_corridor
    from examples.gridphysics.mazes import polarmaze_game
    from pybrain.optimization import SNES
    g = VGDLParser().parseGame(polarmaze_game)
    g.buildLevel(consistent_corridor)
    game_env = GameEnvironment(g)
    net = buildNet(game_env.outdim, 4, 4, temperature=0.05, recurrent=False)
    
    algo = SNES(lambda x: someEpisodes(game_env, x), net, verbose=True, desiredEvaluation=0.78)
    rows, cols = 2,2
    episodesPerStep = 3
    for i in range(rows*cols):
        pylab.subplot(rows, cols, i+1)
        algo.learn(episodesPerStep)
        net._setParameters(algo.bestEvaluable)
        plotBackground(game_env)    
        plotTrajectories(game_env, net)
        pylab.title(str((i+1)*episodesPerStep))
        if algo.desiredEvaluation <= algo.bestEvaluation:
            break
        print
    pylab.show()
Ejemplo n.º 13
0
def test2():
    from examples.gridphysics.mazes import polarmaze_game, maze_level_1    
    from pybrain.optimization import SNES
    
    game_str, map_str = polarmaze_game, maze_level_1
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    game_env = GameEnvironment(g, actionDelay=100, recordingEnabled=True)
    net = buildNet(game_env.outdim, 6, 2)
    
    algo = SNES(lambda x: someEpisodes(game_env, x), net, verbose=True, desiredEvaluation=0.43)
    rows, cols = 3,3
    episodesPerStep = 2
    for i in range(rows*cols):
        pylab.subplot(rows, cols, i+1)
        algo.learn(episodesPerStep)
        net._setParameters(algo.bestEvaluable)
        plotBackground(game_env)    
        plotTrajectories(game_env, net)
        pylab.title(str((i+1)*episodesPerStep))
        if algo.desiredEvaluation <= algo.bestEvaluation:
            break
        print
    pylab.show()
Ejemplo n.º 14
0
    def train_network(X_train,
                      y_train,
                      X_validate,
                      y_validate,
                      X_test,
                      y_test,
                      test_split=0,
                      validate_split=0):
        file_start = "%d\t%d\t%d" % (seed, test_split, validate_split)

        n = buildNet(X_train.shape[1], n_classes)
        learned = n.params
        population_size = get_population_size(learned, cmaes)

        evaluations_per_generation = population_size * batch_size
        num_generations = max_evaluations / (evaluations_per_generation) + 1

        # Used to sample a batch with same class ratios
        from sklearn.cross_validation import StratifiedShuffleSplit
        sss = StratifiedShuffleSplit(y_train.reshape(-1),
                                     num_generations,
                                     train_size=batch_size,
                                     random_state=seed)
        train_indices = [batch_index for (batch_index, _) in sss]

        def objF(params):
            nn = buildNet(X_train.shape[1], n_classes)
            nn._setParameters(np.array(params))

            cur_data = X_train[train_indices[l.numLearningSteps]]
            cur_label = y_train[train_indices[l.numLearningSteps]]

            results = []
            for example, cor in zip(cur_data, cur_label):
                results.append(nn.activate(example))
                nn.reset()

            loss = log_loss(cur_label, results)
            return loss

        test_network(X_validate, y_validate, learned, 0, file_start, "val")
        test_network(X_test, y_test, learned, 0, file_start, "test")

        l = SNES(objF, learned, verbose=False)
        if cmaes:
            l = CMAES(objF, learned, verbose=False)
        l.minimize = True
        l.maxEvaluations = num_generations * population_size

        for generation in xrange(num_generations):
            result = l.learn(additionalLearningSteps=1)
            learned = result[0]

            train_evaluations = (generation + 1) * evaluations_per_generation

            test_network(X_train[train_indices[generation]],
                         y_train[train_indices[generation]], learned,
                         train_evaluations, file_start, "train")
            test_network(X_validate, y_validate, learned, train_evaluations,
                         file_start, "val")
            test_network(X_test, y_test, learned, train_evaluations,
                         file_start, "test")

            if generation % 100 == 0:
                f.flush()
Ejemplo n.º 15
0
    rules[57] = 0.8
    setRule(rules)
    #certainGame


#start
certainInitial()
net = buildNetwork(336, 10, 8, hiddenclass=SigmoidLayer)

from pybrain.optimization import SNES
from pybrain.optimization import OriginalNES
from pybrain.optimization import GA
from numpy import ndarray
best = 0
print "SNES starting......"
algo = SNES(lambda x: evaluate(x), net, verbose=True)
#algo = GA(lambda x: evaluate(x), net, verbose=True)
#algo = OriginalNES(lambda x: evaluate(x), net, verbose=True, desiredEvaluation=0.85)
episodesPerStep = 10
for i in range(99999):
    algo.learn(episodesPerStep)
    print net.params
    if isinstance(algo.bestEvaluable, ndarray):
        net._setParameters(algo.bestEvaluable)
    else:
        net = algo.bestEvaluable
    if algo.bestEvaluation > best:
        best = algo.bestEvaluation
    nowgame = template
    nowgame = nowgame.replace('{red}', str(red))
    nowgame = nowgame.replace('{blue}', str(blue))