Example #1
0
        def fitness(individual):
            env = self.env
            adapters = self.adapters

            total_reward = 0
            trials = self.trials_per_individual
            action_prob = self.action_prob
            action_valid = False
            action = None
            for _ in range(trials):
                observation = env.reset()
                ctx = hg.ExecutionContext(tweaks=individual)
                with ctx.as_default():
                    for t in range(self.max_steps):
                        if (not action_valid
                            ) or action_prob == 1 or np.random.uniform(
                            ) <= action_prob:
                            action = graph(
                                input=adapters[0].from_gym(observation))
                            action = adapters[1].to_gym(action)
                            action_valid = True
                        observation, reward, done, info = env.step(action)
                        total_reward += reward
                        if done:
                            break
            return -total_reward / trials
Example #2
0
    def test(self,
             graph: hg.Graph,
             individual,
             *,
             speed=1.0,
             single_render_invocation=False):
        env = self.env
        adapters = self.adapters

        fps = env.metadata['video.frames_per_second']
        frame_time = 1.0 / (fps * speed)

        if single_render_invocation:
            env.render()

        total_reward = 0
        observation = env.reset()
        ctx = hg.ExecutionContext(tweaks=individual)
        action_prob = self.action_prob
        action_valid = False
        action = None
        with ctx.as_default():
            while True:
                if not single_render_invocation:
                    env.render()
                    time.sleep(frame_time)

                if (not action_valid) or action_prob == 1 or np.random.uniform(
                ) <= action_prob:
                    action = graph(input=adapters[0].from_gym(observation))
                    action = adapters[1].to_gym(action)
                    action_valid = True
                observation, reward, done, info = env.step(action)
                total_reward += reward
                if done:
                    break
        print('Test episode concluded, total_reward={}'.format(total_reward))
Example #3
0
import hypergraph as hg
import numpy as np

graph1 = hg.Graph()
with graph1.as_default():
    hg.mark("abc1") << (hg.dump() << "** abc1 **")
    n = hg.mark("abc2") << (hg.dump() << "** abc2 **")

    idx = hg.node(lambda _: np.random.randint(0, 2))
    hg.output() << (hg.select(idx) << ["abc1", "abc2"])

for _ in range(3):
    ctx = hg.ExecutionContext()
    with ctx.as_default():
        print(graph1())
    print("*** end of execution ***")
Example #4
0
import hypergraph as hg
from hypergraph.genetic import GeneticOperators


@hg.decl_tweaks(y=hg.tweaks.Uniform())
@hg.decl_tweaks(z=hg.tweaks.Normal(mean=10))
def test1(x, y, z):
    return x + y + z


graph1 = hg.Graph(name='g1')
with graph1.as_default():
    # Note: the dictionary items are automatically mapped to the arguments
    hg.output() << (hg.call(test1) << {'x': 2})

genetic = GeneticOperators(graph1)
print(genetic.phenotype)
tweaks = genetic.create_population(1)[0]
print(tweaks)

ctx = hg.ExecutionContext(tweaks=tweaks)
with ctx.as_default():
    print(graph1())