Causal Graphs in Reinforcement Learning

Project of creating causal graphs in reinforcement learning

Requirements

python -m pip install torch torchvision pygame matplotlib pynput

Import classes

from game import *
from agent import *
from collector import *
from auxillaries import *
from helper import *
from replaybuffer import *

Setup simple loop

def simple(defaults):
    collector = Collector(**defaults)
    env = Game(**defaults)
    mover = Mover(env, **defaults)

    with Save(env, collector, mover, **defaults) as save:
        for frame in loop(env, collector, save):
            actions = mover(env.board)
            observations, rewards, dones, info = env.step(actions)
            mover.learn(observations, actions, rewards, dones)
            collector.collect([rewards], [dones])

Setup teleport loop

def teleport(defaults):
    collector = Collector(**defaults)
    env = Game(**defaults)
    mover = Mover(env, _extra_dim=1, **defaults)
    teleporter = Teleporter(env, **defaults)
    buffer = ReplayBuffer(**defaults)

    with Save(env, collector, mover, teleporter, **defaults) as save:
        intervention_idx, modified_board = teleporter.pre_process(env)
        for frame in loop(env, collector, save, teleporter):
            modified_board = teleporter.interveen(env.board, intervention_idx, modified_board)
            actions = mover(modified_board)
            observations, rewards, dones, info = env.step(actions)
            modified_board, modified_rewards, modified_dones, teleport_rewards, intervention_idx = teleporter.modify(observations, rewards, dones, info)
            buffer.teleporter_save_data(teleporter.boards, observations, teleporter.interventions, teleport_rewards, dones, intervention_idx)
            mover.learn(modified_board, actions, modified_rewards, modified_dones)
            board_before, board_after, intervention, tele_rewards, tele_dones = buffer.sample_data()
            teleporter.learn(board_after, intervention, tele_rewards, tele_dones, board_before)
            collector.collect([rewards, modified_rewards, teleport_rewards], [dones, modified_dones])

Counterfactuals loop

def CFagent(defaults):
    env = Game(**defaults)
    mover = Mover(env, _extra_dim=1, **defaults)
    teleporter = Teleporter(env, **defaults)
    buffer = ReplayBuffer(**defaults)
    CFagent = CFAgent(env, **defaults)
    CFbuffer = CFReplayBuffer(**defaults)
    collector = Collector(**defaults)

    with Save(env, collector, mover, teleporter, CFagent, **defaults) as save:
        intervention_idx, modified_board = teleporter.pre_process(env)
        dones = CFagent.pre_process(env)
        CF_dones, cfs = None, None
        for frame in loop(env, collector, save, teleporter):
            CFagent.counterfact(env, dones, teleporter, CF_dones, cfs)
            modified_board = teleporter.interveen(env.board, intervention_idx, modified_board)
            actions = mover(modified_board)
            observations, rewards, dones, info = env.step(actions)
            modified_board, modified_rewards, modified_dones, teleport_rewards, intervention_idx = teleporter.modify(observations, rewards, dones, info)
            buffer.teleporter_save_data(teleporter.boards, observations, teleporter.interventions, teleport_rewards, dones, intervention_idx)
            mover.learn(modified_board, actions, modified_rewards, modified_dones)
            board_before, board_after, intervention, tele_rewards, tele_dones = buffer.sample_data()
            teleporter.learn(board_after, intervention, tele_rewards, tele_dones, board_before)
            collector.collect([rewards, modified_rewards, teleport_rewards], [dones, modified_dones])
            CF_dones, cfs = CFagent.counterfact_check(dones, env, **defaults)
            CFbuffer.CF_save_data(CFagent.boards, observations, CFagent.counterfactuals, rewards, dones, CF_dones)
            CFboard, CFobs, cf, CFrewards, CFdones1 = CFbuffer.sample_data()
            CFagent.learn(CFobs, cf, CFrewards, CFdones1, CFboard)

Setup default variables

class Defaults:
    name: str = "Agent"
    main: function = graphTrain
    level: Levels = Levels.Causal2
    failed_actions_chance: float = 0
    use_model: bool = True
    depth: int = 1
    model_explore: int = 100000
    samples: int = 5
    hours: float = 12
    batch: int = 100
    width: int = 9
    height: int = 9

    graphMode: GraphMode = GraphMode.UCB1

    network1: Networks = Networks.Teleporter
    K1: float = 5000000
    learner1: Learners = Learners.Qlearn
    exploration1: Explorations = Explorations.softmaxer
    gamma1: float = 0.98

    network2: Networks = Networks.Mini
    K2: float = 1000000
    learner2: Learners = Learners.Qlearn
    exploration2: Explorations = Explorations.epsilonGreedy
    gamma2: float = 0.95

    layer_Blocks: bool = True
    layer_Goal: bool = True
    layer_Gold: bool = True
    layer_Keys: bool = True
    layer_Door: bool = True
    layer_Holder: bool = True
    layer_Putter: bool = True

    layer_Rock: bool = True
    layer_Dirt: bool = True

    layer_Diamond1: bool = True
    layer_Diamond2: bool = True
    layer_Diamond3: bool = True
    layer_Diamond4: bool = True

    layer_Reddoor: bool = True
    layer_Redkeys: bool = True
    layer_Bluedoor: bool = True
    layer_Bluekeys: bool = True

    layer_Pink1: bool = True
    layer_Pink2: bool = True
    layer_Pink3: bool = True
    layer_Brown1: bool = True
    layer_Brown2: bool = True
    layer_Brown3: bool = True

    layer_Greendown: bool = True
    layer_Greenup: bool = True
    layer_Greenstar: bool = True
    layer_Yellowstar: bool = True
    layer_Bluestar: bool = True

    layer_Coconut: bool = True

    layer_Monster: bool = True

    layer_Greencross: bool = True
    layer_Bluecross: bool = True
    layer_Redcross: bool = True
    layer_Purplecross: bool = True
    layer_Super1: bool = True
    layer_Super2: bool = True
    layer_Super3: bool = True
    layer_Super4: bool = True
    layer_Super5: bool = True
    layer_Super6: bool = True
    layer_Super7: bool = True

    epsilon_cap: float = 0.2
    softmax_cap: float = 0.02
    update: int = 10000
    reset_chance: float = 0.002
    modified_done_chance: float = 0.05
    miss_intervention_cost: float = -0.15
    intervention_cost: float = -0.05
    replay_size: int = 100000
    sample_size: int = 50
    CF_convert: int = 3
    Counterfacts: int = 1
    TopN: int = 6
    Random_counterfacts: bool = False
    num: int = 0
    load_name: str = "Causal4_Conver4_3counterfacts"

Run the selected loop

run(Defaults)

Name		Name	Last commit message	Last commit date
Latest commit History 1,086 Commits
.vscode		.vscode
Drawings		Drawings
Handin		Handin
Utils		Utils
outputs		outputs
plots		plots
trainlocally		trainlocally
.gitignore		.gitignore
BayesianNN.py		BayesianNN.py
README.md		README.md
ValueCausalityTest.py		ValueCausalityTest.py
agent.py		agent.py
allGraphs.py		allGraphs.py
allGraphsTrain.py		allGraphsTrain.py
auxillaries.py		auxillaries.py
causalityTest.py		causalityTest.py
collector.py		collector.py
colors.py		colors.py
createCausalGraph.py		createCausalGraph.py
createPlots.py		createPlots.py
exploration.py		exploration.py
game.py		game.py
getFormular.py		getFormular.py
graphs.py		graphs.py
helper.py		helper.py
hours.py		hours.py
layer.py		layer.py
layers.py		layers.py
learner.py		learner.py
level.py		level.py
levels.py		levels.py
load.py		load.py
main.py		main.py
network.py		network.py
optionCritic.py		optionCritic.py
paint.py		paint.py
plotter.py		plotter.py
replaybuffer.py		replaybuffer.py
run.sh		run.sh
save.py		save.py
save.sh		save.sh
scanner.py		scanner.py
showGraph.py		showGraph.py
simulator.py		simulator.py
test_simulator.py		test_simulator.py
tester.py		tester.py
widgets.py		widgets.py

FredslundMagnus/Bachelorprojekt

Folders and files

Latest commit

History

Repository files navigation

Causal Graphs in Reinforcement Learning

Requirements

Import classes

Setup simple loop

Setup teleport loop

Counterfactuals loop

Setup default variables

Run the selected loop

About

Resources

Stars

Watchers

Forks

Languages