else:
                                audio = 1
                            video = 0
                            camera = 0
                            checkagain = 1
                            counter_silence = 0
                ##############################################################
                elif info == 1:
                    data = ast.literal_eval(data.decode('utf-8'))
                    if menuvariable == 1:
                        machinelearningtext = data
                        conn.sendall(b"FinishLearning.endmes")
                    else:

                        learnpepper = Learning(data)
                        pas, law, saving, swerve = learnpepper.learn()

                        check = checklearning(pas, law, saving, swerve)

                        if check != 'ok':
                            mystring = "LearnMore.endmes" + check
                            string = mystring.encode('utf-8')
                            conn.sendall(string)
                            learn = 2
                            if interactionvariable == 1:
                                audio = 5
                            else:
                                audio = 1
                            info = 0
                        else:
                            print("------------------------------")
Beispiel #2
0
import math
import time
import torch
import torch.nn as nn
import torchvision.transforms as t
import torch.nn.functional as F
import torch.optim as optim
import random
import numpy as np
import matplotlib.pyplot as plt
from mario_q import MarioManager
from helpers import Transition
from helpers import ReplayMemory
from helpers import DQN
from learning import Learning

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
em = MarioManager(device)
memory = ReplayMemory(1000000)
policy = DQN(em.screen_height(), em.screen_width()).to(device)
target = DQN(em.screen_height(), em.screen_width()).to(device)
optimizer = optim.Adam(params=policy.parameters(), lr=0.001)
target.load_state_dict(policy.state_dict())
target.eval()

learning_agent = Learning(policy, target, em, memory, optimizer)
learning_agent.learn()
learning_agent.plot_on_figure()