Esempio n. 1
0
import framework
import utils

DEBUG = False

GAMMA = 0.5  # discounted factor
TRAINING_EP = 0.5  # epsilon-greedy parameter for training
TESTING_EP = 0.05  # epsilon-greedy parameter for testing
NUM_RUNS = 10
NUM_EPOCHS = 600
NUM_EPIS_TRAIN = 25  # number of episodes for training at each epoch
NUM_EPIS_TEST = 50  # number of episodes for testing
ALPHA = 0.001  # learning rate for training

ACTIONS = framework.get_actions()
OBJECTS = framework.get_objects()
NUM_ACTIONS = len(ACTIONS)
NUM_OBJECTS = len(OBJECTS)


def tuple2index(action_index, object_index):
    """Converts a tuple (a,b) to an index c"""
    return action_index * NUM_OBJECTS + object_index


def index2tuple(index):
    """Converts an index c to a tuple (a,b)"""
    return index // NUM_OBJECTS, index % NUM_OBJECTS


# pragma: coderesponse template name="linear_epsilon_greedy"
import matplotlib.pyplot as plt
from tqdm import tqdm

DEBUG = False

GAMMA = 0.5 # discounted factor
TRAINING_EP = 0.5 # epsilon-greedy parameter for training
TESTING_EP = 0.05 # epsilon-greedy parameter for testing
NUM_RUNS = 10
NUM_EPOCHS = 600
NUM_EPIS_TRAIN = 25 # number of episodes for training at each epoch
NUM_EPIS_TEST = 50 #number of episodes for testing
ALPHA = 0.001 # learning rate for training

actions = framework.get_actions()
objects = framework.get_objects()
NUM_ACTIONS = len(actions)
NUM_OBJECTS = len(objects)

model = None
optimizer = None

def tuple2index(action_index, object_index):
    return action_index * NUM_OBJECTS + object_index

def index2tuple(index):
    return index // NUM_OBJECTS, index % NUM_OBJECTS


# bag-of-words embedding
def extract_words(input_string):