import framework import utils DEBUG = False GAMMA = 0.5 # discounted factor TRAINING_EP = 0.5 # epsilon-greedy parameter for training TESTING_EP = 0.05 # epsilon-greedy parameter for testing NUM_RUNS = 10 NUM_EPOCHS = 600 NUM_EPIS_TRAIN = 25 # number of episodes for training at each epoch NUM_EPIS_TEST = 50 # number of episodes for testing ALPHA = 0.001 # learning rate for training ACTIONS = framework.get_actions() OBJECTS = framework.get_objects() NUM_ACTIONS = len(ACTIONS) NUM_OBJECTS = len(OBJECTS) def tuple2index(action_index, object_index): """Converts a tuple (a,b) to an index c""" return action_index * NUM_OBJECTS + object_index def index2tuple(index): """Converts an index c to a tuple (a,b)""" return index // NUM_OBJECTS, index % NUM_OBJECTS # pragma: coderesponse template name="linear_epsilon_greedy"
import matplotlib.pyplot as plt from tqdm import tqdm DEBUG = False GAMMA = 0.5 # discounted factor TRAINING_EP = 0.5 # epsilon-greedy parameter for training TESTING_EP = 0.05 # epsilon-greedy parameter for testing NUM_RUNS = 10 NUM_EPOCHS = 600 NUM_EPIS_TRAIN = 25 # number of episodes for training at each epoch NUM_EPIS_TEST = 50 #number of episodes for testing ALPHA = 0.001 # learning rate for training actions = framework.get_actions() objects = framework.get_objects() NUM_ACTIONS = len(actions) NUM_OBJECTS = len(objects) model = None optimizer = None def tuple2index(action_index, object_index): return action_index * NUM_OBJECTS + object_index def index2tuple(index): return index // NUM_OBJECTS, index % NUM_OBJECTS # bag-of-words embedding def extract_words(input_string):