def __init__(self, config, db, neural, weboutput=[]): self._config = config self._database = db self._neural = neural self._weboutput = weboutput self._batchSize = int(self._config["batchSize"]) self._windowSize = int(self._config["windowSize"]) self._numCoins = len(self._config["coins"]) self._learningRate = self._config["learningRate"] self._decayRate = self._config["decayRate"] self._decaySteps = self._config["decaySteps"] self._trainTestSplit = self._config["trainTestSplit"] self._interval = self._config["tradeInterval"] self._startutc, self._endutc = self._database.rangeUtcstamp() self._startutc = max(self._config["startUtc"], self._startutc) self._allX = self._database.readAll(self._startutc, self._endutc, False) self._replayMemory = replay.ReplayMemory(config,fromutc=self._startutc,toutc=self._endutc) self._commission = self._config["commission"] self._startTrainUtc = self._startutc self._endTrainUtc = self._startutc + int(self._trainTestSplit * (self._endutc - self._startutc)) self._endTrainUtc = self._endTrainUtc - (self._endTrainUtc % self._interval) self._startTestUtc = self._endTrainUtc self._endTestUtc = self._endutc self.initTensors() self._saver = tf.train.Saver(max_to_keep=5)
help="tensorflow model checkpoint file to initialize from") parser.add_argument("rom", help="rom file to run") args = parser.parse_args() print 'Arguments: %s' % (args) baseOutputDir = 'game-out-' + time.strftime("%Y-%m-%d-%H-%M-%S") os.makedirs(baseOutputDir) State.setup(args) environment = AtariEnvironment(args, baseOutputDir) dqn = dqn.DeepQNetwork(environment.getNumActions(), baseOutputDir, args) replayMemory = replay.ReplayMemory(args) def runEpoch(minEpochSteps, evalWithEpsilon=None): stepStart = environment.getStepNumber() isTraining = True if evalWithEpsilon is None else False startGameNumber = environment.getGameNumber() epochTotalScore = 0 while environment.getStepNumber() - stepStart < minEpochSteps: startTime = lastLogTime = time.time() stateReward = 0 state = None while not environment.isGameOver():
# setup ################################# base_output_dir = 'run-out-' + time.strftime("%Y-%m-%d-%H-%M-%S") os.makedirs(base_output_dir) tensorboard_dir = base_output_dir + "/tensorboard/" os.makedirs(tensorboard_dir) summary_writer = tf.summary.create_file_writer(tensorboard_dir) with summary_writer.as_default(): tf.summary.text('params', str(args), step=0) State.setup(args) environment = CarEnv(args) replay_memory = replay.ReplayMemory(base_output_dir, args) dqn = dqn.DeepQNetwork(environment.get_num_actions(), environment.get_state_size(), replay_memory, base_output_dir, tensorboard_dir, args) train_epsilon = args.epsilon #don't want to reset epsilon between epoch start_time = datetime.datetime.now() train_episodes = 0 eval_episodes = 0 episode_train_reward_list = [] episode_eval_reward_list = [] ################################# # stop handler #################################
def __init__(self, model, optimizer, criterion): self.model = model self.optimizer = optimizer self.criterion = criterion self.memory = replay.ReplayMemory()