def __init__(self, config, db, neural, weboutput=[]):

        self._config = config
        self._database = db
        self._neural = neural
        self._weboutput = weboutput

        self._batchSize = int(self._config["batchSize"])
        self._windowSize = int(self._config["windowSize"])
        self._numCoins = len(self._config["coins"])
        self._learningRate = self._config["learningRate"]
        self._decayRate = self._config["decayRate"]
        self._decaySteps = self._config["decaySteps"]
        self._trainTestSplit = self._config["trainTestSplit"]

        self._interval = self._config["tradeInterval"]
        self._startutc, self._endutc = self._database.rangeUtcstamp()
        self._startutc = max(self._config["startUtc"], self._startutc)

        self._allX = self._database.readAll(self._startutc, self._endutc, False)

        self._replayMemory = replay.ReplayMemory(config,fromutc=self._startutc,toutc=self._endutc)

        self._commission = self._config["commission"]

        self._startTrainUtc = self._startutc
        self._endTrainUtc =  self._startutc + int(self._trainTestSplit * (self._endutc - self._startutc))
        self._endTrainUtc =  self._endTrainUtc - (self._endTrainUtc % self._interval)

        self._startTestUtc = self._endTrainUtc
        self._endTestUtc = self._endutc

        self.initTensors()

        self._saver = tf.train.Saver(max_to_keep=5)
Beispiel #2
0
                    help="tensorflow model checkpoint file to initialize from")
parser.add_argument("rom", help="rom file to run")
args = parser.parse_args()

print 'Arguments: %s' % (args)

baseOutputDir = 'game-out-' + time.strftime("%Y-%m-%d-%H-%M-%S")
os.makedirs(baseOutputDir)

State.setup(args)

environment = AtariEnvironment(args, baseOutputDir)

dqn = dqn.DeepQNetwork(environment.getNumActions(), baseOutputDir, args)

replayMemory = replay.ReplayMemory(args)


def runEpoch(minEpochSteps, evalWithEpsilon=None):
    stepStart = environment.getStepNumber()
    isTraining = True if evalWithEpsilon is None else False
    startGameNumber = environment.getGameNumber()
    epochTotalScore = 0

    while environment.getStepNumber() - stepStart < minEpochSteps:

        startTime = lastLogTime = time.time()
        stateReward = 0
        state = None

        while not environment.isGameOver():
Beispiel #3
0
# setup
#################################

base_output_dir = 'run-out-' + time.strftime("%Y-%m-%d-%H-%M-%S")
os.makedirs(base_output_dir)

tensorboard_dir = base_output_dir + "/tensorboard/"
os.makedirs(tensorboard_dir)
summary_writer = tf.summary.create_file_writer(tensorboard_dir)
with summary_writer.as_default():
    tf.summary.text('params', str(args), step=0)

State.setup(args)

environment = CarEnv(args)
replay_memory = replay.ReplayMemory(base_output_dir, args)
dqn = dqn.DeepQNetwork(environment.get_num_actions(),
                       environment.get_state_size(), replay_memory,
                       base_output_dir, tensorboard_dir, args)

train_epsilon = args.epsilon  #don't want to reset epsilon between epoch
start_time = datetime.datetime.now()
train_episodes = 0
eval_episodes = 0
episode_train_reward_list = []
episode_eval_reward_list = []

#################################
# stop handler
#################################
Beispiel #4
0
    def __init__(self, model, optimizer, criterion):

        self.model = model
        self.optimizer = optimizer
        self.criterion = criterion
        self.memory = replay.ReplayMemory()