def recommend( datestrs, name='departure_tree', verbose=False, ): """ Parameters --------- datestrs: list of strings Datestrings of the format YYYY-MM-DD names: string The stems fo the filename where the model is store. verbose: boolean Returns ------- recommendations: dict of int Dictionary keys are datestrings and values are departure times """ model_name = name + '.pickle' try: # Try to load a saved tree tree = tools.restore(model_name) except Exception: # If unsuccessful, create a new one tree = create_tree(verbose=verbose) tools.store(tree, model_name) features_df = create_features(datestrs) departures = {} for datestr in datestrs: estimated_departure = tree.estimate(features_df.loc[datestr, :]) departures[datestr] = estimated_departure return departures
def get_trips(): """ Attempt to restore a saved copy. If unsuccessful, download a new one. Returns ------- trips: list of dictionaries """ trips_filename = 'trips.pickle' try: trips = tools.restore(trips_filename) except Exception: trips = download_data() tools.store(trips, trips_filename) return trips
def get_arrival_times(trips_df): """ Attempt to restore a saved copy. if unsuccessful, download a new one. Parameters ---------- trips_df: DataFrame Returns ------- arrival_times_df: DataFrame """ arrival_times_filename = 'arrival_times.pickle' try: arrival_times_df = tools.restore(arrival_times_filename) except Exception: arrival_times_df = None if arrival_times_df is None: arrival_times_df = calculate_arrival_times(trips_df) tools.store(arrival_times_df, arrival_times_filename) return arrival_times_df
def trainer(env, opt): agent = tools.qAgent(opt) sess = tf.InteractiveSession() obs, brd, Q1 = agent.valueNet() nobs, nbrd, Q2 = agent.valueNet() act = tf.placeholder(tf.float32, [None, opt.GAME_SIZE, opt.GAME_SIZE]) rwd = tf.placeholder(tf.float32, [ None, ]) val1 = tf.reduce_sum(tf.reduce_sum(tf.multiply(Q1, act), -1), -1) val2 = rwd + opt.GAMMA * tf.reduce_max(tf.reduce_max(Q2, -1), 1) loss = tf.reduce_mean(tf.square(val1 - val2)) trainStep = tf.train.AdamOptimizer(opt.LR).minimize(loss) sess.run(tf.global_variables_initializer()) saver = tools.restore(sess) globalStep = 0 for i_e in range(1, opt.MAX_EPISODE + 1): state = env.reset() done = False sumLoss = 0 step = 0 score = 0 while not done: globalStep += 1 step += 1 if not (globalStep + 1) % opt.EPS_STEP and agent.eps > opt.FIN_EPS: agent.eps *= opt.EPS_DECAY agent.obsMem.push(state) agent.brdMem.push(env.board) action, actBrd = agent.smpAct(Q1, { obs: [state], brd: [env.board] }, env.board, step) state, nstate, done, reward = env.step(action) score += reward agent.nobsMem.push(nstate) agent.actMem.push(actBrd) agent.rwdMem.push(reward) agent.nbrdMem.push(env.board) if globalStep >= opt.MEM_SIZE: randIdx = np.random.choice(opt.MEM_SIZE, opt.BATCH_SIZE) lossVal, _ = sess.run( [loss, trainStep], feed_dict={ obs: agent.obsMem.mem[randIdx], act: agent.actMem.mem[randIdx], rwd: agent.rwdMem.mem[randIdx], nobs: agent.nobsMem.mem[randIdx], brd: agent.brdMem.mem[randIdx], nbrd: agent.nbrdMem.mem[randIdx] }) sumLoss += lossVal if not i_e % 1: print( "====== Episode %d ended with score = %f, avg_loss = %f ======" % (i_e, score, sumLoss / step)) if i_e > opt.MEM_SIZE and not i_e % 100: saver.save(sess, 'checkpoints/omok-dqn', global_step=globalStep)