def etl_people_survey_result(people_survey, pid, project_id, survey_id, enterprise_info, orgid, t): # 添加人员问卷信息到 people_survey_info if people_survey.begin_answer_time == None or people_survey.finish_time == None: people_survey_info = [ people_survey.id, pid, survey_id, project_id, enterprise_info.id, orgid, people_survey.begin_answer_time, people_survey.finish_time, people_survey.model_score, people_survey.dimension_score, people_survey.substandard_score, people_survey.facet_score, people_survey.happy_score, people_survey.happy_ability_score, people_survey.happy_efficacy_score, people_survey.praise_score, people_survey.uniformity_score, t ] else: people_survey_info = [ people_survey.id, pid, survey_id, project_id, enterprise_info.id, orgid, time_format(people_survey.begin_answer_time), time_format(people_survey.finish_time), people_survey.model_score, people_survey.dimension_score, people_survey.substandard_score, people_survey.facet_score, people_survey.happy_score, people_survey.happy_ability_score, people_survey.happy_efficacy_score, people_survey.praise_score, people_survey.uniformity_score, t ] print('tttt%s' % people_survey.begin_answer_time) return people_survey_info
def update_list(self): que = self.w1.update() rows = self.cur.execute(que).fetchmany(self.count_rows_spin.value()) self.list_order.clear() self.list_order_2.clear() for i in rows: dish_time_s = self.cur.execute(f'''select cooktime from dish where id = (select dishid from orderdish where id = ({i[0]}))''').fetchone()[0] date_time_s = self.cur.execute(f'''select datetime from orderclient where id = {i[1]}''').fetchone()[0] dish_count = int(i[4]) date_time = QDateTime.fromString(date_time_s, date_time_format()) dish_time = QTime.fromString(dish_time_s, time_format()) dish_time_minutes = dish_time.hour() * 60 + dish_time.minute() * dish_count dish_time = QTime(dish_time_minutes // 60, dish_time_minutes % 60) secs_passed = date_time.secsTo(QDateTime.currentDateTime()) hms = [dish_time.hour() - secs_passed // 3600, dish_time.minute() - secs_passed // 60 % 60, 59 - secs_passed % 60] time_last = QTime(*hms) if time_last.isValid(): order = [time_last.toString(time_format() + ':ss'), *i[2:]] else: order = ['Done', *i[2:]] item = QListWidgetItem(' - '.join(map(str, order))) if not time_last.isValid(): item.setBackground(QColor(255, 220, 220)) self.list_order_2.addItem(item) else: self.list_order.addItem(item)
def etl_project_info(assess, enterpriseinfo, t): # 添加项目信息到 assess_info assess_info = [[ assess.id, assess.name, enterpriseinfo.id, time_format(assess.begin_time), time_format(assess.end_time), assess.assess_type, assess.user_count, t ]] return assess_info
def get_string_repr_duration_and_place( current_datetime, # DateTime place, # Str spent_time # Float ): # DateTime after spent end_datetime = get_datetime_after_spent(current_datetime, spent_time) # Get time format of start and end time start_time_format = time_format(current_datetime.hour, current_datetime.minute) end_time_format = time_format(end_datetime.hour, end_datetime.minute) return f'{ start_time_format } - { end_time_format } - { place }'
def train_agent(self): average_reward = 0 scores_window = deque(maxlen=100) t0 = time.time() for i_epiosde in range(1, self.episodes): episode_reward = 0 state = self.env.reset() t = 0 while True: t += 1 action = self.act(state) next_state, reward, done, _ = self.env.step(action) episode_reward += reward if i_epiosde > 10: self.learn() self.memory.add(state, reward, action, next_state, done) state = next_state if done: scores_window.append(episode_reward) break if i_epiosde % self.eval == 0: self.eval_policy() ave_reward = np.mean(scores_window) print("Epiosde {} Steps {} Reward {} Reward averge{:.2f} Time {}". format(i_epiosde, t, episode_reward, np.mean(scores_window), time_format(time.time() - t0))) self.writer.add_scalar('Aver_reward', ave_reward, self.steps)
def train(env, config): """ """ t0 = time.time() save_models_path = str(config["locexp"]) memory = ReplayBuffer((8, ), (1, ), config["buffer_size"], config["seed"], config["device"]) memory.load_memory(config["buffer_path"]) agent = Agent(state_size=8, action_size=4, config=config) if config["idx"] < memory.idx: memory.idx = config["idx"] print("memory idx ", memory.idx) for t in range(config["predicter_time_steps"]): text = "Train Predicter {} \ {} time {} \r".format( t, config["predicter_time_steps"], time_format(time.time() - t0)) print(text, end='') agent.learn(memory) if t % int(config["eval"]) == 0: print(text) agent.save(save_models_path + "/models/{}-".format(t)) #agent.test_predicter(memory) agent.test_q_value(memory) agent.eval_policy() agent.eval_policy(True, 1)
def _ult_charge_append(self, players, time): ult_charge_row = [utils.time_format(time)] for player in players: chara = player.chara ult_charge = player.ult_charge ult_charge_row += [chara, ult_charge] self.sheet.append(ult_charge_row) return
def add(self, res): # Add WorkMinute dishcookminte = self.cur.execute( f'select cooktime from dish where id = ' f'{res[2]}').fetchone()[0] dishcookminte = QTime.fromString(dishcookminte, time_format()) dishcookminte = dishcookminte.hour() * 60 + dishcookminte.minute( ) * int(res[3]) self.cur.execute(f'''update cook set workminute = workminute + {dishcookminte} where id = {res[1]}''') return super().add(res)
def output_result(self, test, dev): resultf = self.__logdir + '/result.pkl' elapsed_time = time.time() - self.__start_time d = get_arg('all') d['test'] = test d['dev'] = dev d['composition'] = d['composition'].name d['time'] = utils.time_format(elapsed_time) pickle.dump(d, open(resultf, 'wb')) script = 'import pickle\n' +\ "for k, v in pickle.load(open('result.pkl', 'rb')).items():\n" +\ " print('{}: {}'.format(k, v))" print(script, file=open(self.__logdir + '/print_result.py', 'w'))
def _format(data): """ 格式化输出信息 :param data: {"字段名": value, ...} :return: [value, ...] """ data['time'] = utils.time_format(data['time']) format_spec = Config.format result = [''] * (len(Config.title) + 1) for k, v in format_spec.items(): if k in ['object hero', 'subject hero'] and k in data: data[k] = utils.chara_capitalize(data[k]) if k in data: result[v-1] = data[k] for i, s in enumerate(result): if s == 'empty' or s == 'Empty': result[i] = '' return result
def main_loop(self, datasets, epochs=100, batchsize=100, reporter=[], validation=False): ''' Main learning loop ''' # Start training print('\n\n--- START TRAINING ---\n\n') num_data = len(datasets) self.on_train_begin() for e in range(epochs): perm = np.random.permutation(num_data) start_time = time.time() for b in range(0, num_data, batchsize): bsize = min(batchsize, num_data - b) indx = perm[b:b+bsize] # Print current status ratio = 100.0 * (b + bsize) / num_data print(chr(27) + "[2K", end='') print('\rEpoch #%d | %d / %d (%6.2f %%) ' % \ (e + 1, b + bsize, num_data, ratio), end='') # Get batch and train on it x_batch = self.make_batch(datasets, indx) losses = self.train_on_batch(x_batch) self.report_logs(reporter, losses) # Compute ETA elapsed_time = time.time() - start_time eta = elapsed_time / (b + bsize) * (num_data - (b + bsize)) print('| ETA: %s ' % utils.time_format(eta), end='') sys.stdout.flush() train_logs=self.train_on_batch(datasets.get_training()) val_logs=[] if validation: val_logs = self.train_on_batch(datasets.get_validation()) print('\tvalidation ', end='') self.report_logs(reporter,val_logs) print('\n') _logs=(train_logs,val_logs) self.on_epoch_end(e,_logs)
plt.imshow(np.squeeze(final_lstm_pred_vals[j, 0, :, :, :])) fig.add_subplot(rows, columns, j + (2 * columns) + 1) plt.imshow(np.squeeze(final_ground_truth_vals[j + 1, 0, :, :, :])) fig.add_subplot(rows, columns, j + 1 + (3 * columns)) plt.imshow(np.squeeze(final_pred_vals[j + columns + 1, 0, :, :, :])) fig.add_subplot(rows, columns, j + columns + 1 + (3 * columns)) plt.imshow( np.squeeze(final_lstm_pred_vals[j + columns, 0, :, :, :])) fig.add_subplot(rows, columns, j + (2 * columns) + 1 + (3 * columns)) plt.imshow( np.squeeze(final_ground_truth_vals[j + columns + 1, 0, :, :, :])) plt.savefig('images/final_pred.png') if not ARGS.test: print( saver.save( sess, '/home/louis/MSc_Thesis/saved_models/saved_model_weights.ckpt') ) print("Setup:", utils.time_format(setup_time - start_time)) print("Training: {} over {} epochs of size {}; {} per batch".format( utils.time_format(time.time() - setup_time), ARGS.num_epochs, batch_num, utils.time_format( (time.time() - setup_time) / (ARGS.num_epochs * batch_num)))) print("Total:", utils.time_format(time.time() - start_time))
def train_agent(env, args, config): """ Args: """ # create CNN convert the [1,3,84,84] to [1, 200] now = datetime.now() dt_string = now.strftime("%d_%m_%Y_%H:%M:%S") torch.manual_seed(config["seed"]) np.random.seed(config["seed"]) if torch.cuda.is_available() and not args.disable_cuda: args.device = torch.device('cuda') torch.cuda.manual_seed(np.random.randint(1, 10000)) torch.backends.cudnn.enabled = args.enable_cudnn pathname = dt_string + "_seed" + str(config["seed"]) print("save tensorboard {}".format(config["locexp"])) tensorboard_name = str(config["locexp"]) + '/runs/' + pathname agent = Agent(args, env) #agent.load(str(args.locexp), "1/checkpoint-52038.pth") memory = ReplayMemory(args, args.memory_capacity) #memory.load_memory("memory_pacman") #memory = ReplayBuffer((3, config["size"], config["size"]), (1,), config["expert_buffer_size"], int(config["image_pad"]), config["device"]) priority_weight_increase = (1 - args.priority_weight) / (args.T_max - args.learn_start) writer = SummaryWriter(tensorboard_name) results_dir = os.path.join(str(config["locexp"]), args.id) mkdir("", results_dir) scores_window = deque(maxlen=100) steps_window = deque(maxlen=100) scores = [] t0 = time.time() # Training loop agent.train() T, done = 0, True print("result dir ", results_dir) agent.save(results_dir, 'checkpoint-{}.pth'.format(T)) #eval_policy(env, agent, writer, T, config) episode = -1 steps = 0 score = 0 print("save policy ", args.checkpoint_interval) # eval_policy(env, agent, writer, 0, config) for T in range(1, args.T_max + 1): # print("\r {} of {}".format(T, args.T_max), end='') if done: episode += 1 # Checkpoint the network if episode % 100 == 0: memory.save_memory("memory_pacman") print("Eval policy") #eval_policy(env, agent, writer, T, config) agent.save(results_dir, 'checkpoint-{}.pth'.format(T)) scores_window.append(score) # save most recent scor scores.append(score) # save most recent score steps_window.append(steps) ave_steps = np.mean(steps_window) print( '\rTime steps {} episode {} score {} Average Score: {:.2f} steps {} ave steps {:.2f} time: {}' .format(T, episode, score, np.mean(scores_window), steps, ave_steps, time_format(time.time() - t0)), end="") writer.add_scalar('Episode_reward ', score, T) average_reward = np.mean(scores_window) writer.add_scalar('Average_reward ', average_reward, T) state, done = env.reset("mediumClassic"), False steps = 0 score = 0 if T % args.replay_frequency == 0: agent.reset_noise() # Draw a new set of noisy weights action = agent.act( state) # Choose an action greedily (with noisy weights) next_state, reward, done, _ = env.step(action) # Step score += reward steps += 1 if steps == 125: done = True memory.append(state, action, reward, done) # Append transition to memory # Train and test if T >= args.learn_start: memory.priority_weight = min( memory.priority_weight + priority_weight_increase, 1) # Anneal importance sampling weight β to 1 if T % args.replay_frequency == 0: agent.learn( memory ) # Train with n-step distributional double-Q learning # Update target network if T % args.target_update == 0: agent.update_target_net() state = next_state
def __init__(self, name, correct=None, enabled=True, default=None): super().__init__(name, QTimeEdit, correct, enabled, default) self.format = time_format()
def train_agent(self): env = gym.make("LunarLanderContinuous-v2") average_reward = 0 scores_window = deque(maxlen=100) s = 0 t0 = time.time() for i_epiosde in range(self.episodes): episode_reward = 0 state = env.reset() for t in range(self.max_timesteps): s += 1 action = self.act(state) next_state, reward, done, _ = env.step(action) episode_reward += reward if i_epiosde > 3: self.learn() self.memory.add(state, reward, action, next_state, done) state = next_state if done: scores_window.append(episode_reward) break if i_epiosde % self.eval == 0: self.eval_policy() ave_reward = np.mean(scores_window) print("Epiosde {} Steps {} Reward {} Reward averge{} Time {}".format(i_epiosde, t, episode_reward, np.mean(scores_window), time_format(time.time() - t0))) self.writer.add_scalar('Aver_reward', ave_reward, self.steps)
os.chmod(param_file_path, 0o777) if ARGS.tfdbg: chosen_session = tf_debug.LocalCLIDebugWrapperSession(tf.Session()) else: chosen_session = tf.Session() with chosen_session as sess: if ARGS.restore == None: sess.run(tf.global_variables_initializer()) else: #restore_path = saver.restore(sess, os.path.join(checkpoint_dir, ARGS.restore)) restore_path = saver.restore(sess, ARGS.restore) print("Restoring from checkpoint at", restore_path) train_start_time = time.time() print("Graph-build time: ", utils.time_format(train_start_time - start_time)) dataset_length = len(input_d_vecs) num_batches = dataset_length // ARGS.batch_size dataset_length_validation = len(input_d_vecs_validation) num_batches_validation = dataset_length_validation // ARGS.batch_size best_em_score = 0.0 best_avg_f1 = 0.0 global_batch_num = 0 new_avg_f1 = 0 new_em_score = 0 best_em_score_outer_prod = 0.0 best_avg_f1_outer_prod = 0.0
def main(): data = utils.read_file('./data/new.csv') print("There are %d samples in raw data set" % len(data)) print("Raw input data set information") utils.data_info(data) utils.missing_info(data, "raw_missing") # handle format and garbled text issues in raw data data = utils.time_format(data) data = utils.garbled_drawing(data) data = utils.garbled_floor(data) data = utils.garbled_living(data) data = utils.garbled_bath(data) data = utils.garbled_construct(data) data = utils.strange_building(data) # drop columns that provide no help data = utils.drop_columns(data, ['url', 'id', 'price', 'DOM']) print( "Raw data set information after transferring format and drop columns") utils.data_info(data) utils.missing_info(data, "raw_missing_2") # the rawdata contains more than 300000 data points, try to use 10% of rawdata in this project x_raw, y_raw, data, y = utils.data_splitting(data, data['totalPrice'], 0.1) data.to_csv('small.csv') print("smaller data set", np.shape(data), np.shape(y)) print("y_info") print(y.describe()) plt.hist(y) plt.xlabel("totalPrice") plt.ylabel("counts") plt.savefig('y.png') plt.close() # split D into D'' and D_Test x_doubleprime, y_doubleprime, x_test, y_test = utils.data_splitting( data, data['totalPrice'], 0.2) print("D'' shape", np.shape(x_doubleprime), np.shape(y_doubleprime)) print("D_test shape", np.shape(x_test), np.shape(y_test)) # split D'' into D' and D_pt x_prime, y_prime, x_pt, y_pt = utils.data_splitting( x_doubleprime, x_doubleprime['totalPrice'], 0.1) print("D_pt shape", np.shape(x_pt), np.shape(y_pt)) print("D_prime shape", np.shape(x_prime), np.shape(y_prime)) # Use pre-training set to look at data and conduct initial test print("Pre-training data set preprocessing:") utils.pre_training(x_pt) # D' data set preprocessing print("D' preprocessing:") x_train, y_train, x_val, y_val, cols_keep, imputation = utils.preprocessing( x_prime) print("D' after preprocessing:") print("D_train shape after preprocessing", np.shape(x_train), np.shape(y_train)) print("D_val shape after preprocessing", np.shape(x_val), np.shape(y_val)) # Linear Regression lin_reg = model.linear_regression(x_train, y_train, x_val, y_val, cols_keep) # Ridge Regression rid_reg = model.ridge_regression(x_train, y_train, x_val, y_val, cols_keep) # Lasso Regression las_reg = model.lasso_regression(x_train, y_train, x_val, y_val, cols_keep) # Random Forest rf = model.random_forest(x_train, y_train, x_val, y_val, cols_keep) # model tuning x, y, rid_reg_cv = model.ridge_cv(x_train, y_train, x_val, y_val, cols_keep) las_reg_cv = model.lasso_cv(x_train, y_train, x_val, y_val, cols_keep) rf_cv = model.random_forest_cv(x_train, y_train, x_val, y_val, cols_keep) # Final_result x_cv = pd.concat([x, y], axis=1) with_missing_cv = utils.missing_info(x_cv, "cv_missing") with_missing_test = utils.missing_info(x_test, "test_missing") continuous = ["Lng", "Lat", "square", "ladderRatio", "communityAverage"] discrete = [ "Cid", "tradeTime", "followers", "livingRoom", "drawingRoom", "kitchen", "bathRoom", "floor", "buildingType", "constructionTime", "renovationCondition", "buildingStructure", "elevator", "fiveYearsProperty", "subway", "district" ] x_cv, y_cv, x_test, y_test = utils.method_2_prime(x_cv, with_missing_cv, x_test, with_missing_test, continuous, discrete) x_test = pd.DataFrame(x_test, index=x_test.index, columns=cols_keep) predict_lin_red = lin_reg.predict(x_test) print("Linear Regression:") print("RMSE on test data = %.5f" % np.sqrt(mean_squared_error(y_test, predict_lin_red))) predict_rid_reg = rid_reg.predict(x_test) predict_rid_reg_cv = rid_reg_cv.predict(x_test) print("Ridge Regression:") print("RMSE on test data = %.5f" % np.sqrt(mean_squared_error(y_test, predict_rid_reg))) print("After cross validation, RMSE on test data = %.5f" % np.sqrt(mean_squared_error(y_test, predict_rid_reg_cv))) predict_las_reg = las_reg.predict(x_test) predict_las_reg_cv = las_reg_cv.predict(x_test) print("Lasso Regression:") print("RMSE on test data = %.5f" % np.sqrt(mean_squared_error(y_test, predict_las_reg))) print("After cross validation, RMSE on test data = %.5f" % np.sqrt(mean_squared_error(y_test, predict_las_reg_cv))) predict_rf = rf.predict(x_test) predict_rf_cv = rf_cv.predict(x_test) print("Random Forest:") print("RMSE on test data = %.5f" % np.sqrt(mean_squared_error(y_test, predict_rf))) print("After cross validation, RMSE on test data = %.5f" % np.sqrt(mean_squared_error(y_test, predict_rf_cv))) # plot plt.scatter(x_test['square'], y_test, c='blue', marker='o', label='real test') plt.scatter(x_test['square'], predict_rf_cv, c='red', marker='x', label='predict test') plt.xlabel('square') plt.legend(loc='upper right') plt.savefig("feature_square.png") plt.close() plt.scatter(x_test['livingRoom'], y_test, c='blue', marker='o', label='real test') plt.scatter(x_test['livingRoom'], predict_rf_cv, c='red', marker='x', label='predict test') plt.xlabel('livingRoom') plt.legend(loc='upper right') plt.savefig("feature_living.png") plt.close()
def train_agent(env, config): """ Args: """ # create CNN convert the [1,3,84,84] to [1, 200] now = datetime.now() dt_string = now.strftime("%d_%m_%Y_%H:%M:%S") torch.manual_seed(config["seed"]) np.random.seed(config["seed"]) #pathname = str(args.locexp) + "/" + str(args.env_name) + '_agent_' + str(args.policy) #pathname += "_batch_size_" + str(args.batch_size) + "_lr_act_" + str(args.lr_actor) #pathname += "_lr_critc_" + str(args.lr_critic) + "_lr_decoder_" pathname = dt_string tensorboard_name = str(config["locexp"]) + '/runs/' + pathname agent = DQNAgent(state_size=200, action_size=env.action_space.n, config=config) writer = SummaryWriter(tensorboard_name) print("action_size {}".format(env.action_space.n)) # eval_policy(env, agent, writer, 0, config) memory = ReplayBuffer((3, config["size"], config["size"]), (1, ), config["expert_buffer_size"], int(config["image_pad"]), config["device"]) if config["create_buffer"]: create_buffer(env, memory, config) memory.load_memory("/export/leiningc/" + config["buffer_path"]) else: print("load Buffer") memory.load_memory("/export/leiningc/" + config["buffer_path"]) print("Buffer size {}".format(memory.idx)) eps = config["eps_start"] eps_end = config["eps_end"] eps_decay = config["eps_decay"] scores_window = deque(maxlen=100) scores = [] t0 = time.time() for i_episode in range(config["train_episodes"]): obs = env.reset() score = 0 for t in range(config["max_t"]): action = agent.act(obs, eps) # action = env.action_space.sample() next_obs, reward, done_no_max, _ = env.step(action) done = done_no_max if t + 1 == config["max_t"]: print("t ", t) done = 0 memory.add(obs, action, reward, next_obs, done, done_no_max) agent.step(memory, writer) obs = next_obs eps = max(eps_end, eps_decay * eps) # decrease epsilon score += reward if done: break scores_window.append(score) # save most recent scor scores.append(score) # save most recent score ave_score = np.mean(scores_window) writer.add_scalar("ave_score", ave_score, i_episode) writer.add_scalar("episode_score", score, i_episode) print( '\rEpisode {} score {} \tAverage Score: {:.2f} eps: {:.2f} time: {}' .format(i_episode, score, np.mean(scores_window), eps, time_format(time.time() - t0)), end="") if i_episode % config["eval"] == 0: eval_policy(env, agent, writer, i_episode, config) agent.save( str(config["locexp"]) + "/models/eval-{}/".format(i_episode)) print( 'Episode {} Average Score: {:.2f} eps: {:.2f} time: {}'. format(i_episode, np.mean(scores_window), eps, time_format(time.time() - t0)), )
with tf.Session() as sess: # add additional options to trace the session execution options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() sess.run(tf.global_variables_initializer(), options=options, run_metadata=run_metadata) # Create the Timeline object, and write it to a json file fetched_timeline = timeline.Timeline(run_metadata.step_stats) chrome_trace = fetched_timeline.generate_chrome_trace_format() with open('timeline_01.json', 'w') as f: f.write(chrome_trace) #sess.run(tf.global_variables_initializer()) train_start_time = time.time() print("Time elapsed from beginning until right before starting train is: ", utils.time_format(train_start_time - start_time)) for i in range(ARGS.num_epochs): sess.run(iter_.initializer) while True: try: summary, _, loss_val = sess.run([merged, train_step, mean_loss]) break except tf.errors.OutOfRangeError: writer.add_summary(summary, i) break train_end_time = time.time() print("Total training time (without data reading): ", utils.time_format(train_end_time - train_start_time)) #save_path = saver.save(sess, "/tmp/model.ckpt")
saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=20) config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) # tensor board merged = tf.summary.merge_all() writer = tf.summary.FileWriter(log_path + '/', sess.graph) logging.info('Model builded, %s used\n' % time_format(time.time() - tic0)) test_feed_dicts = [] for i in xrange((len(dataset_u.test_x) + args.batch_size_u - 1) // args.batch_size_u): x, y, l, msl = data.pack( dataset_u.test_x[i * args.batch_size_u:(i + 1) * args.batch_size_u], dataset_u.test_y[i * args.batch_size_u:(i + 1) * args.batch_size_u]) test_feed_dicts.append({ x_u_: x, seq_len_u_: l, y_u_: y, msl_u_: msl, keep_prob_: 1
def train(self): scores_window = deque(maxlen=100) step_window = deque(maxlen=100) eps = 1 t0 = time.time() total_timesteps = 0 i_episode = 0 total_timesteps = 0 while total_timesteps < self.total_frames: state = self.env.reset() env_score = 0 steps = 0 while True: total_timesteps += 1 steps += 1 action = self.act(state, eps) next_state, reward, done, _ = self.env.step(action) eps = max(self.eps_end, self.eps_decay*eps) # decrease epsilon if self.start_timesteps < total_timesteps: self.step() env_score += reward self.replay_buffer.add(state, action, reward, next_state, done, done) state = next_state if done: i_episode += 1 break scores_window.append(env_score) # save most recent score step_window.append(steps) # save most recent score mean_reward = np.mean(scores_window) mean_steps = np.mean(step_window) self.writer.add_scalar('env_reward', env_score, total_timesteps) self.writer.add_scalar('mean_reward', mean_reward, total_timesteps) self.writer.add_scalar('mean_steps', mean_steps, total_timesteps) self.writer.add_scalar('steps', steps, total_timesteps) print(' Totalsteps {} Episode {} Step {} Reward {} Average Score: {:.2f} epsilon {:.2f} time {}' .format(total_timesteps, i_episode, steps, env_score, np.mean(scores_window), eps, time_format(time.time()-t0))) if i_episode % self.eval == 0: print('\rEpisode {}\tAverage Score: {:.2f} Time: {}'.format(i_episode, np.mean(scores_window), time_format(time.time()-t0)))