suc_count = 0 for epoch in range(current_epoch, epochs_count): state = env.reset() episode_reward = 0 epoche_observations = [] epoche_actions = [] epoche_rewards = [] time_begin = time.clock() while True: if is_render: env.render() action = model.predict(state) episode_rewards_sum = sum(epoche_rewards) if episode_rewards_sum < min_reward: action = 0 if time.clock() - time_begin > time_limit: action = 0 if suc_count < (epochs_count - current_epoch) / 2: # replace neural metworc with heuristic algorithm on low vertical coordinate if state[1] < 0.5 and random.random() > 0.5: action = heuristic(env, state) state_, reward, done, info = env.step(action)
for i in range(1, 10): total_reward = 0 steps = 0 s = env.reset() epoche_rewards = [] start = time.clock() print("iteration: ", i) while True: env.render() frames.append(Image.fromarray(env.render(mode='rgb_array'))) if is_heuristic: a = heuristic(env, s) else: a = model.predict(s) # replace neural metworc with heuristic algorithm on low vertical coordinate #if s[1] < 0.1: # a = heuristic(env, s) state_, reward, done, info = env.step(a) epoche_rewards.append(reward) print("reward ", reward, "action ", a) episode_rewards_sum = sum(epoche_rewards) if episode_rewards_sum < -200: done = True if time.clock() - start > 40: break
import tensorflow as tf os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) model = TFFMRegressor( order=dim, rank=args.embedding_dim, optimizer=tf.train.AdagradOptimizer(learning_rate=args.lr), n_epochs=args.train_epochs, # batch_size=1076946, batch_size=4096, init_std=0.001, reg=args.weight_decay, input_type='sparse', log_dir=os.path.join(args.save, save_name), ) model.fit(train_queue[0], train_queue[1], show_progress=True) inferences = model.predict(test_queue[0]) mse = mean_squared_error(test_queue[1], inferences) rmse = np.sqrt(mse) logging.info('rmse: %.4f[%.4f]' % (rmse, time() - start)) elif args.mode == 'autoneural': start = time() if dim == 2: model = AutoNeural(num_users, num_items, args.embedding_dim, args.weight_decay).cuda() elif dim == 3: model = AutoNeural_Triple(num_ps, num_qs, num_rs, args.embedding_dim, args.weight_decay).cuda() embedding_oprimizer = torch.optim.Adagrad(model.embedding_parameters(), args.lr)