Example #1
0
    suc_count = 0

    for epoch in range(current_epoch, epochs_count):

        state = env.reset()
        episode_reward = 0
        epoche_observations = []
        epoche_actions = []
        epoche_rewards = []
        time_begin = time.clock()

        while True:
            if is_render:
                env.render()

            action = model.predict(state)

            episode_rewards_sum = sum(epoche_rewards)
            if episode_rewards_sum < min_reward:
                action = 0

            if time.clock() - time_begin > time_limit:
                action = 0

            if suc_count < (epochs_count - current_epoch) / 2:
                # replace neural metworc with heuristic algorithm on low vertical coordinate
                if state[1] < 0.5 and random.random() > 0.5:
                    action = heuristic(env, state)

            state_, reward, done, info = env.step(action)
Example #2
0
    for i in range(1, 10):
        total_reward = 0
        steps = 0
        s = env.reset()
        epoche_rewards = []
        start = time.clock()
        print("iteration: ", i)

        while True:
            env.render()
            frames.append(Image.fromarray(env.render(mode='rgb_array')))

            if is_heuristic:
                a = heuristic(env, s)
            else:
                a = model.predict(s)

            # replace neural metworc with heuristic algorithm on low vertical coordinate
            #if s[1] < 0.1:
            #    a = heuristic(env, s)

            state_, reward, done, info = env.step(a)
            epoche_rewards.append(reward)

            print("reward ", reward, "action ", a)
            episode_rewards_sum = sum(epoche_rewards)
            if episode_rewards_sum < -200:
                done = True

            if time.clock() - start > 40:
                break
Example #3
0
        import tensorflow as tf
        os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)
        model = TFFMRegressor(
            order=dim,
            rank=args.embedding_dim,
            optimizer=tf.train.AdagradOptimizer(learning_rate=args.lr),
            n_epochs=args.train_epochs,
            # batch_size=1076946,
            batch_size=4096,
            init_std=0.001,
            reg=args.weight_decay,
            input_type='sparse',
            log_dir=os.path.join(args.save, save_name),
        )
        model.fit(train_queue[0], train_queue[1], show_progress=True)
        inferences = model.predict(test_queue[0])
        mse = mean_squared_error(test_queue[1], inferences)
        rmse = np.sqrt(mse)
        logging.info('rmse: %.4f[%.4f]' % (rmse, time() - start))

    elif args.mode == 'autoneural':
        start = time()
        if dim == 2:
            model = AutoNeural(num_users, num_items, args.embedding_dim,
                               args.weight_decay).cuda()
        elif dim == 3:
            model = AutoNeural_Triple(num_ps, num_qs, num_rs,
                                      args.embedding_dim,
                                      args.weight_decay).cuda()
        embedding_oprimizer = torch.optim.Adagrad(model.embedding_parameters(),
                                                  args.lr)