def main():

    if torch.cuda.is_available():
        print("CUDA available, proceeding with GPU...")
        device = torch.device("cuda")
    else:
        print("No GPU found, proceeding with CPU...")
        device = torch.device("cpu")

    env = gym.make(ENV_ID)
    raw_env = env.unwrapped

    np_random = raw_env.np_random
    observation_space = get_observation_space(env)
    n_inputs = observation_space.low.size + env.action_space.low.size
    n_outputs = observation_space.low.size

    model = MlpModel(n_inputs,
                     n_outputs,
                     hidden_units=(64, 32),
                     np_random=np_random,
                     device=device)

    model_path = Path(f'./out/{EXP_NAME}_model.pkl')
    # model_path = Path(f'./out/{EXP_NAME}_model_e30.pkl')
    data_path = Path(f'./out/{EXP_NAME}_data.pkl')

    do_train = True
    if model_path.exists():
        print('Found existing model.')
        if OVERWRITE_EXISTING:
            print('Overwriting...')
        else:
            model = MlpModel.load(model_path, device)
            do_train = False
    else:
        print('Existing model not found.')

    if do_train:

        df = pd.read_pickle('../data/push_sphere_v0_details.pkl')
        episode_length = df['step'].max() + 1
        n_episodes = df['episode'].max() + 1

        episodes = []
        for i in range(n_episodes):
            ep_df = df[df['episode'] == i]
            actions = np.array(ep_df['raw_action'].tolist())
            states = np.array(ep_df['raw_obs'].tolist())
            episodes.append((states, actions[1:]))

        if False:
            dataset = EnvDataset(env)
            dataset.generate(n_episodes, episode_length)
            episodes = dataset.data

        def epoch_callback(epoch, loss):
            print(epoch, loss)
            if epoch % 10 == 0:
                path = Path(f'./out/{EXP_NAME}_model_e{epoch}.pkl')
                model.save(path)
                evaluate(MlpModel,
                         path,
                         'FetchPushSphereDense-v1',
                         strategy=push_strategy,
                         strategy_period=episode_length)
                #evaluate(MlpModel, path, 'FetchReachDense-v1')

        print('Training...')
        losses = model.train(episodes,
                             epochs=TRAINING_EPOCHS,
                             batch_size=BATCH_SIZE,
                             epoch_callback=epoch_callback,
                             scale_data=True,
                             shuffle_data=True)

        print('Saving model...')
        model.save(model_path)

    if VISUAL_TEST:
        print('Testing model...')

        reward_fn = RewardFunction.simplified_push_reward(env)
        controller = MPC(env,
                         model,
                         MPC_HORIZON,
                         MPC_SEQUENCES,
                         np_random,
                         reward_function=reward_fn)

        for e in range(2000):
            env.reset()
            controller.forget_history()

            for s in range(100):
                env.render()

                tic = datetime.now()

                obs = get_observations(env)
                action = controller.get_action(obs)
                _, rewards, dones, info = env.step(action)

                toc = datetime.now()
                print((toc - tic).total_seconds())
def main():

    if torch.cuda.is_available():
        print("CUDA available, proceeding with GPU...")
        device = torch.device("cuda")
    else:
        print("No GPU found, proceeding with CPU...")
        device = torch.device("cpu")

    env = gym.make(ENV_ID)
    raw_env = env.unwrapped

    np_random = raw_env.np_random
    observation_space = get_observation_space(env)
    n_inputs = observation_space.low.size + env.action_space.low.size
    n_outputs = observation_space.low.size

    model = MDN_Model(n_inputs,
                      n_outputs,
                      MDN_COMPONENTS,
                      hidden_units=(20, ),
                      np_random=np_random,
                      device=device)

    model_path = Path(f'./out/{EXP_NAME}_model.pkl')
    # model_path = Path(f'./out/{EXP_NAME}_model_e10.pkl')
    data_path = Path(f'./out/{EXP_NAME}_data.pkl')

    do_train = True
    if model_path.exists():
        print('Found existing model.')
        if OVERWRITE_EXISTING:
            print('Overwriting...')
        else:
            model = MDN_Model.load(model_path, device)
            do_train = False
    else:
        print('Existing model not found.')

    if do_train:

        do_generate = False
        dataset = EnvDataset(env)
        if data_path.exists():
            print('Loading data...')
            dataset.load(data_path)
            if dataset.episodes != N_EPISODES or dataset.episode_length != EPISODE_LENGTH:
                print(
                    'Existing data is not compatible with the desired parameters.'
                )
                do_generate = True
        else:
            do_generate = True

        if do_generate:
            print('Generating data...')
            dataset.generate(N_EPISODES, EPISODE_LENGTH, strategy=STRATEGY)
            dataset.save(data_path)
        episodes = dataset.data

        def epoch_callback(epoch, loss):
            print(epoch, loss)
            if epoch % 10 == 0:
                path = Path(f'./out/{EXP_NAME}_model_e{epoch}.pkl')
                model.save(path)
                evaluate_mdn(path, ENV_ID, strategy=STRATEGY)

        if False:
            eps_with_changes = 0
            for e in episodes:
                prev_pos = np.zeros(3)
                changed = -1
                for s in e[0]:
                    sphere_pos = s[3:6].copy()
                    changed += int(
                        not np.allclose(sphere_pos, prev_pos, atol=0.001))
                    prev_pos = sphere_pos
                eps_with_changes += int(changed > 0)
            print(eps_with_changes)

        print('Training...')
        losses = model.train(episodes,
                             TRAINING_EPOCHS,
                             batch_size=BATCH_SIZE,
                             epoch_callback=epoch_callback,
                             scale_data=True,
                             shuffle_data=True)

        print('Saving model...')
        model.save(model_path)

    if VISUAL_TEST:
        print('Testing model...')
        controller = MPC(env, model, MPC_HORIZON, MPC_SEQUENCES, np_random)

        for e in range(2000):
            env.reset()

            for s in range(100):
                env.render()

                tic = datetime.now()

                obs = get_observations(env)
                action = controller.get_action(obs)
                _, rewards, dones, info = env.step(action)

                toc = datetime.now()
                print((toc - tic).total_seconds())
def main():

    if torch.cuda.is_available():
        print("CUDA available, proceeding with GPU...")
        device = torch.device("cuda")
    else:
        print("No GPU found, proceeding with CPU...")
        device = torch.device("cpu")

    env = gym.make(ENV_ID)
    raw_env = env.unwrapped

    np_random = raw_env.np_random
    observation_space = get_observation_space(env)

    mlp1_n_inputs = Z_SIZE + env.action_space.low.size
    mlp1_n_outputs = Z_SIZE

    # mlp1_n_inputs = observation_space.low.size + env.action_space.low.size
    # mlp1_n_outputs = observation_space.low.size

    mlp2_n_inputs = Z_SIZE
    mlp2_n_outputs = observation_space.low.size

    mlp1 = MlpModel(mlp1_n_inputs,
                    mlp1_n_outputs,
                    hidden_units=(128, 128, 128, 64),
                    np_random=np_random,
                    device=device)
    mlp2 = SimpleMlpModel(mlp2_n_inputs,
                          mlp2_n_outputs,
                          hidden_units=(128, 128, 32),
                          device=device)

    mlp1_model_path = Path(f'./out/{EXP_NAME}1_model.pkl')
    mlp2_model_path = Path(f'./out/{EXP_NAME}2_model.pkl')
    # model_path = Path(f'./out/{EXP_NAME}_model_e30.pkl')

    do_train = True
    if mlp1_model_path.exists():
        print('Found existing model.')
        if OVERWRITE_EXISTING:
            print('Overwriting...')
        else:
            print(mlp1_model_path.as_posix())
            print(mlp2_model_path.as_posix())
            exit(0)
    else:
        print('Existing model not found.')

    if do_train:

        ##########################################################

        df = pd.read_pickle(DATA_PATH)
        n_episodes = df['episode'].max() + 1
        episode_length = df['step'].max() + 1
        all_z = np.load(Z_PATH)['arr_0']

        episodes = []

        for i in range(n_episodes):
            ep_df = df[df['episode'] == i]
            actions = np.array(ep_df['raw_action'].tolist())
            raw_obs = np.array(ep_df['raw_obs'].tolist())

            # s = raw_obs
            s = all_z[i]
            episodes.append((s, actions[1:]))

        mlp2_x = all_z.reshape(n_episodes * episode_length, -1)
        mlp2_y = np.array(list(df['raw_obs']))

        ##########################################################

        def epoch_callback(epoch, loss):
            print(epoch, loss)

        print('Training...')
        losses = mlp1.train(episodes,
                            epochs=TRAINING_EPOCHS1,
                            batch_size=BATCH_SIZE,
                            epoch_callback=epoch_callback,
                            scale_data=True,
                            shuffle_data=True)
        losses = mlp2.train(mlp2_x,
                            mlp2_y,
                            epochs=TRAINING_EPOCHS2,
                            batch_size=BATCH_SIZE,
                            epoch_callback=epoch_callback,
                            scale_data=True,
                            shuffle_data=True)

        print('Saving models...')
        mlp1.save(mlp1_model_path)
        mlp2.save(mlp2_model_path)
def main():

    if torch.cuda.is_available():
        print("CUDA available, proceeding with GPU...")
        device = torch.device("cuda")
    else:
        print("No GPU found, proceeding with CPU...")
        device = torch.device("cpu")

    env = gym.make(ENV_ID)
    raw_env = env.unwrapped

    np_random = raw_env.np_random
    observation_space = get_observation_space(env)
    n_inputs = 16 + env.action_space.low.size
    n_outputs = observation_space.low.size

    model = LSTM_Model(n_inputs, 32, n_outputs, n_layers=2, np_random=np_random, device=device, window_size=5)

    model_path = Path(f'./out/{EXP_NAME}_model.pkl')
    # model_path = Path(f'./out/{EXP_NAME}_model_e30.pkl')

    do_train = True
    if model_path.exists():
        print('Found existing model.')
        if OVERWRITE_EXISTING:
            print('Overwriting...')
        else:
            model = LSTM_Model.load(model_path, device)
            do_train = False
    else:
        print('Existing model not found.')

    if do_train:

        ##########################################################

        df = pd.read_pickle(DATA_PATH)
        n_episodes = df['episode'].max() + 1
        # episode_length = df['step'].max() + 1

        episodes = []
        targets, all_z = None, None

        if Z_TO_OBS:
            all_z = np.load(Z_PATH)['arr_0']
            targets = []

        for i in range(n_episodes):
            ep_df = df[df['episode'] == i]
            actions = np.array(ep_df['raw_action'].tolist())
            raw_obs = np.array(ep_df['raw_obs'].tolist())

            if Z_TO_OBS:
                targets.append(raw_obs[1:])
                z = all_z[i]
                episodes.append((z, actions[1:]))
            else:
                episodes.append((raw_obs, actions[1:]))

        if targets is not None:
            targets = np.array(targets)

        ##########################################################

        def epoch_callback(epoch, loss):
            print(epoch, loss)
            if epoch % 10 == 0:
                path = Path(f'./out/{EXP_NAME}_model_e{epoch}.pkl')
                model.save(path)
                # evaluate(LSTM_Model, path, ENV_ID)

        print('Training...')
        losses = model.train(episodes, targets=targets, epochs=TRAINING_EPOCHS, batch_size=BATCH_SIZE,
                             epoch_callback=epoch_callback, scale_data=True, scale_targets=True)

        print('Saving model...')
        model.save(model_path)

    if VISUAL_TEST:
        print('Testing model...')

        controller = MPC(env, model, MPC_HORIZON, MPC_SEQUENCES, np_random)

        for e in range(2000):
            env.reset()
            controller.forget_history()

            for s in range(100):
                env.render()

                tic = datetime.now()

                obs = get_observations(env)
                action = controller.get_action(obs)
                _, rewards, dones, info = env.step(action)

                toc = datetime.now()
                print((toc - tic).total_seconds())