Esempio n. 1
0
def run_A2C():
    episodes = 500
    seed = 1
    results = []
    game = 'CartPole-v0'

    env = gym.make(game)

    state_size = env.observation_space.shape[0]
    action_size = env.action_space.n

    agent = A2CAgent(state_size, action_size)

    for e in range(episodes):
        done = False
        score = 0
        state = env.reset()
        state = np.reshape(state, [1, state_size])

        while not done:
            action = agent.get_action(state)
            next_state, reward, done, info = env.step(action)
            next_state = np.reshape(next_state, [1, state_size])
            agent.train_model(state, action, reward, next_state, done)

            score += reward
            state = next_state

        results.append(score)

    utils.save_trained_model(game, seed, 'A2C', agent.actor)

    plt.plot(results)
    plt.show()
Esempio n. 2
0
def run_experiment():

    environment = 'CartPole-v0'
    seed = 1
    episodes = 500

    returns = []

    agent = UpsideDownAgent(environment)

    for e in range(episodes):
        for i in range(100):
            agent.train_behaviour_function()

        for i in range(15):
            tmp_r = []
            exploratory_commands = agent.sample_exploratory_commands(
            )  # Line 5 Algorithm 1
            desired_return = exploratory_commands[0]
            desired_horizon = exploratory_commands[1]
            r = agent.generate_episode(environment, e, desired_return,
                                       desired_horizon, False)
            tmp_r.append(r)

        print(np.mean(tmp_r))
        returns.append(np.mean(tmp_r))

        exploratory_commands = agent.sample_exploratory_commands()

    agent.generate_episode(environment, 1, 200, 200, True)

    utils.save_results(environment, 'upside_down_agent', seed, returns)
    utils.save_trained_model(environment, seed, agent.behaviour_function)
def run_experiment():

    import argparse

    parser = argparse.ArgumentParser()

    parser.add_argument('--approximator', type=str, default='neural_network')
    parser.add_argument('--environment',
                        type=str,
                        default='PongDeterministic-v4')
    parser.add_argument('--seed', type=int, default=1)

    args = parser.parse_args()

    approximator = args.approximator
    environment = args.environment
    seed = args.seed

    episodes = 1500
    returns = []

    agent = UpsideDownAgent(environment, approximator)

    for e in range(episodes):

        print("Episode {}".format(e))

        for i in range(100):
            agent.train_behaviour_function()

        print("Finished training B!")

        for i in range(15):
            tmp_r = []
            exploratory_commands = agent.sample_exploratory_commands(
            )  # Line 5 Algorithm 1
            desired_return = exploratory_commands[0]
            desired_horizon = exploratory_commands[1]
            r = agent.generate_episode(environment, e, desired_return,
                                       desired_horizon, False)
            tmp_r.append(r)

        print(np.mean(tmp_r))
        returns.append(np.mean(tmp_r))

        exploratory_commands = agent.sample_exploratory_commands()

        #agent.generate_episode(environment, 1, 200, 200, True)

        utils.save_results(environment, approximator, seed, returns)

    if approximator == 'neural_network':
        utils.save_trained_model(environment, seed, agent.behaviour_function)

    plt.plot(returns)
    plt.show()
Esempio n. 4
0
def train_model():
    """
    Train the job application model and save the model
    :return: 
    """
    jobs_training = get_training_data(TRAINING__DATA_PATH)
    jobs_labels = get_training_labels("applications")
    logging.info("Transforming data")
    jobs_prepared = full_pipeline.fit_transform(jobs_training)
    logging.info("Training the model")
    lin_reg = LinearRegression(copy_X=True,
                               fit_intercept=True,
                               n_jobs=1,
                               normalize=False)
    lin_reg.fit(jobs_prepared, jobs_labels)
    logging.info("Saving the model")
    save_trained_model(lin_reg)
Esempio n. 5
0
    def recommend(self):
        '''Recommend track IDs using artist biographies and audio files.'''

        # Load data
        print('Loading Data...')
        X_train, Y_train, X_val, Y_val, X_test, Y_test = self.load_data(
            PARAMS, PARAMS['dataset']['dataset_ab'],
            PARAMS['dataset']['dataset_as'],
            self.__config.training_params["validation"],
            self.__config.training_params["test"],
            self.__config.dataset_settings["nsamples"],
            PARAMS['dataset']['meta-suffix'],
            PARAMS['dataset']['meta-suffix2'])

        # Set model parameters
        model_dir = os.path.join(MODELS_DIR, self.__config.model_id)
        utils.ensure_dir(MODELS_DIR)
        utils.ensure_dir(model_dir)
        model_file = os.path.join(model_dir,
                                  self.__config.model_id + MODEL_EXT)
        trained_model = self.__config.get_dict()

        if not os.path.exists(model_file):
            # Construct and save model
            print('Building Network...')
            model = self.build_model(self.__config)
            utils.save_model(model, model_file)
            print(model.summary())

            # Training and validation
            print('\nTraining...')
            early_stopping = EarlyStopping(monitor='val_loss', patience=5)
            model.fit(X_train,
                      Y_train,
                      batch_size=self.__config.training_params['n_minibatch'],
                      nb_epoch=self.__config.training_params['n_epochs'],
                      verbose=1,
                      validation_data=(X_val, Y_val),
                      callbacks=[early_stopping])

            # Save trained model
            model.save_weights(
                os.path.join(model_dir, self.__config.model_id + WEIGHTS_EXT))
            utils.save_trained_model(DEFAULT_TRAINED_MODELS_FILE,
                                     trained_model)
            print('\nSaving trained model %s in %s...' %
                  (trained_model['model_id'], DEFAULT_TRAINED_MODELS_FILE))
        else:
            model = utils.load_model(model_file)
            model.load_weights(
                os.path.join(model_dir, self.__config.model_id + WEIGHTS_EXT))
            trained_model = self.__config.get_dict()

        # Predict and evaluate the model for split test data
        print('\nPredicting for split test data...')
        preds = model.predict(X_test)

        r2s = []
        for i, pred in enumerate(preds):
            r2 = r2_score(Y_test[i], pred)
            r2s.append(r2)
        r2 = np.asarray(r2s).mean()
        print('R2 avg: ', r2)

        # Delete used variables
        del X_train, Y_train, X_val, Y_val, X_test, Y_test
        gc.collect()

        # Load trained model and model config
        trained_models = pd.read_csv(DEFAULT_TRAINED_MODELS_FILE, sep='\t')
        model_config = trained_models[trained_models['model_id'] ==
                                      trained_model['model_id']].to_dict(
                                          orient="list")

        # Predict for whole test data
        print('\nPredicting for whole test data...')
        predicted_matrix_map, predictions_index = self.predict(
            model_config,
            trained_model['model_id'],
            trim_coeff=self.__config.predicting_params['trim_coeff'],
            model=model,
            fact=PARAMS['dataset']['fact'],
            dim=PARAMS['dataset']['dim'],
            num_users=PARAMS['dataset']['num_users'],
            dataset_as=PARAMS['dataset']['dataset_as'],
            meta_source_ab=PARAMS['dataset']['meta-suffix'],
            meta_source_as=PARAMS['dataset']['meta-suffix2'])
        print('Prediction is completed.\n')

        # Evaluation
        model_config = trained_models[trained_models["model_id"] ==
                                      trained_model["model_id"]].to_dict(
                                          orient="list")
        model_settings = eval(model_config['dataset_settings'][0])
        model_arch = eval(model_config['model_arch'][0])
        model_training = eval(model_config['training_params'][0])
        str_config = json.dumps(model_settings) + "\n" + json.dumps(
            model_arch) + "\n" + json.dumps(model_training) + "\n"
        model_settings["loss"] = model_training['loss_func']

        self.evaluate(trained_model['model_id'], model_settings, str_config,
                      predicted_matrix_map, predictions_index)