def run_A2C(): episodes = 500 seed = 1 results = [] game = 'CartPole-v0' env = gym.make(game) state_size = env.observation_space.shape[0] action_size = env.action_space.n agent = A2CAgent(state_size, action_size) for e in range(episodes): done = False score = 0 state = env.reset() state = np.reshape(state, [1, state_size]) while not done: action = agent.get_action(state) next_state, reward, done, info = env.step(action) next_state = np.reshape(next_state, [1, state_size]) agent.train_model(state, action, reward, next_state, done) score += reward state = next_state results.append(score) utils.save_trained_model(game, seed, 'A2C', agent.actor) plt.plot(results) plt.show()
def run_experiment(): environment = 'CartPole-v0' seed = 1 episodes = 500 returns = [] agent = UpsideDownAgent(environment) for e in range(episodes): for i in range(100): agent.train_behaviour_function() for i in range(15): tmp_r = [] exploratory_commands = agent.sample_exploratory_commands( ) # Line 5 Algorithm 1 desired_return = exploratory_commands[0] desired_horizon = exploratory_commands[1] r = agent.generate_episode(environment, e, desired_return, desired_horizon, False) tmp_r.append(r) print(np.mean(tmp_r)) returns.append(np.mean(tmp_r)) exploratory_commands = agent.sample_exploratory_commands() agent.generate_episode(environment, 1, 200, 200, True) utils.save_results(environment, 'upside_down_agent', seed, returns) utils.save_trained_model(environment, seed, agent.behaviour_function)
def run_experiment(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--approximator', type=str, default='neural_network') parser.add_argument('--environment', type=str, default='PongDeterministic-v4') parser.add_argument('--seed', type=int, default=1) args = parser.parse_args() approximator = args.approximator environment = args.environment seed = args.seed episodes = 1500 returns = [] agent = UpsideDownAgent(environment, approximator) for e in range(episodes): print("Episode {}".format(e)) for i in range(100): agent.train_behaviour_function() print("Finished training B!") for i in range(15): tmp_r = [] exploratory_commands = agent.sample_exploratory_commands( ) # Line 5 Algorithm 1 desired_return = exploratory_commands[0] desired_horizon = exploratory_commands[1] r = agent.generate_episode(environment, e, desired_return, desired_horizon, False) tmp_r.append(r) print(np.mean(tmp_r)) returns.append(np.mean(tmp_r)) exploratory_commands = agent.sample_exploratory_commands() #agent.generate_episode(environment, 1, 200, 200, True) utils.save_results(environment, approximator, seed, returns) if approximator == 'neural_network': utils.save_trained_model(environment, seed, agent.behaviour_function) plt.plot(returns) plt.show()
def train_model(): """ Train the job application model and save the model :return: """ jobs_training = get_training_data(TRAINING__DATA_PATH) jobs_labels = get_training_labels("applications") logging.info("Transforming data") jobs_prepared = full_pipeline.fit_transform(jobs_training) logging.info("Training the model") lin_reg = LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False) lin_reg.fit(jobs_prepared, jobs_labels) logging.info("Saving the model") save_trained_model(lin_reg)
def recommend(self): '''Recommend track IDs using artist biographies and audio files.''' # Load data print('Loading Data...') X_train, Y_train, X_val, Y_val, X_test, Y_test = self.load_data( PARAMS, PARAMS['dataset']['dataset_ab'], PARAMS['dataset']['dataset_as'], self.__config.training_params["validation"], self.__config.training_params["test"], self.__config.dataset_settings["nsamples"], PARAMS['dataset']['meta-suffix'], PARAMS['dataset']['meta-suffix2']) # Set model parameters model_dir = os.path.join(MODELS_DIR, self.__config.model_id) utils.ensure_dir(MODELS_DIR) utils.ensure_dir(model_dir) model_file = os.path.join(model_dir, self.__config.model_id + MODEL_EXT) trained_model = self.__config.get_dict() if not os.path.exists(model_file): # Construct and save model print('Building Network...') model = self.build_model(self.__config) utils.save_model(model, model_file) print(model.summary()) # Training and validation print('\nTraining...') early_stopping = EarlyStopping(monitor='val_loss', patience=5) model.fit(X_train, Y_train, batch_size=self.__config.training_params['n_minibatch'], nb_epoch=self.__config.training_params['n_epochs'], verbose=1, validation_data=(X_val, Y_val), callbacks=[early_stopping]) # Save trained model model.save_weights( os.path.join(model_dir, self.__config.model_id + WEIGHTS_EXT)) utils.save_trained_model(DEFAULT_TRAINED_MODELS_FILE, trained_model) print('\nSaving trained model %s in %s...' % (trained_model['model_id'], DEFAULT_TRAINED_MODELS_FILE)) else: model = utils.load_model(model_file) model.load_weights( os.path.join(model_dir, self.__config.model_id + WEIGHTS_EXT)) trained_model = self.__config.get_dict() # Predict and evaluate the model for split test data print('\nPredicting for split test data...') preds = model.predict(X_test) r2s = [] for i, pred in enumerate(preds): r2 = r2_score(Y_test[i], pred) r2s.append(r2) r2 = np.asarray(r2s).mean() print('R2 avg: ', r2) # Delete used variables del X_train, Y_train, X_val, Y_val, X_test, Y_test gc.collect() # Load trained model and model config trained_models = pd.read_csv(DEFAULT_TRAINED_MODELS_FILE, sep='\t') model_config = trained_models[trained_models['model_id'] == trained_model['model_id']].to_dict( orient="list") # Predict for whole test data print('\nPredicting for whole test data...') predicted_matrix_map, predictions_index = self.predict( model_config, trained_model['model_id'], trim_coeff=self.__config.predicting_params['trim_coeff'], model=model, fact=PARAMS['dataset']['fact'], dim=PARAMS['dataset']['dim'], num_users=PARAMS['dataset']['num_users'], dataset_as=PARAMS['dataset']['dataset_as'], meta_source_ab=PARAMS['dataset']['meta-suffix'], meta_source_as=PARAMS['dataset']['meta-suffix2']) print('Prediction is completed.\n') # Evaluation model_config = trained_models[trained_models["model_id"] == trained_model["model_id"]].to_dict( orient="list") model_settings = eval(model_config['dataset_settings'][0]) model_arch = eval(model_config['model_arch'][0]) model_training = eval(model_config['training_params'][0]) str_config = json.dumps(model_settings) + "\n" + json.dumps( model_arch) + "\n" + json.dumps(model_training) + "\n" model_settings["loss"] = model_training['loss_func'] self.evaluate(trained_model['model_id'], model_settings, str_config, predicted_matrix_map, predictions_index)