def __init__(self, session): self.session = session self.NO_SUCH_ANSWER = -1 self.learn = Learning() self.learn.fit() self.ntf = NotificationsDB() self.SORRY = rp.SORRY_NO_ANSWER
def test_build_linear_auto_architecture(self): input = Input(shape=[64, 64, 3], tensor=K.variable(np.random.random([1, 64, 64, 3]))) depth = 6 skip_distance = 3 blocks_per_rescale = 2 stride_difference_per_rescale = 1.5 filter_increase_factor_per_rescale = 1.2 regularizer = keras.regularizers.l1_l2(l1=.5, l2=.75) layer = Learning.build_linear_auto_architecture( input, depth, blocks_per_rescale, skip_distance, stride_difference_per_rescale, filter_increase_factor_per_rescale, regularizer) layer_output = layer.eval(session=K.get_session()) self.assertTrue(layer.shape.as_list()[1:] == [32, 32, 6]) self.assertTrue(layer_output.shape == (1, 32, 32, 6)) stride_difference_per_rescale = 2 layer = Learning.build_linear_auto_architecture( input, depth, blocks_per_rescale, skip_distance, stride_difference_per_rescale, filter_increase_factor_per_rescale, regularizer) layer_output = layer.eval(session=K.get_session()) self.assertTrue(layer.shape.as_list()[1:] == [8, 8, 6]) self.assertTrue(layer_output.shape == (1, 8, 8, 6)) stride_difference_per_rescale = 1.67 filter_increase_factor_per_rescale = 3 layer = Learning.build_linear_auto_architecture( input, depth, blocks_per_rescale, skip_distance, stride_difference_per_rescale, filter_increase_factor_per_rescale, regularizer) layer_output = layer.eval(session=K.get_session()) self.assertTrue(layer.shape.as_list()[1:] == [16, 16, 81]) self.assertTrue(layer_output.shape == (1, 16, 16, 81))
def __init__(self,Infer): """ GPR.Learning(Infer) """ Learning.__init__(self,Infer) self.logP = None return
def __init__(self, Infer): """ GPR.Learning(Infer) """ Learning.__init__(self, Infer) self.logP = None return
def policyIteration(env): ''' Simple test for policy iteration ''' polIter = Learning(0.9, env, augmentActionSet=False) V, pi = polIter.solvePolicyIteration() # I'll assign the goal as the termination action pi[env.getGoalState()] = 4 # Now we just plot the learned value function and the obtained policy plot = Plotter(outputPath, env) plot.plotValueFunction(V[0:numStates], 'goal_') plot.plotPolicy(pi[0:numStates], 'goal_')
def learn_train(src, dir_name): dict_type = 'train' docs = 'train' learning = Learning(src, dir_name, dict_type) learning.learn_tp(docs, grid_search = True) learning.learn_tt(docs, grid_search = True) learning.learn_tc(docs, grid_search = True) learning.learn_t2(docs, grid_search = True)
def main(): number_of_actions = 12 input_dimension = 90 episodes = int(sys.argv[2]) load = int(sys.argv[1]) csv_config = open("config.csv", 'r') csv_reader = csv.reader(csv_config, delimiter=',') file = {} for row in csv_reader: file[row[0]] = row[1] csv_config.close() max_steps = int(file.get('max_steps')) epsilon = float(file.get('epsilon')) batch_size = int(file.get('batch_size')) epochs = int(file.get('epochs')) epsilon_decay = float(file.get('epsilon_decay')) episodes_decay = int(file.get('episodes_decay')) alpha = float(file.get('alpha')) gamma = float(file.get('gamma')) dqn = Learning(number_of_actions, input_dimension, load, batch_size, episodes, max_steps, epsilon, gamma, alpha, epsilon_decay, episodes_decay, epochs) dqn.agent.cummulative_reward = float(file.get('cummulative_reward')) dqn.run() os.chdir("..") file = { 'epsilon': dqn.epsilon, 'cummulative_reward': dqn.agent.cummulative_reward, 'max_steps': dqn.max_steps, 'batch_size': dqn.agent.batch_size, 'epochs': dqn.epochs, 'epsilon_decay': dqn.epsilon_decay, 'episodes_decay': dqn.episodes_decay, 'alpha': dqn.alpha, 'gamma': dqn.gamma } csv_config = open("config.csv", 'w') csv_writer = csv.writer(csv_config, delimiter=',') for item in file: csv_writer.writerow([str(item), file.get(item)]) csv_config.close()
def setUpClass(cls): cls.unstandardized_photos_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'resources', 'runner', 'unstandardized_photos') cls.photos_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'resources', 'runner', 'photos') cls.test_photos_csv_file_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'resources', 'runner', 'test_photos.csv') cls.dataImporter = DataImporter() cls.dataImporter.convert_to_standard_resolution( cls.unstandardized_photos_path, cls.photos_path, [152, 114], { "multi_core_count": 2, "timeout_secs": 100, "chunk_size": 2 }) cls.data_set = cls.dataImporter.import_all_data( cls.test_photos_csv_file_path, cls.photos_path) data_set = test_Runner.dataImporter.import_all_data( cls.test_photos_csv_file_path, cls.photos_path) cls.mock_results = Learning.simple_binary_classification(data_set, epochs=1, batch_size=2)
def main(): # データの読み出し log_data = read('log') # ユーザーの行動ログデータ event_data = read('events') # イベントの属性データ user_data = read('users') # ユーザーのデモグラ情報 user_test = read('test') # 評価対象ユーザー sample_data = read('sample_submit') # 応募用サンプルファイル # 説明変数(特徴量)と目的変数を作成 X_v_train, X_t_train, X_valid, X_test, y_v_train, y_t_train, y_valid = Preprocessing( log_data, event_data, user_data, user_test).preprocessing() # ベストモデルの作成 model = Ridge(fit_intercept=False, normalize=True) parameters = {'alpha': [5.0, 6.0, 7.0, 8.0, 9.0]} best_model = Learning(X_v_train, X_valid, y_v_train, y_valid, model, parameters, cv=3).scoring() # 予測データの作成 submit_data = Predicting(X_t_train, y_t_train, X_test, best_model).predicting() # 提出用ファイルの書き出し submit_data.to_csv('submit.tsv', sep='\t', index=False, header=False) print('All Done')
def policyEvaluation(env): ''' Simple test for policy evaluation ''' pi = numStates * [[0.25, 0.25, 0.25, 0.25]] actionSet = env.getActionSet() #This solution is slower and it does not work for gamma = 1 #polEval = Learning(0.9999, env, augmentActionSet=False) #expectation = polEval.solvePolicyEvaluation(pi) bellman = Learning(1, env, augmentActionSet=False) expectation = bellman.solveBellmanEquations(pi, actionSet, None) for i in xrange(len(expectation) - 1): sys.stdout.write(str(expectation[i]) + '\t') if (i + 1) % env.numCols == 0: print print
def test_run_experiment_float(self): run_data_import([None, self.parameter_space_float_file_path]) config = ParameterSpace.load(self.parameter_space_float_file_path).get_configuration_grid()[0] raw_performance, stats = Learning.run_experiment({**config, "tensorboard_path": self.tensorboard_path}) self.assertTrue(stats["stats"]["meta"] == {'type': 'float', 'labels': {0: "A", 1: "B", 2: "C", 3: "D"}}) self.assertTrue(isinstance(stats["model"], keras.models.Model))
def test_run_experiment_bool(self): run_data_import([None, self.parameter_space_bool_file_path]) config = ParameterSpace.load(self.parameter_space_bool_file_path).get_configuration_grid()[0] raw_performance, stats = Learning.run_experiment({**config, "tensorboard_path": self.tensorboard_path}) self.assertTrue(stats["stats"]["meta"] == {'type': 'bool', 'labels': ["dirty", "clean"]}) self.assertTrue(isinstance(stats["model"], keras.models.Model))
def test_simple_crow_score_regression(self): data_set = self.dataImporter.import_all_data( self.test_photos_csv_file_path, self.photos_path) results = Learning.simple_crow_score_regression(data_set, epochs=1, batch_size=2) Analysis.store_raw_result(self.result_path, results) Analysis.process_result(self.result_path)
class Helper(object): def __init__(self, session): self.session = session self.NO_SUCH_ANSWER = -1 self.learn = Learning() self.learn.fit() self.ntf = NotificationsDB() self.SORRY = rp.SORRY_NO_ANSWER def helper(self, bot, update): ''' собственно помошник ''' text = update.message.text text = unicode(text.encode("utf-8"), "utf-8").encode("utf-8").lower() answer = self.learn.predict(text) if answer == self.NO_SUCH_ANSWER: self.create_notification(update.message) notification = self.create_notification(update.message) self.ntf.send_notification(notification) update.message.reply_text(self.SORRY) else: update.message.reply_text(answer[0]) update.message.reply_text(answer[1]) update.message.reply_text(rp.RT_SERVICES_LIST, reply_markup=kb.RT_SERVICES_KEYBOARD) return st.START_CONVERSATION @staticmethod def create_notification(message): ''' Из стандартного типа message создаём notification, чтобы внести запрос в базу ''' notification = {} notification["user_id"] = message.chat.id notification["notification"] = message.text notification["date"] = message.date return notification
def test_get_data_sets(self): # is data loaded? # is it in the right directory run_data_import([None, self.parameter_space_file_path]) config = ParameterSpace.load(self.parameter_space_file_path).get_configuration_grid()[0] ds = Learning.get_data_sets(config) self.assertTrue(ds['training'][0].shape == tuple([8] + config['data_import']['output_image_dimensions'] + [3])) self.assertTrue(ds['validation'][0].shape == tuple([4] + config['data_import']['output_image_dimensions'] + [3])) self.assertTrue(ds['test'][0].shape == tuple([4] + config['data_import']['output_image_dimensions'] + [3])) self.assertTrue(np.all(ds['from_network'](ds['to_network'](ds['training'][1])) == ds['training'][1])) self.assertTrue(ds['output_type'] == config['learning']['target']['output_type']) self.assertTrue(ds['value_names'] == {0: 'w', 2: 'y', 3: 'z', 1: 'x'}) config = ParameterSpace.load(self.parameter_space_float_file_path).get_configuration_grid()[0] ds = Learning.get_data_sets(config) self.assertTrue(np.all(ds['from_network'](ds['to_network'](ds['training'][1])) == ds['training'][1])) self.assertTrue(ds['output_type'] == config['learning']['target']['output_type']) self.assertTrue(ds['value_names'] == {3: 'D', 2: 'C', 0: 'A', 1: 'B'}) config = ParameterSpace.load(self.parameter_space_bool_file_path).get_configuration_grid()[0] ds = Learning.get_data_sets(config) self.assertTrue(np.all(ds['from_network'](ds['to_network'](ds['training'][1])) == ds['training'][1])) self.assertTrue(ds['output_type'] == config['learning']['target']['output_type']) self.assertTrue(ds['value_names'] == ['dirty', 'clean']) pass
def learn(self, tp=True, tt=True, tc=True, t2=True): learning_docs = 'train' # delete temporary dir if exist path = self.src + '/model/' + self.dir_name self.delete_dir(path) learning = Learning(self.src, self.dir_name, self.dict_type) if tp: learning.learn_tp(learning_docs, grid_search=True) if tt: learning.learn_tt(learning_docs, grid_search=True) if tc: learning.learn_tc(learning_docs, grid_search=True) if t2: learning.learn_t2(learning_docs, grid_search=True)
def test_resnet_output(self): # are arguments passed correctly? input = Input(shape=[5, 6, 7], tensor=K.variable(np.random.random([1, 5, 6, 7]))) weight_regularizer = keras.regularizers.l1_l2(l1=.5, l2=.75) layer = Learning.resnet_output(input, 10, weight_regularizer) self.assertTrue(layer.shape.as_list()[1:] == [10]) layer_output = layer.eval(session=K.get_session()) self.assertTrue(layer_output.shape == (1, 10))
def test_run_experiment_categorical_int(self): # is the experiment actually run? # are results stored in the appropriate directory? # is data loaded from the appropriate directory? # are parameters assigned correctly? run_data_import([None, self.parameter_space_file_path]) config = ParameterSpace.load(self.parameter_space_file_path).get_configuration_grid()[0] raw_performance, stats = Learning.run_experiment({**config, "tensorboard_path": self.tensorboard_path}) self.assertTrue(stats["stats"]["meta"] == {'type': 'categorical_int', 'labels': {1: 'x', 3: 'z', 2: 'y', 0: 'w'}}) self.assertTrue(isinstance(stats["model"], keras.models.Model))
def test_initial_convolution(self): # is the output shape correct? # are arguments passed? input = Input(shape=[5, 6, 7], tensor=K.variable(np.random.random([1, 5, 6, 7]))) weight_regularizer = keras.regularizers.l1_l2(l1=.5, l2=.75) layer = Learning.initial_convolution(input, 2, 40, 7, regularizer=weight_regularizer) self.assertTrue(layer.shape[1:].as_list() == [3, 3, 40]) layer_output = layer.eval(session=K.get_session()) self.assertTrue(layer_output.shape == (1, 3, 3, 40))
def learn(self, tp = True, tt = True, tc = True, t2 = True): learning_docs = 'train' # delete temporary dir if exist path = self.src + '/model/' + self.dir_name self.delete_dir(path) learning = Learning(self.src, self.dir_name, self.dict_type) if tp: learning.learn_tp(learning_docs, grid_search = True) if tt: learning.learn_tt(learning_docs, grid_search = True) if tc: learning.learn_tc(learning_docs, grid_search = True) if t2: learning.learn_t2(learning_docs, grid_search = True)
def test_resnet_skip_block(self): # is the output shape correct? # are arguments passed? (test through last layer) input = Input(shape=[5, 6, 7], tensor=K.variable(np.random.random([1, 5, 6, 7]))) weight_regularizer = keras.regularizers.l1_l2(l1=.5, l2=.75) layer = Learning.resnet_skip_block(input, 2, # stride 2, # filter increase 3, # skip distance weight_regularizer) self.assertTrue(layer.shape.as_list() == [1, 3, 3, 14]) layer_output = layer.eval(session=K.get_session()) self.assertTrue(layer_output.shape == (1, 3, 3, 14))
def test_build_model(self): input = Input(shape=[64, 64, 3]) block_depth = 2 output_type = 'categorical_int' skip_distance = 3 blocks_per_rescale = 2 stride_difference_per_rescale = 1.5 filter_increase_factor_per_rescale = 1.2 initial_stride = 2 initial_filter_count = 64 initial_kernel_size = 7 l1_factor = .1 l2_factor = .1 model = Learning.build_model(input, 4, block_depth, output_type, blocks_per_rescale, stride_difference_per_rescale, filter_increase_factor_per_rescale, skip_distance, initial_stride, initial_filter_count, initial_kernel_size, l1_factor, l2_factor) # does compiled model actually produce values? model.compile(optimizer='adam', loss='mean_squared_error') result = model.predict(np.random.random([30] + input.shape.as_list()[1:])) self.assertTrue(result.shape == (30, 4)) self.assertTrue(np.all(np.isfinite(result)))
from Evaluation import Evaluation from GridWorld import GridWorld from Learning import Learning # グリッドワールドの大きさを指定 row = 5 column = 5 LearningAgentSpan = 10 # 学習エージェントの寿命 LearningTimes = 100 # 学習回数 P = 5 # 報酬 T = 10 # 遡る数 EvaluationAgentSpan = 10 # 評価エージェントの寿命 EvaluationTimes = 100 # 試行回数 grid_world = GridWorld(row, column) grid_world.make_grid_world() learning = Learning(grid_world.get_grid_world(), row, column) learning.do_learning(LearningAgentSpan, LearningTimes, P, T) evaluation = Evaluation(learning.get_grid_world(), row, column) evaluation.evaluation(EvaluationAgentSpan, EvaluationTimes)
def train_fold(train_config, experiment_folder, pipeline_name, log_dir, fold_id, train_dataloader, valid_dataloader, binarizer_fn, eval_fn): fold_logger = init_logger(log_dir, 'train_fold_{}.log'.format(fold_id)) best_checkpoint_folder = Path(experiment_folder, train_config['CHECKPOINTS']['BEST_FOLDER']) best_checkpoint_folder.mkdir(exist_ok=True, parents=True) checkpoints_history_folder = Path( experiment_folder, train_config['CHECKPOINTS']['FULL_FOLDER'], 'fold{}'.format(fold_id)) checkpoints_history_folder.mkdir(exist_ok=True, parents=True) checkpoints_topk = train_config['CHECKPOINTS']['TOPK'] calculation_name = '{}_fold{}'.format(pipeline_name, fold_id) device = train_config['DEVICE'] module = importlib.import_module(train_config['MODEL']['PY']) model_class = getattr(module, train_config['MODEL']['CLASS']) model = model_class(**train_config['MODEL']['ARGS']) pretrained_model_config = train_config['MODEL'].get('PRETRAINED', False) if pretrained_model_config: loaded_pipeline_name = pretrained_model_config['PIPELINE_NAME'] pretrained_model_path = Path( pretrained_model_config['PIPELINE_PATH'], pretrained_model_config['CHECKPOINTS_FOLDER'], '{}_fold{}.pth'.format(loaded_pipeline_name, fold_id)) if pretrained_model_path.is_file(): model.load_state_dict(torch.load(pretrained_model_path)) fold_logger.info( 'load model from {}'.format(pretrained_model_path)) if len(train_config['DEVICE_LIST']) > 1: model = torch.nn.DataParallel(model) module = importlib.import_module(train_config['CRITERION']['PY']) loss_class = getattr(module, train_config['CRITERION']['CLASS']) loss_fn = loss_class(**train_config['CRITERION']['ARGS']) optimizer_class = getattr(torch.optim, train_config['OPTIMIZER']['CLASS']) optimizer = optimizer_class(model.parameters(), **train_config['OPTIMIZER']['ARGS']) scheduler_class = getattr(torch.optim.lr_scheduler, train_config['SCHEDULER']['CLASS']) scheduler = scheduler_class(optimizer, **train_config['SCHEDULER']['ARGS']) n_epoches = train_config['EPOCHES'] grad_clip = train_config['GRADIENT_CLIPPING'] grad_accum = train_config['GRADIENT_ACCUMULATION_STEPS'] early_stopping = train_config['EARLY_STOPPING'] validation_frequency = train_config.get('VALIDATION_FREQUENCY', 1) freeze_model = train_config['MODEL']['FREEZE'] Learning(optimizer, binarizer_fn, loss_fn, eval_fn, device, n_epoches, scheduler, freeze_model, grad_clip, grad_accum, early_stopping, validation_frequency, calculation_name, best_checkpoint_folder, checkpoints_history_folder, checkpoints_topk, fold_logger).run_train(model, train_dataloader, valid_dataloader)
def test_train(self): # does the code execute? # does performance improve for very simply learned input batch_size = 8 epochs = 200 output_type = 'bool' d = keras.layers.Dense(32, input_shape=(784,)) d2 = keras.layers.Dense(1) model = keras.models.Sequential([ d, keras.layers.Activation('relu'), d2, keras.layers.Activation('sigmoid'), ]) w = d.get_weights() w2 = d2.get_weights() input = np.random.random([100, 784]) output = np.sum(input, axis=1, keepdims=True) > .7 training_set = ( input, output ) validation_set = ( input, output ) results = Learning.train(model, output_type, training_set, validation_set, epochs, batch_size, self.tensorboard_path) self.assertTrue(os.path.isdir(self.tensorboard_path)) tw = d.get_weights() tw2 = d2.get_weights() self.assertTrue(np.sum(w[0] == tw[0]) < 5000) self.assertTrue(np.sum(w2[0] == tw2[0]) < 10) self.assertTrue(model == results.model) self.assertTrue('acc' in results.history) self.assertTrue('lr' in results.history) self.assertTrue(len(results.epoch) < 190) results = Learning.train(model, 'float', training_set, validation_set, epochs, batch_size, self.tensorboard_path) self.assertTrue('mean_squared_error' in results.history) self.assertTrue('acc' not in results.history) d = keras.layers.Dense(32, input_shape=(784,)) d2 = keras.layers.Dense(3) model = keras.models.Sequential([ d, keras.layers.Activation('relu'), d2, keras.layers.Activation('sigmoid'), ]) input = np.random.random([100, 784]) t = np.random.random([784, 3]) output = np.matmul(input, t) output = (output == np.max(output, axis=1, keepdims=True)).astype('int') training_set = ( input, output ) validation_set = ( input, output ) results = Learning.train(model, 'categorical_int', training_set, validation_set, epochs, batch_size, self.tensorboard_path) self.assertTrue('mean_squared_error' not in results.history) self.assertTrue('acc' in results.history)
#! /usr/bin/env python # encoding:utf-8 """ @author: DYS @file: main.py @time: 2018/4/16 14:03 """ import sys from PyQt5.QtWidgets import QApplication from Learning import Learning if __name__ == '__main__': app = QApplication(sys.argv) py_learning = Learning() py_learning.show() sys.exit(app.exec_())
import logging from Learning import Learning from GeneticSpec.GeneticPopulation import GeneticPopulation #make learning object variables = ["x", "y", "v", "z"] lower = [0, 0, 0, 0] #lowerbound upper = [80, 45, 80, 45] # upperbound l = Learning(logging.INFO, "Data/values", "Data/labels", "Data/times", variables, lower, upper) #start learning generation = l.run() #save rules and scores to file ruleScores = generation.finalFormulaScoresToString(100) with open("testScores.txt", 'w') as filehandle: for r in ruleScores: filehandle.write('%s\n' % r) #save rules themselves rules = generation.finalFormulasToString(100) with open("testRules.txt", 'w') as filehandle: for r in rules: filehandle.write('%s\n' % r)
# re = KM.getResult() # for i in range(len(PL.getFreeOrd(time))): # if(re[i] < self.DriNum): # temp[re[i]] = i # if(self.flag == 1): # temp = np.zeros(self.DriNum, dtype=int) # re = KM.getResult() # for i in range(1,len(PL.getFreeOrd(time))): # temp[re[i-1]] = i # self.finalresult.append(temp) if __name__ == '__main__': time = 0 totalSum = 0 RL = Learning() RL.getData() #Get train data RL.learning() #Trainning PL = planning() PL.DriverProductor() #Create drivers while (time < 9): # RL.OrdersProductor() PL.OrdersProductor(time) #timestap fo = PL.getFreeOrd(time) fd = PL.getFreeDri(time) print(str(time) + ' has drivers:') print(fd)
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., -83., -90., -59., -40., -40., -4.5, -150., -108., -113. ] #lowerbound upper = [ 9.900e+01, 7.070e+02, 5.330e+02, 8.500e+01, 1.480e+02, 8.000e+00, 1.390e+02, 1.000e+01, 1.037e+03, 1.480e+01, 8.200e+00, 2.400e+00, 2.700e+02, 1.000e+01, 3.000e+00, 2.500e+00, 5.000e+01, 1.250e+02, 1.000e+00, 1.000e+00, 1.000e+00, 2.420e+03, 2.750e+01, 8.500e+01, 9.000e+01, 5.900e+01, 5.300e+01, 4.900e+01, 4.810e+00, 1.680e+02, 1.250e+02, 1.250e+02, 2.000e+01, 5.800e+01, 2.600e+01, 3.200e+01, 2.600e+01, 3.100e+00, 1.100e+02, 6.800e+01, 9.400e+01 ] # upperbound l = Learning(logging.INFO, "../Data/Card/cardDataChanges.txt", "../Data/Card/cardRehospLabels.txt", "../Data/Card/cardTimeChanges.txt", variables, lower, upper) #start learning generation = l.run() #save rules and scores to file ruleScores = generation.finalFormulaScoresToString(500) with open("CardChangesRuleScores.txt", 'w') as filehandle: for r in ruleScores: filehandle.write('%s\n' % r) #save rules themselves rules = generation.finalFormulasToString(500) with open("CardChangesRules.txt", 'w') as filehandle:
def discoverOptions(env, epsilon, verbose, discoverNegation, plotGraphs=False): #I'll need this when computing the expected number of steps: options = [] actionSetPerOption = [] # Computing the Combinatorial Laplacian W = env.getAdjacencyMatrix() D = np.zeros((numStates, numStates)) # Obtaining the Valency Matrix for i in xrange(numStates): for j in xrange(numStates): D[i][i] = np.sum(W[i]) # Making sure our final matrix will be full rank for i in xrange(numStates): if D[i][i] == 0.0: D[i][i] = 1.0 # Normalized Laplacian L = D - W expD = Utils.exponentiate(D, -0.5) normalizedL = expD.dot(L).dot(expD) # Eigendecomposition # IMPORTANT: The eigenvectors are in columns eigenvalues, eigenvectors = np.linalg.eig(normalizedL) # I need to sort the eigenvalues and eigenvectors idx = eigenvalues.argsort()[::-1] eigenvalues = eigenvalues[idx] eigenvectors = eigenvectors[:, idx] # If I decide to use both directions of the eigenvector, I do it here. # It is easier to just change the list eigenvector, even though it may # not be the most efficient solution. The rest of the code remains the same. if discoverNegation: oldEigenvalues = eigenvalues oldEigenvectors = eigenvectors.T eigenvalues = [] eigenvectors = [] for i in xrange(len(oldEigenvectors)): eigenvalues.append(oldEigenvalues[i]) eigenvalues.append(oldEigenvalues[i]) eigenvectors.append(oldEigenvectors[i]) eigenvectors.append(-1 * oldEigenvectors[i]) eigenvalues = np.asarray(eigenvalues) eigenvectors = np.asarray(eigenvectors).T if plotGraphs: # Plotting all the basis plot = Plotter(outputPath, env) plot.plotBasisFunctions(eigenvalues, eigenvectors) # Now I will define a reward function and solve the MDP for it # I iterate over the columns, not rows. I can index by 0 here. guard = len(eigenvectors[0]) for i in xrange(guard): idx = guard - i - 1 if verbose: print 'Solving for eigenvector #' + str(idx) polIter = Learning(0.9, env, augmentActionSet=True) env.defineRewardFunction(eigenvectors[:, idx]) V, pi = polIter.solvePolicyIteration() # Now I will eliminate any actions that may give us a small improvement. # This is where the epsilon parameter is important. If it is not set all # it will never be considered, since I set it to a very small value for j in xrange(len(V)): if V[j] < epsilon: pi[j] = len(env.getActionSet()) if plotGraphs: plot.plotValueFunction(V[0:numStates], str(idx) + '_') plot.plotPolicy(pi[0:numStates], str(idx) + '_') options.append(pi[0:numStates]) optionsActionSet = env.getActionSet() optionsActionSet.append('terminate') actionSetPerOption.append(optionsActionSet) #I need to do this after I'm done with the PVFs: env.defineRewardFunction(None) env.reset() return options, actionSetPerOption
1.00000000e+00, 1.50000000e+01, 7.00000000e+00, 4.90000000e+01, 1.00000000e+02, 2.60000000e+02, 3.82000000e+02, 4.05000000e+01, 5.00000000e+00, 3.86000000e+02, 3.56000000e+02, 2.42000000e+02, 1.11000000e+02, 1.10000000e+01, 1.32000000e+02, 4.00000000e+01, 1.00000000e+01, 4.60000000e+02, 2.02000000e+01, 3.40000000e+00, 3.40000000e+00, 9.98000000e+01, 1.40000000e+02, 6.76000000e+01, 8.00000000e+00, 6.62000000e+02, 8.30000000e+00, 5.00000000e+00, 1.51000000e+02, 9.00000000e+00, 9.70000000e+00, 2.35000000e+01, 2.60000000e+01, 3.00000000e+02, 6.86283741e-02, 6.86283741e-02, 6.86283741e-02, 1.00000000e+00, 5.43110090e+01, 1.69365926e-02, 1.69365926e-02, 1.69365926e-02, 6.05001092e-01, 2.22452357e+00, 8.51720869e-01, 4.49814800e+00, 1.00000000e+00, 1.00000000e+00 ] # upperbound l = Learning(logging.INFO, "Data/ICUData/133.txt", "Data/ICUData/133labels.txt", "Data/ICUData/time.txt", variables, lower, upper) #start learning generation = l.run() #save rules and scores to file ruleScores = generation.finalFormulaScoresToString(100) with open("1ruleScores.txt", 'w') as filehandle: for r in ruleScores: filehandle.write('%s\n' % r) #save rules themselves rules = generation.finalFormulasToString(100) with open("1rules.txt", 'w') as filehandle:
def getAvgNumStepsBetweenEveryPoint(self, fullActionSet, optionsActionSet, verbose, initOption=0, numOptionsToConsider=0): ''' ''' toPlot = [] numPrimitiveActions = 4 actionSetToUse = fullActionSet[:numPrimitiveActions] for i in xrange(numOptionsToConsider + 1): avgs = [] # I'm going to use a matrix encoding the random policy. For each # state I encode the equiprobable policy for primitive actions and # options. However, I need to add the condition that, if the # option's policy says terminate, it should have probability zero # for the equiprobable policy. pi = [] for j in xrange(self.numStates - 1): pi.append([]) for k in xrange(numPrimitiveActions): pi[j].append(1.0) if i > 0: for k in xrange(i): #current number of options to consider idx1 = i + initOption - 1 idx2 = numPrimitiveActions + k + initOption nAction = optionsActionSet[idx1][fullActionSet[idx2] [j]] if nAction == "terminate": pi[j].append(0.0) else: pi[j].append(1.0) denominator = sum(pi[j]) for k in xrange(len(pi[j])): pi[j][k] = pi[j][k] / denominator if i > 0: actionSetToUse.append(fullActionSet[numPrimitiveActions + i - 1 + initOption]) if verbose: print 'Obtaining shortest paths for ' + str(numPrimitiveActions) \ + ' primitive actions and ' + str(i) + ' options.' for s in xrange(self.environment.getNumStates()): goalChanged = self.environment.defineGoalState(s) if goalChanged: bellman = Learning(self.gamma, self.environment, augmentActionSet=False) expectation = bellman.solveBellmanEquations( pi, actionSetToUse, optionsActionSet) avgs.append(self._computeAvgOnMDP((-1.0 * expectation))) toPlot.append(sum(avgs) / float(len(avgs))) if numOptionsToConsider > 0: plt = Plotter(self.outputPath, self.environment) plt.plotLine(xrange(len(toPlot)), toPlot, '# options', 'Avg. # steps', 'Avg. # steps between any two points', 'avg_num_steps.pdf') return toPlot