def __init__(self, dbHelp, moods): ''' construct playlist object ''' self._list = [] self._currentI = 0 self._moods = [] self._generator = Learner(moods) self._db = dbHelp
def test_learn_single_starting(self): self.learner = Learner() self.other_player = Player() self.tictactoe = TicTacToe() fields = [(0, 0), (0, 1), (0, 2)] other_fields = [(1, 0), (1, 1)] for field in range(len(fields)): self.tictactoe.play(*fields[field], self.learner) try: self.tictactoe.play(*other_fields[field], self.other_player) except (IndexError): pass self.learner.look(self.tictactoe) self.assertEqual(fields, self.tictactoe.is_winner(self.learner)) self.learner.learn(self.tictactoe, 100, self.learner) print(self.learner.rewards) self.tictactoe = TicTacToe() while (self.tictactoe.available_moves() and not self.tictactoe.is_winner(self.learner) and not self.tictactoe.is_winner(self.other_player)): self.tictactoe.play(*self.learner.next_move(self.tictactoe), self.learner) if (other_fields): self.tictactoe.play(*other_fields.pop(0), self.other_player) print(self.tictactoe.board) self.assertFalse(self.tictactoe.is_winner(self.other_player)) self.assertEqual(fields, self.tictactoe.is_winner(self.learner))
def initializeTeam(self): """Create a new Team and add two new Learners with different atomic actions. These Learners are added to the Learner population, and the Team is added to both the Team and root Team populations. """ # Create two new Learners with different atomic actions a1 = randint(0, Trainer.ATOMIC_ACTION_RANGE) a2 = randint(0, Trainer.ATOMIC_ACTION_RANGE) while a1 == a2: a2 = randint(0, Trainer.ATOMIC_ACTION_RANGE) l1 = Learner(action = a1) l2 = Learner(action = a2) # Create new Team team = Team() # Add Learners to Team team.addLearner(l1) team.addLearner(l2) # Add Learners to Learner population self.learner_pop.append(l1) self.learner_pop.append(l2) # Add Team to Team populations. Note that all new Teams are, by # definition, root teams self.team_pop.append(team)
def test_learn_single(self): self.learner = Learner('X') self.other_player = Player('O') self.tictactoe = TicTacToe() fields = [(0, 0), (0, 1), (0, 2)] for field in fields: self.tictactoe.play(*field, self.learner) self.assertEqual(fields, self.tictactoe.is_winner(self.learner)) self.learner.learn(self.tictactoe, 100, self.learner) print(self.learner.rewards) self.tictactoe = TicTacToe() while (self.tictactoe.available_moves() and not self.tictactoe.is_winner(self.learner) and not self.tictactoe.is_winner(self.other_player)): self.tictactoe.play(*self.learner.next_move(self.tictactoe), self.learner) print(self.tictactoe.board) self.assertFalse(self.tictactoe.is_winner(self.other_player)) self.assertEqual(fields, self.tictactoe.is_winner(self.learner))
def feature_tab(base_dir): # Open X and output the attribute amount for model_name in ['bag', 'bag-ngram', 'tf', 'tf-ngram']: if model_name == 'bag': model_n = 'Bag-of-word' elif model_name == 'tf': model_n = 'Tf-idf' elif model_name == 'bag-ngram': model_n = 'Bag-of-word-NGram' else: model_n = 'Tf-idf-NGram' model_name = model_name + '_' for dataset in ['Neris', 'Murlo', 'Virut', 'Sogou']: if dataset != 'Neris': model_n = '' line = model_n + ' & ' + dataset + '& ' output_dir = base_dir + dataset X = Learner.obj_from_file(os.path.join(output_dir, model_name + "X.pkl")) line += str(X.shape[1]) + ' & 500 & ' # print X.shape[1] feature_names = Learner.obj_from_file(os.path.join(output_dir, model_name + "feature_names_sel.pkl")) for i in range(1, 5): feature_name = feature_names[i] if len(feature_name) > 8: feature_name = str(feature_name)[0:8] line += feature_name + ', ' line += ' ...\\\\ ' print line
class LearnActing(Learn): ''' classdocs ''' def __init__(self): ''' Constructor ''' super(LearnActing, self).__init__() self.actionLearner = Learner(["turn", "go_away", "come"]) def learnedAct(self): act = self.actionLearner.chooseAction() self.learnerLogger("A", self.actionLearner.getContext(), act) return act def setContext(self, context): self.actionLearner.setContext(context) self.context = context def reward(self, reward): self.actionLearner.reward(reward) self.rewardLogger("A", self.actionLearner.getContext(), self.actionLearner.getLastAction(), reward) def acted(self, action): if action == "": self.actionLearner.skip()
def test_set_reward(self): reward = 100 self.learner = Learner() tictactoe = TicTacToe() action = tictactoe.available_moves()[0] self.learner.set_reward(reward, action, tictactoe.board) self.assertEqual(reward, self.learner.get_reward(action, tictactoe.board))
def test_look_other_player(self): self.learner = Learner() self.other_player = RandomPlayer() tictactoe = TicTacToe() action = tictactoe.available_moves()[0] tictactoe.play(action[0], action[1], self.other_player) self.learner.look(tictactoe) self.assertEqual(1, len(self.learner.history[self.other_player.name])) self.assertEqual(action, self.learner.history[self.other_player.name][0])
def __init__(self, config, method = 'classification'): """ """ # self.logging = logging # if config.configuration['logging_level_str'] == 'INFO': # self.logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO) # else: # self.logging.basicConfig(level=logging.NOTSET) # # self.logging.info('started building a MLP_Learner!') # # self.result_path = '' # self.result_opt_path = '' Learner.__init__(self, config, method)
def __init__(self, config, method='classification'): """ """ # self.logging = logging # if config.configuration['logging_level_str'] == 'INFO': # self.logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO) # else: # self.logging.basicConfig(level=logging.NOTSET) # # self.logging.info('started building a MLP_Learner!') # # self.result_path = '' # self.result_opt_path = '' Learner.__init__(self, config, method)
def mutateLearners(self, team): # Since we will remove Learners from the Team while iterating over them, # grab a copy of the list of Learner references. We will iterate through # the copy while mutating the original list learners = team.learners.copy() # Probabalistically mutate Learners in the Team for learner in learners: # Most Learners will be skipped and not mutated if not weightedCoinFlip(Trainer.P_MUT_LEARNER): continue # Sanity check if team.countAtomicActions() == 0: print( "WARNING - Trainer::mutateLearners - No atomic actions in Team!" ) # If this Team only has one Learner with an atomic action and it # happens to be this Learner, remove the possibility of setting the # new action to a Team pointer p_atomic = Trainer.P_ATOMIC if learner.isActionAtomic() and team.countAtomicActions() == 1: p_atomic = 1.0 # Copy Learner. The copy will be mutated and added to the pool of # Learners. This is done to ensure that, in the event that this Learner # is also used to great success in another Team, that other Team # still points to the original functional Learner. learner_prime = Learner(learner=learner) # Remove original Learner from Team and replace it with the new one. team.removeLearner(learner) team.addLearner(learner_prime) # Mutate the new Learner learner_prime.mutateProgram() # Probabalistically mutate the new Learner's action if weightedCoinFlip(Trainer.P_MUT_LEARNER_ACTION): self.mutateLearnerAction(learner_prime, p_atomic) # Sanity check if team.countAtomicActions() == 0: print( "WARNING - Trainer::mutateLearners - No atomic actions in Team after mutation!" ) # Add new Learner to the pool of Learners self.learner_pop.append(learner_prime)
def test_human_learner_players(self): player1 = HumanPlayer() player2 = Learner() game = TicTacToe() gui = KtinterGui(game) match = Match(gui, game, players=[player1, player2]) match.play()
def test_learner_random_players(self): player1 = Learner() player2 = RandomPlayer() game = TicTacToe() gui = KtinterGui(game) match = Match(gui, game, players=[player1, player2]) match.play()
def test_human_learner_players_cl(self): player1 = HumanPlayer() player2 = Learner() game = TicTacToe() gui = CommandlineGui(game) match = Match(gui, game, players=[player1, player2]) match.play()
def find_bias_points(self, ydata, predict, index): """ Find the bias points. Parameters ---------- ydata: np.array predict: np.array index: array Indexes of the random values. distances: array Returns ---------- bias : array the indexes of the bias points. """ rmse, corr = Learner.evaluate_predicated(ydata, predict) rmse_threshold = rmse * (1 + self.rmse_percent) bias = [] for i in range(len(index)): if math.fabs(ydata[i] - predict[i]) > rmse_threshold: bias.append(index[i]) return bias
def main(): # import selenium.webdriver as webdriver thread_id = 1 input_shape = (3, 128, 128) action_space = 2 capacity = 100000 options = webdriver.ChromeOptions() options.add_argument('headless') options.add_argument('--mute-audio') options.add_argument('window-size=400x600') learner = Learner(thread_id, input_shape, action_space, capacity, use_cuda=False, num_frames=1400000, batch_size=32, gamma=0.99, epsilon_start=1.0, epsilon_final=0.01, epsilon_decay=30000, network_update_rate=30) logger.warning('Starting threads.') threads = [] # start threads learner.start() threads.append(learner) for i in range(2): player = Player(2, learner, options, save=False) player.start() threads.append(player) logger.warning('All threads started.') # Wait for all threads to complete for t in threads: t.join() print("Exiting Main Thread")
def __init__(self, env): self.learner_pop = [] self.env = env for i in range(Trainer.POPULATION_SIZE): l = Learner() self.learner_pop.append(l)
def __init__(self, f, rid, skip, fail, replicas, clients, log_file, loss_rate=0, debug=True): self.f = f self.rid = rid self.skip = skip self.replica_list = replicas self.fail = fail # num of forced crash replicas, for test 2, 3 self.total_p = 2 * f + 1 self.majority = f + 1 self.acceptor = Acceptor(self.rid, 0) self.proposer = Proposer(self.f, self.rid, self.skip) self.learner = Learner(self.f, self.rid, loss_rate) self.view = 0 # current view num self.processed_request = {} # request is added when replied to client self.viwechange_log = {} self.client_list = clients # client hosts ports self.slot_num = 0 self.loss_rate = loss_rate self.isLeader = False self.is_live = True self.log_file = log_file # log_file path self.setLeader() print("Setting up socket") # set up receiving socket self.host = replicas[rid].host self.port = replicas[rid].port self.timeout = PROCESS_TIMEOUT self.s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) self.s.bind((self.host, self.port)) self.s.settimeout(self.timeout) print("Socket set")
def run_trial_with_objective(objective_type_data): """Run a trial given the combination of the objective type and dataset""" objective_type, data = objective_type_data print("Learning with squared error + fairness_{}".format(objective_type)) types = set([objective_type]) if objective_type == 'Over+Under': types = set(['Underestimation', 'Overestimation']) learner = Learner(data, d, lam=1e-3) learner.learn(types, epochs=epoch, display=False) ####################### # Evaluating ####################### eval = Evaluator(data, learner) result_testing = eval.error_fairness() result_training = eval.error_fairness_training() return result_testing, result_training
def train_and_save(X, y, model_name, classifier_dir): outfile = os.path.join(classifier_dir, model_name + 'cv_res_sel.json') cv_res = dict() results = dict() thread1 = Thread(target=Learner.train_classifier, args=(Learner.train_tree, X, y, True, results, 'tree')) thread2 = Thread(target=Learner.train_classifier, args=(Learner.train_bayes, X, y, True, results, 'bayes')) thread3 = Thread(target=Learner.train_classifier, args=(Learner.train_logistic, X, y, True, results, 'logistic')) thread4 = Thread(target=Learner.train_classifier, args=(Learner.train_SVM, X, y, True, results, 'svm')) thread5 = Thread(target=Learner.train_classifier, args=(Learner.ocsvm, X, y, True, results, 'ocsvm')) thread1.start() thread2.start() thread3.start() thread4.start() thread5.start() thread1.join() thread2.join() thread3.join() thread4.join() thread5.join() clf_tree, cv_res['tree'] = results['tree'] clf_bayes, cv_res['bayes'] = results['bayes'] clf_logistic, cv_res['logistic'] = results['logistic'] clf_svm, cv_res['svm'] = results['svm'] clf_ocsvm, cv_res['ocsvm'] = results['ocsvm'] Learner.save2file(clf_tree, os.path.join(classifier_dir, model_name + 'tree_sel.pkl')) Learner.save2file(clf_bayes, os.path.join(classifier_dir, model_name + 'bayes_sel.pkl')) Learner.save2file(clf_logistic, os.path.join(classifier_dir, model_name + 'logistic_sel.pkl')) Learner.save2file(clf_svm, os.path.join(classifier_dir, model_name + 'svm_sel.pkl')) Learner.save2file(clf_ocsvm, os.path.join(classifier_dir, model_name + 'ocsvm_sel.pkl')) CtuCCAnalyzer.logger.info('Threads Done! Saving cv_res...') json.dump(cv_res, codecs.open(outfile, 'w', encoding='utf-8')) """
def __init__(self, name): ''' Constructor ''' super(LearnUttering, self).__init__() if name: self.subjectLemmaLearner = Learner(["S1", "S2", "S3", "S4"]) else: self.subjectLemmaLearner = Learner(["S1", "S2"]) self.verbLemmaLearner = Learner(["V1", "V2", "V3"])
def zero_day_helper(base_dir, src_name, model_name, algorithm, target_name, normal_dir=None): vec_dir = os.path.join(base_dir, src_name) model_path = os.path.join(vec_dir, model_name + algorithm + '_sel.pkl') target_path = os.path.join(base_dir, target_name) if normal_dir is None: data, labels = Learner.gen_instances('', target_path) else: data, labels = Learner.gen_instances(os.path.join(normal_dir, target_name), '') vec = Learner.obj_from_file(os.path.join(vec_dir, model_name + 'vec.pkl')) vec_sel = Learner.obj_from_file(os.path.join(vec_dir, model_name + 'vec_sel.pkl')) data, vocab, vec = Learner.gen_X_matrix(data, vec=vec) return Learner.predict(Learner.obj_from_file(model_path), vec_sel, data, labels=labels, src_name=src_name, model_name=model_name)
def __init__(self, env, test_env=False): self.learner_pop = [] if (test_env and Trainer.FITNESS_SHARING): print( "WARNING: Test environment not used in conjunction with fitness sharing" ) self.env = env self.test_env = test_env if Trainer.VERBOSE: self.write_output("Generation,Average Score,Top Score,Successes\n") for i in range(Trainer.POPULATION_SIZE): l = Learner() self.learner_pop.append(l)
def get_y(self, point): """ Use KNN algorithm to find the y value of the point. Parameters ---------- point: array Returns ---------- y: float """ heap = MaxHeap(self.k) for i in range(len(self.x)): distance = Learner.euclidean_distance(self.x[i], point) heap.add((distance, i, self.x[i], self.y[i])) # if the first value in the tuple is equal, heapq use the second value to compare values = heap.heapsort() values = [val[3] for val in values] return np.array(values).sum()/len(values)
class Playlist: _excludeSongs = [commonHash, commonId, commonPath, commonTitle, commonArtist] _excludeMoods = [commonHash, commonId] def __init__(self, dbHelp, moods): ''' construct playlist object ''' self._list = [] self._currentI = 0 self._moods = [] self._generator = Learner(moods) self._db = dbHelp def add_mood(self, mood): ''' add mood classifier to playlist ''' self._moods.append(mood) def add_moods(self, moods): ''' add a batch of mood classifiers to playlist ''' self._moods += moods def reset_mood(self): ''' reset mood classifiers at work here ''' self._moods = [] def _get_songs_and_moods(self, songs=None, moods=None): ''' retrieve (if necessary) songs and attribute data, match to moods from mood table and pull out hashes ''' if songs == None: songs = self._db.all_songs() if moods == None: moods = self._db.all_song_moods() if len(moods) == 0 or len(songs) == 0: return [], [], [] songs = np.array(self._prune(songs, Playlist._excludeSongs)) moods = self._prune(moods, Playlist._excludeMoods) moods = self._get_mood_list(songs, moods) hashes = songs[:,0] songs = songs[:,1:].astype(float) return hashes, songs, moods def _prune(self, stuff, exclude): ''' take out entries that do not represent data about the songs ''' newStuff = [] for row in stuff: newRow = [row[commonHash]] for item in row.keys(): if item not in exclude: newRow.append(row[item]) newStuff.append(newRow) return newStuff def _compute_model(self): ''' compute the model of given songs with their ratings ''' hashes, songs, moods = self._get_songs_and_moods() if len(hashes) == 0 or all(x == -1 for x in moods): print ("Must have songs in database and assign at " "least one song to a mood.") return [], [], [] self._generator.model_songs(songs, moods) return hashes, songs, moods def generate_list_mood(self): ''' generate a playlist after computing a model of the songs and mood assignments currently in the db ''' hashes, songs, moods = self._compute_model() if len(hashes) == 0: return header, categs = self._generator.categorize_songs_probab(songs) moodsIndices = [header.index(m) for m in self._moods] mergeCats = [(hashes[j],sum([x[i] for i in moodsIndices])) for j,x in enumerate(categs) if sum([x[i] for i in moodsIndices]) > .7] self._to_list([pair[0] for pair in sorted(mergeCats, reverse=True)]) def generate_list_song(self, kernelsong): ''' generates playlist based on a given song ''' hashes, songs, moods = self._compute_model() if len(hashes) == 0: return header, categs = self._generator.categorize_songs_probab(songs) i = list(hashes).index(str(kernelsong)) kernelish = categs[i] self._to_list([hashes[i] for i,x in enumerate(categs) if self._similar(x, kernelish)]) def _similar(self, one, two): '''decides if two same-length lists are similar''' avgDiff = sum([abs(two[i] - x) for i,x in enumerate(one)]) avgDiff = avgDiff/len(one) return avgDiff < .15 def _to_list(self, songs): '''converts list of hashes to list of filepaths''' # shuffle(files) self._list = songs def _get_mood_list(self, songs, moods): ''' match mood table data to song data ''' ms = [] for i,x in enumerate(songs): ms.append(-1) for m in moods: if x[0] == m[0]: ms[i] = m[1] return ms def get_next_song(self): ''' return next song in list ''' self._currentI += 1 return self.convert_hash_to_song(self._list[self._currentI]) def has_next_song(self): ''' ask if the playlist has a next song ''' return self._currentI < len(self._list) - 1 def get_current_song(self): ''' return currently playing song ''' return self.convert_hash_to_song(self._list[self._currentI]) def convert_hash_to_song(self, has): ''' return a song object associated with the given hash ''' return Song.song_from_filepath(self._db.hash_to_file(has)) def get_list(self, length=50): ''' return playlist of given length ''' plist = self._list[:length] songs = [] for x in plist: songs.append(self.convert_hash_to_song(x)) shuffle(songs) return songs
def __init__(self): ''' Constructor ''' super(LearnActing, self).__init__() self.actionLearner = Learner(["turn", "go_away", "come"])
update_everys = [2] configs = [] params = [batch_sizes, taus, lr_actors, lr_critics, update_everys] Hyperparams = namedtuple( 'Hyperparams', ['batch_size', 'tau', 'lr_actor', 'lr_critic', 'update_every']) for params in itertools.product(*params): configs.append(Hyperparams(*params)) random_seed = 2 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") best_max = 0.0 fout = open("results.txt", "w") for config in configs: learner = Learner(state_size, action_size, random_seed, num_agents, device, config) max_score, episode = train(learner, n_episodes=10000) if max_score > best_max: best_max = max_score best_config = config print("******************************************") fout.write("******************************************") print("Hyperparameters : {}".format(config)) fout.write("Hyperparameters : {}\n".format(config)) print( "Reached a maximum average of scores (over last 100 episodes) = {:.2f} after {:d} episodes" .format(max_score, episode)) fout.write( "Reached a maximum average of scores (over last 100 episodes) = {:.2f} after {:d} episodes\n" .format(max_score, episode)) print("\t\t --------------- \n\n")
def __init__(self, config, type = 'LDA', method = 'classification'): """ """ Learner.__init__(self, config, method) self.type = type
def __init__(self, config, method='classification'): """ """ Learner.__init__(self, config, method)
Created on Sat Apr 01 13:20:39 2017 @author: Carl """ from Stocks import Stocks from Learner import Learner from Market import Market from Twitter import Twitter import datetime start = datetime.date(2010, 1, 1) end = datetime.date(2017, 3, 31) stocks = Stocks(start, end, ['GOOG', 'AAPL', 'SPY']) learner = Learner(stocks) market = Market(stocks, 1000000, 1) twitter = Twitter('./PickledTweets') ticker = stocks.get_tickers()[0] features = [ feature for feature in stocks.get_features() if feature not in ['Open', 'High', 'Low', 'Close'] ] target = 'Daily Log Return Direction' test_size = 0.1 scale_data = True learner.prepare_data(ticker, features, target, test_size, scale_data) print("---Classifiers---") learner.train_classifiers()
class TestLearner(unittest.TestCase): def test_next_move_init(self): self.learner = Learner() tictactoe = TicTacToe() self.assertEqual(0, self.learner.get_reward((0, 0), tictactoe.board)) def test_set_reward(self): reward = 100 self.learner = Learner() tictactoe = TicTacToe() action = tictactoe.available_moves()[0] self.learner.set_reward(reward, action, tictactoe.board) self.assertEqual(reward, self.learner.get_reward(action, tictactoe.board)) def test_look_player(self): self.learner = Learner() tictactoe = TicTacToe() action = tictactoe.available_moves()[0] tictactoe.play(action[0], action[1], self.learner) self.learner.look(tictactoe) self.assertEqual(1, len(self.learner.history[self.learner.name])) self.assertEqual(action, self.learner.history[self.learner.name][0]) self.learner.look(tictactoe) self.assertEqual(1, len(self.learner.history[self.learner.name])) self.assertEqual(action, self.learner.history[self.learner.name][0]) def test_look_other_player(self): self.learner = Learner() self.other_player = RandomPlayer() tictactoe = TicTacToe() action = tictactoe.available_moves()[0] tictactoe.play(action[0], action[1], self.other_player) self.learner.look(tictactoe) self.assertEqual(1, len(self.learner.history[self.other_player.name])) self.assertEqual(action, self.learner.history[self.other_player.name][0]) def test_look_both_players(self): self.learner = Learner() self.other_player = RandomPlayer() tictactoe = TicTacToe() action1 = tictactoe.available_moves()[0] tictactoe.play(action1[0], action1[1], self.learner) self.learner.look(tictactoe) self.assertEqual(1, len(self.learner.history[self.learner.name])) self.assertEqual(action1, self.learner.history[self.learner.name][0]) action2 = tictactoe.available_moves()[0] tictactoe.play(action2[0], action2[1], self.other_player) self.learner.look(tictactoe) self.assertEqual(1, len(self.learner.history[self.other_player.name])) self.assertEqual(action2, self.learner.history[self.other_player.name][0]) def test_play(self): self.learner = Learner() self.tictactoe = TicTacToe() action = self.tictactoe.available_moves()[0] self.tictactoe.play(action[0], action[1], self.learner) def test_learn_single(self): self.learner = Learner('X') self.other_player = Player('O') self.tictactoe = TicTacToe() fields = [(0, 0), (0, 1), (0, 2)] for field in fields: self.tictactoe.play(*field, self.learner) self.assertEqual(fields, self.tictactoe.is_winner(self.learner)) self.learner.learn(self.tictactoe, 100, self.learner) print(self.learner.rewards) self.tictactoe = TicTacToe() while (self.tictactoe.available_moves() and not self.tictactoe.is_winner(self.learner) and not self.tictactoe.is_winner(self.other_player)): self.tictactoe.play(*self.learner.next_move(self.tictactoe), self.learner) print(self.tictactoe.board) self.assertFalse(self.tictactoe.is_winner(self.other_player)) self.assertEqual(fields, self.tictactoe.is_winner(self.learner)) def test_learn_single_starting(self): self.learner = Learner() self.other_player = Player() self.tictactoe = TicTacToe() fields = [(0, 0), (0, 1), (0, 2)] other_fields = [(1, 0), (1, 1)] for field in range(len(fields)): self.tictactoe.play(*fields[field], self.learner) try: self.tictactoe.play(*other_fields[field], self.other_player) except (IndexError): pass self.learner.look(self.tictactoe) self.assertEqual(fields, self.tictactoe.is_winner(self.learner)) self.learner.learn(self.tictactoe, 100, self.learner) print(self.learner.rewards) self.tictactoe = TicTacToe() while (self.tictactoe.available_moves() and not self.tictactoe.is_winner(self.learner) and not self.tictactoe.is_winner(self.other_player)): self.tictactoe.play(*self.learner.next_move(self.tictactoe), self.learner) if (other_fields): self.tictactoe.play(*other_fields.pop(0), self.other_player) print(self.tictactoe.board) self.assertFalse(self.tictactoe.is_winner(self.other_player)) self.assertEqual(fields, self.tictactoe.is_winner(self.learner)) def test_learn_single_otherstarting(self): self.learner = Learner() self.other_player = Player() self.tictactoe = TicTacToe() fields = [(0, 0), (0, 1), (0, 2)] other_fields = [(1, 0), (1, 1), (2, 2)] for field in range(len(fields)): self.tictactoe.play(*fields[field], self.learner) try: self.tictactoe.play(*other_fields[field], self.other_player) except (IndexError): pass self.assertEqual(fields, self.tictactoe.is_winner(self.learner)) self.learner.learn(self.tictactoe, 100, self.learner) print(self.learner.rewards) self.tictactoe = TicTacToe() while (self.tictactoe.available_moves() and not self.tictactoe.is_winner(self.learner) and not self.tictactoe.is_winner(self.other_player)): self.tictactoe.play(*other_fields.pop(0), self.other_player) self.tictactoe.play(*self.learner.next_move(self.tictactoe), self.learner) print(self.tictactoe.board) self.assertFalse(self.tictactoe.is_winner(self.other_player)) self.assertEqual(fields, self.tictactoe.is_winner(self.learner))
class Replica: def __init__(self, f, rid, skip, fail, replicas, clients, log_file, loss_rate=0, debug=True): self.f = f self.rid = rid self.skip = skip self.replica_list = replicas self.fail = fail # num of forced crash replicas, for test 2, 3 self.total_p = 2 * f + 1 self.majority = f + 1 self.acceptor = Acceptor(self.rid, 0) self.proposer = Proposer(self.f, self.rid, self.skip) self.learner = Learner(self.f, self.rid, loss_rate) self.view = 0 # current view num self.processed_request = {} # request is added when replied to client self.viwechange_log = {} self.client_list = clients # client hosts ports self.slot_num = 0 self.loss_rate = loss_rate self.isLeader = False self.is_live = True self.log_file = log_file # log_file path self.setLeader() print("Setting up socket") # set up receiving socket self.host = replicas[rid].host self.port = replicas[rid].port self.timeout = PROCESS_TIMEOUT self.s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) self.s.bind((self.host, self.port)) self.s.settimeout(self.timeout) print("Socket set") def setLeader(self): if self.view % self.total_p == self.rid: self.isLeader = True else: self.isLeader = False def run(self): while True: try: receive(self.s, self.handle_msg) except KeyboardInterrupt: return except socket.timeout: print("waiting for message") def handle_msg(self, msg): print("Handling message", msg) # if self.debug: # self.debug_log.write("Processing: " + str(msg)) if msg['type'] == ACCEPT: # learner gets ACCEPT message from acceptors if self.learner.process_accept(msg): # if self.debug: # self.debug_log.write # print("Learner decided: " + str(msg)) # decided this slot and check execution self.learner.decide(msg['slot_num'], msg['proposal_id']) if not self.learner.execute(self.log_file): self.learner.query_others(self.replica_list, self.loss_rate) # self.propose_viewchange() elif msg['type'] == PROPOSE: # acceptor get PROPOSE message from proposer print("Get Decree:", msg) if self.view < msg['proposal_id']: # if I'm in an older view, I must lost view change message, update view now # if self.debug: # self.debug_log.write print("View changed by: " + str(msg)) self.view = msg['proposal_id'] if self.isLeader: self.isLeader = False self.acceptor.current_proposal_id = self.view self.acceptor.current_proposer_id = self.view % self.total_p self.acceptor.process_proposal(msg, self.replica_list, self.loss_rate) elif msg['type'] == REQUEST: if self.isLeader: # testcase 2 and 3 if self.fail > -1 and self.rid < self.fail: logging.info("force the primary %s to crash" % (str(self.rid))) logging.info("server id %s crashes" % (str(self.rid))) exit() # leader propose value # print("Proposing for this request: " + str(msg)) self.proposer.propose_request(msg, self.replica_list, self.loss_rate) elif msg['resend_id'] > 0: # all replica check resend_id, if resend_id > 0 trigger view change print("Viewchanging for this timeout request: " + str(msg)) self.propose_viewchange() elif msg['type'] == VIEWCHANGE: if msg['new_view'] < self.view: return # other replica want me to become leader if self.process_viewchange(msg) and not self.isLeader: # a majority of replica want me to become leader self.view = msg['new_view'] self.acceptor.current_proposal_id = self.view self.acceptor.current_proposer_id = self.view % self.total_p print("I'm the new leader by majority: " + str(msg)) self.proposer.msg_log = {} self.proposer.acc_counter = {} self.proposer.prepare(msg['new_view'], self.replica_list, self.loss_rate) elif msg['type'] == PREPARE: if msg['proposal_id'] < self.view: return print(u'New leader {} want to prepare: {}'.format( msg['proposal_id'], str(msg))) # I receive a view change confirm, help new primary to prepare self.view = msg['proposal_id'] self.acceptor.current_proposal_id = self.view self.acceptor.current_proposer_id = self.view % self.total_p if self.isLeader and self.view % self.total_p != self.rid: self.isLeader = False self.acceptor.promise(msg, self.replica_list, self.loss_rate) elif msg['type'] == PROMISE: # I'm going to become the new leader, preparing for propose if msg['proposal_id'] % self.total_p == self.rid: self.proposer.addAcceptence(msg) """ getting a majority of promise change isLeader to True after accepted by majority process the received acceptance logs to build up the profile re-propose everything up to highest slot_num send to client: I'm new leader """ if not self.isLeader and self.proposer.isAcceptedByQuorum(): print(u'{} becomes the new leader, proposing now'.format( msg['proposal_id'])) self.isLeader = True # notify clients viewchange for c in self.client_list: send(c.host, c.port, {'type': VIEWCHANGE}, self.loss_rate) proposal_list = self.proposer.getProposalList() for slot_idx, proposal in proposal_list.items(): logging.info('leader %s propose %s', str(self.view), str(slot_idx)) self.proposer.propose(slot_idx, proposal, self.replica_list, self.loss_rate) elif msg['type'] == QUERY: # other learners ask me about they don't have slot_num N if not self.learner.process_query(msg, self.replica_list, self.loss_rate): print("I dont know about this query, proposing view change: " + str(msg)) # I also don't have it self.propose_viewchange() elif msg['type'] == RESPOND: if msg['slot_num'] not in self.learner.decide_log: print("Others tell me about this learn: " + str(msg)) if not self.learner.learn(msg, self.log_file): # if stock on some slots, query others first self.learner.query_others(self.replica_list, self.loss_rate) print("Finish message", msg) def propose_viewchange(self): new_view = self.view + 1 msg = { 'type': VIEWCHANGE, 'new_view': new_view, 'replica_id': self.rid } host, port = self.replica_list[ new_view % self.total_p].host, self.replica_list[new_view % self.total_p].port send(host, port, msg, self.loss_rate) def process_viewchange(self, msg): new_view = msg['new_view'] proposer_rid = msg['replica_id'] if new_view % self.total_p != self.rid: return False if new_view < self.view: print("My current view is", self.view, "but giving me view change") return False if new_view in self.viwechange_log: self.viwechange_log[new_view][proposer_rid] = True else: self.viwechange_log[new_view] = {} self.viwechange_log[new_view][proposer_rid] = True print("Getting", len(self.viwechange_log[new_view]), "votes") if len(self.viwechange_log[new_view]) >= self.majority: return True return False
class LearnUttering(Learn): ''' classdocs ''' def __init__(self, name): ''' Constructor ''' super(LearnUttering, self).__init__() if name: self.subjectLemmaLearner = Learner(["S1", "S2", "S3", "S4"]) else: self.subjectLemmaLearner = Learner(["S1", "S2"]) self.verbLemmaLearner = Learner(["V1", "V2", "V3"]) def learnedUtterance(self): subjLemma = self.subjectLemmaLearner.chooseAction() self.learnerLogger("S", self.subjectLemmaLearner.getContext(), subjLemma) if subjLemma == "S1": subj = "Io" elif subjLemma == "S2": subj = "Tu" elif subjLemma == "S3": subj = "Luca" elif subjLemma == "S4": subj = "Mario" verbLemma = self.verbLemmaLearner.chooseAction() self.learnerLogger("V", self.verbLemmaLearner.getContext(), verbLemma) if verbLemma == "V1": verb = "gira" elif verbLemma == "V2": verb = "veni" else: verb = "vade" return subj + " " + verb + "." def setContext(self, context): self.subjectLemmaLearner.setContext(context) self.verbLemmaLearner.setContext(context) self.context = context def reward(self, reward): self.subjectLemmaLearner.reward(reward) self.verbLemmaLearner.reward(reward) self.rewardLogger("S", self.subjectLemmaLearner.getContext(), self.subjectLemmaLearner.getLastAction(), reward) self.rewardLogger("V", self.verbLemmaLearner.getContext(), self.verbLemmaLearner.getLastAction(), reward) def uttered(self, utterance): if utterance == "": self.subjectLemmaLearner.skip() self.verbLemmaLearner.skip()
n_features=5, learning_rate=0.005, reward_decay=0.9, e_greedy=0.99, replace_target_iter=20, memory_size=10000, batch_size=20, e_greedy_increment=0.005, output_graph=False, prioritized=True, sess=None, ) env = Learner( log_dir=log_dir, i_data=data[1]['train'], j_data=data[0]['train'], tst_data=data[1]['tst'], learning_steps=500, learning_steps_max=50000, ) with open(file_name, 'a') as file: file.write('\n\n===NEW DATA LOADING====\n\n') file.write('\n\ni_data: ' + str(1)) file.write('\n\nj_data: ' + str(3)) for i in range(5): train_DDQN(RL=RL_prio, env=env, file_name=file_name, penalty=0.02) # double_DQN.plot_cost() j = np.random.randint(4) env.i_data = data[j]['train'] env.reward_tst_data = data[j]['tst'] k = np.random.randint(4)
def test_next_move_init(self): self.learner = Learner() tictactoe = TicTacToe() self.assertEqual(0, self.learner.get_reward((0, 0), tictactoe.board))
def test_play(self): self.learner = Learner() self.tictactoe = TicTacToe() action = self.tictactoe.available_moves()[0] self.tictactoe.play(action[0], action[1], self.learner)
def __init__(self, config, method = 'classification'): Learner.__init__(self, config, method) self.kernel = 'rbf' self.grid_dictionary = None