def _add_to_cluster(self, cluster, _doc): super(WavgNetCONFIRM, self)._add_to_cluster(cluster, _doc) # competitive stage similarities = self._cluster_sim_scores(_doc) idx = utils.argmax(similarities) del similarities[idx] if similarities: idx2 = utils.argmax(similarities) if idx2 <= idx: idx2 += 1 sim_vec2 = self.clusters[idx2].center.similarity_vector(_doc) self.clusters[idx2].network.learn(sim_vec2, 0.2)
def best_policy(mdp, U): """Given an MDP and a utility function U, determine the best policy, as a mapping from state to action. (Equation 17.4)""" pi = {} for s in mdp.states: pi[s] = argmax(mdp.actions(s), key=lambda a: expected_utility(a, s, U, mdp)) return pi
def genetic_algorithm(problem, population, fitness_fn, ngen=1000, pmut=0.1): "[Fig. 4.8]" #MAX = 0 for i in range(ngen): new_population = [] ''' print i, '------------' print ' ', MAX for p in population: print problem.value(p) if problem.value(p) > MAX: MAX = problem.value(p) ''' for p in population: fitnesses = map(fitness_fn, population) s1, s2 = weighted_sample_with_replacement(population, fitnesses, 2) p1 = copy.copy(problem) p1.set_state(s1) p2 = copy.copy(problem) p2.set_state(s2) child = p1.mate(p2) child.mutate(pmut) new_population.append(child.initial) population = new_population return utils.argmax(population, fitness_fn)
def update(self,x,y): """ updates the ORT - x : list of k covariates (k x 1) - y : response (scalar) usage: ort.update(x,y) """ k = self.__poisson(1) if k == 0: self.__updateOOBE(x,y) else: for u in xrange(k): self.__age += 1 (j,depth) = self.__findLeaf(x,self.tree) j.elem.update(x,y) #if j.elem.numSamplesSeen > self.minSamples and depth < self.maxDepth: # FIXME: which is the correct approach? if j.elem.stats.n > self.minSamples and depth < self.maxDepth: g = self.__gains(j.elem) if any([ gg >= self.minGain for gg in g ]): bestTest = j.elem.tests[argmax(g)] j.elem.updateSplit(bestTest.dim,bestTest.loc) j.updateChildren( Tree(Elem(self.param)), Tree(Elem(self.param)) ) j.left.elem.stats = bestTest.statsL j.right.elem.stats = bestTest.statsR j.elem.reset()
def genetic_algorithm_stepwise(population): root.title('Genetic Algorithm') for generation in range(ngen): # generating new population after selecting, recombining and mutating the existing population population = [search.mutate(search.recombine(*search.select(2, population, fitness_fn)), gene_pool, mutation_rate) for i in range(len(population))] # genome with the highest fitness in the current generation current_best = ''.join(argmax(population, key=fitness_fn)) # collecting first few examples from the current population members = [''.join(x) for x in population][:48] # clear the canvas canvas.delete('all') # displays current best on top of the screen canvas.create_text(canvas_width / 2, 40, fill=p_blue, font='Consolas 46 bold', text=current_best) # displaying a part of the population on the screen for i in range(len(members) // 3): canvas.create_text((canvas_width * .175), (canvas_height * .25 + (25 * i)), fill=lp_blue, font='Consolas 16', text=members[3 * i]) canvas.create_text((canvas_width * .500), (canvas_height * .25 + (25 * i)), fill=lp_blue, font='Consolas 16', text=members[3 * i + 1]) canvas.create_text((canvas_width * .825), (canvas_height * .25 + (25 * i)), fill=lp_blue, font='Consolas 16', text=members[3 * i + 2]) # displays current generation number canvas.create_text((canvas_width * .5), (canvas_height * 0.95), fill=p_blue, font='Consolas 18 bold', text=f'Generation {generation}') # displays blue bar that indicates current maximum fitness compared to maximum possible fitness scaling_factor = fitness_fn(current_best) / len(target) canvas.create_rectangle(canvas_width * 0.1, 90, canvas_width * 0.9, 100, outline=p_blue) canvas.create_rectangle(canvas_width * 0.1, 90, canvas_width * 0.1 + scaling_factor * canvas_width * 0.8, 100, fill=lp_blue) canvas.update() # checks for completion fittest_individual = search.fitness_threshold(fitness_fn, f_thres, population) if fittest_individual: break
def WalkSAT(clauses, p=0.5, max_flips=10000): """Checks for satisfiability of all clauses by randomly flipping values of variables """ # Set of all symbols in all clauses symbols = set(sym for clause in clauses for sym in prop_symbols(clause)) # model is a random assignment of true/false to the symbols in clauses model = {s: random.choice([True, False]) for s in symbols} for i in range(max_flips): satisfied, unsatisfied = [], [] for clause in clauses: (satisfied if pl_true(clause, model) else unsatisfied).append(clause) if not unsatisfied: # if model satisfies all the clauses return model clause = random.choice(unsatisfied) if probability(p): sym = random.choice(prop_symbols(clause)) else: # Flip the symbol in clause that maximizes number of sat. clauses def sat_count(sym): # Return the the number of clauses satisfied after flipping the symbol. model[sym] = not model[sym] count = len([clause for clause in clauses if pl_true(clause, model)]) model[sym] = not model[sym] return count sym = argmax(prop_symbols(clause), key=sat_count) model[sym] = not model[sym] # If no solution is found within the flip limit, we return failure return None
def determine_optimum_variants(unit1, unit2): """Determines the optimum variants between two units.""" # TODO - improve performance by considering variants (1,1) (1, 2) and (2,1) # as equivalent. outcomes = defaultdict(dict) for v1 in MeleeRangedStrategy.VARIANTS: if not MeleeRangedStrategy.is_compatible(unit1, v1): continue unit1.strategy = MeleeRangedStrategy(unit1, v1) for v2 in MeleeRangedStrategy.VARIANTS: if not MeleeRangedStrategy.is_compatible(unit2, v2): continue unit2.strategy = MeleeRangedStrategy(unit2, v2) turn_order = (unit1, unit2) game_state = AveragingVersusGameState(turn_order, verbosity=0) game_state.run_combat() outcomes[v1][v2] = game_state.hp_delta # What's your best strategy? unit_1_strategies = { v1: min(outcomes[v1].values()) for v1 in outcomes } unit1_strategy = utils.argmax(unit_1_strategies) unit2_strategy = utils.argmin(outcomes[unit1_strategy]) # for v1 in outcomes: # for v2, hp_delta in sorted(outcomes[v1].items()): # print '(%d, %d) => %+.2f' % (v1, v2, hp_delta) # print '%s\'s strategy: %s' % (unit1, unit1_strategy) # print '%s\'s strategy: %s' % (unit2, unit2_strategy) return (unit1_strategy, unit2_strategy)
def alphabeta_full_search(state, game): """Search game to determine best action; use alpha-beta pruning. As in [Fig. 6.7], this version searches all the way to the leaves.""" player = game.to_move(state) def max_value(state, alpha, beta): if game.terminal_test(state): return game.utility(state, player) v = -infinity for (a, s) in game.successors(state): v = max(v, min_value(s, alpha, beta)) if v >= beta: return v alpha = max(alpha, v) return v def min_value(state, alpha, beta): if game.terminal_test(state): return game.utility(state, player) v = infinity for (a, s) in game.successors(state): v = min(v, max_value(s, alpha, beta)) if v <= alpha: return v beta = min(beta, v) return v # Body of alphabeta_search starts here: action, state = argmax(game.successors(state), lambda ((a, s)): min_value(s, -infinity, infinity)) return action
def minimax_decision(state, game): """Given a state in a game, calculate the best move by searching forward all the way to the terminal states. [Fig. 6.4]""" player = game.to_move(state) def max_value(state): if game.terminal_test(state): return game.utility(state, player) v = -infinity for (_, s) in game.successors(state): v = max(v, min_value(s)) return v def min_value(state): if game.terminal_test(state): return game.utility(state, player) v = infinity for (_, s) in game.successors(state): v = min(v, max_value(s)) return v # Body of minimax_decision starts here: action, state = argmax(game.successors(state), lambda ((a, s)): min_value(s)) return action
def minimax_decision(state, game): """Given a state in a game, calculate the best move by searching forward all the way to the terminal states. [Figure 5.3]""" player = game.to_move(state) def max_value(state): if game.terminal_test(state): return game.utility(state, player) v = -infinity for a in game.actions(state): v = max(v, min_value(game.result(state, a))) return v def min_value(state): if game.terminal_test(state): return game.utility(state, player) v = infinity for a in game.actions(state): v = min(v, max_value(game.result(state, a))) return v # Body of minimax_decision: return argmax(game.actions(state), key=lambda a: min_value(game.result(state, a)))
def actions(self, state): search_list = [c for c in self.decoder.chardomain if c not in state] target_list = [c for c in alphabet if c not in state.values()] # Find the best charater to replace plainchar = argmax(search_list, key=lambda c: self.decoder.P1[c]) for cipherchar in target_list: yield (plainchar, cipherchar)
def _add_to_cluster(self, cluster, _doc): super(PushAwayCONFIRM, self)._add_to_cluster(cluster, _doc) sim_score = self._cached_most_similar_val margins = map(lambda x: sim_score - x if sim_score != x else 0, self._get_cached_sim_scores(_doc)) most_similar_cluster = self.clusters[utils.argmax(margins)] cluster.center.push_away(most_similar_cluster.center)
def generate(self, in_str): dy.renew_cg() in_seq = u.preprocess_seq(in_str, self.char2int) output_str = "" for probs in self._probs(in_seq): char = u.argmax(probs) output_str += self.int2char[char] return output_str
def predict(example): """Predict the target value for example. Consider each possible value, and pick the most likely by looking at each attribute independently.""" def class_probability(targetval): return (target_dist[targetval] * product(attr_dists[targetval, attr][example[attr]] for attr in dataset.inputs)) return argmax(target_vals, key=class_probability)
def renderResults(name, results, test_x, activations_0, activations_1): image = Image.new('RGB', (28 * 3 + 280, len(results) * 280)) draw = ImageDraw.Draw(image) for index, result in enumerate(results): renderImage(image, "MNIST_data/" + str(argmax(test_x[index][0])) + ".png", 0 * 28, index * 280 + 0 * 32) renderImage(image, "MNIST_data/" + str(argmax(result[0])) + ".png", 2 * 28, index * 280 + 0 * 32) renderActivations(draw, activations_1[index][0], 3 * 28, index * 280 + 0 * 32) renderActivations(draw, activations_0[index][0], 3 * 28, index * 280 + 0 * 32 + 14) renderImage(image, "MNIST_data/" + str(argmax(test_x[index][1])) + ".png", 0 * 28, index * 280 + 1 * 32) renderImage(image, "MNIST_data/" + str(argmax(result[1])) + ".png", 2 * 28, index * 280 + 1 * 32) renderActivations(draw, activations_1[index][1], 3 * 28, index * 280 + 1 * 32) renderActivations(draw, activations_0[index][1], 3 * 28, index * 280 + 1 * 32 + 14) renderImage(image, "MNIST_data/" + str(argmax(test_x[index][2])) + ".png", 0 * 28, index * 280 + 2 * 32) renderImage(image, "MNIST_data/" + str(argmax(result[2])) + ".png", 2 * 28, index * 280 + 2 * 32) renderActivations(draw, activations_1[index][2], 3 * 28, index * 280 + 2 * 32) renderActivations(draw, activations_0[index][2], 3 * 28, index * 280 + 2 * 32 + 14) renderImage(image, "MNIST_data/" + "plus.png", 0 * 28, index * 280 + 3 * 32) renderImage(image, "MNIST_data/" + str(argmax(result[3])) + ".png", 2 * 28, index * 280 + 3 * 32) renderActivations(draw, activations_1[index][3], 3 * 28, index * 280 + 3 * 32) renderActivations(draw, activations_0[index][3], 3 * 28, index * 280 + 3 * 32 + 14) renderImage(image, "MNIST_data/" + "equals.png", 0 * 28, index * 280 + 4 * 32) renderImage(image, "MNIST_data/" + str(argmax(result[4])) + ".png", 2 * 28, index * 280 + 4 * 32) renderActivations(draw, activations_1[index][4], 3 * 28, index * 280 + 4 * 32) renderActivations(draw, activations_0[index][4], 3 * 28, index * 280 + 4 * 32 + 14) image.save(name + ".png")
def renderResults200(name, results, test_x, activations_0, activations_1): image = Image.new('RGB', (28 * 3 + 800, len(results) * 180)) draw = ImageDraw.Draw(image) for index, result in enumerate(results): renderDigit(draw, test_x[index][0], 0 * 28, index * 180 + 0 * 36) renderImage(image, "MNIST_data/" + str(argmax(result[0][10:])) + ".png", 2 * 28, index * 180 + 0 * 36) renderActivations200(draw, activations_0[index][0], 3 * 28, index * 180 + 0 * 36) renderActivations200(draw, activations_1[index][0], 3 * 28, index * 180 + 0 * 36 + 16) renderDigit(draw, test_x[index][1], 0 * 28, index * 180 + 1 * 36) renderImage(image, "MNIST_data/" + str(argmax(result[1][10:])) + ".png", 2 * 28, index * 180 + 1 * 36) renderActivations200(draw, activations_0[index][1], 3 * 28, index * 180 + 1 * 36) renderActivations200(draw, activations_1[index][1], 3 * 28, index * 180 + 1 * 36 + 16) renderDigit(draw, test_x[index][2], 0 * 28, index * 180 + 2 * 36) renderImage(image, "MNIST_data/" + getOperator(result[2][:4]) + ".png", 2 * 28, index * 180 + 2 * 36) renderActivations200(draw, activations_0[index][2], 3 * 28, index * 180 + 2 * 36) renderActivations200(draw, activations_1[index][2], 3 * 28, index * 180 + 2 * 36 + 16) renderDigit(draw, test_x[index][3], 0 * 28, index * 180 + 3 * 36) renderImage(image, "MNIST_data/" + str(argmax(result[3][:10])) + ".png", 1 * 28, index * 180 + 3 * 36) renderImage(image, "MNIST_data/" + str(argmax(result[3][10:20])) + ".png", 2 * 28, index * 180 + 3 * 36) renderActivations200(draw, activations_0[index][3], 3 * 28, index * 180 + 3 * 36) renderActivations200(draw, activations_1[index][3], 3 * 28, index * 180 + 3 * 36 + 16) image.save(name + ".png")
def best_policy(mdp, U): """Dado um MDP e uma função de utilidade U, determinar a melhor política, como um mapeamento do estado para a ação.""" pi = {} for s in mdp.states: pi[s] = argmax(mdp.actions(s), key=lambda a: expected_utility(a, s, U, mdp)) return pi
def predict(example): """Predict the target value for example. Calculate probabilities for each class and pick the max.""" def class_probability(targetval): attr_dist = attr_dists[targetval] return target_dist[targetval] * product(attr_dist[a] for a in example) return argmax(target_dist.keys(), key=class_probability)
def predict(example): """Predict the target value for example. Consider each possible value, and pick the most likely by looking at each attribute independently.""" def class_probability(targetval): return (target_dist[targetval] * product(attr_dists[targetval, attr][example[attr]] for attr in dataset.inputs)) return argmax(targetvals, key=class_probability)
def genetic_algorithm(self, problem, map_canvas): """ Genetic Algorithm modified for the given problem """ def init_population(pop_number, gene_pool, state_length): """ initialize population """ population = [] for i in range(pop_number): population.append(utils.shuffled(gene_pool)) return population def recombine(state_a, state_b): """ recombine two problem states """ start = random.randint(0, len(state_a) - 1) end = random.randint(start + 1, len(state_a)) new_state = state_a[start:end] for city in state_b: if city not in new_state: new_state.append(city) return new_state def mutate(state, mutation_rate): """ mutate problem states """ if random.uniform(0, 1) < mutation_rate: sample = random.sample(range(len(state)), 2) state[sample[0]], state[sample[1]] = state[sample[1]], state[sample[0]] return state def fitness_fn(state): """ calculate fitness of a particular state """ fitness = problem.value(state) return int((5600 + fitness) ** 2) current = Node(problem.initial) population = init_population(100, current.state, len(current.state)) all_time_best = current.state while(1): population = [mutate(recombine(*select(2, population, fitness_fn)), self.mutation_rate.get()) for i in range(len(population))] current_best = utils.argmax(population, key=fitness_fn) if fitness_fn(current_best) > fitness_fn(all_time_best): all_time_best = current_best self.cost.set("Cost = " + str('%0.3f' % (-1 * problem.value(all_time_best)))) map_canvas.delete('poly') points = [] for city in current_best: points.append(self.frame_locations[city][0]) points.append(self.frame_locations[city][1]) map_canvas.create_polygon(points, outline='red', width=1, fill='', tag='poly') best_points = [] for city in all_time_best: best_points.append(self.frame_locations[city][0]) best_points.append(self.frame_locations[city][1]) map_canvas.create_polygon(best_points, outline='red', width=3, fill='', tag='poly') map_canvas.update() map_canvas.after(self.speed.get())
def value_iteration(mdp, gamma, epsilon=0.001): """ Perform value iteration of Markov Descision Process MDP. Parameters ---------- mpd : mdp.MPD object Markov Descision Process gamma : float > 0 Discount factor epsilon : float > 0 Algorithm sensitivity. Returns ------- dict of state : float Map from state to value, where state is in mdp.states. """ # -------- TASK 2.2 -------------------------------------------------------- # TASK: Implement the value iteration algorithm # This corresponds to step 1 - 3 of the algorithm as described on the # MyCourses pages # The initial value for each state is 0. # The return value, v, should be a dictionary mapping from states # to values. This corresponds to v_{n+1} once the change is small # enough in step 3 and the algorithm terminates. # # CODE HERE CODE HERE CODE HERE # termination = epsilon * (1 - gamma) / (2 * gamma) v = {} nv = {} va = {} n = 0 f = False for s in mdp.states(): v[s] = 0 nv = v.copy() while not f: f = True for s in mdp.states(): va = { a: value_of(mdp, s, a, v, gamma) for a in mdp.applicable_actions(s) } max = argmax(va) nv[s] = va[max] if all([((abs(nv[value] - v[value])) < termination) for value in v]): return nv v = nv.copy() f = False return v
def fitness_threshold(fitness_fn, f_thres, population): if not f_thres: return None fittest_individual = argmax(population, key=fitness_fn) if fitness_fn(fittest_individual) >= f_thres: return fittest_individual return None
def _policy(self, current_state): """Select an action epsilon-greedily.""" if random.uniform(0, 1) < self.eps: action = self.env.action_space.sample() else: action = argmax(range(self.env.action_space.n), lambda a : self.Q[self.current_state][a]) return action
def epsilon_greedy(self, q_values, epsilon): sampled = random.uniform(0.0, 1.0) action_set = list(self.env.get_action_set( self.env.get_current_state())) if sampled < epsilon: return random.choice(action_set) else: return utils.argmax( q_values[action_set]) # I'm breaking ties randomly
def _update(self, action, reward): """Update Q according to reward received.""" Q = self.Q maxaction = argmax(range(self.env.action_space.n), lambda a : Q[self.current_state][a] - Q[self.previous_state][action]) maxactiondiff = Q[self.current_state][maxaction] - Q[self.previous_state][action] Q[self.previous_state][action] += self.alpha*(self.A*reward + self.B*self.gamma*maxactiondiff)
def call(self, inputs, decoder_type='argmax', k=25, p=0.9, temperature=0.8): """ Args: inputs: it is a list of the previous token, memorized keys and values, and size of the previous tokens. token shape = (1, 1, 2 (ID and position)) mem_k shape = (number of layers, 1, number of heads, attn hidden size, context size) mem_v shape = (number of layers, 1, number of heads, context size, attn hidden size) length = an integer Returns: next_token: shape = (1, 1, 2 (ID and position)) mem_k: shape = (number of layers, 1, number of heads, attn hidden size, context size) mem_v: shape = (number of layers, 1, number of heads, context size, attn hidden size) """ token = inputs[0] mem_k = inputs[1] mem_v = inputs[2] length = inputs[3] new_mem_k = [] new_mem_v = [] hidden = self.embed(token) for i, block in enumerate(self.transformer_stack): hidden, k, v = block(hidden, mem_k[i], mem_v[i], length) new_mem_k.append(k) new_mem_v.append(v) mem_k = tf.stack(new_mem_k) mem_v = tf.stack(new_mem_v) logit = tf.reshape( tf.matmul(hidden[0, :, :], self.embed.we[:self.n_vocab, :], transpose_b=True), [self.n_vocab]) if decoder_type == 'argmax': next_token = argmax(logit) elif decoder_type == 'top-k': next_token = top_k_sampling(logit, k, temperature) elif decoder_type == 'nucleus': next_token = nucleus_sampling(logit, p) else: next_token = sampling(logit, temperature) return next_token, mem_k, mem_v
def genetic_algorithm(population, fitness_fn, gene_pool=[0, 1], f_thres=None, ngen=1000, pmut=0.1): """[Figure 4.8]""" for i in range(ngen): population = [ mutate(recombine(*select(2, population, fitness_fn)), gene_pool, pmut) for i in range(len(population)) ] if f_thres: fittest_individual = argmax(population, key=fitness_fn) if fitness_fn(fittest_individual) >= f_thres: return fittest_individual return argmax(population, key=fitness_fn)
def __init__(self, col, Y): self.col = col self.children = {} self.prob = Counter(Y) s = sum(self.prob.values()) for y in self.prob: self.prob[y] /= s label_ind, self.label_prob = argmax(self.prob.keys(), key=self.prob.__getitem__) self.label = Y[label_ind]
def predict(example): """Predict the target value for example. Consider each possible value, and pick the most likely by looking at each attribute independently.""" def class_probability(targetval): prob = target_dist[targetval] for attr in dataset.inputs: prob *= gaussian(means[targetval][attr], deviations[targetval][attr], example[attr]) return prob return argmax(target_vals, key=class_probability)
def _viterbi_decode(self, feats): backpointers = [] # Initialize the viterbi variables in log space init_vvars = torch.Tensor(1, self.tagset_size).fill_(-10000.) init_vvars[0][self.tag_to_ix[START_TAG]] = 0 # forward_var at step i holds the viterbi variables for step i-1 forward_var = autograd.Variable(init_vvars) for feat in feats: bptrs_t = [] # holds the backpointers for this step viterbivars_t = [] # holds the viterbi variables for this step for next_tag in range(self.tagset_size): # next_tag_var[i] holds the viterbi variable for tag i at the # previous step, plus the score of transitioning # from tag i to next_tag. # We don't include the emission scores here because the max # does not depend on them (we add them in below) next_tag_var = forward_var + self.transitions[next_tag] best_tag_id = argmax(next_tag_var) bptrs_t.append(best_tag_id) viterbivars_t.append(next_tag_var[0][best_tag_id]) # Now add in the emission scores, and assign forward_var to the set # of viterbi variables we just computed forward_var = (torch.cat(viterbivars_t) + feat).view(1, -1) backpointers.append(bptrs_t) # Transition to STOP_TAG terminal_var = forward_var + self.transitions[self.tag_to_ix[STOP_TAG]] best_tag_id = argmax(terminal_var) path_score = terminal_var[0][best_tag_id] # Follow the back pointers to decode the best path. best_path = [best_tag_id] for bptrs_t in reversed(backpointers): best_tag_id = bptrs_t[best_tag_id] best_path.append(best_tag_id) # Pop off the start tag (we dont want to return that to the caller) start = best_path.pop() assert start == self.tag_to_ix[START_TAG] # Sanity check best_path.reverse() return path_score, best_path
def most_open_move(board, order): "Find the move that has the most open floor filled around it." p1, p2, t, touching = dfs_count_around(board) wall_move = follow_wall_move(board, order) open_move = utils.argmax(p1.keys(), lambda k: p1[k]) if p1[wall_move] == p1[open_move]: best_move = wall_move else: best_move = open_move logging.debug("most open move is: %s (%d) %s", best_move, p1[best_move], p1) return best_move
def best_policy(mdp, U): """与えられたMDPと価値関数から最適な方策を決定する。 具体的には、全ての状態に関して{状態s:するべき行動a}の辞書を追加して返す。""" pi = {} for s in mdp.states: # keyで比較を行なっている。つまり、sにおいて全ての(s,a)の価値を計算して比較。 # 最大のaを、{s:a}として登録。 pi[s] = argmax(mdp.actions(s), key=lambda a: expected_utility(a, s, U, mdp)) return pi
def EpsilonGreedy(self, Q, s, epsilon, episode): actions_in_state = self.actions_in_state A = actions_in_state(s, episode) a = argmax(A, key=lambda a1:Q[s,a1]) rand_num = random.uniform(0,1) if rand_num < epsilon: a = random.choice(A) return a
def best_policy(mdp, U): """Given an MDP and a utility function U, determine the best policy, as a mapping from state to action.""" pi = {} f = lambda a: expected_utility(a, s, U, mdp) for count, s in enumerate(mdp.states): if count % 1 == 0: #for s in mdp.states: pi[s] = argmax(mdp.actions(s), key=f) return pi
def execute(self, percept): """Execute the information gathering algorithm""" self.observation = self.integrate_percept(percept) vpis = self.vpi_cost_ratio(self.variables) j = argmax(vpis) variable = self.variables[j] if self.vpi(variable) > self.cost(variable): return self.request(variable) return self.decnet.best_action()
def __init__(self, col, Y): self.col = col self.children = {} # Calculate the total self.prob = Counter(Y) s = sum(self.prob.values()) for y in self.prob: # Standardization self.prob[y] /= s # calculate the largest label_ind, self.label_prob = argmax(self.prob.keys(), key=self.prob.__getitem__) self.labels = Y[label_ind]
def genetic_algorithm(population, fitness_fn, gene_pool=[0, 1], f_thres=None, ngen=1000, pmut=0.1): # noqa """[Figure 4.8]""" for i in range(ngen): new_population = [] random_selection = selection_chances(fitness_fn, population) for j in range(len(population)): x = random_selection() y = random_selection() child = reproduce(x, y) if random.uniform(0, 1) < pmut: child = mutate(child, gene_pool) new_population.append(child) population = new_population if f_thres: fittest_individual = argmax(population, key=fitness_fn) if fitness_fn(fittest_individual) >= f_thres: return fittest_individual return argmax(population, key=fitness_fn)
def generate(self, in_str, max_len=2, **kwargs): dy.renew_cg() in_seq = u.preprocess_seq(in_str, self.char2int) out_str, pred, EOSint = "", None, self.char2int[u.EOS] decoder = self.make_decoder(in_seq, **kwargs) probs = next(decoder) while pred != EOSint and len(out_str) < (max_len * len(in_seq)): pred = u.argmax(probs) # greedy search (take best prediction) next(decoder) probs = decoder.send(pred) out_str += self.int2char[pred] return out_str
def viterbi_decode(self, feats): ''' In this function, we implement the viterbi algorithm explained above. A Dynamic programming based approach to find the best tag sequence ''' backpointers = [] # analogous to forward # Initialize the viterbi variables in log space init_vvars = torch.Tensor(1, self.tagset_size).fill_(-10000.) init_vvars[0][self.tag_to_ix[START_TAG]] = 0 # forward_var at step i holds the viterbi variables for step i-1 forward_var = Variable(init_vvars) if self.use_gpu: forward_var = forward_var.cuda() for feat in feats: next_tag_var = forward_var.view(1, -1).expand( self.tagset_size, self.tagset_size) + self.transitions _, bptrs_t = torch.max(next_tag_var, dim=1) bptrs_t = bptrs_t.squeeze().data.cpu().numpy( ) # holds the backpointers for this step next_tag_var = next_tag_var.data.cpu().numpy() viterbivars_t = next_tag_var[ range(len(bptrs_t)), bptrs_t] # holds the viterbi variables for this step viterbivars_t = Variable(torch.FloatTensor(viterbivars_t)) if self.use_gpu: viterbivars_t = viterbivars_t.cuda() # Now add in the emission scores, and assign forward_var to the set # of viterbi variables we just computed forward_var = viterbivars_t + feat backpointers.append(bptrs_t) # Transition to STOP_TAG terminal_var = forward_var + self.transitions[self.tag_to_ix[STOP_TAG]] terminal_var.data[self.tag_to_ix[STOP_TAG]] = -10000. terminal_var.data[self.tag_to_ix[START_TAG]] = -10000. best_tag_id = argmax(terminal_var.unsqueeze(0)) path_score = terminal_var[best_tag_id] # Follow the back pointers to decode the best path. best_path = [best_tag_id] for bptrs_t in reversed(backpointers): best_tag_id = bptrs_t[best_tag_id] best_path.append(best_tag_id) # Pop off the start tag (we dont want to return that to the caller) start = best_path.pop() assert start == self.tag_to_ix[START_TAG] # Sanity check best_path.reverse() return path_score, best_path
def play(self, game): """ Returns the "best" move to play in the current <game>-state, after some deliberation (<check_abort>). """ def display(depth, label, parent_label, i): for _ in range(depth): print(' ', end='') if parent_label is not None: print(parent_label.moves[i], parent_label.q[i].n, parent_label.q[i].avg, end=': ') print(label.n) return def visits(q, i): return q[i].n self.reset() if self.tree is None: self.tree = tree.Tree() in_advanced_mode = self._params.get('advanced') max_num_simulations = self._params.get('simulations') if max_num_simulations is None: max_num_simulations = 0 num_simulations = 0 #while not check_abort.do_abort() and (max_num_simulations == 0 or num_simulations <= max_num_simulations): while (max_num_simulations == 0 or num_simulations <= max_num_simulations): mcts.simulate(self, game, self.tree, in_advanced_mode) # tree.depth_first_traversal(self.tree, self.tree.root(), 0, display) num_simulations += 1 node_id = self.tree.root() node_label = self.tree.node_label(node_id) max_i = utils.argmax(node_label.q, len(node_label.q), visits) e = self._params.get('explore') if e is not None and e > game.get_move_no() and random.randint( 1, 10) <= 8: # Choose a random move with a 80% change for first e moves, if requested. max_i = random.randint(0, node_label.len - 1) policy = [node_label.q[i].n for i in range(node_label.len)] total = 0 for p in policy: total += p if total > 0: policy = [p / total for p in policy] # tree.depth_first_traversal(self.tree, self.tree.root(), 0, display) return node_label.moves[max_i], node_label.q[ max_i].avg, max_i, node_label.moves, policy, node_label.q
def _viterbi_decode(self, feats): backpointers = [] # Initialize the viterbi variables in log space init_vvars = torch.full((1, self.tagset_size), -10000.) init_vvars[0][self.tag_to_ix[START_TAG]] = 0 # forward_var at step i holds the viterbi variables for step i-1 forward_var = init_vvars for feat in feats: bptrs_t = [] # holds the backpointers for this step viterbivars_t = [] # holds the viterbi variables for this step for next_tag in range(self.tagset_size): # next_tag_var[i] holds the viterbi variable for tag i at the # previous step, plus the score of transitioning # from tag i to next_tag. next_tag_var = forward_var + self.transitions[next_tag] best_tag_id = argmax(next_tag_var) bptrs_t.append(best_tag_id) viterbivars_t.append(next_tag_var[0][best_tag_id].view(1)) forward_var = (torch.cat(viterbivars_t) + feat).view(1, -1) backpointers.append(bptrs_t) # Transition to STOP_TAG terminal_var = forward_var + self.transitions[self.tag_to_ix[STOP_TAG]] best_tag_id = argmax(terminal_var) path_score = terminal_var[0][best_tag_id] # Follow the back pointers to decode the best path. best_path = [best_tag_id] for bptrs_t in reversed(backpointers): best_tag_id = bptrs_t[best_tag_id] best_path.append(best_tag_id) # Pop off the start tag (we dont want to return that to the caller) start = best_path.pop() assert start == self.tag_to_ix[START_TAG] # Sanity check best_path.reverse() return path_score, best_path
def genetic_algorithm(population, fitness_fn, ngen=1000, pmut=0.1): "[Fig. 4.8]" for i in range(ngen): new_population = [] for i in len(population): fitnesses = map(fitness_fn, population) p1, p2 = weighted_sample_with_replacement(population, fitnesses, 2) child = p1.mate(p2) if random.uniform(0, 1) < pmut: child.mutate() new_population.append(child) population = new_population return argmax(population, fitness_fn)
def sample_site_cftp_dep(matrix, mu, Ne): L = len(matrix) def log_phat(s): ep = score_seq(matrix,s) nu = Ne - 1 return -nu*log(1 + exp(ep - mu)) first_site = "A"*L last_site = "T"*L best_site = "".join(["ACGT"[argmin(row)] for row in matrix]) worst_site = "".join(["ACGT"[argmax(row)] for row in matrix]) trajs = [[best_site],[random_site(L)],[random_site(L)],[random_site(L)], [worst_site]] def mutate_site(site,(ri,rb)): return subst(site,"ACGT"[rb],ri)
def genetic_algorithm(population, fitness_fn, ngen=1000, pmut=0.1): "[Figure 4.8]" for i in range(ngen): new_population = [] for i in range(len(population)): fitnesses = map(fitness_fn, population) p1, p2 = weighted_sample_with_replacement(population, fitnesses, 2) child = p1.mate(p2) if random.uniform(0, 1) < pmut: child.mutate() new_population.append(child) population = new_population return argmax(population, key=fitness_fn)
def policy_iteration(mdp): "Solve an MDP by policy iteration [Figure 17.7]" U = {s: 0 for s in mdp.states} pi = {s: random.choice(mdp.actions(s)) for s in mdp.states} while True: U = policy_evaluation(pi, U, mdp) unchanged = True for s in mdp.states: a = argmax(mdp.actions(s), key=lambda a: expected_utility(a, s, U, mdp)) if a != pi[s]: pi[s] = a unchanged = False if unchanged: return pi
def max_value(node): if game.terminal_test(node): return game.utility(node, player) self.change_list.append(('a', node)) self.change_list.append(('h',)) max_a = argmax(game.actions(node), key=lambda x: min_value(game.result(node, x))) max_node = game.result(node, max_a) self.utils[node] = self.utils[max_node] x1, y1 = self.node_pos[node] x2, y2 = self.node_pos[max_node] self.change_list.append(('l', (node, max_node - 3*node - 1))) self.change_list.append(('e', node)) self.change_list.append(('p',)) self.change_list.append(('h',)) return self.utils[node]
def program(self,percept): s1,r1 = percept Q, Nsa, s, a, r = self.Q, self.Nsa, self.s, self.a, self.r alpha, gamma, f = self.alpha, self.gamma, self.f if s1 in self.terminals: Q[s1][None] = r1 if s is not None: Nsa[s][a] += 1 Q[s][a] += alpha(Nsa[s][a])*(r+gamma*max(Q[s1].values())-Q[s][a]) if s1 in self.terminals: self.s = self.a = self.r = None else: self.s, self.r = s1, r1 self.a = argmax(Q[s1].keys(), lambda a1: f(Q[s1][a1],Nsa[s1][a1])) return self.a
def __call__(self, percept): s1, r1 = self.update_state(percept) Q, Nsa, s, a, r = self.Q, self.Nsa, self.s, self.a, self.r alpha, gamma, terminals, actions_in_state = self.alpha, self.gamma, self.terminals, self.actions_in_state if s1 in terminals: Q[(s1, None)] = r1 if s is not None: Nsa[(s, a)] += 1 Q[(s, a)] += alpha(Nsa[(s, a)])*(r+gamma*max([Q[(s1, a1)] for a1 in actions_in_state(s1)])-Q[(s, a)]) if s1 in terminals: self.s = self.a = self.r = None else: self.s, self.r = s1, r1 self.a = argmax(actions_in_state(s1), key=lambda a1: self.f(Q[(s1, a1)], Nsa[(s1, a1)])) return self.a
def _init_clusters(self): sub_docs = self.docs[:self.num_instances] sim_mat = utils.pairwise(sub_docs, lambda x, y: max(self.doc_similarity(x, y), self.doc_similarity(y, x))) edges = utils.minimum_spanning_tree(sim_mat) ccs = utils.get_ccs(range(self.num_instances), edges) biggest_cc = max(map(len, ccs)) while biggest_cc > self.num_init: edge_to_remove = random.sample(edges, 1)[0] edges.remove(edge_to_remove) ccs = utils.get_ccs(range(self.num_instances), edges) biggest_cc = max(map(len, ccs)) cc = ccs[utils.argmax(map(len, ccs))] for idx in cc: self._add_cluster(self.docs[idx], member=False)
def sample_site_spec(matrix, mu, Ne): nu = Ne - 1 L = len(matrix) best_site = "".join(["ACGT"[argmin(col)] for col in matrix]) worst_site = "".join(["ACGT"[argmax(col)] for col in matrix]) def phat(s): assert len(s) == L ep = score_seq(matrix,s) return (1 + exp(ep - mu))**(-nu) chosen_site = "" def best_completion(s): l = len(s) return phat(s + best_site[l:]) def worst_completion(s): l = len(s) return s + worst_site[l:] return chosen_site
def sample_site_cftp(matrix, mu, Ne): L = len(matrix) f = seq_scorer(matrix) def log_phat(s): ep = f(s) nu = Ne - 1 return -nu*log(1 + exp(ep - mu)) first_site = "A"*L last_site = "T"*L best_site = "".join(["ACGT"[argmin(row)] for row in matrix]) worst_site = "".join(["ACGT"[argmax(row)] for row in matrix]) #middle_sites = [[random_site(L)] for i in range(10)] #trajs = [[best_site]] + middle_sites + [[worst_site]] trajs = [[best_site],[worst_site]] ords = [rslice("ACGT",sorted_indices(row)) for row in matrix] def mutate_site(site,(ri,direction)): b = (site[ri]) idx = ords[ri].index(b) idxp = min(max(idx + direction,0),3) bp = ords[ri][idxp] return subst(site,bp,ri)
def find_mean_field_approximation(ks,q,ps=None): eps = eps_from_ks(ks) if ps is None: ps = occupancies_ref(ks,q) mu_min = -10 mu_max = 10 mu_steps = 1000 mus = interpolate(mu_min,mu_max,mu_steps) coeffs = map(lambda mu:polyfit(ps,probs(eps,mu),1)[0], mus) max_coeff_idx = argmax(coeffs) # find mu corresponding to best fit. Cutoff at peak, since curve # has two intersections with y = 1. mu_idx = argmin(map(lambda coeff:(1-coeff)**2,coeffs[:max_coeff_idx])) best_mu = mus[mu_idx] qs = probs(eps,best_mu) print "best_mu:",best_mu print "mean copy number: %s (sd: %s) vs. sum(ps) %s" %(mean_occ(eps,best_mu),sd_occ(eps,best_mu),sum(ps)) print "pearson correlation:",pearsonr(ps,qs) print "best linear fit: p = %s*q + %s" % tuple(polyfit(qs,ps,1)) return best_mu
def expectiminimax(state, game): """Return the best move for a player after dice are thrown. The game tree includes chance nodes along with min and max nodes. [Figure 5.11]""" player = game.to_move(state) def max_value(state, dice_roll): v = -infinity for a in game.actions(state): v = max(v, chance_node(state, a)) game.dice_roll = dice_roll return v def min_value(state, dice_roll): v = infinity for a in game.actions(state): v = min(v, chance_node(state, a)) game.dice_roll = dice_roll return v def chance_node(state, action): res_state = game.result(state, action) if game.terminal_test(res_state): return game.utility(res_state, player) sum_chances = 0 num_chances = 21 dice_rolls = list(itertools.combinations_with_replacement([1, 2, 3, 4, 5, 6], 2)) if res_state.to_move == 'W': for val in dice_rolls: game.dice_roll = (-val[0], -val[1]) sum_chances += max_value(res_state, (-val[0], -val[1])) * (1/36 if val[0] == val[1] else 1/18) elif res_state.to_move == 'B': for val in dice_rolls: game.dice_roll = val sum_chances += min_value(res_state, val) * (1/36 if val[0] == val[1] else 1/18) return sum_chances / num_chances # Body of expectiminimax: return argmax(game.actions(state), key=lambda a: chance_node(state, a))