def check_probability_matching_other_agent(real_teacher, uncertainty=1.): """ Pretty much like agent_agent_test but with sampling instead of map and the possibility of simulating the teacher If it is done with a real teacher, the average distance between their production probs diminishes because the learner gets more and more towards 0.5, which is where the average random teacher starts from. So to see whether the learner is actually doing probability matching (rather than always getting maximally uncertain when it receives contradictory models) use a simulated teacher, preferably with a low level of uncertainty (i.e. a confident teacher). """ max_model_size = 7 all_models = generate_list_models(max_model_size) agent2 = pop.NetworkAgent(max_model_size) if real_teacher: agent1 = pop.NetworkAgent(max_model_size) else: agent1 = pop.ConfidenceTeacher(max_model_size, uncertainty) distances = [] for i in range(3000): random_indices = np.random.randint(0, len(all_models), int(0.9 * len(all_models))) # models are randomly picked rows of all_models models = all_models[random_indices] production = agent1.sample(models) agent2.learn(models, production) distances.append(check_agents_similarity(agent1, agent2, all_models)) plt.scatter(range(len(distances)), distances) plt.show()
def test_order_importance(): """ Is there more variability across the quantifiers guessed from the same underlying true quantifier, if the order of the observations is shuffled across learners? In other words, does the order matter for learning? Check whether agents learn a quantifier from the same observations more consistently (even if wrongly) if those observations are always in the same order rather than shifted order """ max_model_size = 7 all_models = generate_list_models(max_model_size) n_tests = 1000 # check for different quantifiers for i in range(1): quantifier = produce_random_quants(max_model_size, all_models) models, truth_values = shuffle_learning_model(all_models, quantifier, restrict=0.7) # unshuffled condition learners = [pop.NetworkAgent(max_model_size) for _ in range(n_tests)] map( lambda agent: agent.learn( models, truth_values, shuffle_by_epoch=False), learners) # unshuffled_test is the array of the languages learned from the quantifier without shuffling the model unshuffled_test = create_languages_array(learners, all_models) # shuffled condition learners = [pop.NetworkAgent(max_model_size) for _ in range(n_tests)] map( lambda agent: agent.learn( shuffle_learning_model(models, truth_values)), learners) # shuffled_test is the array of the languages learned from the quantifier when shuffling the model shuffled_test = create_languages_array(learners, all_models) # calculate the standard deviation in what the agents learned for every model unshuffled_std = np.std(unshuffled_test, axis=1) shuffled_std = np.std(shuffled_test, axis=1) # calculate the differences in standard deviations for the shuffled and unshuffled group # if shuffling has an effect, the differences should be positive differences_std = shuffled_std - unshuffled_std plt.hist(differences_std, bins=100) plt.show()
def agent_agent_test(): """ Shows how the similarity between two agents evolves as the second agent sees more and more of the first agent's output """ max_model_size = 10 all_models = generate_list_models(max_model_size) agent1, agent2 = pop.NetworkAgent(max_model_size), pop.NetworkAgent( max_model_size) distances = [] for i in range(1000): random_indices = np.random.randint(0, len(all_models), int(0.7 * len(all_models))) # models are randomly picked rows of all_models models = all_models[random_indices] production = agent1.map(models) if i == 0: seaborn.distplot(agent1.produce(all_models), label='initial') plt.show() agent2.learn(models, production) distances.append(check_agents_similarity(agent1, agent2, all_models)) plt.scatter(range(len(distances)), distances) plt.show()
def check_probability_matching_few_models(): """ train neural nets on conflicting model with hand selected models to check if they do probability matching """ repetitions_per_model = 10000 prob_models = [0.1, 0.9] models = [[0, 1, 1], [1, 1, 0]] for model, p_model in zip(models, prob_models): model = np.tile(model, (repetitions_per_model, 1)) judgs = np.random.binomial(n=1, p=p_model, size=(repetitions_per_model, 1)) agent = pop.NetworkAgent(3) agent.learn(model, judgs) print(np.column_stack((prob_models, agent.produce(models))))
def produce_random_quants(max_model_size, all_models, n_quants=1, qtype="random"): """ Produces a random quantifier with a given length and optional type. Possible types: "random", "mon", "network", "uniform" # TODO: implement more quantifier types """ if n_quants > 1: return np.column_stack( tuple( produce_random_quants(max_model_size, all_models, qtype=qtype) for _ in range(n_quants))) if n_quants == 1: if qtype == "random": return np.random.randint(2, size=(len(all_models), 1)) if qtype == "mon": # create random monotone quantifier bound_position = np.random.randint(max_model_size) direction = np.random.randint(2) sizes = np.sum(all_models, axis=1) return np.where(((direction == 1) & (sizes >= bound_position)) | ((direction == 0) & (sizes <= bound_position)), 1, 0).reshape(-1, 1) elif qtype == "conv": # create random convex (possible monotone) quantifier bounds_position = np.sort( np.random.choice(max_model_size, size=2, replace=False)) direction = np.random.randint(2) counts = np.sum(all_models, axis=1) quant = (counts <= bounds_position[0]) | ( counts >= bounds_position[1]) == direction return quant.reshape(-1, 1).astype(np.int) elif qtype == "network": return pop.NetworkAgent(max_model_size).map(all_models).astype( np.int) elif qtype == "uniform": return pop.UniformRandomAgent(max_model_size).map( all_models).astype(np.int) else: raise ValueError( ("Value of quantifier type not recognized. " "Acceptable types: random, mon, conv, network or uniform"))
def agent_quantifier_test(max_model_size=None, quant=None, train_split=0.75): """ Plots the difference between a random agent and a quantifier (random if not passed) as the agent observed data produced from the quantifier """ if not max_model_size: max_model_size = 10 all_models = generate_list_models(max_model_size) quantifier = quant or np.random.randint(0, 2, size=(len(all_models), 1)) all_models, quantifier = shuffle_learning_model(all_models, quantifier) train_models, test_models = train_test_split(all_models, train_split) train_quant, test_quant = train_test_split(quantifier, train_split) agent = pop.NetworkAgent(max_model_size) train_distances, test_distances = [], [] for i in range(1000): random_indices = np.random.randint(0, len(train_models), int(0.7 * len(train_models))) models = train_models[random_indices] production = train_quant[random_indices] # if i == 0: # seaborn.distplot(agent.produce(all_models), label='initial') agent.learn(models, production) # if i == 0: # seaborn.distplot(agent.produce(all_models), label='after one') # plt.legend() # plt.show() train_distances.append( check_agent_quantifier_similarity(agent, train_quant, train_models)) test_distances.append( check_agent_quantifier_similarity(agent, test_quant, test_models)) plt.scatter(range(len(train_distances)), train_distances) plt.scatter(range(len(test_distances)), test_distances) plt.show() return train_distances, test_distances