def _save_visual_report(self, path): feats_nbins = self.helper.get_features_bins() for s, *_ in self.mean_val_state_action_outliers: obs_vec = get_features_from_index(s, feats_nbins) self.helper.save_features_image( obs_vec, join(path, 'mean-val-outlier-s-{}.png'.format(s))) for s, a, *_ in self.pred_error_state_action_outliers: obs_vec = get_features_from_index(s, feats_nbins) self.helper.save_features_image( obs_vec, join(path, 'pred-error-outlier-s-{}-a-{}.png'.format(s, a))) for f, v, *_ in self.mean_val_feature_outliers: obs_vec = [ANY_FEATURE_IDX] * len(feats_nbins) obs_vec[f] = v self.helper.save_features_image( obs_vec, join(path, 'mean-val-outlier-f-{}-v-{}.png'.format(f, v))) for f, v, a, *_ in self.pred_error_feature_outliers: obs_vec = [ANY_FEATURE_IDX] * len(feats_nbins) obs_vec[f] = v self.helper.save_features_image( obs_vec, join(path, 'pred-error-outlier-f-{}-v-{}-a-{}.png'.format(f, v, a)))
def _save_visual_report(self, path): feats_nbins = self.helper.get_features_bins() for s, *_ in self.freq_states: obs_vec = get_features_from_index(s, feats_nbins) self.helper.save_features_image( obs_vec, join(path, 'freq-s-{}.png'.format(s))) for s, *_ in self.infreq_states: obs_vec = get_features_from_index(s, feats_nbins) self.helper.save_features_image( obs_vec, join(path, 'infreq-s-{}.png'.format(s))) for feat_set, *_ in self.freq_feature_sets: obs_vec = [ANY_FEATURE_IDX] * len(feats_nbins) set_name = '' for f, v in feat_set: obs_vec[f] = v set_name += '-{}-{}'.format(f, v) self.helper.save_features_image( obs_vec, join(path, 'freq-fv{}.png'.format(set_name))) for feat_set in self.infreq_feature_sets: obs_vec = [ANY_FEATURE_IDX] * len(feats_nbins) set_name = '' for f, v in feat_set: obs_vec[f] = v set_name += '-{}-{}'.format(f, v) self.helper.save_features_image( obs_vec, join(path, 'infreq-fv{}.png'.format(set_name)))
def _save_visual_report(self, path): feats_nbins = self.helper.get_features_bins() for s, *_ in self.certain_states: obs_vec = get_features_from_index(s, feats_nbins) self.helper.save_features_image( obs_vec, join(path, 'certain-exec-s-{}.png'.format(s))) for s, *_ in self.uncertain_states: obs_vec = get_features_from_index(s, feats_nbins) self.helper.save_features_image( obs_vec, join(path, 'uncertain-exec-s-{}.png'.format(s))) for f, v, *_ in self.certain_feats: obs_vec = [ANY_FEATURE_IDX] * len(feats_nbins) obs_vec[f] = v self.helper.save_features_image( obs_vec, join(path, 'certain-exec-f-{}-v-{}.png'.format(f, v))) for f, v, *_ in self.uncertain_feats: obs_vec = [ANY_FEATURE_IDX] * len(feats_nbins) obs_vec[f] = v self.helper.save_features_image( obs_vec, join(path, 'uncertain-exec-f-{}-v-{}.png'.format(f, v)))
def _save_visual_report(self, path): feats_nbins = self.helper.get_features_bins() for s, *_ in self.earlier_states: obs_vec = get_features_from_index(s, feats_nbins) self.helper.save_features_image( obs_vec, join(path, 'earlier-s-{}.png'.format(s))) for s, a, *_ in self.earlier_actions: obs_vec = get_features_from_index(s, feats_nbins) self.helper.save_features_image( obs_vec, join(path, 'earlier-s-{}-a-{}.png'.format(s, a)))
def get_sample_interesting_aspects(self, s, a, r, ns): feats_nbins = self.helper.get_features_bins() aspects = [] for st, *_ in self.certain_states: if st == s: aspects.append('certain-exec-s-{}'.format(s)) break for st, *_ in self.uncertain_states: if st == s: aspects.append('uncertain-exec-s-{}'.format(s)) break obs_vec = get_features_from_index(s, feats_nbins) for f, v, *_ in self.certain_feats: if obs_vec[f] == v: aspects.append('certain-exec-f-{}-v-{}'.format(f, v)) break for f, v, *_ in self.uncertain_feats: if obs_vec[f] == v: aspects.append('uncertain-exec-f-{}-v-{}'.format(f, v)) break return aspects
def get_sample_interesting_aspects(self, s, a, r, ns): feats_nbins = self.helper.get_features_bins() aspects = [] for st, ac, *_ in self.certain_trans: if st == s and ac == a: aspects.append('certain-trans-s-{}-a-{}'.format(s, a)) break for st, ac, *_ in self.uncertain_trans: if st == s and ac == a: aspects.append('uncertain-trans-s-{}-a-{}'.format(s, a)) break obs_vec = get_features_from_index(s, feats_nbins) for f, v, ac, *_ in self.certain_feats: if obs_vec[f] == v and ac == a: aspects.append('certain-feats-f-{}-v-{}-a-{}'.format(f, v, a)) break for f, v, ac, *_ in self.uncertain_feats: if obs_vec[f] == v and ac == a: aspects.append('uncertain-feats-f-{}-v-{}-a-{}'.format( f, v, a)) break return aspects
def get_sample_interesting_aspects(self, s, a, r, ns): feats_nbins = self.helper.get_features_bins() aspects = [] for st, *_ in self.contradictory_value_states: if st == s: aspects.append('contradictory-value-s-{}'.format(s)) break for st, *_ in self.contradictory_count_states: if st == s: aspects.append('contradictory-count-s-{}'.format(s)) break for st, *_ in self.contradictory_goal_states: if st == s: aspects.append('contradictory-goal-s-{}'.format(s)) break obs_vec = get_features_from_index(s, feats_nbins) for f, v, ac in self.contradictory_feature_actions: if obs_vec[f] == v and ac == a: aspects.append('contradictory-f-{}-v-{}-a-{}'.format(f, v, a)) break return aspects
def get_sample_interesting_aspects(self, s, a, r, ns): feats_nbins = self.helper.get_features_bins() aspects = [] for st, *_ in self.freq_states: if st == s: aspects.append('frequent-s-{}'.format(s)) break for st, *_ in self.infreq_states: if st == s: aspects.append('infrequent-s-{}'.format(s)) break obs_vec = get_features_from_index(s, feats_nbins) for feat_set, *_ in self.freq_feature_sets: if all(obs_vec[f] == v for f, v in feat_set): aspects.append('frequent-feature-set-s-{}'.format(s)) break for feat_set in self.infreq_feature_sets: if all(obs_vec[f] == v for f, v in feat_set): aspects.append('infrequent-feature-set-s-{}'.format(s)) break return aspects
def get_sample_interesting_aspects(self, s, a, r, ns): feats_nbins = self.helper.get_features_bins() aspects = [] for st, *_ in self.mean_val_state_action_outliers: if st == s: aspects.append('mean-val-outlier-s-{}'.format(s)) break for st, a, *_ in self.pred_error_state_action_outliers: if st == s: aspects.append('pred-error-outlier-s-{}-a-{}'.format(s, a)) break obs_vec = get_features_from_index(s, feats_nbins) for f, v, *_ in self.mean_val_feature_outliers: if obs_vec[f] == v: aspects.append('mean-val-outlier-f-{}-v-{}'.format(f, v)) break for f, v, ac, *_ in self.pred_error_feature_outliers: if obs_vec[f] == v and a == ac: aspects.append('pred-error-outlier-f-{}-v-{}-a-{}'.format( f, v, a)) break return aspects
def _print_sequence(self, s, n, seq, prob, feats_nbins, action_names, file, write_console): obs_vec = get_features_from_index(s, feats_nbins) feats_labels = self.helper.get_features_labels(obs_vec, True) print_line( '\t{}-{} (count: {}, prob. reaching target: {:.3e})'.format( s, feats_labels, n, prob), file, write_console) for a, ns in seq: obs_vec1 = get_features_from_index(ns, feats_nbins) feats_labels = self.helper.get_features_labels(obs_vec1, True) obs_vec_diff = self._vec_diff(obs_vec1, obs_vec) feats_labels = [feats_labels[i] for i in obs_vec_diff] obs_vec = obs_vec1 print_line( '\t\t{} -> {}-{}'.format(action_names[a], ns, feats_labels), file, write_console) print_line('____________________________________', file, write_console)
def _save_visual_report(self, path): feats_nbins = self.helper.get_features_bins() for s, *_ in self.contradictory_value_states: obs_vec = get_features_from_index(s, feats_nbins) self.helper.save_features_image( obs_vec, join(path, 'contradictory-value-s-{}.png'.format(s))) for s, *_ in self.contradictory_count_states: obs_vec = get_features_from_index(s, feats_nbins) self.helper.save_features_image( obs_vec, join(path, 'contradictory-count-s-{}.png'.format(s))) for s, *_ in self.contradictory_goal_states: obs_vec = get_features_from_index(s, feats_nbins) self.helper.save_features_image( obs_vec, join(path, 'contradictory-goal-s-{}.png'.format(s)))
def _save_visual_report(self, path, clear=True): feats_nbins = self.helper.get_features_bins() for s, *_ in self.local_minima_states: obs_vec = get_features_from_index(s, feats_nbins) self.helper.save_features_image(obs_vec, join(path, 'local-minimum-s-{}.png'.format(s))) for s, *_ in self.absolute_minima_states: obs_vec = get_features_from_index(s, feats_nbins) self.helper.save_features_image(obs_vec, join(path, 'absolute-minimum-s-{}.png'.format(s))) for s, *_ in self.local_maxima_states: obs_vec = get_features_from_index(s, feats_nbins) self.helper.save_features_image(obs_vec, join(path, 'local-maximum-s-{}.png'.format(s))) for s, *_ in self.absolute_maxima_states: obs_vec = get_features_from_index(s, feats_nbins) self.helper.save_features_image(obs_vec, join(path, 'absolute-maximum-s-{}.png'.format(s))) for s, a, *_ in self.val_diff_mean_action_outliers: obs_vec = get_features_from_index(s, feats_nbins) self.helper.save_features_image(obs_vec, join(path, 'val-diff-mean-outlier-s-{}-a-{}.png'.format(s, a))) for s, *_ in self.val_diff_variance_state_outliers: obs_vec = get_features_from_index(s, feats_nbins) self.helper.save_features_image(obs_vec, join(path, 'val-diff-variance-outlier-s-{}.png'.format(s)))
def _save_report(self, file, write_console): feats_nbins = self.helper.get_features_bins() action_names = self.config.get_action_names() print_line('====================================', file, write_console) print_line('{} total time-steps'.format(self.total_time_steps), file, write_console) print_line('====================================', file, write_console) print_line( '{} earlier states found (min. support: {}, max time-step: {}):'. format(len(self.earlier_states), self.min_state_count, int(self.state_max_time_step * self.total_time_steps)), file, write_console) for s, t, n in self.earlier_states: obs_vec = get_features_from_index(s, feats_nbins) feats_labels = self.helper.get_features_labels(obs_vec, True) print_line( '\t{}-{} (count: {}, last time-step: {})'.format( s, feats_labels, n, t), file, write_console) print_line('====================================', file, write_console) print_line( '{} earlier state-action pairs found (min. support: {}, max time-step: {}):' .format(len(self.earlier_actions), self.min_state_count, int(self.action_max_time_step * self.total_time_steps)), file, write_console) for s, a, t, n in self.earlier_actions: obs_vec = get_features_from_index(s, feats_nbins) feats_labels = self.helper.get_features_labels(obs_vec, True) print_line( '\t{}-{} - {} (count: {}, last time-step: {})'.format( s, feats_labels, action_names[a], n, t), file, write_console)
def _save_visual_report(self, path): feats_nbins = self.helper.get_features_bins() for i, s_seq in enumerate(self.certain_seqs_to_subgoal): s, _, seq, _ = s_seq obs_vec = get_features_from_index(s, feats_nbins) self.helper.save_features_image( obs_vec, join(path, 'certain-seq-subgoal-#{}-0-s-{}.png'.format(i, s))) for j, a_ns in enumerate(seq): a, ns = a_ns obs_vec = get_features_from_index(ns, feats_nbins) self.helper.save_features_image( obs_vec, join( path, 'certain-seq-subgoal-#{}-{}-a-{}-s-{}.png'.format( i, j + 1, a, ns))) for s, *_ in self.uncertain_future_states: obs_vec = get_features_from_index(s, feats_nbins) self.helper.save_features_image( obs_vec, join(path, 'uncertain-future-s-{}.png'.format(s)))
def _save_report(self, file, write_console): feats_nbins = self.helper.get_features_bins() action_names = self.config.get_action_names() rwd_avg, rwd_std, n = self.avg_reward print_line('====================================', file, write_console) print_line( 'Average overall reward: {:.3f} ± {:.3f} (count: {})'.format( rwd_avg, rwd_std, n), file, write_console) print_line('====================================', file, write_console) print_line( '{} state-action outliers found (outlier threshold: {}):'.format( len(self.state_action_outliers), self.state_outlier_stds), file, write_console) for s, a, rwd, n in self.state_action_outliers: obs_vec = get_features_from_index(s, feats_nbins) feats_labels = self.helper.get_features_labels(obs_vec) print_line( '\t{}-{} - {} (mean reward: {:.3f}, count: {})'.format( s, feats_labels, action_names[a], rwd, n), file, write_console) print_line('====================================', file, write_console) print_line('Actions\' average rewards:', file, write_console) for a in range(len(self.action_rwds_avg)): rwd_avg, rwd_std, n = self.action_rwds_avg[a] print_line( '\t{}: {:.3f} ± {:.3f} (count: {})'.format( action_names[a], rwd_avg, rwd_std, n), file, write_console) print_line('====================================', file, write_console) print_line( '{} feature-action outliers found (outlier threshold: {}):'.format( len(self.feature_action_outliers), self.feature_outlier_stds), file, write_console) for f, v, a, rwd_avg, rwd_std, n in self.feature_action_outliers: obs_vec = np.zeros(len(feats_nbins), np.uint32) obs_vec[f] = v feat_label = self.helper.get_features_labels(obs_vec)[f] print_line( '\t{} - {} (avg. reward: {:.3f} ± {:.3f}, count: {})'.format( feat_label, action_names[a], rwd_avg, rwd_std, n), file, write_console)
def get_sample_interesting_aspects(self, s, a, r, ns): feats_nbins = self.helper.get_features_bins() aspects = [] for st, ac, *_ in self.state_action_outliers: if st == s and ac == a: aspects.append('mean-reward-outlier-s-{}-a-{}'.format(s, a)) break obs_vec = get_features_from_index(s, feats_nbins) for f, v, ac, *_ in self.feature_action_outliers: if obs_vec[f] == v and ac == a: aspects.append('mean-reward-outlier-f-{}-v-{}-a-{}'.format( f, v, a)) break return aspects
def _save_report(self, file, write_console): feats_nbins = self.helper.get_features_bins() action_names = self.config.get_action_names() print_line('====================================', file, write_console) print_line( '{} certain sequences to sub-goals found (min. prob > 0):'.format( len(self.certain_seqs_to_subgoal)), file, write_console) for s, n, seq, prob in self.certain_seqs_to_subgoal: self._print_sequence(s, n, seq, prob, feats_nbins, action_names, file, write_console) print_line('====================================', file, write_console) print_line( '{} states with an uncertain future found:'.format( len(self.uncertain_future_states)), file, write_console) for s, n in self.uncertain_future_states: obs_vec = get_features_from_index(s, feats_nbins) feats_labels = self.helper.get_features_labels(obs_vec, True) print_line('\t{} (count: {})'.format(feats_labels, n), file, write_console)
def analyze(self): # gets visited state-action pairs with sufficient support visited_sa = np.where(self.agent.c_sa >= self.min_state_count) visited_r_sa = self.agent.r_sa[visited_sa] # gets state-action pairs with outlier rewards state_action_outliers = get_outliers_dist_mean(visited_r_sa, self.state_outlier_stds) s_idxs = visited_sa[0][state_action_outliers].tolist() a_idxs = visited_sa[1][state_action_outliers].tolist() rwds = visited_r_sa[state_action_outliers].tolist() counts = self.agent.c_sa[visited_sa][state_action_outliers].tolist() self.state_action_outliers = list(zip(s_idxs, a_idxs, rwds, counts)) # gets average reward per action action_rwd_means = [ float(np.mean(visited_r_sa[visited_sa[1] == a])) for a in range(self.config.num_actions) ] action_rwd_stds = [ float(np.std(visited_r_sa[visited_sa[1] == a])) for a in range(self.config.num_actions) ] action_rwd_ns = [ len(visited_r_sa[visited_sa[1] == a]) for a in range(self.config.num_actions) ] self.action_rwds_avg = list( zip(action_rwd_means, action_rwd_stds, action_rwd_ns)) # collects all rewards per state feature and action feats_nbins = self.helper.get_features_bins() feats_rwds = [[] for _ in range(len(feats_nbins))] for f in range(len(feats_nbins)): feats_rwds[f] = [[] for _ in range(feats_nbins[f])] for v in range(feats_nbins[f]): feats_rwds[f][v] = [[] for _ in range(self.config.num_actions)] for s, a, rwd in zip(visited_sa[0], visited_sa[1], visited_r_sa): # gets features for each visited state obs_vec = get_features_from_index(s, feats_nbins) # for each feature, add reward to the corresponding feature value bucket for f in range(len(obs_vec)): feats_rwds[f][obs_vec[f]][a].append(rwd) # gets average reward per state feature and extracts outliers feats_rwds_avg = [] for f in range(len(feats_nbins)): for v in range(feats_nbins[f]): for a in range(self.config.num_actions): rwds = feats_rwds[f][v][a] if len(rwds) == 0: continue feats_rwds_avg.append((f, v, a, float(np.mean(rwds)), float(np.std(rwds)), len(rwds))) fva_outliers_idxs = get_outliers_dist_mean( [x[3] for x in feats_rwds_avg], self.feature_outlier_stds) self.feature_action_outliers = [ feats_rwds_avg[i] for i in fva_outliers_idxs ] # gets overall average reward received self.avg_reward = (float(np.mean(visited_r_sa)), float(np.std(visited_r_sa)), len(visited_r_sa)) # sorts lists self.state_action_outliers.sort(key=lambda e: -e[2]) self.feature_action_outliers.sort(key=lambda e: -e[3])
def analyze(self): self.certain_trans = [] self.uncertain_trans = [] # gets visited state-action pairs visited_s = np.nonzero(self.agent.c_s)[0] visited_sa_counts = self.agent.c_sa[visited_s] actions_disps = np.full((len(visited_s), self.config.num_actions), np.nan) # initializes feature-action transitions feats_nbins = self.helper.get_features_bins() num_feats = len(feats_nbins) feats_counts = [list()] * num_feats for f in range(num_feats): feats_counts[f] = np.zeros( (feats_nbins[f], self.config.num_actions, self.config.num_states), np.uint) # calculates state-action dispersions for i in range(len(visited_s)): s = visited_s[i].item() obs_vec = get_features_from_index(s, feats_nbins) for a in range(self.config.num_actions): # ignore state-action visits with insufficient support state_action_supp = visited_sa_counts[i][a].item() if state_action_supp < self.min_state_count: continue # updates feature-action transitions for f in range(len(feats_nbins)): feats_counts[f][obs_vec[f]] += self.agent.c_sas[s] # calculates dispersion of possible transitions (only considers non-zero transitions) dist = self.agent.c_sas[s][a][np.nonzero( self.agent.c_sas[s][a])] disp = float(get_distribution_evenness(dist)) actions_disps[i][a] = disp num_next_states = len(dist) # checks for certain and uncertain transitions if disp <= self.certain_trans_max_disp: self.certain_trans.append((s, a, state_action_supp, disp)) elif num_next_states >= self.trans_min_states and disp >= self.uncertain_trans_min_disp: self.uncertain_trans.append( (s, a, state_action_supp, disp)) # analyzes un/certain actions self.certain_actions = [] self.uncertain_actions = [] for a in range(self.config.num_actions): action_disps = actions_disps[:, a] mean_action_disp = float( np.mean(action_disps[~np.isnan(action_disps)])) if mean_action_disp >= self.uncertain_action_min_disp: self.uncertain_actions.append((a, mean_action_disp)) elif mean_action_disp <= self.certain_action_max_disp: self.certain_actions.append((a, mean_action_disp)) # analyzes un/certain features self.certain_feats = [] self.uncertain_feats = [] for f in range(num_feats): for v in range(feats_nbins[f]): for a in range(self.config.num_actions): # gets dispersion (only considers non-zero transitions) dist = feats_counts[f][v][a][np.nonzero( feats_counts[f][v][a])] # ignores all zero transitions if len(dist) == 0: continue disp = float(get_distribution_evenness(dist)) if disp >= self.uncertain_feat_min_disp: self.uncertain_feats.append((f, v, a, disp)) elif disp <= self.certain_feat_max_disp: self.certain_feats.append((f, v, a, disp)) # sorts lists self.certain_trans.sort(key=lambda e: e[3]) self.uncertain_trans.sort(key=lambda e: -e[3]) self.certain_actions.sort(key=lambda e: e[1]) self.uncertain_actions.sort(key=lambda e: -e[1]) self.certain_feats.sort(key=lambda e: e[3]) self.uncertain_feats.sort(key=lambda e: -e[3])
def _save_report(self, file, write_console): feats_nbins = self.helper.get_features_bins() action_names = self.config.get_action_names() print_line( 'Min. state-action support: {}'.format(self.min_state_count), file, write_console) print_line('====================================', file, write_console) print_line( '{} certain state transitions found (max. dispersion: {}):'.format( len(self.certain_trans), self.certain_trans_max_disp), file, write_console) for s, a, supp, disp in self.certain_trans: feats_labels = self.helper.get_features_labels( get_features_from_index(s, feats_nbins), True) print_line( '\t({}-{}, {}) (supp: {}, disp: {:.3f})'.format( s, feats_labels, action_names[a], supp, disp), file, write_console) print_line('====================================', file, write_console) print_line( '{} uncertain state transitions found (min. dispersion: {}):'. format(len(self.uncertain_trans), self.uncertain_trans_min_disp), file, write_console) for s, a, supp, disp in self.uncertain_trans: feats_labels = self.helper.get_features_labels( get_features_from_index(s, feats_nbins), True) print_line( '\t({}-{}, {}) (supp: {}, disp: {:.3f})'.format( s, feats_labels, action_names[a], supp, disp), file, write_console) print_line('====================================', file, write_console) print_line( '{} certain actions found (max. dispersion: {}):'.format( len(self.certain_actions), self.certain_action_max_disp), file, write_console) for a, disp in self.certain_actions: print_line('\t{} (disp: {:.3f})'.format(action_names[a], disp), file, write_console) print_line('====================================', file, write_console) print_line( '{} uncertain actions found (min. dispersion: {}):'.format( len(self.uncertain_actions), self.uncertain_action_min_disp), file, write_console) for a, disp in self.uncertain_actions: print_line('\t{} (disp: {:.3f})'.format(action_names[a], disp), file, write_console) print_line('====================================', file, write_console) print_line( '{} certain state feature-action pairs found (max disp: {}):'. format(len(self.certain_feats), self.certain_feat_max_disp), file, write_console) num_feats = len(feats_nbins) for f, v, a, disp in self.certain_feats: obs_vec = np.zeros(num_feats, np.uint32) obs_vec[f] = v feat_label = self.helper.get_features_labels(obs_vec)[f] print_line( '\t{} - {} (disp: {:.3f})'.format(feat_label, action_names[a], disp), file, write_console) print_line('====================================', file, write_console) print_line( '{} uncertain state feature-action pairs found (min. disp: {}):'. format(len(self.uncertain_feats), self.uncertain_feat_min_disp), file, write_console) for f, v, a, disp in self.uncertain_feats: obs_vec = np.zeros(num_feats, np.uint32) obs_vec[f] = v feat_label = self.helper.get_features_labels(obs_vec)[f] print_line( '\t{} - {} (disp: {:.3f})'.format(feat_label, action_names[a], disp), file, write_console)
def analyze(self): # gets visited state-action pairs with sufficient support visited_sa = np.where(self.agent.c_sa >= self.min_state_count) # gets overall average state value visited_q_sa = self.agent.q[visited_sa] self.avg_value = (float(np.mean(visited_q_sa)), float(np.std(visited_q_sa)), len(visited_q_sa)) # gets states' values (max q) visited_q_s = group_by_key( list(zip(visited_sa[0].tolist(), visited_q_sa))) visited_s = np.array([s for s, _ in visited_q_s]) visited_v_s = np.array( [np.mean(values).item() for _, values in visited_q_s]) # gets states with outlier maximum values and the corresponding actions state_outliers = get_outliers_dist_mean(visited_v_s, self.state_value_outlier_stds) s_idxs = visited_s[state_outliers].tolist() as_idxs = [ np.where(self.agent.q[visited_s[i]] == visited_v_s[i])[0].tolist() for i in state_outliers ] s_values = visited_v_s[state_outliers].tolist() counts = self.agent.c_s[s_idxs].tolist() self.mean_val_state_action_outliers = list( zip(s_idxs, as_idxs, s_values, counts)) # gets overall average prediction error visited_dq_sa = self.agent.dq[visited_sa] self.avg_pred_error = (float(np.mean(visited_dq_sa)), float(np.std(visited_dq_sa)), len(visited_dq_sa)) # gets state-action pairs with outlier mean prediction errors (delta-q) state_action_outliers = get_outliers_dist_mean( visited_dq_sa, self.pred_error_outlier_stds) s_idxs = visited_sa[0][state_action_outliers].tolist() a_idxs = visited_sa[1][state_action_outliers].tolist() pred_errors = visited_dq_sa[state_action_outliers].tolist() counts = self.agent.c_sa[visited_sa][state_action_outliers].tolist() self.pred_error_state_action_outliers = list( zip(s_idxs, a_idxs, pred_errors, counts)) # gets average value per action action_values = group_by_key( list(zip(visited_sa[1].tolist(), visited_q_sa))) self.action_vals_avg = [(a, np.mean(values).item(), np.std(values).item()) for a, values in action_values] # collects maximal values for all state features feats_nbins = self.helper.get_features_bins() feats_vals = [[] for _ in range(len(feats_nbins))] feats_dqs = [[] for _ in range(len(feats_nbins))] for f in range(len(feats_nbins)): feats_vals[f] = [[] for _ in range(feats_nbins[f])] feats_dqs[f] = [[] for _ in range(feats_nbins[f])] for v in range(feats_nbins[f]): feats_dqs[f][v] = [[] for _ in range(self.config.num_actions)] for i, val in enumerate(visited_v_s): # gets features for each visited state s = visited_s[i] obs_vec = get_features_from_index(s, feats_nbins) # for each feature, adds the value to the corresponding feature value bucket for f in range(len(obs_vec)): feats_vals[f][obs_vec[f]].append(val) for i, dq in enumerate(visited_dq_sa): # gets features for each visited state-action pair s = visited_sa[0][i] a = visited_sa[1][i] obs_vec = get_features_from_index(s, feats_nbins) # for each feature, adds the value to the corresponding feature value bucket for f in range(len(obs_vec)): feats_dqs[f][obs_vec[f]][a].append(dq) # gets average value and pred. error per state feature and extracts outliers feats_vals_avg = [] feats_pred_error_avg = [] for f in range(len(feats_nbins)): for v in range(feats_nbins[f]): q_values = feats_vals[f][v] if len(q_values) > 0: feats_vals_avg.append((f, v, float(np.mean(q_values)), float(np.std(q_values)))) for a in range(self.config.num_actions): dqs = feats_dqs[f][v][a] if len(dqs) > 0: feats_pred_error_avg.append( (f, v, a, float(np.mean(dqs)), float(np.std(dqs)))) feat_outliers_idxs = get_outliers_dist_mean( [x[2] for x in feats_vals_avg], self.feature_outlier_stds) self.mean_val_feature_outliers = [ feats_vals_avg[i] for i in feat_outliers_idxs ] feat_outliers_idxs = get_outliers_dist_mean( [x[3] for x in feats_pred_error_avg], self.feature_outlier_stds) self.pred_error_feature_outliers = [ feats_pred_error_avg[i] for i in feat_outliers_idxs ] # sorts lists self.mean_val_state_action_outliers.sort(key=lambda e: -e[2]) self.pred_error_state_action_outliers.sort(key=lambda e: -e[2]) self.mean_val_feature_outliers.sort(key=lambda e: -e[2]) self.pred_error_feature_outliers.sort(key=lambda e: -e[3])
def _save_report(self, file, write_console): feats_nbins = self.helper.get_features_bins() action_names = self.config.get_action_names() print_line('====================================', file, write_console) print_line( 'Action JS divergence threshold: {} per action:'.format( self.action_dist_div_threshold / self.config.num_actions), file, write_console) print_line('====================================', file, write_console) print_line( '{} contradictory values states found):'.format( len(self.contradictory_value_states)), file, write_console) for s, n, jsd, diff_actions in self.contradictory_value_states: obs_vec = get_features_from_index(s, feats_nbins) feats_labels = self.helper.get_features_labels(obs_vec, True) action_labels = [action_names[a] for a in diff_actions] print_line( '\t{}-{} (jsd: {:.3f}, count: {})'.format( s, feats_labels, jsd, n), file, write_console) print_line('\t\tDivergent actions: {}'.format(action_labels), file, write_console) self._print_actions('Rwd. dist', diff_actions, self.state_rewards[s], file, write_console) self._print_actions('Val. dist', diff_actions, self.state_values[s], file, write_console) print_line('====================================', file, write_console) print_line( '{} contradictory count states found):'.format( len(self.contradictory_count_states)), file, write_console) for s, n, jsd, diff_actions in self.contradictory_count_states: obs_vec = get_features_from_index(s, feats_nbins) feats_labels = self.helper.get_features_labels(obs_vec, True) action_labels = [action_names[a] for a in diff_actions] print_line( '\t{}-{} (jsd: {:.3f}, count: {})'.format( s, feats_labels, jsd, n), file, write_console) print_line('\t\tDivergent actions: {}'.format(action_labels), file, write_console) self._print_actions('Val. dist', diff_actions, self.state_values[s], file, write_console) self._print_actions('Count dist', diff_actions, self.state_counts[s], file, write_console) print_line('====================================', file, write_console) print_line( '{} contradictory goal states found):'.format( len(self.contradictory_goal_states)), file, write_console) for s, n in self.contradictory_goal_states: obs_vec = get_features_from_index(s, feats_nbins) feats_labels = self.helper.get_features_labels(obs_vec, True) print_line('\t{}-{} (count: {})'.format(s, feats_labels, n), file, write_console) print_line('====================================', file, write_console) print_line( '{} contradictory feature-action associations found):'.format( len(self.contradictory_feature_actions)), file, write_console) for f, v, a in self.contradictory_feature_actions: obs_vec = np.zeros(len(feats_nbins), np.uint32) obs_vec[f] = v feat_label = self.helper.get_features_labels(obs_vec)[f] print_line('\t{} - {}'.format(feat_label, action_names[a]), file, write_console)
def _save_report(self, file, write_console): feats_nbins = self.helper.get_features_bins() print_line('====================================', file, write_console) print_line('{} local minima states found (min. transition support: {}):'.format( len(self.local_minima_states), self.min_transition_count), file, write_console) for s, val, ns_val_avg, n in self.local_minima_states: obs_vec = get_features_from_index(s, feats_nbins) feats_labels = self.helper.get_features_labels(obs_vec, True) print_line('\t{}-{} (count: {}, value: {:.3f} <= avg next values: {:.3f})'.format( s, feats_labels, n, val, ns_val_avg), file, write_console) print_line('====================================', file, write_console) print_line('{} absolute minima states found (min. transition support: {}):'.format( len(self.absolute_minima_states), self.min_transition_count), file, write_console) for s, val, ns_val_avg, n in self.absolute_minima_states: obs_vec = get_features_from_index(s, feats_nbins) feats_labels = self.helper.get_features_labels(obs_vec, True) print_line('\t{}-{} (count: {}, value: {:.3f} <= avg next values: {:.3f})'.format( s, feats_labels, n, val, ns_val_avg), file, write_console) print_line('====================================', file, write_console) print_line('{} local maxima states found (min. transition support: {}):'.format( len(self.local_maxima_states), self.min_transition_count), file, write_console) for s, val, ns_val_avg, n in self.local_maxima_states: obs_vec = get_features_from_index(s, feats_nbins) feats_labels = self.helper.get_features_labels(obs_vec, True) print_line('\t{}-{} (count: {}, value: {:.3f} >= avg next values: {:.3f})'.format( s, feats_labels, n, val, ns_val_avg), file, write_console) print_line('====================================', file, write_console) print_line('{} absolute maxima states found (min. transition support: {}):'.format( len(self.absolute_maxima_states), self.min_transition_count), file, write_console) for s, val, ns_val_avg, n in self.absolute_maxima_states: obs_vec = get_features_from_index(s, feats_nbins) feats_labels = self.helper.get_features_labels(obs_vec, True) print_line('\t{}-{} (count: {}, value: {:.3f} >= avg next values: {:.3f})'.format( s, feats_labels, n, val, ns_val_avg), file, write_console) print_line('====================================', file, write_console) print_line( '{} value difference mean state outliers found (min. transition support: {}, outlier stds: {}):'.format( len(self.val_diff_mean_action_outliers), self.min_transition_count, self.state_diff_outlier_stds), file, write_console) action_names = self.config.get_action_names() for s, a, diff, n in self.val_diff_mean_action_outliers: obs_vec = get_features_from_index(s, feats_nbins) feats_labels = self.helper.get_features_labels(obs_vec, True) print_line('\t{}-{} - {} (trans. count: {}, mean value diff.: {:.3f})'.format( s, feats_labels, action_names[a], n, diff), file, write_console) print_line('====================================', file, write_console) print_line( '{} value difference variance state-action outliers found (min. transition support: {}, outlier stds: {}):'. format(len(self.val_diff_variance_state_outliers), self.min_transition_count, self.state_diff_outlier_stds), file, write_console) for s, diff, n in self.val_diff_variance_state_outliers: obs_vec = get_features_from_index(s, feats_nbins) feats_labels = self.helper.get_features_labels(obs_vec, True) print_line('\t{}-{} (count: {}, value diff. variance: {:.3f})'.format( s, feats_labels, n, diff), file, write_console)
def _save_report(self, file, write_console): feats_nbins = self.helper.get_features_bins() print_line('====================================', file, write_console) print_line('Total states visited: {}'.format(self.total_count), file, write_console) print_line( 'Coverage of the state-space: {:.2f}%'.format( self.state_coverage * 100.), file, write_console) print_line( 'Dispersion of the visits to the state-space: {:.3f}'.format( self.state_dispersion), file, write_console) print_line('====================================', file, write_console) print_line( '{} frequent states found (min. support: {}):'.format( len(self.freq_states), self.min_state_count), file, write_console) for s, n in self.freq_states: rf = n / float(self.total_count) obs_vec = get_features_from_index(s, feats_nbins) feats_labels = self.helper.get_features_labels(obs_vec, True) print_line( '\t{}-{} (count: {}, freq.: {:.3f})'.format( s, feats_labels, n, rf), file, write_console) print_line('====================================', file, write_console) print_line( '{} infrequent states found (max. support: {}):'.format( len(self.infreq_states), self.max_state_count), file, write_console) for s, n in self.infreq_states: obs_vec = get_features_from_index(s, feats_nbins) feats_labels = self.helper.get_features_labels(obs_vec, True) print_line('\t{}-{} (count: {})'.format(s, feats_labels, n), file, write_console) print_line('====================================', file, write_console) print_line( '{} frequent feature-sets found (min. support: {}, min. assoc. strength: {}):' .format(len(self.freq_feature_sets), self.min_feat_set_count, self.min_feat_set_assoc), file, write_console) num_feats = len(feats_nbins) for feat_set, jacc in self.freq_feature_sets: feats_labels = self._get_feats_labels(feat_set, num_feats) print_line('\t{} (jacc: {:.3f})'.format(feats_labels, jacc), file, write_console) print_line('====================================', file, write_console) print_line( '{} interesting feature-rules found (min. confidence: {}):'.format( len(self.freq_feature_rules), self.min_feat_rule_conf), file, write_console) for ant, cons, n, conf in self.freq_feature_rules: n /= float(self.total_count) ant = self._get_feats_labels(ant, num_feats) cons = self._get_feats_labels(cons, num_feats) print_line( '\t{} => {} (freq: {:.3f}, conf: ({:.3f}))'.format( ant, cons, n, conf), file, write_console) print_line('====================================', file, write_console) print_line( '{} infrequent feature-sets found (max. assoc. strength: {}):'. format(len(self.infreq_feature_sets), self.max_feat_set_assoc), file, write_console) for feat_set in self.infreq_feature_sets: feats_labels = self._get_feats_labels(feat_set, num_feats) print_line('\t{}'.format(feats_labels), file, write_console)
def analyze(self): """ Performs an analysis of the behavior of the agent during its interaction with the environment. Namely, it calculates the state-action coverage, the mean dispersion of action executions, and the (un)certain states and state features. :return: """ # gets visited state-action pairs visited_s = np.nonzero(self.agent.c_s)[0] visited_sa_counts = self.agent.c_sa[visited_s] # calculates coverage num_visited_sa = np.count_nonzero(visited_sa_counts) total_sa = len(visited_sa_counts) * self.config.num_actions self.state_action_coverage = float(num_visited_sa) / total_sa # calculates mean action execution dispersion state_dispersions = [ get_distribution_evenness(c_a) for c_a in visited_sa_counts ] self.mean_action_dispersion = (np.mean(state_dispersions).item(), np.std(state_dispersions).item(), len(state_dispersions)) # analyzes un/certain states self.certain_states = [] self.uncertain_states = [] for i in range(len(visited_s)): # check state visit ratio s = visited_s[i].item() if self.agent.c_s[s] < self.min_state_count: continue # checks dispersions disp = state_dispersions[i] if disp >= self.uncertain_state_min_disp: self.uncertain_states.append((s, float(disp))) elif disp <= self.certain_state_max_disp: # if certain state, get the action(s) usually selected max_actions = get_outliers_dist_mean(visited_sa_counts[i], 1.5, below=False) self.certain_states.append((s, float(disp), max_actions)) # gets action counts for each feature feats_nbins = self.helper.get_features_bins() num_feats = len(feats_nbins) feats_counts = [list()] * num_feats for f in range(num_feats): feats_counts[f] = np.zeros( (feats_nbins[f], self.config.num_actions), np.uint) for i in range(len(visited_sa_counts)): c_sa = visited_sa_counts[i] s = visited_s[i] obs_vec = get_features_from_index(s, feats_nbins) for f in range(len(feats_nbins)): feats_counts[f][obs_vec[f]] += c_sa # analyzes un/certain state-features self.certain_feats = [] self.uncertain_feats = [] for f in range(num_feats): for v in range(feats_nbins[f]): # checks dispersions disp = float(get_distribution_evenness(feats_counts[f][v])) if disp >= self.uncertain_feat_min_disp: self.uncertain_feats.append((f, v, disp)) elif disp <= self.certain_feat_max_disp: # if certain feature, get the action(s) usually selected max_actions = get_outliers_dist_mean(feats_counts[f][v], 1.5, below=False) self.certain_feats.append((f, v, disp, max_actions)) # sorts lists self.certain_states.sort(key=lambda e: e[1]) self.uncertain_states.sort(key=lambda e: -e[1]) self.certain_feats.sort(key=lambda e: e[2]) self.uncertain_feats.sort(key=lambda e: -e[2])
def _save_report(self, file, write_console): feats_nbins = self.helper.get_features_bins() action_names = self.config.get_action_names() mean, std, n = self.avg_value print_line('====================================', file, write_console) print_line( 'Average overall value: {:.3f} ± {:.3f} (count: {})'.format( mean, std, n), file, write_console) print_line('====================================', file, write_console) print_line( '{} state-action value outliers found (min. support: {}, outlier threshold: {}):' .format(len(self.mean_val_state_action_outliers), self.min_state_count, self.state_value_outlier_stds), file, write_console) for s, actions, val, n in self.mean_val_state_action_outliers: obs_vec = get_features_from_index(s, feats_nbins) feats_labels = self.helper.get_features_labels(obs_vec, True) act_labels = [action_names[a] for a in actions] print_line( '\t{}-{} (value: {:.3f}, count: {})\n\t\tMax actions: {}'. format(s, feats_labels, val, n, act_labels), file, write_console) mean, std, n = self.avg_pred_error print_line('====================================', file, write_console) print_line( 'Average overall prediction error: {:.3f} ± {:.3f} (count: {})'. format(mean, std, n), file, write_console) print_line('====================================', file, write_console) print_line( '{} state-action prediction error outliers found (min. support: {}, outlier threshold: {}):' .format(len(self.pred_error_state_action_outliers), self.min_state_count, self.pred_error_outlier_stds), file, write_console) for s, a, pred_error, n in self.pred_error_state_action_outliers: obs_vec = get_features_from_index(s, feats_nbins) feats_labels = self.helper.get_features_labels(obs_vec, True) print_line( '\t{}-{} - {} (avg. prediction error: {:.3f}, count: {})'. format(s, feats_labels, action_names[a], pred_error, n), file, write_console) print_line('====================================', file, write_console) print_line('Actions\' average values:', file, write_console) for a, avg, std in self.action_vals_avg: print_line( '\t{}: {:.3f} ± {:.3f}'.format(action_names[a], avg, std), file, write_console) print_line('====================================', file, write_console) print_line( '{} feature value outliers found (outlier threshold: {}):'.format( len(self.mean_val_feature_outliers), self.feature_outlier_stds), file, write_console) for f, v, avg, std in self.mean_val_feature_outliers: obs_vec = np.zeros(len(feats_nbins), np.uint32) obs_vec[f] = v feat_label = self.helper.get_features_labels(obs_vec)[f] print_line( '\t{} (avg. value: {:.3f} ± {:.3f})'.format( feat_label, avg, std), file, write_console) print_line('====================================', file, write_console) print_line( '{} feature pred. error outliers found (outlier threshold: {}):'. format(len(self.pred_error_feature_outliers), self.feature_outlier_stds), file, write_console) for f, v, a, avg, std in self.pred_error_feature_outliers: obs_vec = np.zeros(len(feats_nbins), np.uint32) obs_vec[f] = v feat_label = self.helper.get_features_labels(obs_vec)[f] print_line( '\t{} - {} (avg. pred. error: {:.3f} ± {:.3f})'.format( feat_label, action_names[a], avg, std), file, write_console)
raise ValueError( 'Could not find any recorded episodes in: {}.'.format(agent_dir)) print('{} episodes were recorded'.format(len(recorded_episodes))) # checks output dir output_dir = sys.argv[2] if len( sys.argv) > 2 else get_observations_output_dir(agent_dir) if not exists(output_dir): makedirs(output_dir) # no window mode, to create the images os.environ['SDL_VIDEODRIVER'] = 'dummy' # for each sequence feats_nbins = helper.get_features_bins() for e in recorded_episodes: # gets state sequence, convert to observation sequence obs_vec_seq = [] s_seq = behavior_tracker.s_s[e] for t in range(len(s_seq)): obs_vec = get_features_from_index(s_seq[t], feats_nbins) obs_vec_seq.append(obs_vec) # saves obs sequence to video file file_name = 'episode {}.mp4'.format(e) print('Creating \'{}\' from sequence with {} time-steps...'.format( file_name, len(obs_vec_seq))) helper.save_features_video(obs_vec_seq, join(output_dir, file_name), FPS)
def _save_report(self, file, write_console): feats_nbins = self.helper.get_features_bins() action_names = self.config.get_action_names() print_line('====================================', file, write_console) print_line( 'Coverage of the state-action space (for visited states): {:.2f}%'. format(self.state_action_coverage * 100.), file, write_console) mean, std, n = self.mean_action_dispersion print_line( 'Mean dispersion of the execution of actions in visited states: {:.3f} ± {:.3f} (count: {})' .format(mean, std, n), file, write_console) print_line('====================================', file, write_console) print_line( '{} certain states found (max. dispersion: {}):'.format( len(self.certain_states), self.certain_state_max_disp), file, write_console) for s, disp, max_actions in self.certain_states: max_action_labels = [action_names[a] for a in max_actions] feats_labels = self.helper.get_features_labels( get_features_from_index(s, feats_nbins), True) print_line( '\t{}-{} (mean disp.: {:.3f}, max actions {})'.format( s, feats_labels, disp, max_action_labels), file, write_console) print_line('====================================', file, write_console) print_line( '{} uncertain states found (min. dispersion: {}):'.format( len(self.uncertain_states), self.uncertain_state_min_disp), file, write_console) for s, disp in self.uncertain_states: feats_labels = self.helper.get_features_labels( get_features_from_index(s, feats_nbins), True) print_line( '\t{}-{} (mean disp.: {:.3f})'.format(s, feats_labels, disp), file, write_console) print_line('====================================', file, write_console) print_line( '{} certain state features found (max. dispersion: {}):'.format( len(self.certain_feats), self.certain_feat_max_disp), file, write_console) num_feats = len(feats_nbins) for f, v, disp, max_actions in self.certain_feats: max_action_labels = [action_names[a] for a in max_actions] obs_vec = np.zeros(num_feats, np.uint32) obs_vec[f] = v feat_label = self.helper.get_features_labels(obs_vec)[f] print_line( '\t{} (mean disp.: {:.3f}, max actions: {})'.format( feat_label, disp, max_action_labels), file, write_console) print_line('====================================', file, write_console) print_line( '{} uncertain state features found (min. dispersion: {}):'.format( len(self.uncertain_feats), self.uncertain_feat_min_disp), file, write_console) for f, v, disp in self.uncertain_feats: obs_vec = np.zeros(num_feats, np.uint32) obs_vec[f] = v feat_label = self.helper.get_features_labels(obs_vec)[f] print_line('\t{} (mean disp.: {:.3f})'.format(feat_label, disp), file, write_console)
# checks possible synchronization errors if done and t != seq_len - 1: raise ValueError( 'Environment ended at {}, before tracked behavior which ended at: {}' .format(t, seq_len - 1)) if t == seq_len - 1 and not done: raise ValueError( 'Environment did not end at {} like it was supposed to'. format(t)) if not done and s != s_seq[t + 1]: raise ValueError( 'Environment state {} does not match tracked state {}'. format(s, s_seq[t + 1])) if not advance_episode: obs_vec = get_features_from_index(s, feats_nbins) # prints information clean_console() print('Episode: {}, time-step: {}'.format(e, t)) print('Action: {}, reward: {}'.format(action_names[a], r)) print( '------------------------------------------------------------' ) helper.print_features(obs_vec, True) print( '------------------------------------------------------------' ) for act_name in action_names: print(act_name.upper().ljust(15), end='\t') print()
def analyze(self): # gets total visited states and calculates coverage and dispersion visited_states_counts = self.agent.c_s[np.nonzero(self.agent.c_s)] self.state_coverage = float( len(visited_states_counts)) / self.config.num_states self.state_dispersion = float( get_distribution_evenness(visited_states_counts)) feats_nbins = self.helper.get_features_bins() # builds states-as-transactions list and counts total state visits state_transactions = [] self.total_count = 0 for s in range(self.agent.num_states): cs = self.agent.c_s[s].item() self.total_count += cs # converts state idx to set of features and adds them as transactions obs_vec = get_features_from_index(s, feats_nbins) state_transaction = [(f, obs_vec[f].item()) for f in range(len(obs_vec))] for c in range(cs): state_transactions.append(state_transaction) # gets (in)frequent states self.freq_states = [] self.infreq_states = [] for s in range(self.agent.num_states): c_s = self.agent.c_s[s].item() if c_s >= self.min_state_count: self.freq_states.append((s, c_s)) elif 0 < c_s <= self.max_state_count: self.infreq_states.append((s, c_s)) # gets frequent feature-sets tree = FPTree(state_transactions, self.min_feat_set_count, None, None) patterns, no_patterns, counts = find_patterns_above( tree, self.min_feat_set_assoc) # filters out non-maximal patterns and 1-item patterns filter_maximal(patterns) self.freq_feature_sets = [(feat_set, jacc) for feat_set, jacc in patterns.items()] self.freq_feature_sets = [ feat_set for feat_set in self.freq_feature_sets if len(feat_set[0]) > 1 ] # gets feature association rules self.freq_feature_rules = generate_association_rules( patterns, counts, self.min_feat_rule_conf) # gets infrequent feature-sets tree = FPTree(state_transactions, 1, None, None) self.infreq_feature_sets = find_patterns_below(tree, self.max_feat_set_assoc) self._filter_invalid_infreq_feat_sets() self._filter_maximal_infreq_feat_sets() # sorts lists self.freq_states.sort(key=lambda e: -e[1]) self.infreq_states.sort(key=lambda e: e[1]) self.freq_feature_sets.sort(key=lambda e: -e[1]) self.freq_feature_rules.sort(key=lambda e: -e[3]) self.infreq_feature_sets.sort(key=lambda e: len(e))