def get_distribs(all_states, all_actions): N = len(all_states) distribs = [] for states, actions in zip(np.split(all_states, N), np.split(all_actions, N)): # Loop over individuals distribs.append(pe.get_distrib(states, actions)) return distribs
def save_data(path, sample_length, n_new_customers, compare_features): args = json.loads(open(join(path, 'args.txt'), 'r').read()) n_experts = args['n_experts'] # Sample customer data if args['state_rep'] == 71: env = pe.get_env_and_model(args, '', sample_length, only_env=True, n_experts_in_adam_basket=n_experts+n_new_customers) else: env = pe.get_env_and_model(args, '', sample_length, only_env=True) customer_trajectories = env.generate_expert_trajectories( out_dir=None, n_demos_per_expert=1, n_experts=n_experts+n_new_customers, n_expert_time_steps=sample_length ) expert_states = np.array(customer_trajectories['states'][:n_experts]) expert_actions = np.array(customer_trajectories['actions'][:n_experts]) new_states = np.array(customer_trajectories['states'][n_experts:]) new_actions = np.array(customer_trajectories['actions'][n_experts:]) if compare_features: avg_expert = get_features(expert_actions, average=True) experts = get_features(expert_actions) new_customers = get_features(new_actions) else: avg_expert = pe.get_distrib(expert_states, expert_actions) experts = get_distribs(expert_states, expert_actions) new_customers = get_distribs(new_states, new_actions) models = {} model_paths = [d for d in [x[0] for x in os.walk(path)] if d.endswith('checkpoint')] model_paths.sort(key=get_key_from_path) for mp in model_paths: n_train_steps = get_key_from_path(mp) if int(n_train_steps) <= 1000000: continue print('Collecting data from model saved after %s steps' % n_train_steps) if args['state_rep'] == 71: agent, abs_diffs, errors = evaluate_on_new_customers(args, mp, experts, new_customers, compare_features, n_new_customers) else: agent, abs_diffs, errors = evaluate_on_new_customers(args, mp, experts, new_customers, compare_features) avg_dist = evaluate_on_pop_level(args, mp, avg_expert, compare_features) models[n_train_steps] = (agent, abs_diffs, errors, avg_dist) # final_model_path = next((d for d in [x[0] for x in os.walk(path)] if d.endswith('finish')), None) # agent, abs_diffs, errors = evaluate_on_new_customers(args, final_model_path, experts, new_customers, compare_features) # models['final'] = (agent, abs_diffs, errors) result = Result(models) save_result(result, path)
def evaluate_on_pop_level(args, model_path, avg_expert, compare_features): n_experts = args['n_experts'] env, model, obs_normalizer = pe.get_env_and_model(args, model_path, sample_length, only_env=False) metric = ed if compare_features else wd agent_states = [] agent_actions = [] for i in range(n_experts): # Initialize agent with data from ith expert env.model.spawn_new_customer(i) sample = env.case.get_sample( n_demos_per_expert=1, n_historical_events=args['n_historical_events'], n_time_steps=1000 ) all_data = np.hstack(sample[0]) # history, data = sample[0] j = np.random.randint(0, all_data.shape[1] - args['n_historical_events']) history = all_data[:, j:j + args['n_historical_events']] if args['state_rep'] == 71: adam_basket = np.random.permutation(env.case.adam_baskets[i]) env.case.i_expert = i initial_state = env.case.get_initial_state(history, adam_basket[0]) else: initial_state = env.case.get_initial_state(history, i) states, actions = pe.sample_from_policy(env, model, obs_normalizer, initial_state=initial_state) agent_states.append(states) agent_actions.append(actions) agent_states = np.array(agent_states) agent_actions = np.array(agent_actions) avg_agent = get_features(agent_actions, average=True) if compare_features else pe.get_distrib(agent_states, agent_actions) distance = metric(avg_agent, avg_expert) return distance
def plot(data): # Sample customer data args = json.loads( open( join([ join(dir_path, x) for x in os.listdir(dir_path) if x.startswith('2020') ][0], 'args.txt'), 'r').read()) n_experts = args['n_experts'] if args['state_rep'] == 71: env = pe.get_env_and_model(args, '', sample_length, only_env=True, n_experts_in_adam_basket=n_experts + n_new_customers) else: env = pe.get_env_and_model(args, '', sample_length, only_env=True) customer_trajectories = env.generate_expert_trajectories( out_dir=None, n_demos_per_expert=1, n_experts=n_experts + n_new_customers, n_expert_time_steps=sample_length) customer_states = np.array(customer_trajectories['states']) customer_actions = np.array(customer_trajectories['actions']) customers = get_distribs(customer_states, customer_actions) expert_states = np.array(customer_trajectories['states'][:n_experts]) expert_actions = np.array(customer_trajectories['actions'][:n_experts]) avg_expert = pe.get_distrib(expert_states, expert_actions) import seaborn as sns import pandas as pd df_experts = pd.DataFrame(columns=[ 'int_value', 'Parameter value', 'Number of training episodes', 'Wasserstein distance' ]) df_new_customers = pd.DataFrame(columns=[ 'int_value', 'Parameter value', 'Number of training episodes', 'Wasserstein distance' ]) for param_value, results in data.items(): print('Processing parameter value {}'.format(param_value)) for result in results: for n_train_steps, agent in result.models.items(): if int(n_train_steps) <= 1000000: continue for i, (a, c) in enumerate(zip(agent, customers)): assert len(a) == 1 diff = wd(a[0], c) n_train_episodes = int( n_train_steps) / args['episode_length'] if i < n_experts: df_experts.loc[len(df_experts.index)] = [ param_value, get_label_from_param_value(param_value, param), n_train_episodes, diff ] else: df_new_customers.loc[len(df_new_customers.index)] = [ param_value, get_label_from_param_value(param_value, param), n_train_episodes, diff ] df_experts.sort_values(by=['int_value']) df_new_customers.sort_values(by=['int_value']) sns.set(style='darkgrid') g1 = sns.relplot(x='Number of training episodes', y='Wasserstein distance', hue='Parameter value', ci=95, kind='line', data=df_experts, \ facet_kws={'legend_out': False}) g1.fig.subplots_adjust(top=0.95) ax1 = g1.axes[0][0] ax1.set_title('Comparison with experts') g2 = sns.relplot(x='Number of training episodes', y='Wasserstein distance', hue='Parameter value', ci=95, kind='line', data=df_new_customers, \ facet_kws={'legend_out': False}) g2.fig.subplots_adjust(top=0.95) ax2 = g2.axes[0][0] ax2.set_title('Comparison with new customers') for ax in (ax1, ax2): handles, labels = ax.get_legend_handles_labels() labels2, handles2 = zip(*sorted(zip(labels[1:], handles[1:]), key=lambda t: int(t[0].split(' ')[0]))) labels2 = list(labels2) handles2 = list(handles2) labels2.insert(0, get_label_from_param(param)) handles2.insert(0, handles[0]) ax.legend(handles2, labels2) plt.show()
def evaluate(args, model_path, n_new_customers, sample_length, N, customer_states): n_experts = args['n_experts'] if args['state_rep'] == 71: env, model, obs_normalizer = pe.get_env_and_model( args, model_path, sample_length, only_env=False, n_experts_in_adam_basket=n_experts + n_new_customers) else: env, model, obs_normalizer = pe.get_env_and_model(args, model_path, sample_length, only_env=False) agents = [] for i in range(n_experts + n_new_customers): temp_agents = [] for j in range(N): if args['state_rep'] == 22 or args['state_rep'] == 221 or args[ 'state_rep'] == 23 and i >= n_experts: raise NotImplementedError else: initial_state = random.choice(customer_states[i]) states, actions = pe.sample_from_policy( env, model, obs_normalizer, initial_state=initial_state) states = np.array(states) actions = np.array(actions) a = pe.get_distrib(states, actions) temp_agents.append(a) agents.append(temp_agents) # for seed in range(n_experts + n_new_customers): # temp_agents = [] # if args['state_rep'] == 71: # adam_basket = np.random.permutation(env.case.adam_baskets[seed]) # env.case.i_expert = seed # env.model.spawn_new_customer(seed) # sample = env.case.get_sample( # n_demos_per_expert=1, # n_historical_events=args['n_historical_events'], # n_time_steps=1000 # ) # all_data = np.hstack(sample[0]) # history, data = sample[0] # for i in range(N): # j = np.random.randint(0, all_data.shape[1] - args['n_historical_events']) # history = all_data[:, j:j + args['n_historical_events']] # if args['state_rep'] == 71: # initial_state = env.case.get_initial_state(history, adam_basket[i]) # else: # raise NotImplementedError # states, actions = pe.sample_from_policy2(env, model, obs_normalizer, initial_state=initial_state) # states = np.array(states) # actions = np.array(actions) # a = pe.get_distrib(states, actions) # temp_agents.append(a) # agents.append(temp_agents) return agents
def evaluate_on_new_customers(args, model_path, experts, new_customers, compare_features, n_new_customers=None): global k, N n_experts = args['n_experts'] if n_new_customers is not None: env, model, obs_normalizer = pe.get_env_and_model(args, model_path, sample_length, only_env=False, n_experts_in_adam_basket=n_experts+n_new_customers) else: env, model, obs_normalizer = pe.get_env_and_model(args, model_path, sample_length, only_env=False) agents = [] abs_diffs = [] errors = [] metric = ed if compare_features else wd for i, nc in enumerate(new_customers): distances = [metric(nc, e) for e in experts] closest_experts = np.argsort(distances)[:k] dummy = closest_experts[0] temp_agents = [] temp_abs_diffs = [] n_errors = 0 seed = n_experts + i if args['state_rep'] == 71: adam_basket = np.random.permutation(env.case.adam_baskets[seed]) env.case.i_expert = seed env.model.spawn_new_customer(seed) sample = env.case.get_sample( n_demos_per_expert=1, n_historical_events=args['n_historical_events'], n_time_steps=1000 ) all_data = np.hstack(sample[0]) # history, data = sample[0] for l in range(N): j = np.random.randint(0, all_data.shape[1] - args['n_historical_events']) history = all_data[:, j:j + args['n_historical_events']] if args['state_rep'] == 71: initial_state = env.case.get_initial_state(history, adam_basket[l]) else: initial_state = env.case.get_initial_state(history, dummy) # We set dummy to closest expert states, actions = pe.sample_from_policy(env, model, obs_normalizer, initial_state=initial_state) states = np.array(states) actions = np.array(actions) a = get_features([actions]) if compare_features else pe.get_distrib(states, actions) temp_agents.append(a) temp_abs_diffs.append(metric(a, nc)) distances = [metric(a, e) for e in experts] if np.argmin(distances) not in closest_experts: n_errors += 1 agents.append(temp_agents) abs_diffs.append(temp_abs_diffs) errors.append(n_errors / N) return agents, abs_diffs, errors
def load_data(): data = {} # Load data data_paths = [join(dir_path, x) for x in os.listdir(dir_path) if x.startswith('2020')] data_paths.sort() for i, path in enumerate(data_paths): print('Processing folder {} of {}'.format(i + 1, len(data_paths))) content = os.listdir(path) args = json.loads(open(join(path, 'args.txt'), 'r').read()) if not (plot_only and not update_classification) and i == 0: env = pe.get_env_and_model(args, '', sample_length, only_env=True) # Sample customer data n_experts = args['n_experts'] customer_trajectories = env.generate_expert_trajectories( out_dir=None, n_demos_per_expert=1, n_experts=n_experts+n_new_customers, n_expert_time_steps=sample_length ) expert_states = np.array(customer_trajectories['states'][:n_experts]) expert_actions = np.array(customer_trajectories['actions'][:n_experts]) new_states = np.array(customer_trajectories['states'][n_experts:]) new_actions = np.array(customer_trajectories['actions'][n_experts:]) if compare_features: avg_expert = get_features(expert_actions, average=True) experts = get_features(expert_actions) new_customers = get_features(new_actions) else: avg_expert = pe.get_distrib(expert_states, expert_actions) experts = get_distribs(expert_states, expert_actions) new_customers = get_distribs(new_states, new_actions) if 'result.pkl' in content and not resample: print('Loading saved result') result = load_result(path) if update_classification: print('Updating classification with k = %d' % k) for n_train_steps, t in result.models.items(): agent = t[0] errors = compare_with_new_customers(agent, experts, new_customers, compare_features) l = list(t) l[2] = errors t = tuple(l) result.models.update({n_train_steps: t}) else: if plot_only: print('Ignoring') continue print('Collecting result by sampling from model') models = {} model_paths = [d for d in [x[0] for x in os.walk(path)] if d.endswith('checkpoint')] model_paths.sort(key=get_key_from_path) for mp in model_paths: n_train_steps = get_key_from_path(mp) if n_train_steps < 1000000: continue agent, abs_diffs, errors = evaluate_on_new_customers(args, mp, experts, new_customers, compare_features) avg_dist = evaluate_on_pop_level(args, mp, avg_expert, compare_features) models[n_train_steps] = (agent, abs_diffs, errors, avg_dist) # final_model_path = next((d for d in [x[0] for x in os.walk(path)] if d.endswith('finish')), None) # agent, abs_diffs, errors = evaluate_on_new_customers(args, final_model_path, experts, new_customers, compare_features) # models['final'] = (agent, abs_diffs, errors) result = Result(models) save_result(result, path) if args[param] in data: data[args[param]].append(result) else: data[args[param]] = [result] return data
def save_df(dir_path, folder_name, sample_length=10000, n_new_customers=50): args_path = join(dir_path, 'args.txt') args = json.loads(open(args_path, 'r').read()) n_experts = args['n_experts'] final_model_dir_path = next( (d for d in [x[0] for x in os.walk(dir_path)] if d.endswith('finish')), None) if args['state_rep'] == 71 or args['state_rep'] == 81: env, model, obs_normalizer = pe.get_env_and_model( args, final_model_dir_path, sample_length, n_experts_in_adam_basket=n_experts + n_new_customers) else: env, model, obs_normalizer = pe.get_env_and_model( args, final_model_dir_path, sample_length) customer_states, customer_actions = sample_customer_data( env, n_experts, sample_length, n_new_customers) customers = res.get_distribs(customer_states, customer_actions) expert_states = customer_states[:n_experts] expert_actions = customer_actions[:n_experts] experts = customers[:n_experts] avg_expert = pe.get_distrib(expert_states, expert_actions) model_dir_paths = [ d for d in [x[0] for x in os.walk(dir_path)] if d.endswith('checkpoint') ] model_dir_paths.sort(key=res.get_key_from_path) data = [] for mdp in model_dir_paths: n_steps = res.get_key_from_path(mdp) print('Processing model saved after %s' % n_steps) if int(n_steps) <= 1000000: continue if args['state_rep'] == 71 or args['state_rep'] == 81: env, model, obs_normalizer = pe.get_env_and_model( args, mdp, sample_length, n_experts_in_adam_basket=n_experts + n_new_customers) else: env, model, obs_normalizer = pe.get_env_and_model( args, mdp, sample_length) agent_states, agent_actions, closest_expert = sample_agent_data( n_experts + n_new_customers, args, env, model, obs_normalizer, customers, customer_states) agents = res.get_distribs(agent_states, agent_actions) avg_agent = pe.get_distrib(agent_states[:n_experts], agent_actions[:n_experts]) temp = [] for i, (a, c) in enumerate(zip(agents, customers)): if i < n_experts: data.append([n_steps, wd(a, c), 'Experts']) else: data.append([n_steps, wd(a, c), 'New customers']) data.append([ n_steps, wd(a, experts[closest_expert[i]]), 'Closest expert' ]) data.append([n_steps, wd(avg_agent, avg_expert), 'Average expert']) df = pd.DataFrame(data, columns=[ 'Number of training steps', 'Wasserstein distance', 'Comparison with' ]) os.makedirs(join('report', folder_name), exist_ok=True) counter = len([ x for x in os.listdir(join('report', folder_name)) if x.endswith('.csv') ]) df.to_csv(join('report', folder_name, 'df_' + folder_name + str(counter + 1) + '.csv'), index=False)
def main(): import custom_gym import gym import numpy as np import itertools import os, sys from os.path import join import pandas as pd tools_path = join(os.getcwd(), 'customer_behaviour/tools') sys.path.insert(1, tools_path) import policy_evaluation as pe import results2 as res from scipy.stats import wasserstein_distance as wd sample_length = 10000 n_experts = 10 n_customers = 50 tot_customers = n_experts + n_customers n_last_days = 7 model = DGM() case = Case22(model=model, n_experts=tot_customers) states = [] actions = [] for expert in range(tot_customers): state, action = get_states_actions(case=case, model=model, seed=expert, episode_length=sample_length, n_historical_events=10, save_for_visualisation=False) states.append(state) actions.append(action) states = np.array(states) actions = np.array(actions) customers = res.get_distribs(states, actions) experts_ts = actions[:n_experts] new_customers_dis = customers[-n_customers:] experts_dis = customers[:n_experts] dist = [] final_dist = [] for i in range(n_customers): c = new_customers_dis[i] distances = [wd(c, e) for e in experts_dis] dist.append(min(distances)) dummy = np.argsort(distances)[0] expert_ts = experts_ts[dummy, :].tolist()[0] length_subsample = int(sample_length / 10) samples = [ expert_ts[x:x + length_subsample] for x in range(0, len(expert_ts), length_subsample) ] ratios = [get_purchase_ratio(sample) for sample in samples] freq_probs, bins = get_freqs_probs(ratios, length_subsample) purchase_days = [] for sample in samples: purchase_days.extend(np.nonzero(sample * 10)[0].tolist()) #purchase_days = [np.nonzero(sample)[0].tolist() for sample in samples] #print(purchase_days[0]) purchase_histo = get_purchase_probs(purchase_days, sample_length) purchase_days = generate_purchase_days(sample_length, bins, freq_probs, purchase_histo) c_actions = np.zeros((sample_length, )).astype(int) for day in purchase_days: c_actions[day] = 1 temp = c_actions.tolist() c_states = [] for x in range(len(temp) - 10): c_states.append(temp[x:x + 10]) c_states = np.asarray(c_states) n_actions = len(temp) n_states = c_states.shape[0] c_actions = c_actions[:n_states] c_dis = pe.get_distrib(c_states, c_actions) #c_dis= res.get_distribs(c_states, c_actions) final_dist.append(wd(c_dis, experts_dis[dummy])) print(np.mean(final_dist))