def sample_agent_data(N, args, env, model, obs_normalizer, customers, customer_states): agent_states = [] agent_actions = [] closest_expert = N * [0] for i in range(N): # Initialize agent with data from ith expert initial_state = random.choice(customer_states[i]) if args['state_rep'] == 22 or args['state_rep'] == 221 or args[ 'state_rep'] == 23 and i >= n_experts: # Find closest expert c = customers[i] distances = [wd(c, e) for e in experts] dummy = np.argsort(distances)[0] closest_expert[i] = dummy initial_state[dummy] = 1 states, actions = pe.sample_from_policy(env, model, obs_normalizer, initial_state=initial_state) agent_states.append(states) agent_actions.append(actions) agent_states = np.array(agent_states) agent_actions = np.array(agent_actions) return agent_states, agent_actions, closest_expert
def earthmover_distance_categorical(p1, p2): if not isinstance(p1, Categorical) or not isinstance(p2, Categorical): raise ValueError('Input distributions must be Categoricals') probs1 = p1._probs probs2 = p2._probs return wd(range(len(probs1)), range(len(probs2)), probs1, probs2)
def find_distance(*argv): total_distance = 0 for i1, i2 in combinations(np.arange(len(argv[0])), 2): if len(argv[0][i1]) == 0 or len(argv[0][i2]) == 0: total_distance += 100 #np.inf else: total_distance += wd(argv[0][i1], argv[0][i2]) return total_distance
def Distance(A, trueW, plot=False): """ compute wasserstein distance between vector m1, m2 m0: list of trueW of elements where A[element][0] == 1 m1: list of trueW of elements where A[element][1] == 1 """ m0, m1 = [], [] for i in range(len(A)): if A[i][0] == 1: m0.append(trueW[i]) else: m1.append(trueW[i]) if plot: # plt.subplot(2,1,1) sns.distplot(m0, hist=False) # plt.subplot(2,1,2) sns.distplot(m1, hist=False) return wd(m0, m1)
def plot(data): # Sample customer data args = json.loads( open( join([ join(dir_path, x) for x in os.listdir(dir_path) if x.startswith('2020') ][0], 'args.txt'), 'r').read()) n_experts = args['n_experts'] if args['state_rep'] == 71: env = pe.get_env_and_model(args, '', sample_length, only_env=True, n_experts_in_adam_basket=n_experts + n_new_customers) else: env = pe.get_env_and_model(args, '', sample_length, only_env=True) customer_trajectories = env.generate_expert_trajectories( out_dir=None, n_demos_per_expert=1, n_experts=n_experts + n_new_customers, n_expert_time_steps=sample_length) customer_states = np.array(customer_trajectories['states']) customer_actions = np.array(customer_trajectories['actions']) customers = get_distribs(customer_states, customer_actions) expert_states = np.array(customer_trajectories['states'][:n_experts]) expert_actions = np.array(customer_trajectories['actions'][:n_experts]) avg_expert = pe.get_distrib(expert_states, expert_actions) import seaborn as sns import pandas as pd df_experts = pd.DataFrame(columns=[ 'int_value', 'Parameter value', 'Number of training episodes', 'Wasserstein distance' ]) df_new_customers = pd.DataFrame(columns=[ 'int_value', 'Parameter value', 'Number of training episodes', 'Wasserstein distance' ]) for param_value, results in data.items(): print('Processing parameter value {}'.format(param_value)) for result in results: for n_train_steps, agent in result.models.items(): if int(n_train_steps) <= 1000000: continue for i, (a, c) in enumerate(zip(agent, customers)): assert len(a) == 1 diff = wd(a[0], c) n_train_episodes = int( n_train_steps) / args['episode_length'] if i < n_experts: df_experts.loc[len(df_experts.index)] = [ param_value, get_label_from_param_value(param_value, param), n_train_episodes, diff ] else: df_new_customers.loc[len(df_new_customers.index)] = [ param_value, get_label_from_param_value(param_value, param), n_train_episodes, diff ] df_experts.sort_values(by=['int_value']) df_new_customers.sort_values(by=['int_value']) sns.set(style='darkgrid') g1 = sns.relplot(x='Number of training episodes', y='Wasserstein distance', hue='Parameter value', ci=95, kind='line', data=df_experts, \ facet_kws={'legend_out': False}) g1.fig.subplots_adjust(top=0.95) ax1 = g1.axes[0][0] ax1.set_title('Comparison with experts') g2 = sns.relplot(x='Number of training episodes', y='Wasserstein distance', hue='Parameter value', ci=95, kind='line', data=df_new_customers, \ facet_kws={'legend_out': False}) g2.fig.subplots_adjust(top=0.95) ax2 = g2.axes[0][0] ax2.set_title('Comparison with new customers') for ax in (ax1, ax2): handles, labels = ax.get_legend_handles_labels() labels2, handles2 = zip(*sorted(zip(labels[1:], handles[1:]), key=lambda t: int(t[0].split(' ')[0]))) labels2 = list(labels2) handles2 = list(handles2) labels2.insert(0, get_label_from_param(param)) handles2.insert(0, handles[0]) ax.legend(handles2, labels2) plt.show()
def save_df(dir_path, folder_name, sample_length=10000, n_new_customers=50): args_path = join(dir_path, 'args.txt') args = json.loads(open(args_path, 'r').read()) n_experts = args['n_experts'] final_model_dir_path = next( (d for d in [x[0] for x in os.walk(dir_path)] if d.endswith('finish')), None) if args['state_rep'] == 71 or args['state_rep'] == 81: env, model, obs_normalizer = pe.get_env_and_model( args, final_model_dir_path, sample_length, n_experts_in_adam_basket=n_experts + n_new_customers) else: env, model, obs_normalizer = pe.get_env_and_model( args, final_model_dir_path, sample_length) customer_states, customer_actions = sample_customer_data( env, n_experts, sample_length, n_new_customers) customers = res.get_distribs(customer_states, customer_actions) expert_states = customer_states[:n_experts] expert_actions = customer_actions[:n_experts] experts = customers[:n_experts] avg_expert = pe.get_distrib(expert_states, expert_actions) model_dir_paths = [ d for d in [x[0] for x in os.walk(dir_path)] if d.endswith('checkpoint') ] model_dir_paths.sort(key=res.get_key_from_path) data = [] for mdp in model_dir_paths: n_steps = res.get_key_from_path(mdp) print('Processing model saved after %s' % n_steps) if int(n_steps) <= 1000000: continue if args['state_rep'] == 71 or args['state_rep'] == 81: env, model, obs_normalizer = pe.get_env_and_model( args, mdp, sample_length, n_experts_in_adam_basket=n_experts + n_new_customers) else: env, model, obs_normalizer = pe.get_env_and_model( args, mdp, sample_length) agent_states, agent_actions, closest_expert = sample_agent_data( n_experts + n_new_customers, args, env, model, obs_normalizer, customers, customer_states) agents = res.get_distribs(agent_states, agent_actions) avg_agent = pe.get_distrib(agent_states[:n_experts], agent_actions[:n_experts]) temp = [] for i, (a, c) in enumerate(zip(agents, customers)): if i < n_experts: data.append([n_steps, wd(a, c), 'Experts']) else: data.append([n_steps, wd(a, c), 'New customers']) data.append([ n_steps, wd(a, experts[closest_expert[i]]), 'Closest expert' ]) data.append([n_steps, wd(avg_agent, avg_expert), 'Average expert']) df = pd.DataFrame(data, columns=[ 'Number of training steps', 'Wasserstein distance', 'Comparison with' ]) os.makedirs(join('report', folder_name), exist_ok=True) counter = len([ x for x in os.listdir(join('report', folder_name)) if x.endswith('.csv') ]) df.to_csv(join('report', folder_name, 'df_' + folder_name + str(counter + 1) + '.csv'), index=False)
def main(): import custom_gym import gym from customer_behaviour.tools.dgm import DGM import numpy as np import itertools import os, sys from os.path import join import pandas as pd tools_path = join(os.getcwd(), 'customer_behaviour/tools') sys.path.insert(1, tools_path) import policy_evaluation as pe import results2 as res from scipy.stats import wasserstein_distance as wd import seaborn as sns sample_length = 10000 expert_ls = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 50] n_customers = 20 n_runs = 3 tot_customers = max(expert_ls) + n_customers n_last_days = 7 model = DGM() case = Case22(model=model, n_experts=tot_customers) states = [] actions = [] for expert in range(tot_customers): state, action = get_states_actions(case=case, model=model, seed=expert, episode_length=sample_length, n_historical_events=1000, save_for_visualisation=False) states.append(state) actions.append(action) states = np.array(states) actions = np.array(actions) customers = res.get_distribs(states, actions) data = [] for n_experts in expert_ls: #expert_states = states[:n_experts] #expert_actions = actions[:n_experts] for _ in range(n_runs): #experts = np.random.choice(customers[:max(expert_ls)], n_experts) choice_indices = np.random.choice( range(len(customers) - n_customers), n_experts, replace=False) experts = [customers[i] for i in choice_indices] #experts = customers[:n_experts] #avg_expert = pe.get_distrib(expert_states, expert_actions) new_customers = customers[-n_customers:] dist = [] for i in range(n_customers): c = new_customers[i] distances = [wd(c, e) for e in experts] dist.append(min(distances)) data.append([n_experts, np.mean(dist)]) df = pd.DataFrame( data, columns=['Number of experts', 'Average distance to closest customer']) sns.set(style='darkgrid') g = sns.relplot(x='Number of experts', y='Average distance to closest customer', hue=None, ci=95, kind='line', data=df, facet_kws={'legend_out': False}) plt.show()
def main(): import custom_gym import gym import numpy as np import itertools import os, sys from os.path import join import pandas as pd tools_path = join(os.getcwd(), 'customer_behaviour/tools') sys.path.insert(1, tools_path) import policy_evaluation as pe import results2 as res from scipy.stats import wasserstein_distance as wd sample_length = 10000 n_experts = 10 n_customers = 50 tot_customers = n_experts + n_customers n_last_days = 7 model = DGM() case = Case22(model=model, n_experts=tot_customers) states = [] actions = [] for expert in range(tot_customers): state, action = get_states_actions(case=case, model=model, seed=expert, episode_length=sample_length, n_historical_events=10, save_for_visualisation=False) states.append(state) actions.append(action) states = np.array(states) actions = np.array(actions) customers = res.get_distribs(states, actions) experts_ts = actions[:n_experts] new_customers_dis = customers[-n_customers:] experts_dis = customers[:n_experts] dist = [] final_dist = [] for i in range(n_customers): c = new_customers_dis[i] distances = [wd(c, e) for e in experts_dis] dist.append(min(distances)) dummy = np.argsort(distances)[0] expert_ts = experts_ts[dummy, :].tolist()[0] length_subsample = int(sample_length / 10) samples = [ expert_ts[x:x + length_subsample] for x in range(0, len(expert_ts), length_subsample) ] ratios = [get_purchase_ratio(sample) for sample in samples] freq_probs, bins = get_freqs_probs(ratios, length_subsample) purchase_days = [] for sample in samples: purchase_days.extend(np.nonzero(sample * 10)[0].tolist()) #purchase_days = [np.nonzero(sample)[0].tolist() for sample in samples] #print(purchase_days[0]) purchase_histo = get_purchase_probs(purchase_days, sample_length) purchase_days = generate_purchase_days(sample_length, bins, freq_probs, purchase_histo) c_actions = np.zeros((sample_length, )).astype(int) for day in purchase_days: c_actions[day] = 1 temp = c_actions.tolist() c_states = [] for x in range(len(temp) - 10): c_states.append(temp[x:x + 10]) c_states = np.asarray(c_states) n_actions = len(temp) n_states = c_states.shape[0] c_actions = c_actions[:n_states] c_dis = pe.get_distrib(c_states, c_actions) #c_dis= res.get_distribs(c_states, c_actions) final_dist.append(wd(c_dis, experts_dis[dummy])) print(np.mean(final_dist))