def plot_margin_histograms(self): for (lbl, group) in zip(['buyer_wins', 'seller_wins'], [self.buyer_wins, self.seller_wins]): margins = [] for ex in group: winner = utils.get_winner(ex) if winner is None: continue margin = utils.get_margin(ex) if 0 <= margin <= MAX_MARGIN: margins.append(margin) b = np.linspace(0, MAX_MARGIN, num=int(MAX_MARGIN / 0.2) + 2) print b hist, bins = np.histogram(margins, bins=b) width = np.diff(bins) center = (bins[:-1] + bins[1:]) / 2 fig, ax = plt.subplots(figsize=(8, 3)) ax.bar(center, hist, align='center', width=width) ax.set_xticks(bins) save_path = os.path.join( self.stats_path, '{:s}_wins_margins_histogram.png'.format(lbl)) plt.savefig(save_path)
def plot_length_vs_margin(self, out_name='turns_vs_margin.png'): labels = ['buyer wins', 'seller wins'] plt.figure(figsize=(10, 6)) for (chats, lbl) in zip([self.buyer_wins, self.seller_wins], labels): margins = defaultdict(list) for ex in chats: turns = utils.get_turns_per_agent(ex) total_turns = turns[0] + turns[1] margin = utils.get_margin(ex) if margin > MAX_MARGIN or margin < 0.: continue margins[total_turns].append(margin) sorted_keys = list(sorted(margins.keys())) turns = [] means = [] errors = [] for k in sorted_keys: if len(margins[k]) >= THRESHOLD: turns.append(k) means.append(np.mean(margins[k])) errors.append(stats.sem(margins[k])) plt.errorbar(turns, means, yerr=errors, label=lbl, fmt='--o') plt.legend() plt.xlabel('# of turns in dialogue') plt.ylabel('Margin of victory') save_path = os.path.join(self.stats_path, out_name) plt.savefig(save_path)
def plot_speech_acts_old(self): labels = ['buyer_wins', 'seller_wins'] for (group, lbl) in zip([self.buyer_wins, self.seller_wins], labels): plt.figure(figsize=(10, 6)) speech_act_counts = dict( (act, defaultdict(list)) for act in SpeechActs.ACTS) for chat in group: winner = utils.get_winner(chat) margin = utils.get_margin(chat) if margin > MAX_MARGIN or margin < 0.: continue if winner is None: continue margin = round_partial( margin ) # round the margin to the nearest 0.1 to reduce noise if winner == -1 or winner == 0: speech_acts = self.get_speech_acts(chat, agent=0) # print "Chat {:s}\tWinner: {:d}".format(chat['uuid'], winner) # print speech_acts for act in SpeechActs.ACTS: frac = float(speech_acts.count(act)) / float( len(speech_acts)) speech_act_counts[act][margin].append(frac) if winner == -1 or winner == 1: speech_acts = self.get_speech_acts(chat, agent=1) # print "Chat {:s}\tWinner: {:d}".format(chat['uuid'], winner) # print speech_acts for act in SpeechActs.ACTS: frac = float(speech_acts.count(act)) / float( len(speech_acts)) speech_act_counts[act][margin].append(frac) for act in SpeechActs.ACTS: counts = speech_act_counts[act] margins = [] fracs = [] errors = [] bin_totals = 0. for m in sorted(counts.keys()): if len(counts[m]) > THRESHOLD: bin_totals += len(counts[m]) margins.append(m) fracs.append(np.mean(counts[m])) errors.append(stats.sem(counts[m])) print bin_totals / float(len(margins)) plt.errorbar(margins, fracs, yerr=errors, label=act, fmt='--o') plt.xlabel('Margin of victory') plt.ylabel('Fraction of speech act occurences') plt.title('Speech act frequency vs. margin of victory') plt.legend() save_path = os.path.join(self.stats_path, '{:s}_speech_acts.png'.format(lbl)) plt.savefig(save_path)
def plot_price_trends(self, top_n=10): labels = ['buyer_wins', 'seller_wins'] for (group, lbl) in zip([self.buyer_wins, self.seller_wins], labels): plt.figure(figsize=(10, 6)) trends = [] for chat in group: winner = utils.get_winner(chat) margin = utils.get_margin(chat) if margin > 1.0 or margin < 0.: continue if winner is None: continue # print "Winner: Agent {:d}\tWin margin: {:.2f}".format(winner, margin) if winner == -1 or winner == 0: trend = self.get_price_trend(self.price_tracker, chat, agent=0) if len(trend) > 1: trends.append((margin, chat, trend)) if winner == -1 or winner == 1: trend = self.get_price_trend(self.price_tracker, chat, agent=1) if len(trend) > 1: trends.append((margin, chat, trend)) # print "" sorted_trends = sorted(trends, key=lambda x: x[0], reverse=True) for (idx, (margin, chat, trend)) in enumerate(sorted_trends[:top_n]): print '{:s}: Chat {:s}\tMargin: {:.2f}'.format( lbl, chat['uuid'], margin) print 'Trend: ', trend print chat['scenario']['kbs'] print "" plt.plot(trend, label='Margin={:.2f}'.format(margin)) plt.legend() plt.xlabel('N-th price mentioned in chat') plt.ylabel('Value of mentioned price') out_path = os.path.join(self.stats_path, '{:s}_trend.png'.format(lbl)) plt.savefig(out_path)
def plot_speech_acts_by_role(self): labels = utils.ROLES for lbl in labels: plt.figure(figsize=(10, 6)) speech_act_counts = dict( (act, defaultdict(list)) for act in SpeechActs.ACTS) for chat in self.dataset: if utils.get_winner(chat) is None: # skip chats with no outcomes continue speech_acts = self.get_speech_acts(chat, role=lbl) agent = 1 if chat['scenario']['kbs'][1]['personal'][ 'Role'] == lbl else 0 margin = utils.get_margin(chat, agent=agent) if margin > MAX_MARGIN: continue margin = round_partial(margin) for act in SpeechActs.ACTS: frac = float(speech_acts.count(act)) / float( len(speech_acts)) speech_act_counts[act][margin].append(frac) for act in SpeechActs.ACTS: counts = speech_act_counts[act] margins = [] fracs = [] errors = [] for m in sorted(counts.keys()): if len(counts[m]) > THRESHOLD: margins.append(m) fracs.append(np.mean(counts[m])) errors.append(stats.sem(counts[m])) plt.errorbar(margins, fracs, yerr=errors, label=act, fmt='--o') plt.xlabel('Margin of victory') plt.ylabel('Fraction of speech act occurences') plt.title('Speech act frequency vs. margin of victory') plt.legend() save_path = os.path.join(self.stats_path, '{:s}_speech_acts.png'.format(lbl)) plt.savefig(save_path)
def __init__(self, transcripts, featurizer, split=0.8): self.featurizer = featurizer X = [featurizer.create_feature_vector(t) for t in transcripts] margins = [utils.get_margin(t, role=utils.BUYER) for t in transcripts] new_X = [] y = [] for (idx, margin) in enumerate(margins): if margin >= self.POSITIVE_THRESHOLD or margin < self.NEGATIVE_THRESHOLD: new_X.append(X[idx]) y.append(margin) print "# of chats with buyer margin >= {:.1f}: {:d}".format(self.POSITIVE_THRESHOLD, y.count(1)) print "# of chats with buyer margin < {:.1f}: {:d}".format(self.NEGATIVE_THRESHOLD, y.count(0)) X, y = self.shuffle_data(X, y) self.train_X = None self.train_y = None self.test_X = None self.test_y = None self.split(X, y, split=split) print "# of training examples: {:d}".format(self.train_X.shape[0]) print "# of test examples: {:d}".format(self.test_X.shape[0]) self.model = LogisticRegression(C=1.5)