Exemple #1
0
    def plot_margin_histograms(self):
        for (lbl, group) in zip(['buyer_wins', 'seller_wins'],
                                [self.buyer_wins, self.seller_wins]):
            margins = []
            for ex in group:
                winner = utils.get_winner(ex)
                if winner is None:
                    continue
                margin = utils.get_margin(ex)
                if 0 <= margin <= MAX_MARGIN:
                    margins.append(margin)

            b = np.linspace(0, MAX_MARGIN, num=int(MAX_MARGIN / 0.2) + 2)
            print b
            hist, bins = np.histogram(margins, bins=b)

            width = np.diff(bins)
            center = (bins[:-1] + bins[1:]) / 2

            fig, ax = plt.subplots(figsize=(8, 3))
            ax.bar(center, hist, align='center', width=width)
            ax.set_xticks(bins)

            save_path = os.path.join(
                self.stats_path, '{:s}_wins_margins_histogram.png'.format(lbl))
            plt.savefig(save_path)
Exemple #2
0
    def plot_length_vs_margin(self, out_name='turns_vs_margin.png'):
        labels = ['buyer wins', 'seller wins']
        plt.figure(figsize=(10, 6))

        for (chats, lbl) in zip([self.buyer_wins, self.seller_wins], labels):
            margins = defaultdict(list)
            for ex in chats:
                turns = utils.get_turns_per_agent(ex)
                total_turns = turns[0] + turns[1]
                margin = utils.get_margin(ex)
                if margin > MAX_MARGIN or margin < 0.:
                    continue

                margins[total_turns].append(margin)

            sorted_keys = list(sorted(margins.keys()))

            turns = []
            means = []
            errors = []
            for k in sorted_keys:
                if len(margins[k]) >= THRESHOLD:
                    turns.append(k)
                    means.append(np.mean(margins[k]))
                    errors.append(stats.sem(margins[k]))

            plt.errorbar(turns, means, yerr=errors, label=lbl, fmt='--o')

        plt.legend()
        plt.xlabel('# of turns in dialogue')
        plt.ylabel('Margin of victory')

        save_path = os.path.join(self.stats_path, out_name)
        plt.savefig(save_path)
Exemple #3
0
    def plot_speech_acts_old(self):
        labels = ['buyer_wins', 'seller_wins']
        for (group, lbl) in zip([self.buyer_wins, self.seller_wins], labels):
            plt.figure(figsize=(10, 6))
            speech_act_counts = dict(
                (act, defaultdict(list)) for act in SpeechActs.ACTS)
            for chat in group:
                winner = utils.get_winner(chat)
                margin = utils.get_margin(chat)
                if margin > MAX_MARGIN or margin < 0.:
                    continue
                if winner is None:
                    continue

                margin = round_partial(
                    margin
                )  # round the margin to the nearest 0.1 to reduce noise

                if winner == -1 or winner == 0:
                    speech_acts = self.get_speech_acts(chat, agent=0)
                    # print "Chat {:s}\tWinner: {:d}".format(chat['uuid'], winner)
                    # print speech_acts
                    for act in SpeechActs.ACTS:
                        frac = float(speech_acts.count(act)) / float(
                            len(speech_acts))
                        speech_act_counts[act][margin].append(frac)
                if winner == -1 or winner == 1:
                    speech_acts = self.get_speech_acts(chat, agent=1)
                    # print "Chat {:s}\tWinner: {:d}".format(chat['uuid'], winner)
                    # print speech_acts
                    for act in SpeechActs.ACTS:
                        frac = float(speech_acts.count(act)) / float(
                            len(speech_acts))
                        speech_act_counts[act][margin].append(frac)

            for act in SpeechActs.ACTS:
                counts = speech_act_counts[act]
                margins = []
                fracs = []
                errors = []
                bin_totals = 0.
                for m in sorted(counts.keys()):
                    if len(counts[m]) > THRESHOLD:
                        bin_totals += len(counts[m])
                        margins.append(m)
                        fracs.append(np.mean(counts[m]))
                        errors.append(stats.sem(counts[m]))
                print bin_totals / float(len(margins))

                plt.errorbar(margins, fracs, yerr=errors, label=act, fmt='--o')

            plt.xlabel('Margin of victory')
            plt.ylabel('Fraction of speech act occurences')
            plt.title('Speech act frequency vs. margin of victory')
            plt.legend()
            save_path = os.path.join(self.stats_path,
                                     '{:s}_speech_acts.png'.format(lbl))
            plt.savefig(save_path)
Exemple #4
0
    def plot_price_trends(self, top_n=10):
        labels = ['buyer_wins', 'seller_wins']
        for (group, lbl) in zip([self.buyer_wins, self.seller_wins], labels):
            plt.figure(figsize=(10, 6))
            trends = []
            for chat in group:
                winner = utils.get_winner(chat)
                margin = utils.get_margin(chat)
                if margin > 1.0 or margin < 0.:
                    continue
                if winner is None:
                    continue

                # print "Winner: Agent {:d}\tWin margin: {:.2f}".format(winner, margin)
                if winner == -1 or winner == 0:
                    trend = self.get_price_trend(self.price_tracker,
                                                 chat,
                                                 agent=0)
                    if len(trend) > 1:
                        trends.append((margin, chat, trend))
                if winner == -1 or winner == 1:
                    trend = self.get_price_trend(self.price_tracker,
                                                 chat,
                                                 agent=1)
                    if len(trend) > 1:
                        trends.append((margin, chat, trend))

                # print ""

            sorted_trends = sorted(trends, key=lambda x: x[0], reverse=True)
            for (idx, (margin, chat,
                       trend)) in enumerate(sorted_trends[:top_n]):
                print '{:s}: Chat {:s}\tMargin: {:.2f}'.format(
                    lbl, chat['uuid'], margin)
                print 'Trend: ', trend
                print chat['scenario']['kbs']
                print ""
                plt.plot(trend, label='Margin={:.2f}'.format(margin))
            plt.legend()
            plt.xlabel('N-th price mentioned in chat')
            plt.ylabel('Value of mentioned price')
            out_path = os.path.join(self.stats_path,
                                    '{:s}_trend.png'.format(lbl))
            plt.savefig(out_path)
Exemple #5
0
    def plot_speech_acts_by_role(self):
        labels = utils.ROLES
        for lbl in labels:
            plt.figure(figsize=(10, 6))
            speech_act_counts = dict(
                (act, defaultdict(list)) for act in SpeechActs.ACTS)
            for chat in self.dataset:
                if utils.get_winner(chat) is None:
                    # skip chats with no outcomes
                    continue
                speech_acts = self.get_speech_acts(chat, role=lbl)
                agent = 1 if chat['scenario']['kbs'][1]['personal'][
                    'Role'] == lbl else 0
                margin = utils.get_margin(chat, agent=agent)
                if margin > MAX_MARGIN:
                    continue
                margin = round_partial(margin)
                for act in SpeechActs.ACTS:
                    frac = float(speech_acts.count(act)) / float(
                        len(speech_acts))
                    speech_act_counts[act][margin].append(frac)

            for act in SpeechActs.ACTS:
                counts = speech_act_counts[act]
                margins = []
                fracs = []
                errors = []
                for m in sorted(counts.keys()):
                    if len(counts[m]) > THRESHOLD:
                        margins.append(m)
                        fracs.append(np.mean(counts[m]))
                        errors.append(stats.sem(counts[m]))

                plt.errorbar(margins, fracs, yerr=errors, label=act, fmt='--o')

            plt.xlabel('Margin of victory')
            plt.ylabel('Fraction of speech act occurences')
            plt.title('Speech act frequency vs. margin of victory')
            plt.legend()
            save_path = os.path.join(self.stats_path,
                                     '{:s}_speech_acts.png'.format(lbl))
            plt.savefig(save_path)
Exemple #6
0
    def __init__(self, transcripts, featurizer, split=0.8):
        self.featurizer = featurizer
        X = [featurizer.create_feature_vector(t) for t in transcripts]
        margins = [utils.get_margin(t, role=utils.BUYER) for t in transcripts]
        new_X = []
        y = []
        for (idx, margin) in enumerate(margins):
            if margin >= self.POSITIVE_THRESHOLD or margin < self.NEGATIVE_THRESHOLD:
                new_X.append(X[idx])
                y.append(margin)
        print "# of chats with buyer margin >= {:.1f}: {:d}".format(self.POSITIVE_THRESHOLD, y.count(1))
        print "# of chats with buyer margin < {:.1f}: {:d}".format(self.NEGATIVE_THRESHOLD, y.count(0))

        X, y = self.shuffle_data(X, y)
        self.train_X = None
        self.train_y = None
        self.test_X = None
        self.test_y = None
        self.split(X, y, split=split)

        print "# of training examples: {:d}".format(self.train_X.shape[0])
        print "# of test examples: {:d}".format(self.test_X.shape[0])

        self.model = LogisticRegression(C=1.5)