コード例 #1
0
    def analysis_hour(self):
        # self.record_result('<strong style="color: black; font-size: 24px;">正在分析该商品不同省份的购买量...</strong>')

        obj = self.data_frame['creation_time']
        obj = obj.dt.hour
        obj = obj.value_counts()
        obj = obj.sort_index()
        index = np.arange(0, 24)
        obj = obj.reindex(index, method='ffill', fill_value=0)

        ax = obj.plot(xticks=index, kind='line', style='o-', label='评论数量')

        obj = self.data_frame['reference_time']
        obj = obj.dt.hour
        obj = obj.value_counts()
        obj = obj.sort_index()
        obj = obj.reindex(index, method='ffill', fill_value=0)

        obj.plot(xticks=index, kind='line', style='o-', label='购买数量')

        ax.set_ylabel('数量')
        ax.set_xlabel('24 小时分布')
        plt.title('购买/评论该商品的 24 小时时间分布图')

        plt.tight_layout()
        plt.legend()
        filename = '%s_creation_time.png' % self.product_id
        plt.savefig('%s/%s' % (utils.get_save_image_path(), filename))
        plt.clf()

        result = utils.get_image_src(filename=filename)
        self.record_result(result, type='image')
コード例 #2
0
    def analysis_buy_days(self):
        # self.record_result('<strong style="color: black; font-size: 24px;">正在分析该商品不同省份的购买量...</strong>')

        obj = self.data_frame['days']
        obj = obj.value_counts()
        obj = obj.sort_index()

        # 如果有超过 20 天后评论的,则合并在一起
        if len(obj.index) > 20:
            value = obj[obj.index >= 20].sum()
            obj = obj.drop(obj[obj.index > 20].index)
            obj.values[-1] += value

        ax = obj.plot(kind='line', style='ro-', xticks=obj.index)
        obj = obj.rename({obj.index[-1]: str(obj.index[-1]) + '+'})
        ax.set_xticklabels(labels=obj.index, rotation=0)

        count = obj.sum()
        for i, val in enumerate(obj.values):
            if i <= 5:
                ax.text(i - 0.4, val, '%.3f%%' % (val * 1.0 / count * 100))
            if i == len(obj.index) - 1:
                ax.text(i - 0.4, val, '%.3f%%' % (val * 1.0 / count * 100))

        plt.title('该商品用户购买后写下评论的时间关系图')
        ax.set_xlabel('写评论时间(天)')
        ax.set_ylabel('数量')

        plt.tight_layout()
        filename = '%s_days.png' % self.product_id
        plt.savefig('%s/%s' % (utils.get_save_image_path(), filename))
        plt.clf()

        result = utils.get_image_src(filename=filename)
        self.record_result(result, type='image')
コード例 #3
0
    def analysis_user_level(self):
        # self.record_result('<strong style="color: black; font-size: 24px;">正在分析该商品不同省份的购买量...</strong>')

        obj = self.data_frame['user_level_name']
        obj = obj.value_counts()

        ax = obj.plot(kind='bar', alpha=self.opacity, color=self.color)
        ax.set_xticklabels(obj.index,
                           rotation=0 if len(obj.index) <= 6 else 45)

        count = obj.sum()
        for i, val in enumerate(obj.values):
            ax.text(i - 0.25, val, '%.2f%%' % (val * 1.0 / count * 100))

        plt.title('购买该商品的用户等级分布图')
        ax.set_xlabel('用户等级')
        ax.set_ylabel('数量')

        plt.tight_layout()
        filename = '%s_user_level.png' % self.product_id
        plt.savefig('%s/%s' % (utils.get_save_image_path(), filename))
        plt.clf()

        result = utils.get_image_src(filename=filename)
        self.record_result(result, type='image')
コード例 #4
0
    def analysis_mobile(self):
        # self.record_result('<strong style="color: black; font-size: 24px;">正在分析该商品不同省份的购买量...</strong>')

        fig_size = plt.rcParams["figure.figsize"]
        plt.figure(figsize=(2.4, 2.4))

        obj = self.data_frame['is_mobile']
        obj = obj.value_counts()

        obj = obj.rename({1: '移动端', 0: 'PC'})
        plt.pie(x=obj.values,
                autopct='%.0f%%',
                radius=0.7,
                labels=obj.index,
                startangle=180)

        plt.title('该商品移动/ PC 购买比例')

        plt.tight_layout()
        filename = '%s_mobile.png' % self.product_id
        plt.savefig('%s/%s' % (utils.get_save_image_path(), filename))
        plt.figure(figsize=fig_size)
        plt.clf()
        result = utils.get_image_src(filename=filename)
        self.record_result(result, type='image')
コード例 #5
0
    def analysis_buy_channel(self):
        # self.record_result('用户购买该商品使用的客户端', color = 'black', font_size = 24, strong = True)

        obj = self.data_frame['user_client_show']
        obj = obj.value_counts()
        obj = obj.rename({u'': u'其他,网页端'})
        # obj = obj.append(mobile_obj)
        # obj.plot(style = 'ro-')
        ax = obj.plot(kind='bar', alpha=self.opacity, color=self.color)
        ax.set_xticklabels(obj.index, rotation=45 if len(obj.index) > 3 else 0)

        # 显示柱状图的百分比
        count = obj.sum()
        for i, val in enumerate(obj.values):
            ax.text(i - 0.25, val, '%.3f%%' % (val * 1.0 / count * 100))

        # 尝试将购买渠道和在移动端购买放到一个图中
        # plt.subplot(111)
        # obj = self.data_frame['is_mobile']
        # obj = obj.value_counts()
        #
        # obj = obj.rename({1: '移动端', 0: 'PC'})
        # plt.pie(x = obj.values, autopct = '%.0f%%', radius = 0.3, labels = obj.index)

        plt.title('该商品不同客户端的购买数量关系图')
        ax.set_xlabel('客户端')
        ax.set_ylabel('数量')

        filename = '%s_channel.png' % self.product_id
        plt.tight_layout()
        plt.savefig('%s/%s' % (utils.get_save_image_path(), filename))
        plt.clf()
        result = utils.get_image_src(filename=filename)
        self.record_result(result, type='image')
コード例 #6
0
    def analysis_content(self, contents, type):
        # 解决京东的评价中包含 &hellip
        contents = contents.replace('&hellip', '')
        if contents == '':
            return

        d = '%s/media/mask.png' % settings.BASE_DIR
        mask = np.array(Image.open(d))
        wordcloud = WordCloud(font_path=self.font_path,
                              mask=mask).generate(contents)

        result = ''
        for i, ((word, count), font_size, position, orientation,
                color) in enumerate(wordcloud.layout_):
            if i <= 3:
                result += word + '、'

        if type == 'good':
            result = '好评数据 关键字:%s' % result
        elif type == 'general':
            result = '中评数据 关键字:%s' % result
        elif type == 'poor':
            result = '差评数据 关键字:%s' % result

        self.record_result(result, strong=True, color='black', font_size=24)

        filename = '%s_%s.png' % (self.product_id, type)
        wordcloud.to_image().save('%s/%s' %
                                  (utils.get_save_image_path(), filename))

        result = utils.get_image_src(filename=filename)
        self.record_result(result, type='image')
コード例 #7
0
    def analysis_color(self):
        # self.record_result('用户购买该商品不同颜色比例', color = 'black', font_size = 24, strong = True)

        obj = self.data_frame['product_sku']
        logging.warn("obj 111")
        logging.warn(obj)
        obj = obj.value_counts()
        logging.warn("obj 222")
        logging.warn(obj)

        plt.title('该商品不同颜色购买数量关系图')
        ax = plt.subplot(111)
        ax.set_xlabel('颜色')
        ax.set_ylabel('数量')

        obj = obj.rename({'': u'其他'})
        logging.warn("obj 333")
        logging.warn(obj)
        ax = obj.plot(kind='bar', alpha=self.opacity, color=self.color)

        # 是否倾斜显示 X 轴标签
        if len(ax.containers) > 0:
            if len(obj.index) > 5:
                xticks_pos = [
                    1 * patch.get_width() + patch.get_xy()[0]
                    for patch in ax.containers[0]
                ]
                plt.xticks(xticks_pos, obj.index, rotation=45, ha='right')
            else:
                xticks_pos = [
                    0.5 * patch.get_width() + patch.get_xy()[0]
                    for patch in ax.containers[0]
                ]
                plt.xticks(xticks_pos, obj.index, rotation=0)

        # 显示柱状图的百分比
        count = obj.sum()
        for i, val in enumerate(obj.values):
            ax.text(i - 0.25, val, '%.3f%%' % (val * 1.0 / count * 100))

        plt.tight_layout()
        filename = '%s_color.png' % self.user_id
        plt.savefig('%s/%s' % (utils.get_save_image_path(), filename))
        plt.clf()
        result = utils.get_image_src(filename=filename)
        logging.warn(result)
        self.record_result(result, type='image')
コード例 #8
0
    def analysis_sell_time(self):
        # self.record_result('该商品购买时间、评论时间关系图', color = 'black', font_size = 24, strong = True)

        cre_obj = Series(index=self.data_frame['creation_time'], data=1)
        cre_obj = cre_obj.resample(rule='M').sum()
        cre_obj = cre_obj.fillna(0)

        # cre_obj.plot(xticks = cre_obj.index, label = '评论数量', kind = 'line', color = 'orange')
        cre_obj.plot(style='o-', xticks=cre_obj.index, label='评论数量')

        obj = Series(index=self.data_frame['reference_time'], data=1)
        obj = obj.resample(rule='M').sum()
        obj = obj.fillna(0)

        # obj.plot(xticks = obj.index, label = '购买数量')
        ax = obj.plot(style='o-', xticks=obj.index, label='购买数量')
        if len(obj.index) <= 5:
            ax.set_xticklabels([x.strftime('\n%d\n%m\n%Y') for x in obj.index])
        else:
            count = len(obj.index)
            if count <= 10:
                ax.set_xticklabels([
                    x.strftime('\n%d\n%m\n%Y') if i % 2 == 0 else ''
                    for i, x in enumerate(obj.index)
                ])
            else:
                ax.set_xticklabels([
                    x.strftime('\n%d\n%m\n%Y')
                    if i % 4 == 0 or i == (len(obj.index) - 1) else ''
                    for i, x in enumerate(obj.index)
                ])

        plt.title('该商品购买时间、评论时间关系图')
        ax.set_xlabel('时间')
        ax.set_ylabel('购买/评论数量')

        plt.tight_layout()
        plt.legend()
        filename = '%s_time.png' % self.product_id
        plt.savefig('%s/%s' % (utils.get_save_image_path(), filename))
        plt.clf()

        result = utils.get_image_src(filename=filename)
        self.record_result(result, type='image')