def analysis_hour(self): # self.record_result('<strong style="color: black; font-size: 24px;">正在分析该商品不同省份的购买量...</strong>') obj = self.data_frame['creation_time'] obj = obj.dt.hour obj = obj.value_counts() obj = obj.sort_index() index = np.arange(0, 24) obj = obj.reindex(index, method='ffill', fill_value=0) ax = obj.plot(xticks=index, kind='line', style='o-', label='评论数量') obj = self.data_frame['reference_time'] obj = obj.dt.hour obj = obj.value_counts() obj = obj.sort_index() obj = obj.reindex(index, method='ffill', fill_value=0) obj.plot(xticks=index, kind='line', style='o-', label='购买数量') ax.set_ylabel('数量') ax.set_xlabel('24 小时分布') plt.title('购买/评论该商品的 24 小时时间分布图') plt.tight_layout() plt.legend() filename = '%s_creation_time.png' % self.product_id plt.savefig('%s/%s' % (utils.get_save_image_path(), filename)) plt.clf() result = utils.get_image_src(filename=filename) self.record_result(result, type='image')
def analysis_buy_days(self): # self.record_result('<strong style="color: black; font-size: 24px;">正在分析该商品不同省份的购买量...</strong>') obj = self.data_frame['days'] obj = obj.value_counts() obj = obj.sort_index() # 如果有超过 20 天后评论的,则合并在一起 if len(obj.index) > 20: value = obj[obj.index >= 20].sum() obj = obj.drop(obj[obj.index > 20].index) obj.values[-1] += value ax = obj.plot(kind='line', style='ro-', xticks=obj.index) obj = obj.rename({obj.index[-1]: str(obj.index[-1]) + '+'}) ax.set_xticklabels(labels=obj.index, rotation=0) count = obj.sum() for i, val in enumerate(obj.values): if i <= 5: ax.text(i - 0.4, val, '%.3f%%' % (val * 1.0 / count * 100)) if i == len(obj.index) - 1: ax.text(i - 0.4, val, '%.3f%%' % (val * 1.0 / count * 100)) plt.title('该商品用户购买后写下评论的时间关系图') ax.set_xlabel('写评论时间(天)') ax.set_ylabel('数量') plt.tight_layout() filename = '%s_days.png' % self.product_id plt.savefig('%s/%s' % (utils.get_save_image_path(), filename)) plt.clf() result = utils.get_image_src(filename=filename) self.record_result(result, type='image')
def analysis_user_level(self): # self.record_result('<strong style="color: black; font-size: 24px;">正在分析该商品不同省份的购买量...</strong>') obj = self.data_frame['user_level_name'] obj = obj.value_counts() ax = obj.plot(kind='bar', alpha=self.opacity, color=self.color) ax.set_xticklabels(obj.index, rotation=0 if len(obj.index) <= 6 else 45) count = obj.sum() for i, val in enumerate(obj.values): ax.text(i - 0.25, val, '%.2f%%' % (val * 1.0 / count * 100)) plt.title('购买该商品的用户等级分布图') ax.set_xlabel('用户等级') ax.set_ylabel('数量') plt.tight_layout() filename = '%s_user_level.png' % self.product_id plt.savefig('%s/%s' % (utils.get_save_image_path(), filename)) plt.clf() result = utils.get_image_src(filename=filename) self.record_result(result, type='image')
def analysis_mobile(self): # self.record_result('<strong style="color: black; font-size: 24px;">正在分析该商品不同省份的购买量...</strong>') fig_size = plt.rcParams["figure.figsize"] plt.figure(figsize=(2.4, 2.4)) obj = self.data_frame['is_mobile'] obj = obj.value_counts() obj = obj.rename({1: '移动端', 0: 'PC'}) plt.pie(x=obj.values, autopct='%.0f%%', radius=0.7, labels=obj.index, startangle=180) plt.title('该商品移动/ PC 购买比例') plt.tight_layout() filename = '%s_mobile.png' % self.product_id plt.savefig('%s/%s' % (utils.get_save_image_path(), filename)) plt.figure(figsize=fig_size) plt.clf() result = utils.get_image_src(filename=filename) self.record_result(result, type='image')
def analysis_buy_channel(self): # self.record_result('用户购买该商品使用的客户端', color = 'black', font_size = 24, strong = True) obj = self.data_frame['user_client_show'] obj = obj.value_counts() obj = obj.rename({u'': u'其他,网页端'}) # obj = obj.append(mobile_obj) # obj.plot(style = 'ro-') ax = obj.plot(kind='bar', alpha=self.opacity, color=self.color) ax.set_xticklabels(obj.index, rotation=45 if len(obj.index) > 3 else 0) # 显示柱状图的百分比 count = obj.sum() for i, val in enumerate(obj.values): ax.text(i - 0.25, val, '%.3f%%' % (val * 1.0 / count * 100)) # 尝试将购买渠道和在移动端购买放到一个图中 # plt.subplot(111) # obj = self.data_frame['is_mobile'] # obj = obj.value_counts() # # obj = obj.rename({1: '移动端', 0: 'PC'}) # plt.pie(x = obj.values, autopct = '%.0f%%', radius = 0.3, labels = obj.index) plt.title('该商品不同客户端的购买数量关系图') ax.set_xlabel('客户端') ax.set_ylabel('数量') filename = '%s_channel.png' % self.product_id plt.tight_layout() plt.savefig('%s/%s' % (utils.get_save_image_path(), filename)) plt.clf() result = utils.get_image_src(filename=filename) self.record_result(result, type='image')
def analysis_content(self, contents, type): # 解决京东的评价中包含 &hellip contents = contents.replace('&hellip', '') if contents == '': return d = '%s/media/mask.png' % settings.BASE_DIR mask = np.array(Image.open(d)) wordcloud = WordCloud(font_path=self.font_path, mask=mask).generate(contents) result = '' for i, ((word, count), font_size, position, orientation, color) in enumerate(wordcloud.layout_): if i <= 3: result += word + '、' if type == 'good': result = '好评数据 关键字:%s' % result elif type == 'general': result = '中评数据 关键字:%s' % result elif type == 'poor': result = '差评数据 关键字:%s' % result self.record_result(result, strong=True, color='black', font_size=24) filename = '%s_%s.png' % (self.product_id, type) wordcloud.to_image().save('%s/%s' % (utils.get_save_image_path(), filename)) result = utils.get_image_src(filename=filename) self.record_result(result, type='image')
def analysis_color(self): # self.record_result('用户购买该商品不同颜色比例', color = 'black', font_size = 24, strong = True) obj = self.data_frame['product_sku'] logging.warn("obj 111") logging.warn(obj) obj = obj.value_counts() logging.warn("obj 222") logging.warn(obj) plt.title('该商品不同颜色购买数量关系图') ax = plt.subplot(111) ax.set_xlabel('颜色') ax.set_ylabel('数量') obj = obj.rename({'': u'其他'}) logging.warn("obj 333") logging.warn(obj) ax = obj.plot(kind='bar', alpha=self.opacity, color=self.color) # 是否倾斜显示 X 轴标签 if len(ax.containers) > 0: if len(obj.index) > 5: xticks_pos = [ 1 * patch.get_width() + patch.get_xy()[0] for patch in ax.containers[0] ] plt.xticks(xticks_pos, obj.index, rotation=45, ha='right') else: xticks_pos = [ 0.5 * patch.get_width() + patch.get_xy()[0] for patch in ax.containers[0] ] plt.xticks(xticks_pos, obj.index, rotation=0) # 显示柱状图的百分比 count = obj.sum() for i, val in enumerate(obj.values): ax.text(i - 0.25, val, '%.3f%%' % (val * 1.0 / count * 100)) plt.tight_layout() filename = '%s_color.png' % self.user_id plt.savefig('%s/%s' % (utils.get_save_image_path(), filename)) plt.clf() result = utils.get_image_src(filename=filename) logging.warn(result) self.record_result(result, type='image')
def analysis_sell_time(self): # self.record_result('该商品购买时间、评论时间关系图', color = 'black', font_size = 24, strong = True) cre_obj = Series(index=self.data_frame['creation_time'], data=1) cre_obj = cre_obj.resample(rule='M').sum() cre_obj = cre_obj.fillna(0) # cre_obj.plot(xticks = cre_obj.index, label = '评论数量', kind = 'line', color = 'orange') cre_obj.plot(style='o-', xticks=cre_obj.index, label='评论数量') obj = Series(index=self.data_frame['reference_time'], data=1) obj = obj.resample(rule='M').sum() obj = obj.fillna(0) # obj.plot(xticks = obj.index, label = '购买数量') ax = obj.plot(style='o-', xticks=obj.index, label='购买数量') if len(obj.index) <= 5: ax.set_xticklabels([x.strftime('\n%d\n%m\n%Y') for x in obj.index]) else: count = len(obj.index) if count <= 10: ax.set_xticklabels([ x.strftime('\n%d\n%m\n%Y') if i % 2 == 0 else '' for i, x in enumerate(obj.index) ]) else: ax.set_xticklabels([ x.strftime('\n%d\n%m\n%Y') if i % 4 == 0 or i == (len(obj.index) - 1) else '' for i, x in enumerate(obj.index) ]) plt.title('该商品购买时间、评论时间关系图') ax.set_xlabel('时间') ax.set_ylabel('购买/评论数量') plt.tight_layout() plt.legend() filename = '%s_time.png' % self.product_id plt.savefig('%s/%s' % (utils.get_save_image_path(), filename)) plt.clf() result = utils.get_image_src(filename=filename) self.record_result(result, type='image')