def get_comment_date_count(): for comment in Comment.select(): year_month = str(comment.create_time)[0:7] try: cdc = CommentDateCount.get_by_id(year_month) cdc.total += 1 cdc.save() except CommentDateCount.DoesNotExist: CommentDateCount.create(year_month=year_month, total=1) comments_total = Comment.select().count() for cdc in CommentDateCount.select(): cdc.percentage = calculate_percentage(comments_total, cdc.total) cdc.save()
def get_order_days_count(): for comment in Comment.select().where(Comment.order_days.is_null(False)): order_days = comment.order_days try: odc = OrderDaysCount.get_by_id(order_days) odc.total += 1 odc.save() except OrderDaysCount.DoesNotExist: OrderDaysCount.create(order_days=order_days, total=1) order_total = Comment.select().where( Comment.order_days.is_null(False)).count() for odc in OrderDaysCount.select(): odc.percentage = calculate_percentage(order_total, odc.total) odc.save()
def get_order_date_count(): for comment in Comment.select().where(Comment.order_time.is_null(False)): year_month = str(comment.order_time)[0:7] try: odc = OrderDateCount.get_by_id(year_month) odc.total += 1 odc.save() except OrderDateCount.DoesNotExist: OrderDateCount.create(year_month=year_month, total=1) order_total = Comment.select().where( Comment.order_time.is_null(False)).count() for odc in OrderDateCount.select(): odc.percentage = calculate_percentage(order_total, odc.total) odc.save()
def get_after_days_count(): for comment in Comment.select().where(Comment.after_days.is_null(False)): after_days = comment.after_days try: adc = AfterDaysCount.get_by_id(after_days) adc.total += 1 adc.save() except AfterDaysCount.DoesNotExist: AfterDaysCount.create(after_days=after_days, total=1) after_total = Comment.select().where( Comment.after_days.is_null(False)).count() for adc in AfterDaysCount.select(): adc.percentage = calculate_percentage(after_total, adc.total) adc.save()
def insert_jd_comments(comment_list: list, jd_ss: Union[Shop, JDSku]): for comment in comment_list: color, rom = parse_iPhone11_product_info(comment['productColor'], comment['productSize']) new_comment, created = Comment.get_or_create( source='京东', is_self=jd_ss.is_self, comment_id='JD' + str(comment['id']), create_time=comment['creationTime'], content=comment['content'], star=comment['score'], order_time=comment['referenceTime'], order_days=comment['days'], color=color, rom=rom ) if created is True: if 'afterUserComment' in comment: after_comment = comment['afterUserComment'] new_comment.after_time = after_comment['created'] new_comment.after_content = after_comment['content'] new_comment.after_days = comment['afterDays'] if comment['userClient'] == 4: new_comment.user_device = 'Android' elif comment['userClient'] == 2: new_comment.user_device = 'iOS' else: new_comment.user_device = 'other' new_comment.save()
def get_user_device_count(): total = Comment.select().where(Comment.user_device.is_null(False)).count() android = Comment.select().where(Comment.user_device == 'Android').count() ios = Comment.select().where(Comment.user_device == 'iOS').count() other = Comment.select().where(Comment.user_device == 'other').count() android_percentage = calculate_percentage(total, android) ios_percentage = calculate_percentage(total, ios) other_percentage = calculate_percentage(total, other) UserDeviceCount.create(total=total, android=android, ios=ios, other=other, android_percentage=android_percentage, ios_percentage=ios_percentage, other_percentage=other_percentage)
def get_all_comments_words(): content = '' for comment in Comment.select(): content += comment.content + '\n' # 基于TF-IDF算法的关键词抽取 jieba.analyse.set_stop_words(DATA_ANALYZE_DIR + '/custom_cn_stopwords.txt') tags = jieba.analyse.extract_tags(content, topK=200) for tag in tags: AllCommentsWords.create(word=tag)
def get_non_five_star_comments_words(): content = '' for comment in Comment.select().where((Comment.star.in_([1, 2, 3, 4]))): content += comment.content + '\n' if comment.after_content is not None: content += comment.after_content + '\n' # 基于TF-IDF算法的关键词抽取 jieba.analyse.set_stop_words(DATA_ANALYZE_DIR + '/custom_cn_stopwords.txt') tags = jieba.analyse.extract_tags(content, topK=200) for tag in tags: NonFiveStarCommentsWords.create(word=tag)
def get_ios_comments_words(): content = '' for comment in Comment.select().where(Comment.user_device == 'iOS'): content += comment.content + '\n' if comment.after_content is not None: content += comment.after_content + '\n' # 基于TF-IDF算法的关键词抽取 jieba.analyse.set_stop_words(DATA_ANALYZE_DIR + '/custom_cn_stopwords.txt') tags = jieba.analyse.extract_tags(content, topK=200) for tag in tags: IosCommentsWords.create(word=tag)
def get_all_comments_wordcloud(): content = '' for comment in Comment.select(): content += comment.content + '\n' wordcloud = WordCloud(font_path=FONT_DIR + '/NotoSansCJKsc-Regular.otf', width=7200, height=2400, stopwords=get_stopwords_set(), background_color='white', collocations=False) # 使用jieba分词的默认精确模式 wordcloud.generate(' '.join(jieba.lcut(content))) wordcloud.to_file(IMAGE_DIR + '/iPhone11_all_comments_wordcloud.png')
def get_non_five_star_comments_wordcloud(): content = '' for comment in Comment.select().where((Comment.star.in_([1, 2, 3, 4]))): content += comment.content + '\n' if comment.after_content is not None: content += comment.after_content + '\n' wordcloud = WordCloud(font_path=FONT_DIR + '/NotoSansCJKsc-Regular.otf', width=4000, height=2400, stopwords=get_stopwords_set(), background_color='white', collocations=False) # 使用jieba分词的默认精确模式 wordcloud.generate(' '.join(jieba.lcut(content))) wordcloud.to_file(IMAGE_DIR + '/iPhone11_non_five_star_comments_wordcloud.png')
def insert_sn_comments(comment_list: list, sn_ss: Union[Shop, SNSku]): for comment in comment_list: try: commodity_info = comment['commodityInfo'] if commodity_info['charaterId1'] == '颜色': color, rom = parse_iPhone11_product_info(commodity_info['charaterDesc1'], commodity_info['charaterDesc2']) elif commodity_info['charaterId2'] == '颜色': color, rom = parse_iPhone11_product_info(commodity_info['charaterDesc2'], commodity_info['charaterDesc1']) else: color, rom = parse_iPhone11_product_info(commodity_info['charaterDesc1'], commodity_info['charaterDesc1']) except (AttributeError, KeyError): print('---有一条评论对应的产品信息不规范, 跳过此条评论---') continue new_comment, created = Comment.get_or_create( source='苏宁', is_self=sn_ss.is_self, comment_id='SN' + str(comment['commodityReviewId']), create_time=comment['publishTime'], content=comment['content'], star=comment['qualityStar'], color=color, rom=rom ) if created is True: if comment['againFlag'] is True: after_comment = comment['againReview'] new_comment.after_time = after_comment['publishTime'] new_comment.after_content = after_comment['againContent'] after_days_str = after_comment['publishTimeStr'] if after_days_str == '当天追加': after_days_num = 0 else: after_days_num = int(re.match(r'^\d+', after_days_str).group()) new_comment.after_days = after_days_num if comment['sourceSystem'] == 'android': new_comment.user_device = 'Android' elif comment['sourceSystem'] == 'ios': new_comment.user_device = 'iOS' else: new_comment.user_device = 'other' new_comment.save()