def avg(): df = pd.read_csv('maoyan2.csv', error_bad_lines=False) df.round(2) # 根据日期统计各天的平均值,并保留两位小数 date_score_avg = df.groupby('date')['score'].mean().round(2)*2 # 根据评分数据生成柱状图 bar = Bar('评分走势图', '数据来源:不正经程序员-采集自猫眼', title_pos='center', width=1500, height=600) # line = Line() # line.add('', attr, value) bar.add('', date_score_avg.index, date_score_avg.values, is_visualmap=False, visual_range=[0, 3500], visual_text_color='#fff', is_more_utils=True, is_label_show=True, xaxis_interval=0, xaxis_rotate=30, mark_line=["average"]) overlap = Overlap() overlap.add(bar) # overlap.add(line) overlap.show_config() overlap.render( 'picture\评分走势图.html') # 根据日期统计各天的平均值,并保留两位小数 date_positive_prob_avg = df.groupby('date')['positive_prob'].mean().round(2) bar = Bar('评论情感指数走势图', '数据来源:不正经程序员-采集自猫眼', title_pos='center', width=1500, height=600) # line = Line() # line.add('', attr, value) bar.add('', date_positive_prob_avg.index, date_positive_prob_avg.values, is_visualmap=False, visual_range=[0, 3500], visual_text_color='#fff', is_more_utils=True, is_label_show=True, xaxis_interval=0, xaxis_rotate=30, mark_line=["average"]) overlap = Overlap() overlap.add(bar) # overlap.add(line) overlap.show_config() overlap.render( 'picture\评论情感指数走势图.html')
def test_overlap_1(): v1 = [10, 20, 30, 40, 50, 60] v2 = [30, 30, 30, 30, 30, 30] v3 = [50, 50, 50, 50, 50, 50] v4 = [10, 10, 10, 10, 10, 10] es = EffectScatter("Scatter - EffectScatter 示例") es.add("es", v1, v2) scatter = Scatter() scatter.add("scatter", v1, v3) es_1 = EffectScatter() es_1.add("es_1", v1, v4, symbol='pin', effect_scale=5) overlap = Overlap() overlap.add(es) overlap.add(scatter) overlap.add(es_1) overlap.show_config() overlap.render()
def render(): # 获取评论中所有城市 with open(r'C:\Users\think\Desktop\情感分析\doc\maoyan.csv', mode='r', encoding='utf_8_sig') as f: rows = f.readlines() #print(rows) #num = 0 for row in rows[1:]: #print(row) #num = num + 1 #print(num) ''' if num == 10: break ''' #print(row) #print(row.count(',')) if row.count(',') != 7: continue elements = row.split(',') #print(elements) score = elements[6] city = elements[4] gender = elements[3] if score != '': scores.append(float(score) * 2) # if float(score) * 2 > 7: # positive_text += comment # elif float(score) * 2 < 4: # negative_text += comment if city != '': # 去掉城市名为空的值 cities.append(city) if gender != '': genders.append(gender) # 按0-10进行排序 #print(scores) score_data = Counter(scores).most_common() score_data = sorted(score_data) gender_data = Counter(genders).most_common() print(gender_data) #print(score_data) # 定义样式 style = Style(title_color='#fff', title_pos='center', width=800, height=600, background_color='#404a59') # 根据评分数据生成柱状图 bar = Bar('《海上钢琴师》各评分数量', '数据来源:采集自猫眼', title_pos='center', width=900, height=600) attr, value = bar.cast(score_data) #print(value) # line = Line() # line.add('', attr, value) bar.add('', attr, value, is_visualmap=True, visual_range=[0, 3500], visual_text_color='#fff', is_more_utils=True, is_label_show=True) overlap = Overlap() overlap.add(bar) # overlap.add(line) overlap.show_config() overlap.render(r'C:\Users\think\Desktop\情感分析\picture\评分数量-柱状图.html') # 对城市数据和坐标文件中的地名进行处理 handle(cities) data = Counter(cities).most_common() # 使用Counter类统计出现的次数,并转换为元组列表 #print(data) # 根据城市数据生成地理坐标图 geo = Geo('观众地理分布', '数据来源:采集自猫眼', **style.init_style) attr, value = geo.cast(data) # print(attr) # print(value) geo.add('', attr, value, visual_range=[0, 600], maptype='china', visual_text_color='#fff', symbol_size=7, is_visualmap=True, is_piecewise=True, visual_split_number=10) geo.render(r'C:\Users\think\Desktop\情感分析\picture\观众地理分布-地理坐标图.html') # 根据城市数据生成柱状图 data_top20 = Counter(cities).most_common(20) # 返回出现次数最多的20条 bar = Bar('观众来源排行TOP20', '数据来源:采集自猫眼', title_pos='center', width=1200, height=600) attr, value = bar.cast(data_top20) bar.add('', attr, value, is_visualmap=True, visual_range=[0, 3500], visual_text_color='#fff', is_more_utils=True, is_label_show=True) bar.render(r'C:\Users\think\Desktop\情感分析\picture\观众来源排行-柱状图.html') #生成观众性别分布图 # 设置主标题与副标题,标题设置居中,设置宽度为900 pie = Pie("观众性别分布图", "数据来源:采集自猫眼", title_pos='center', width=900) attr, value = geo.cast(gender_data) print(value) attr = ["其他", "男", "女"] # 加入数据,设置坐标位置为【25,50】,上方的colums选项取消显示 ''' pie.add("", ["其他","男","女"], value ,visual_range=[0, 3500], is_legend_show=False, is_label_show=True, is_more_utils=True) ''' pie.add("", attr, value, is_label_show=True, is_more_utils=True)
bar.add('', attr, value, is_visualmap=False, visual_range=[0, 3500], visual_text_color='#fff', is_more_utils=True, is_label_show=True) bar.render('picture\观众来源排行-柱状图.html') # 根据评分数据生成柱状图 bar = Bar('《海上钢琴师》各评分数量', '数据来源:不正经程序员-采集自猫眼', title_pos='center', width=900, height=600) attr, value = bar.cast(score_data) # line = Line() # line.add('', attr, value) bar.add('', attr, value, is_visualmap=False, visual_range=[0, 3500], visual_text_color='#fff', is_more_utils=True, is_label_show=True) overlap = Overlap() overlap.add(bar) # overlap.add(line) overlap.show_config() overlap.render( 'picture\评分数量-柱状图.html') # 根据评分数据生成柱状图 bar = Bar('评价人数走势图', '数据来源:不正经程序员-采集自猫眼', title_pos='center', width=1200, height=600) attr, value = bar.cast(date_data) # line = Line() # line.add('', attr, value) bar.add('', attr, value, is_visualmap=False, visual_range=[0, 3500], visual_text_color='#fff', is_more_utils=True, is_label_show=True) overlap = Overlap() overlap.add(bar) # overlap.add(line) overlap.show_config()
def render(): global positive_text global negative_text global luhan_positive_text global luhan_negative_text # 获取评论中所有城市 cities = [] with open('shanghai.csv', mode='r', encoding='utf-8') as f: rows = f.readlines() for row in rows[1:]: if row.count(',') != 7: continue elements = row.split(',') time = elements[0] gender = elements[3] city = elements[4] comment = elements[7] score = elements[6] if not is_float(score): continue if '鹿晗' in comment: scores_luhan.append(float(score) * 2) if float(score) * 2 == 10: luhan_positive_text += comment elif float(score) * 2 == 1: luhan_negative_text += comment if city != '': # 去掉城市名为空的值 cities.append(city) if score != '': scores.append(float(score) * 2) if float(score) * 2 == 10: positive_text += comment elif float(score) * 2 == 1: negative_text += comment if gender != '': genders.append(gender) if time != '': dates.append(time) # print(positive_text) # print(negative_text) with open("positive_text.txt", "w", encoding='utf-8') as f: f.write(positive_text) with open("negative_text.txt", "w", encoding='utf-8') as f: f.write(negative_text) with open("luhan_positive_text.txt", "w", encoding='utf-8') as f: f.write(luhan_positive_text) with open("luhan_negative_text.txt", "w", encoding='utf-8') as f: f.write(luhan_negative_text) # 对城市数据和坐标文件中的地名进行处理 handle(cities) data = Counter(cities).most_common() # 使用Counter类统计出现的次数,并转换为元组列表 score_data = Counter(scores).most_common() # 使用Counter类统计出现的次数,并转换为元组列表 # 按0-10进行排序 score_data = sorted(score_data) score_data_luhan = Counter( scores_luhan).most_common() # 使用Counter类统计出现的次数,并转换为元组列表 # 按0-10进行排序 score_data_luhan = sorted(score_data_luhan) gender_data = Counter(genders).most_common() # 使用Counter类统计出现的次数,并转换为元组列表 print(gender_data) date_data = Counter(dates).most_common() # 按日期进行排序 date_data = sorted(date_data) # print(data) # 定义样式 style = Style(title_color='#fff', title_pos='center', width=800, height=600, background_color='#404a59') # 根据城市数据生成地理坐标图 geo = Geo('观众地理分布', '数据来源:不正经程序员-采集自猫眼', **style.init_style) attr, value = geo.cast(data) geo.add('', attr, value, visual_range=[0, 600], visual_text_color='#fff', symbol_size=7, is_visualmap=True, is_piecewise=True, visual_split_number=10) geo.render('picture\观众地理分布-地理坐标图.html') # 根据城市数据生成柱状图 data_top20 = Counter(cities).most_common(20) # 返回出现次数最多的20条 bar = Bar('观众来源排行TOP20', '数据来源:不正经程序员-采集自猫眼', title_pos='center', width=1200, height=600) attr, value = bar.cast(data_top20) bar.add('', attr, value, is_visualmap=False, visual_range=[0, 3500], visual_text_color='#fff', is_more_utils=True, is_label_show=True) bar.render('picture\观众来源排行-柱状图.html') # 根据评分数据生成柱状图 bar = Bar('各段评分数量', '数据来源:不正经程序员-采集自猫眼', title_pos='center', width=900, height=600) attr, value = bar.cast(score_data) # line = Line() # line.add('', attr, value) bar.add('', attr, value, is_visualmap=True, visual_range=[0, 3500], visual_text_color='#fff', is_more_utils=True, is_label_show=True) overlap = Overlap() overlap.add(bar) # overlap.add(line) overlap.show_config() overlap.render('picture\评分数量-柱状图.html') # 根据评分数据生成柱状图 bar = Bar('评论带有鹿晗的各段评分数量', '数据来源:不正经程序员-采集自猫眼', title_pos='center', width=900, height=600) attr, value = bar.cast(score_data_luhan) # line = Line() # line.add('', attr, value) bar.add('', attr, value, is_visualmap=True, visual_range=[0, 3500], visual_text_color='#fff', is_more_utils=True, is_label_show=True) overlap = Overlap() overlap.add(bar) # overlap.add(line) overlap.show_config() overlap.render('picture\评论带有鹿晗的各段评分数量-柱状图.html') # 根据评分数据生成柱状图 bar = Bar('评价人数走势图', '数据来源:不正经程序员-采集自猫眼', title_pos='center', width=1200, height=600) attr, value = bar.cast(date_data) # line = Line() # line.add('', attr, value) bar.add('', attr, value, is_visualmap=True, xaxis_rotate=30, visual_range=[0, 3500], visual_text_color='#fff', is_more_utils=True, is_label_show=True) overlap = Overlap() overlap.add(bar) # overlap.add(line) overlap.show_config() overlap.render('picture\评价人数走势图.html') from pyecharts import Pie # 设置主标题与副标题,标题设置居中,设置宽度为900 pie = Pie("观众性别分布图", "数据来源:不正经程序员-采集自猫眼", title_pos='center', width=900) attr, value = geo.cast(gender_data) # 加入数据,设置坐标位置为【25,50】,上方的colums选项取消显示 pie.add("", ["其他", "男", "女"], value, visual_range=[0, 3500], is_legend_show=False, is_label_show=True) # 保存图表 pie.render('picture\观众性别分布图.html')