def create_charts(data): # data字典格式(三个表的情况下): # {'charcloud':[str:表一的前描述,str:表一的后描述,数据1,数据2,...,数据n],'...':[...]} html = '' page = Page() style = Style(width=900, height=600) # 本页面包含:1:所有字的词云charcloud(两个数据chars,values)、 # 表一: # 获取表一的数据 html_before = data['charcloud'][0] html_after = data['charcloud'][1] chars = data['charcloud'][2] values = data['charcloud'][3] wordcloud = WordCloud("唐诗用字云图", **style.init_style) wordcloud.add("字云", chars, values, word_size_range=[10, 100], shape='pentagon') java_script = wordcloud.render_embed() html += html_before + java_script + html_after page.add(wordcloud) # 表二: html_before = data['chartop10'][0] html_after = data['chartop10'][1] chars = data['chartop10'][2] values = data['chartop10'][3] bar = Bar("唐诗高频十字", **style.init_style) bar.add("柱状图", chars, values) java_script = bar.render_embed() html += html_before + java_script + html_after page.add(bar) # 表三: html_before = data['frequency×'][0] html_after = data['frequency×'][1] keys = data['frequency×'][2] values = data['frequency×'][3] line = Line("唐诗字频-字数", **style.init_style) line.add("字频--字数", keys, values, is_smooth=True, is_fill=True, area_opacity=0.2, is_datazoom_show=True, datazoom_type="both", datazoom_range=[0, 60], xaxis_interval=1, yaxis_formatter="字", xaxis_name="频次", yaxis_name="字数", xaxis_name_pos="end", yaxis_name_pos="end", is_more_utils=True) java_script = line.render_embed() html += html_before + java_script + html_after page.add(line) # 最后 script = page.get_js_dependencies() return html, script
def stack(request): if request.method == 'POST': language = request.POST.get('language', '') if not language: return HttpResponse('error language') language = language.capitalize() # language = 'Java' engine = mysql.connect(host="localhost", user="******", passwd="root", db="Jobs", charset='utf8') # sql = "SELECT `desc` FROM jobanalysis WHERE type = 'Python'" if language == 'All': sql = "SELECT `desc` FROM jobanalysis" else: sql = "SELECT `desc` FROM jobanalysis WHERE type = '{language}'".format( language=language) df = read_sql(sql, engine) desc = ' '.join(df['desc'].tolist()) word_lst = jieba.cut(desc) words = [ re.sub('\s', '', w).lower().capitalize() for w in word_lst if re.sub('\s', '', w) and len(w) > 2 and re.findall(r'[0-9a-zA-Z]+', w) ] # words = words.remove('Python') ret = Counter(words) # for k, v in ret.most_common(50): # print k.capitalize(), v dct = dict(ret.most_common(40)) pop_lst = [ '211', '985', 'Python', 'Java', 'And', 'Android', 'C++', 'Php', 'Ios', 'Web', 'Api' ] for p in pop_lst: if p in dct: dct.pop(p) wordcloud = WordCloud(width=1300, height=620) rename = { 'Mysql': 'MySQL', 'Sql': 'SQL', 'Mongodb': 'MongoDB', 'Nosql': 'NoSQL', 'Html': 'HTML', 'Css': 'CSS', 'Openstack': 'OpenStack' } keys = [rename.get(k, k) for k in dct.keys()] wordcloud.add("", keys, dct.values(), word_size_range=[25, 80]) html = wordcloud.render_embed() data = {'data': html} return HttpResponse(json.dumps(data), content_type="application/json") elif request.method == 'GET': return render(request, 'backend/stack.html')
def show_data(request): template = loader.get_template('meetings/show_data.html') attr = [u"members", "non-members"] value = [ Members.objects.filter(on_activate=True).count(), Members.objects.filter(on_activate=False).count() ] pie = Pie(u"Members and non-members") pie.add("Count", attr, value) meeting_attendace = Counter(MeetingInfo.objects.values_list('attendance')) attr_attendace = OrderedDict() attr_attendace.update({ Members.objects.get(id=key[0]).name: value for key, value in meeting_attendace.items() }) wordcloud = WordCloud() wordcloud.add("attendace", list(attr_attendace.keys()), list(attr_attendace.values())) bar = Bar("attendaces") bar.add("attendaces", list(attr_attendace.keys()), list(attr_attendace.values()), xaxis_interval=0, xaxis_rotate=-90) meeting_info = MeetingInfo.objects.values_list('date', 'count', 'theme').annotate( Count('attendance')) meeting_info_dict = OrderedDict() for m in meeting_info: meeting_info_dict[str(m[0]) + '#' + str(m[1]) + str(m[2])] = int(m[3]) print(str(m[0]) + '_' + str(m[1]) + str(m[2]), m[3]) line = Line("Meeting attendance number") line.add("ESHTMC", list(meeting_info_dict.keys()), list(meeting_info_dict.values()), mark_point=["average"], xaxis_interval=0, xaxis_rotate=-45) context = dict( host=REMOTE_HOST, pie=pie.render_embed(), pie_script_list=pie.get_js_dependencies(), wordcloud=wordcloud.render_embed(), wordcloud_script_list=wordcloud.get_js_dependencies(), bar=bar.render_embed(), bar_script_list=bar.get_js_dependencies(), line=line.render_embed(), line_script_list=line.get_js_dependencies(), ) return HttpResponse(template.render(context, request))
def make_wordcloud(comm_data): ''' 由于echarts绘制词云图出现问题,用pyecharts绘制词云图 :param comm_data: :return: ''' name = comm_data.keys() value = comm_data.values() wordcloud = WordCloud(width='100%', height=600) wordcloud.add("", name, value, shape="diamond", word_size_range=[15, 120]) return wordcloud.render_embed()
def _requests_show(self): word_cloud = WordCloud("技能要求云图", width=1300, height=620, title_pos='center') attr, value = word_cloud.cast(self.g.requests_data) word_cloud.add("", attr, value, word_size_range=[20, 100], shape='diamond') return word_cloud.render_embed()
def stack(request): if request.method == 'POST': language = request.POST.get('language', '') if not language: return HttpResponse('error language') language = language.capitalize() # language = 'Java' engine = mysql.connect(host="localhost", user="******", passwd="root", db="Jobs", charset='utf8') # sql = "SELECT `desc` FROM jobanalysis WHERE type = 'Python'" if language == 'All': sql = "SELECT `desc` FROM jobanalysis" else: sql = "SELECT `desc` FROM jobanalysis WHERE type = '{language}'".format(language=language) df = read_sql(sql, engine) desc = ' '.join(df['desc'].tolist()) word_lst = jieba.cut(desc) words = [re.sub('\s', '', w).lower().capitalize() for w in word_lst if re.sub('\s', '', w) and len(w) > 2 and re.findall(r'[0-9a-zA-Z]+', w)] # words = words.remove('Python') ret = Counter(words) # for k, v in ret.most_common(50): # print k.capitalize(), v dct = dict(ret.most_common(40)) pop_lst = ['211', '985', 'Python', 'Java', 'And', 'Android', 'C++', 'Php', 'Ios', 'Web', 'Api'] for p in pop_lst: if p in dct: dct.pop(p) wordcloud = WordCloud(width=1300, height=620) rename = { 'Mysql': 'MySQL', 'Sql': 'SQL', 'Mongodb': 'MongoDB', 'Nosql': 'NoSQL', 'Html': 'HTML', 'Css': 'CSS', 'Openstack': 'OpenStack' } keys = [rename.get(k, k) for k in dct.keys()] wordcloud.add("", keys, dct.values(), word_size_range=[25, 80]) html = wordcloud.render_embed() data = { 'data': html } return HttpResponse(json.dumps(data), content_type="application/json") elif request.method == 'GET': return render(request, 'backend/stack.html')
def wordcloud(): name = [ '余小雄', '雄哥组', '小雄', '昨天还没睡够啊', '不是9点就回去', '道固', '讨论阵容、分路', '海林:后羿', '彭涛:悟空' ] value = [10000, 6181, 4386, 4055, 2467, 2244, 1898, 1484, 1112] wordcloud = WordCloud(width=1300, height=620) wordcloud.add("", name, value, word_size_range=[20, 100]) # wordcloud.render() return render_template( 'wordcloud.html', myechart=wordcloud.render_embed(), host=REMOTE_HOST, script_list=wordcloud.get_js_dependencies(), )
def s_make_wordcloud(comm_data): ''' 用pyecharts绘制词云图 :param comm_data: :return: ''' # name 词语的列表 name = comm_data.keys() # value 次数的列表 value = comm_data.values() # 绘制词云图片 # 生成词云对象 wordcloud = WordCloud(width='100%', height=600) # 通过词云对象调用add() 第一个参数是图片名字, 词语列表, 词频列表, shape形状, wordcloud.add("", name, value, shape="pentagon", word_size_range=[15, 50]) # 词云对象.render_embed() return wordcloud.render_embed()
def index_word_2(): time = request.args.get("time") data = pd.read_csv(r"./static/data/3Causes of urban child death.csv") columns = data.columns.values data_x = data["死因"].values.tolist() data_y = data[time].values.tolist() print(data_y) word = WordCloud("{}城市儿童死亡原因".format(time), width="50%") word.add("", data_x, data_y) return render_template( 'index.html', myechart=word.render_embed(), script_list=word.get_js_dependencies(), text=''' 从图来看,无论对于城市还是农村5岁以下儿童来说,损伤和中毒都是具有最高威胁性的,而这个首要死因反应出来的是对儿童生存环境的疏忽。而损伤和中毒这一死因进一步之后,结果令人咋舌,溺亡占比近50%、意外跌落占比近20%、交通意外占比近15%。 当今社会,几乎到处都存在着发生意外损伤的危险,环境中致意外损伤的危险因素也多种多样,并且存在差异,要防范儿童的不必要死亡必须深刻认识这一点。经济、交通落后的农村地区车祸死亡率高于经济、交通发达地区;考虑由于父母的文化程度低、多子女、对儿童照管不周以及缺乏安全教育,故农村儿童在交通上有更大的危险性;在水网地区车祸死亡率较低,但由于缺乏防护措施,溺水等发生则较高;一氧化碳中毒在农村多由于燃煤取暖所致,而在城市系热水器使用不当,未注意通风所致;由于电器的普及,电击伤呈上升趋势,多由于忽视安全操作所致。 只有针对性地分析这些可能存在的危险因素,才能真正意义上的保护儿童的生命安全健康。 ''', text1= '''这次项目的数据采集主要围绕中国儿童死亡情况展开,搜集了近10年来中国5岁以下儿童(包含全体、城市、农村)死亡率数据、其主要死因的分析数据及可能存在的预防控制和专科救助情况数据。 总体来说我国5岁以下儿童死亡率在医疗水平提高以及国家专项纲要推动等因素下已经实现较大幅度的降低,但是农村儿童较高的死亡率仍然可以作为儿童生命安全健康任务的核心突破点。而对于儿童生存环境中存在的各种危险因素,人们的重视程度还是不够,要切实解决这些危险问题,首要做到的就是具体情况具体分析,切不可以以偏概全,对于家庭因素、地区因素、环境因素都要点对点提出宣传及解决建议。而针对疾病预防控制中心及专科疾病防治院的发展,最大问题是发展遇到瓶颈,被暂时性的饱和假象拖慢了发展脚步。儿童的疾病防治与专科诊疗问题从来不是“医疗机构基本覆盖”、“救援需求基本满足”可以解决,更充分的发展,更尖端技术的推广仍然任重道远。''' )
def wordcloud_html(title, key, val): wordcloud = WordCloud(title, title_pos="center", width="100%", height=300) wordcloud.add("", key, val, word_size_range=[20, 100]) return wordcloud.render_embed()
def translate_char(requests): context = "" conn = sqlite3.connect("db.sqlite3") c = conn.cursor() if requests.POST["type"] == "Line": str_sql = "select date(updated) as d ,count(*) as c from Mytest_translate group by d" cursor = c.execute(str_sql) data_arr = [] data_num_arr = [] for row in cursor: # result[row[0]] = row[1] data_arr.append(row[0]) data_num_arr.append(row[1]) line = Line("歷史查詢統計","單位: 次") line.add("times",data_arr,data_num_arr,mark_point=["max","min"],is_label_show=True,is_datazoom_show=True,xaxis_rotate=30,xaxis_interval=2,datazoom_extra_type = "both",xaxis_margin=20,is_more_utils=True) # line.add("times",data_arr,data_num_arr) context = line.render_embed() elif requests.POST["type"] == "Cloud": str_sql = "select word,translate from Mytest_translate" cursor = c.execute(str_sql) en = "" zh_CN = "" en_result={} zh_result = {} for row in cursor: word = row[0].encode("utf-8") word_tr = row[1].encode("utf-8") for ch in r'!"@#$%^&\*()_\?:;,<>\\/\|{}': word.replace(ch,"") word_tr.replace(ch,"") if u'\u4e00' <= row[0][0] <= u'\u9fff': zh_CN += word en += word_tr + " " else: zh_CN += word_tr en += word + " " # 英文統計 word_arr = en.split() for e in word_arr: en_result[e] = en_result.get(e,0) + 1 en_res = list(en_result.items()) en_res.sort(key=lambda x:x[1],reverse=True) # 中文統計 zh_CN_arr = jieba.cut(zh_CN,cut_all=False) zh_CN_res = {} for d in zh_CN_arr: zh_CN_res[d] = zh_CN_res.get(d,0) + 1 zh_CN_res = list(zh_CN_res.items()) zh_CN_res.sort(key=lambda x:x[1],reverse=True) en_name_arr = [] en_num_arr = [] zh_CN_name_arr = [] zh_CN_num_arr = [] for i in en_res[:100]: en_name_arr.append(i[0]) en_num_arr.append(i[1]) for d in zh_CN_res[10:110]: zh_CN_name_arr.append(d[0]) zh_CN_num_arr.append(d[1]) en_char = WordCloud() # ,word_size_range=[20,100] try: if requests.POST["k"] == u"漢字": en_char.add("zh_CN",zh_CN_name_arr,zh_CN_num_arr,shape=requests.POST["t"]) else: en_char.add("En",en_name_arr,en_num_arr,shape=requests.POST["t"]) except: en_char.add("En",en_name_arr,en_num_arr,shape="diamond") context = en_char.render_embed() # return HttpResponse(json.dumps({"context":{"en_name":en_name_arr,"en_num":en_num_arr,"zh_CN_name":zh_CN_name_arr,"zh_CN_num":zh_CN_num_arr}})) # return HttpResponse(json.dumps({"context":en_res[:10]})) else: pass return HttpResponse(json.dumps({"context":context}))
def original_docs(): form = NameForm() # global first_visit_doc docs = [] wordcloud = '' if not form.validate_on_submit(): form.stkCode.data = '紫光股份' form.start_date.data = '20170101' form.end_date.data = '20180701' if form.validate_on_submit(): stkName = form.stkCode.data start_date = datetime.datetime.strptime(form.start_date.data, '%Y%m%d') docs = [] wordcloud = [] if form.end_date.data: end_date = datetime.datetime.strptime(form.end_date.data, '%Y%m%d') else: end_date = start_date if stkName in INDUSTRY_LIST: # cursor = collection.find({"stkIndustry": stkName, "statementNum": {"$gt": 20}}) cursor = collection.find({ "activityDate": { "$gte": start_date, "$lte": end_date }, "statementNum": { "$gte": 20 }, "stkIndustry": stkName }) else: cursor = collection.find({ "stkName": stkName, "activityDate": { "$gte": start_date, "$lte": end_date } }) for doc in cursor: doc['activityDate'] = datetime.datetime.strftime( doc['activityDate'], '%Y-%m-%d') doc['announceDate'] = datetime.datetime.strftime( doc['announceDate'], '%Y-%m-%d') words, html = sent_analyse(doc['content']) wordcloud.extend(words) doc['cutWords'] = html #if word not in STOP_WORDS] docs.append(doc) stk_vector = tfidf_model[dictionary.doc2bow(wordcloud)] words = [] weights = [] for idx, weight in stk_vector: words.append(dictionary.id2token[idx]) weights.append(weight) # print(words) wordcloud = WordCloud("云图", width=800, height=400) wordcloud.add("", words, weights) wordcloud = wordcloud.render_embed() return render_template('queryDb.html', form=form, docs=docs, wordcloud=wordcloud, ipv4=IPV4)
def make_wordcloud(comm_data): name = comm_data.keys() value = comm_data.values() wordcloud = WordCloud(width='100%', height=600) wordcloud.add("", name, value, shape="diamond", word_size_range=[15, 120]) return wordcloud.render_embed()