Ejemplo n.º 1
0
def pkmpopana():
	df=pd.read_csv("pkm-19-clean.csv")
	for i in range(20,27):
		df=df.append(pd.read_csv(f'pkm-{str(i)}-clean.csv'),ignore_index=True)
	sen=''
	for j in df['text']:
		sen+=j
	sen=sen.lower()
	toker=RegexpTokenizer(r'\w+')
	words=toker.tokenize(sen)
	stop_words = set(stopwords.words('english'))
	filtered_sentence = [w for w in words if not w in stop_words]
	fdist=FreqDist(filtered_sentence)
	pk=pd.read_csv('pokemon.csv')
	pk=pk[pk['id']<152]
	pkmname=list(pk['pokemon'])
	re={}
	for n in pkmname:
		if n in fdist.keys():
			re[n]=fdist[n]
	so=sorted(re.items(),key=lambda item:item[1],reverse = True)
	l,p=[],[]
	tar=so[0:2]
	for i in tar:
		l.append(i[1])
		p.append(i[0])
	plt.barh(list(range(len(tar))),width=l[::-1],align='center')
	plt.xlabel('count')
	plt.ylabel('name')
	plt.yticks(list(range(len(tar))),p[::-1])
	plt.show()
Ejemplo n.º 2
0
def _expand_requires_extra(re):
    for extra, reqs in sorted(re.items()):
        for req in reqs:
            if ';' in req:
                name, envmark = req.split(';', 1)
                yield '{} ; extra == "{}" and ({})'.format(
                    name, extra, envmark)
            else:
                yield '{} ; extra == "{}"'.format(req, extra)
Ejemplo n.º 3
0
 def get_url(self, path):
     conf = self.get_config()
     data = self.get_case(path)
     url_li = []
     if "base_url" in conf.keys():
         for i in data:
             re = i["request"]
             for k, v in re.items():
                 if k == "URL":
                     value = conf["base_url"] + v
                     url_li.append(value)
     else:
         for i in data:
             re = i["request"]
             for k, v in re.items():
                 if k == "URL":
                     url_li.append(v)
     return url_li
Ejemplo n.º 4
0
 def get_params(self, path):
     data = self.get_case(path)
     params_li = []
     for i in data:
         re = i["request"]
         for k, v in re.items():
             if k == "params":
                 params_li.append(v)
     return params_li
Ejemplo n.º 5
0
 def get_headers(self, path):
     data = self.get_case(path)
     headers_li = []
     for i in data:
         re = i["request"]
         for k, v in re.items():
             if k == "headers":
                 headers_li.append(v)
     return headers_li
Ejemplo n.º 6
0
 def get_method(self, path):
     data = self.get_case(path)
     method_li = []
     for i in data:
         re = i["request"]
         for k, v in re.items():
             if k == "method":
                 method_li.append(v)
     return method_li
Ejemplo n.º 7
0
# 3. 相似度分析
tfidf[doc_test_vec]  # 获取测试文档中,每个词的TF-IDF值
index = similarities.SparseMatrixSimilarity(tfidf[corpus], num_features=len(dictionary.keys()))  # 对每个目标文档,分析测试文档的相似度
sims = index[tfidf[doc_test_vec]]

# 整理输出

simss = []
for i in range(len(sims)):
    if sims[i] != 0:
        sims[i] += weighList[i]
    simss.append(sims[i])  # 将每个句子对应的相似度放在列表中

print("最终的结果是(文本和相似度对应):")
re = dict(zip(kownledge, simss))  # 将相似度和对应题目组合成为字典
d_order = sorted(re.items(), key=lambda x: x[1], reverse=True)
for i in range(10):
    print(d_order[i], end="")
    print(kownDict[(d_order[i][0]).encode("utf-8")])

baseKownledge = []
for i in range(3):
    baseKownledge.append(d_order[i][0])

print(baseKownledge)
'''
print("排序后的结果")
re2 = sorted(enumerate(sims), key=lambda item: -item[1])   # 根据相似度排序
for i in range(3):
    print(re2[i])
'''