Example #1
0
def ensure_data(data_name, data_url):
    root_path = test_data_path()
    dest_path = os.path.join(root_path, data_name)
    if os.path.exists(dest_path):
        return dest_path
    if data_url.endswith('.zip'):
        dest_path += '.zip'
    download(data_url, dest_path)
    if data_url.endswith('.zip'):
        with zipfile.ZipFile(dest_path, "r") as archive:
            archive.extractall(root_path)
        remove_file(dest_path)
        dest_path = dest_path[:-len('.zip')]
    return dest_path
Example #2
0
def ensure_data(data_name, data_url):  #保证数据,存在本地文件则返回,否则进行下载解压缩
    root_path = test_data_path()
    dest_path = os.path.join(root_path, data_name)
    print("111dest_path:" + dest_path)
    if os.path.exists(dest_path):
        return dest_path
    if data_url.endswith('.zip'):
        dest_path += '.zip'
    download(data_url, dest_path)
    if data_url.endswith('.zip'):
        with zipfile.ZipFile(dest_path, "r") as archive:
            archive.extractall(root_path)
        remove_file(dest_path)
        dest_path = dest_path[:-len('.zip')]
        print("dest_path:" + dest_path)
    return dest_path
Example #3
0
def install_jar(name, filepath, url):
    dst = os.path.join(filepath, name)
    if os.path.isfile(dst):
        return dst
    download(url, dst)
    return dst
Example #4
0
 def install_jar(name, url):
     dst = os.path.join(STATIC_ROOT, name)
     if os.path.isfile(dst):
         return dst
     download(url, dst)
     return dst
Example #5
0
# -*- coding:utf-8 -*-
# Author:hankcs
# Date: 2018-04-28 10:07
import zipfile

from pyhanlp import *
from pyhanlp.static import download

if not os.path.isdir('tests/data/'):
    print('请在项目根目录下运行本脚本')
    exit(1)

if not os.path.isdir('tests/data/hanlp-wiki-vec-zh'):
    model_path = 'tests/data/hanlp-wiki-vec-zh.zip'
    download('http://hanlp.linrunsoft.com/release/model/hanlp-wiki-vec-zh.zip',
             model_path)
    with zipfile.ZipFile(model_path, "r") as archive:
        archive.extractall('tests/data/')

WordVectorModel = JClass('com.hankcs.hanlp.mining.word2vec.WordVectorModel')
DocVectorModel = JClass('com.hankcs.hanlp.mining.word2vec.DocVectorModel')
word2vec = WordVectorModel(
    'tests/data/hanlp-wiki-vec-zh/hanlp-wiki-vec-zh.txt')
doc2vec = DocVectorModel(word2vec)
docs = ["山东苹果丰收", "农民在江苏种水稻", "奥运会女排夺冠", "世界锦标赛胜出", "中国足球失败"]
for idx, doc in enumerate(docs):
    doc2vec.addDocument(idx, doc)

print(word2vec.nearest('语言'))

for res in doc2vec.nearest('我要看比赛'):