예제 #1
0
# 把词语表通过OneHot转换为特征向量
# 输入:一维词表,定义好了的向量词典


def ConvertWordsToTensor(words, dic):
    l = len(dic)
    z = np.zeros([l, 1], dtype=np.float)
    for w in words:
        if w in dic:
            z[dic[w]] = 1
        else:
            z[dic['UNKNOWN']] = 1
    return z


fr = FR.OneHotBuilder(R'data/1998-01-2003版-带音.txt', "19980101", "19980120")

# 把y[i]建成一个numpy向量列表?

X = []
for w in fr.linkedWord:
    z = ConvertWordsToTensor(w, fr.oneHotDic)
    X.append(torch.from_numpy(z))

Y = []
for w in fr.linkedWord:
    if w in fr.entityWord:
        # Y.append(1)
        Y.append(torch.tensor([1.]))
    else:
        # Y.append(0)