예제 #1
0
파일: test.py 프로젝트: coder35/active_NER
wresult2 = []
for result,sentence in zip(wresult1,testlocs):
    wresult2.append([])
    print sentence
    for entity in result:
        print entity
        wresult2[-1].append((sentence[entity[0]], sentence[entity[1]], entity[2]))



wresult = wresult2

# char model

data = preprocess.getcData(datafile)
cdic, cvectors = preprocess.chars2dic2("char_vector_50",chardim)
padding_id = cdic["<padding>"] = len(cvectors)
pids = [padding_id]
cvectors.append(np.random.randn(chardim))
embedding = cvectors
indexdata = preprocess.raw2num1(data,cdic,tags,0,padding_id)

traindata = indexdata[0:len(indexdata)/20*16]
devdata = indexdata[len(indexdata)/20*16:len(indexdata)/20*18]
testdata = indexdata[len(indexdata)/20*18:len(indexdata)]


testwdata = data[len(indexdata)/20*18:len(indexdata)]

cgold = []
for item in testdata:
예제 #2
0
파일: run.py 프로젝트: mswellhao/active_NER
    af = open(nerstr+"/data", 'wb')
    pickle.dump(indexdata,af)
    af.close()

    df = open(nerstr+"/worddic", 'wb')
    pickle.dump(wdic,df)
    df.close()

    tf = open(nerstr+"/tags",'wb')
    pickle.dump(tags,tf)
    tf.close()

elif mtype == "char":
    data = preprocess.getcData(datafile)
    tags = preprocess.tags2dic(map(lambda x:x[1], data))
    cdic, cvectors = preprocess.chars2dic2("char_vector_50",chardim)
    padding_id = cdic["<padding>"] = len(cvectors)
    pids = [padding_id]
    cvectors.append(np.random.randn(chardim))
    embedding = cvectors
    indexdata = preprocess.raw2num1(data,cdic,tags,0,padding_id)
    indim = chardim
    nerstr = sys.argv[1]
    if os.path.exists(nerstr):
        shutil.rmtree(nerstr)
    os.mkdir(nerstr)
    af = open(nerstr+"/data", 'wb')
    pickle.dump(indexdata,af)
    af.close()

    df = open(nerstr+"/chardic", 'wb')