예제 #1
0
print 'Running Vietnamese Semantic Role Labelling Toolkit'

print 'Loading Parameters'

model, enc, listLE, leLabel = lib.readingParameterFromFile(
    modelFile, encFile, leFeatureFile, leLabelFile)
wordEmbeddingGlove, wordEmbeddingSkipGram = lib.importWordEmbedding()
if embedding == 'skipgram':
    wordEmbedding = wordEmbeddingSkipGram
elif embedding == 'glove':
    wordEmbedding = wordEmbeddingGlove

print 'Reading Data'

corpus = lib.readTestData(corpusFile)
listTag, listWord = lib.convertData(corpus)
listTree = lib.dataToTree(listTag, listWord)
listOfListPredicate = lib.getPredicate(listTree)

listChunkVer = lib.getListChunkVer(listOfListPredicate)

corpus, listTree, listChunkVer, listOfListPredicate = lib.removeSenNoPredicate(
    corpus, listTree, listChunkVer, listOfListPredicate)

print 'Extracting Arguments'

listTree, listPredicate = lib.chunkingTest(listTree, listOfListPredicate)

listWord = lib.getWord(listTree)

print 'Creating Features'
예제 #2
0
파일: main.py 프로젝트: khoaipx/SRL
listClusterFile = 'labelList.txt'
listFeatureName = ['voice', 'position', 'phrase type', 'function tag', 'path tree', 'head word', 'predicate', 'distance']
listFeatureIdenName = ['phrase type', 'position', 'voice', 'path tree', 'predicate', 'head word']
listFeatureClassName = ['phrase type', 'position', 'voice', 'path tree', 'predicate', 'head word']
listLabelOriginal = [u'ArgM-CAU', u'Arg4', u'ArgM-GOL', u'ArgM-EXT', u'ArgM-ADV', u'ArgM-NEG', u'ArgM-LVB', u'ArgM-MNR', u'ArgM-ADJ', 'None', u'ArgM-DSP', u'ArgM-COM', u'ArgM-RES', u'ArgM-MOD', u'ArgM-I', u'ArgM-REC', u'ArgM-DIS', u'ArgM-DIR', u'ArgM-Partice', u'ArgM-PRD', u'Arg1', u'Arg2', u'Arg3', u'ArgM-LOC', u'ArgM-TMP', u'Arg0', u'ArgM-PRP']
listLabelReduce = [u'Arg0', u'Arg1', u'ArgM-ADV', u'ArgM-DIR', u'ArgM-DIS', u'ArgM-EXT', u'ArgM-LOC', u'ArgM-MNR' , u'ArgM-MOD', u'ArgM-NEG', u'ArgM-PRP', u'ArgM-TMP']
#listLabelReduce = [u'Arg0', u'Arg1', u'ArgM-TMP']
foldNumber = 10
numberElements = 200

startTime = datetime.now()

print 'Running Program'
print 'Reading Data'
listSentence, listID, listCDATA = lib.readData(dataFile)
listTag, listWord = lib.convertData(listSentence)
listTagClone, listWordClone = lib.convertData(listSentence)
listTree = lib.dataToTree(listTagClone, listWordClone)
listWordName, listCluster = lib.readWordCluster(listWordNameFile, listClusterFile)

listRel, listArg = lib.readCDATA(listCDATA, listWord, listID)

listID1Rel, listTree1Rel, listRel1Rel, listArg1Rel = lib.collectTree1Rel(listID, listTree, listRel, listArg)
listIDExtractFromMutliRel, listTreeExtractFromMutliRel, listRelExtractFromMutliRel, listArgExtractFromMutliRel = lib.extractFromMultiRel(listID, listTree, listRel, listArg)
listIDTotal, listTreeTotal, listRelTotal, listArgTotal = lib.mergeData(listID1Rel, listTree1Rel, listRel1Rel, listArg1Rel, listIDExtractFromMutliRel, listTreeExtractFromMutliRel, listRelExtractFromMutliRel, listArgExtractFromMutliRel)
# listIDAfterChunking, listTreeAfterChunking, listRelAfterChunking, listArgAfterChunking = lib.chunking(listID1Rel, listTree1Rel, listRel1Rel, listArg1Rel)
#listIDAfterChunking, listTreeAfterChunking, listRelAfterChunking, listArgAfterChunking = lib.chunking(listIDTotal, listTreeTotal, listRelTotal, listArgTotal)
listIDTotal, listTreeTotal, listRelTotal, listArgTotal = lib.filterData(listIDTotal, listTreeTotal, listRelTotal, listArgTotal)
# listIDAfterChunking, listTreeAfterChunking, listRelAfterChunking, listArgAfterChunking = lib.chunking(listIDTotal, listTreeTotal, listRelTotal, listArgTotal)

listIDTotal, listTreeTotal, listRelTotal, listArgTotal = lib.filterData(listIDTotal, listTreeTotal, listRelTotal, listArgTotal)
예제 #3
0
파일: vnSRL.py 프로젝트: pth1993/SRL
print 'Running Vietnamese Semantic Role Labelling Toolkit'

print 'Loading Parameters'

model, enc, listLE, leLabel = lib.readingParameterFromFile(modelFile, encFile, leFeatureFile,
                                                           leLabelFile)
wordEmbeddingGlove, wordEmbeddingSkipGram = lib.importWordEmbedding()
if embedding == 'skipgram':
    wordEmbedding = wordEmbeddingSkipGram
elif embedding == 'glove':
    wordEmbedding = wordEmbeddingGlove

print 'Reading Data'

corpus = lib.readTestData(corpusFile)
listTag, listWord = lib.convertData(corpus)
listTree = lib.dataToTree(listTag, listWord)
listOfListPredicate = lib.getPredicate(listTree)

listChunkVer = lib.getListChunkVer(listOfListPredicate)

corpus, listTree, listChunkVer, listOfListPredicate = lib.removeSenNoPredicate(corpus, listTree,
                                                                               listChunkVer, listOfListPredicate)

print 'Extracting Arguments'

listTree, listPredicate = lib.chunkingTest(listTree, listOfListPredicate)

listWord = lib.getWord(listTree)

print 'Creating Features'