print 'Running Vietnamese Semantic Role Labelling Toolkit' print 'Loading Parameters' model, enc, listLE, leLabel = lib.readingParameterFromFile( modelFile, encFile, leFeatureFile, leLabelFile) wordEmbeddingGlove, wordEmbeddingSkipGram = lib.importWordEmbedding() if embedding == 'skipgram': wordEmbedding = wordEmbeddingSkipGram elif embedding == 'glove': wordEmbedding = wordEmbeddingGlove print 'Reading Data' corpus = lib.readTestData(corpusFile) listTag, listWord = lib.convertData(corpus) listTree = lib.dataToTree(listTag, listWord) listOfListPredicate = lib.getPredicate(listTree) listChunkVer = lib.getListChunkVer(listOfListPredicate) corpus, listTree, listChunkVer, listOfListPredicate = lib.removeSenNoPredicate( corpus, listTree, listChunkVer, listOfListPredicate) print 'Extracting Arguments' listTree, listPredicate = lib.chunkingTest(listTree, listOfListPredicate) listWord = lib.getWord(listTree) print 'Creating Features'
listClusterFile = 'labelList.txt' listFeatureName = ['voice', 'position', 'phrase type', 'function tag', 'path tree', 'head word', 'predicate', 'distance'] listFeatureIdenName = ['phrase type', 'position', 'voice', 'path tree', 'predicate', 'head word'] listFeatureClassName = ['phrase type', 'position', 'voice', 'path tree', 'predicate', 'head word'] listLabelOriginal = [u'ArgM-CAU', u'Arg4', u'ArgM-GOL', u'ArgM-EXT', u'ArgM-ADV', u'ArgM-NEG', u'ArgM-LVB', u'ArgM-MNR', u'ArgM-ADJ', 'None', u'ArgM-DSP', u'ArgM-COM', u'ArgM-RES', u'ArgM-MOD', u'ArgM-I', u'ArgM-REC', u'ArgM-DIS', u'ArgM-DIR', u'ArgM-Partice', u'ArgM-PRD', u'Arg1', u'Arg2', u'Arg3', u'ArgM-LOC', u'ArgM-TMP', u'Arg0', u'ArgM-PRP'] listLabelReduce = [u'Arg0', u'Arg1', u'ArgM-ADV', u'ArgM-DIR', u'ArgM-DIS', u'ArgM-EXT', u'ArgM-LOC', u'ArgM-MNR' , u'ArgM-MOD', u'ArgM-NEG', u'ArgM-PRP', u'ArgM-TMP'] #listLabelReduce = [u'Arg0', u'Arg1', u'ArgM-TMP'] foldNumber = 10 numberElements = 200 startTime = datetime.now() print 'Running Program' print 'Reading Data' listSentence, listID, listCDATA = lib.readData(dataFile) listTag, listWord = lib.convertData(listSentence) listTagClone, listWordClone = lib.convertData(listSentence) listTree = lib.dataToTree(listTagClone, listWordClone) listWordName, listCluster = lib.readWordCluster(listWordNameFile, listClusterFile) listRel, listArg = lib.readCDATA(listCDATA, listWord, listID) listID1Rel, listTree1Rel, listRel1Rel, listArg1Rel = lib.collectTree1Rel(listID, listTree, listRel, listArg) listIDExtractFromMutliRel, listTreeExtractFromMutliRel, listRelExtractFromMutliRel, listArgExtractFromMutliRel = lib.extractFromMultiRel(listID, listTree, listRel, listArg) listIDTotal, listTreeTotal, listRelTotal, listArgTotal = lib.mergeData(listID1Rel, listTree1Rel, listRel1Rel, listArg1Rel, listIDExtractFromMutliRel, listTreeExtractFromMutliRel, listRelExtractFromMutliRel, listArgExtractFromMutliRel) # listIDAfterChunking, listTreeAfterChunking, listRelAfterChunking, listArgAfterChunking = lib.chunking(listID1Rel, listTree1Rel, listRel1Rel, listArg1Rel) #listIDAfterChunking, listTreeAfterChunking, listRelAfterChunking, listArgAfterChunking = lib.chunking(listIDTotal, listTreeTotal, listRelTotal, listArgTotal) listIDTotal, listTreeTotal, listRelTotal, listArgTotal = lib.filterData(listIDTotal, listTreeTotal, listRelTotal, listArgTotal) # listIDAfterChunking, listTreeAfterChunking, listRelAfterChunking, listArgAfterChunking = lib.chunking(listIDTotal, listTreeTotal, listRelTotal, listArgTotal) listIDTotal, listTreeTotal, listRelTotal, listArgTotal = lib.filterData(listIDTotal, listTreeTotal, listRelTotal, listArgTotal)
print 'Running Vietnamese Semantic Role Labelling Toolkit' print 'Loading Parameters' model, enc, listLE, leLabel = lib.readingParameterFromFile(modelFile, encFile, leFeatureFile, leLabelFile) wordEmbeddingGlove, wordEmbeddingSkipGram = lib.importWordEmbedding() if embedding == 'skipgram': wordEmbedding = wordEmbeddingSkipGram elif embedding == 'glove': wordEmbedding = wordEmbeddingGlove print 'Reading Data' corpus = lib.readTestData(corpusFile) listTag, listWord = lib.convertData(corpus) listTree = lib.dataToTree(listTag, listWord) listOfListPredicate = lib.getPredicate(listTree) listChunkVer = lib.getListChunkVer(listOfListPredicate) corpus, listTree, listChunkVer, listOfListPredicate = lib.removeSenNoPredicate(corpus, listTree, listChunkVer, listOfListPredicate) print 'Extracting Arguments' listTree, listPredicate = lib.chunkingTest(listTree, listOfListPredicate) listWord = lib.getWord(listTree) print 'Creating Features'