Exemple #1
0
from TestDocument import TestDocument
from UnlabelledDoc import UnlabelledDoc
import pickle


if __name__ == '__main__':
    reload(sys)
    sys.setdefaultencoding('utf8')

    # Parse the configuration
    config = cp.RawConfigParser()
    config.read('config.py')
    trainfile = config.get('init', 'trainfile')
    content = open(trainfile).read()

    doc = TrainDocument(content)
    sent = doc.get_sentences()[0]
    print sent.pos_tags
    pickle.dump(doc,open('TrainDoc.pickle','w'))
    pickle.dump(doc.sentences,open('Sentences.pickle','w'))

    # # Open test documents as wel
    testfile = config.get('init', 'testfile')
    testcontent = open(testfile).read()
    testdoc = TestDocument(testcontent)
    pickle.dump(testdoc.sentences, open('SentencesTest.pickle','w'))

    # IF lu learning is enabled, then read the unlabelled corpus also

    lu = int(config.get('LU','lu'))
    print 'LU learning configuration : ' + str(lu)
__author__ = 'vignesh'

from TrainDocument import TrainDocument
f2 = '/Users/vignesh/Documents/Phd/Courses/fall15/CS521SNLP/ClassProject/corpus/kappa/sahisnukappafull.txt'
c2 = open(f2).read()

doc2 = TrainDocument(c2)
sent2 = doc2.get_sentences()
labels2 = []

for i in range(len(sent2)):

    l2 = sent2[i].labels
    labels2 += l2

print len(labels2)
# write them

outfile = open('labels2forkappa.txt','w')
for i in range(len(labels2)):
    outfile.write(labels2[i])
    outfile.write('\n')
outfile.close()