from pdb import set_trace from Header import Header header = Header() SUFF = ('eas', 'is', 'ais', 'as', 'eamar', 'amar', 'eabhair', 'abhair', 'ead ar', 'adar') books = False sentences = True print "Loading Corpora..." if books: print "\tloading munster" M = CorpusReader('munster') print "\tloading connacht" C = CorpusReader('connacht', M.countBooks()) C.truncateBooks(M.countBooks()) print "\tloading ulster" U = CorpusReader('ulster', M.countBooks()) U.truncateBooks(M.countBooks()) l = [U, M, C] #print "Done." if sentences: print "Creating Balanced Set of sentences" M = CorpusReader('munster') C = CorpusReader('connacht') U = CorpusReader('ulster') l = [U, M, C] MIN_LENG = min([x.countSentences() for x in l]) for x in l: x.truncateSentences(MIN_LENG)
from collections import Counter from pdb import set_trace from Header import Header header = Header() SUFF = ('eas', 'is', 'ais', 'as', 'eamar', 'amar', 'eabhair', 'abhair', 'ead ar', 'adar') books = False sentences = True print "Loading Corpora..." if books: print "\tloading munster" M = CorpusReader('munster') print "\tloading connacht" C = CorpusReader('connacht',M.countBooks()) C.truncateBooks(M.countBooks()) print "\tloading ulster" U = CorpusReader('ulster', M.countBooks()) U.truncateBooks(M.countBooks()) l = [U,M,C] #print "Done." if sentences: print "Creating Balanced Set of sentences" M = CorpusReader('munster') C = CorpusReader('connacht') U = CorpusReader('ulster') l = [U,M,C] MIN_LENG = min([x.countSentences() for x in l]) for x in l: x.truncateSentences(MIN_LENG)