import sys import codecs import onto_utils import language onto_utils.read_lemma(codecs.open(sys.argv[1], encoding="UTF-8")) for k, v in sorted(language.lemma.iteritems()): print("%s\t%s" % (k, v)).encode("utf8")
from onto_utils import read_heads, read_blacklist, read_types, read_lemma # First #read_blacklist(open('../data/blacklist.txt')) # LEARNING #read_terms(open('../data/mbto.obo')) #read_heads(open('../data/mbto.heads'), action='learn') #test.cleaning_helper() print '#' * 100 #test.cleaning_helper() # LEARNING print 'LEARNING' print "Reading lemma" read_lemma(open('../data/expe1_20120910/lemma')) print "Reading types" read_types(open('../data/expe1_20120910/types')) print "Reading heads" read_heads(open('../data/expe1_20120910/heads_tolearn'), action='learn') print "Saving" test.save(prefix='../dumps/expe1_20120910/expe1_20120910_after_learning') # TAGGING print "\nTAGGING" print "Tagging heads" read_heads(open('../data/expe1_20120910/heads_totag'), action='tag') print "Saving" test.save(prefix='../dumps/expe1_20120910/expe1_20120910_after_tagging') #trouves = 0
print '-' * 80 print 'Reading folder: %s' % BASE_DATA print 'Writing folder: %s' % BASE_DUMPS print 'Expects a blacklist: %s' % BLACKLIST_OK print 'Expects an ontology: %s' % ONTO_OK print 'Expects a flat resource: %s' % FLAT_OK print '-' * 80 ############################################################################# # FIRST print 'FIRST' print '-' * 80 ############################################################################# print "Reading lemma" read_lemma(codecs.open(BASE_DATA + u'lemma', encoding='UTF-8')) print '%d lemma read.\n' % len(language.lemma) if BLACKLIST_OK: print 'Reading blacklist' read_blacklist(codecs.open(BASE_DATA + u'blacklist', encoding='UTF-8')) print 'Blacklist content: %s\n' % ', '.join(test.blacklist) print '-' * 80 if ONTO_OK: ############################################################################# # LEARNING ONTO print 'LEARNING ONTO' print '-' * 80 #############################################################################
from onto_utils import read_heads, read_blacklist, read_types, read_lemma # First read_blacklist(open('/bibdev/travail/typage/typage_biotope_task3.4/data/expe_20120912/blacklist.txt')) # LEARNING read_terms(open('/bibdev/travail/typage/typage_biotope_task3.4/data/expe_20120912/bacteria_habitat_OntoBiotope-34')) read_heads(open('/bibdev/travail/typage/typage_biotope_task3.4/data/expe_20120912/heads_tolearn_onto'), action='learn') #test.cleaning_helper() print '#' * 100 #test.cleaning_helper() # LEARNING print 'LEARNING' print "Reading lemma" read_lemma(open('/bibdev/travail/typage/typage_biotope_task3.4/data/expe_20120912/lemma')) print "Reading types" read_types(open('/bibdev/travail/typage/typage_biotope_task3.4/data/expe_20120912/types')) print "Reading heads" read_heads(open('/bibdev/travail/typage/typage_biotope_task3.4/data/expe_20120912/heads_tolearn_dico'), action='learn') print "Saving" test.save(prefix='/bibdev/travail/typage/typage_biotope_task3.4/dumps/expe1_20120912/expe1_20120912_after_learning') # TAGGING print "\nTAGGING" print "Tagging heads" read_heads(open('/bibdev/travail/typage/typage_biotope_task3.4/data/expe_20120912/heads_totag'), action='tag') print "Saving" test.save(prefix='/bibdev/travail/typage/typage_biotope_task3.4/dumps/expe1_20120912/expe1_20120912_after_tagging') #trouves = 0
from onto_utils import read_heads, read_blacklist, read_types, read_lemma # First # read_blacklist(open('../data/blacklist.txt')) # LEARNING # read_terms(open('../data/mbto.obo')) # read_heads(open('../data/mbto.heads'), action='learn') # test.cleaning_helper() print "#" * 100 # test.cleaning_helper() # LEARNING print "LEARNING" print "Reading lemma" read_lemma(open("../data/expe1_20120908/lemma")) print "Reading types" read_types(open("../data/expe1_20120908/types")) print "Reading heads" read_heads(open("../data/expe1_20120908/heads_tolearn"), action="learn") print "Saving" test.save(prefix="../dumps/expe1/expe1_20120908_after_learning") # TAGGING print "\nTAGGING" print "Tagging heads" read_heads(open("../data/expe1_20120908/heads_totag"), action="tag") print "Saving" test.save(prefix="../dumps/expe1/expe1_20120908_after_tagging") # trouves = 0