import sys
import codecs

import onto_utils
import language

onto_utils.read_lemma(codecs.open(sys.argv[1], encoding="UTF-8"))
for k, v in sorted(language.lemma.iteritems()):
    print("%s\t%s" % (k, v)).encode("utf8")
from onto_utils import read_heads, read_blacklist, read_types, read_lemma

# First
#read_blacklist(open('../data/blacklist.txt'))

# LEARNING
#read_terms(open('../data/mbto.obo'))
#read_heads(open('../data/mbto.heads'), action='learn')
#test.cleaning_helper()
print '#' * 100
#test.cleaning_helper()

# LEARNING
print 'LEARNING'
print "Reading lemma"
read_lemma(open('../data/expe1_20120910/lemma'))
print "Reading types"
read_types(open('../data/expe1_20120910/types'))
print "Reading heads"
read_heads(open('../data/expe1_20120910/heads_tolearn'), action='learn')
print "Saving"
test.save(prefix='../dumps/expe1_20120910/expe1_20120910_after_learning')

# TAGGING
print "\nTAGGING"
print "Tagging heads"
read_heads(open('../data/expe1_20120910/heads_totag'), action='tag')
print "Saving"
test.save(prefix='../dumps/expe1_20120910/expe1_20120910_after_tagging')

#trouves = 0
print '-' * 80
print 'Reading folder: %s' % BASE_DATA
print 'Writing folder: %s' % BASE_DUMPS
print 'Expects a blacklist: %s' % BLACKLIST_OK
print 'Expects an ontology: %s' % ONTO_OK
print 'Expects a flat resource: %s' % FLAT_OK
print '-' * 80

#############################################################################
# FIRST
print 'FIRST'
print '-' * 80
#############################################################################

print "Reading lemma"
read_lemma(codecs.open(BASE_DATA + u'lemma', encoding='UTF-8'))
print '%d lemma read.\n' % len(language.lemma)

if BLACKLIST_OK:
    print 'Reading blacklist'
    read_blacklist(codecs.open(BASE_DATA + u'blacklist', encoding='UTF-8'))
    print 'Blacklist content: %s\n' % ', '.join(test.blacklist)

print '-' * 80

if ONTO_OK:
    #############################################################################
    # LEARNING ONTO
    print 'LEARNING ONTO'
    print '-' * 80
    #############################################################################
Beispiel #4
0
from onto_utils import read_heads, read_blacklist, read_types, read_lemma

# First
read_blacklist(open('/bibdev/travail/typage/typage_biotope_task3.4/data/expe_20120912/blacklist.txt'))

# LEARNING
read_terms(open('/bibdev/travail/typage/typage_biotope_task3.4/data/expe_20120912/bacteria_habitat_OntoBiotope-34'))
read_heads(open('/bibdev/travail/typage/typage_biotope_task3.4/data/expe_20120912/heads_tolearn_onto'), action='learn')
#test.cleaning_helper()
print '#' * 100
#test.cleaning_helper()

# LEARNING
print 'LEARNING'
print "Reading lemma"
read_lemma(open('/bibdev/travail/typage/typage_biotope_task3.4/data/expe_20120912/lemma'))
print "Reading types"
read_types(open('/bibdev/travail/typage/typage_biotope_task3.4/data/expe_20120912/types'))
print "Reading heads"
read_heads(open('/bibdev/travail/typage/typage_biotope_task3.4/data/expe_20120912/heads_tolearn_dico'), action='learn')
print "Saving"
test.save(prefix='/bibdev/travail/typage/typage_biotope_task3.4/dumps/expe1_20120912/expe1_20120912_after_learning')

# TAGGING
print "\nTAGGING"
print "Tagging heads"
read_heads(open('/bibdev/travail/typage/typage_biotope_task3.4/data/expe_20120912/heads_totag'), action='tag')
print "Saving"
test.save(prefix='/bibdev/travail/typage/typage_biotope_task3.4/dumps/expe1_20120912/expe1_20120912_after_tagging')

#trouves = 0
from onto_utils import read_heads, read_blacklist, read_types, read_lemma

# First
# read_blacklist(open('../data/blacklist.txt'))

# LEARNING
# read_terms(open('../data/mbto.obo'))
# read_heads(open('../data/mbto.heads'), action='learn')
# test.cleaning_helper()
print "#" * 100
# test.cleaning_helper()

# LEARNING
print "LEARNING"
print "Reading lemma"
read_lemma(open("../data/expe1_20120908/lemma"))
print "Reading types"
read_types(open("../data/expe1_20120908/types"))
print "Reading heads"
read_heads(open("../data/expe1_20120908/heads_tolearn"), action="learn")
print "Saving"
test.save(prefix="../dumps/expe1/expe1_20120908_after_learning")

# TAGGING
print "\nTAGGING"
print "Tagging heads"
read_heads(open("../data/expe1_20120908/heads_totag"), action="tag")
print "Saving"
test.save(prefix="../dumps/expe1/expe1_20120908_after_tagging")

# trouves = 0