#!/usr/bin/env python import sys, traceback from conceptnet4.models import Assertion, Batch, RawAssertion, Frame,\ Frequency, Relation, SurfaceForm, Concept, Rating import conceptnet.models as cn3 from corpus.models import Sentence, Language, Activity from django.contrib.auth.models import User from django.core.paginator import Paginator from django.db import transaction from corpus.parse.adverbs import map_adverb from itertools import islice import yaml csamoa4_activity = Activity.objects.get(name='csamoa4 self-rating') good_acts = [ 16, 20, 22, 24, 28, 31, 32 ] en = Language.get('en') def process_predicate(pred, batch): frametext = pred.frame.text matches = {1: pred.text1, 2: pred.text2} if pred.polarity < 0: matches['a'] = 'not' relation = pred.relation sentence = pred.sentence lang = pred.language surface_forms = [SurfaceForm.get(matches[i], lang, auto_create=True) for i in (1, 2)] concepts = [s.concept for s in surface_forms] # FIXME: english only so far freq = map_adverb(matches.get('a', ''))
import optparse import os import sys from corpus.models import Language optionParser = optparse.OptionParser( usage="%s <options> <document>" % os.environ["ESMT_PROG_NAME"], add_help_option=False ) optionParser.add_option("-h", "--help", action="help", help=optparse.SUPPRESS_HELP) optionParser.add_option("-i", "--id", dest="id", help="language id (2 characters)", metavar="ID") optionParser.add_option("-l", "--language", dest="language", help="language name", metavar="LANG") (options, args) = optionParser.parse_args() if not options.id: optionParser.error("No language id given") if not options.language: optionParser.error("No language name given") log = sys.stdout foundLanguage = Language.objects.filter(id=options.id) if foundLanguage: sys.stderr.write( 'Error: language "%s" already exists in the database (as %s)\n' % (options.id, foundLanguage[0].humanReadable) ) else: l = Language(id=options.id, humanReadable=options.language) l.save() log.write("Language %s => %s added to the database\n" % (l.id, l.humanReadable))
#!/usr/bin/env python2 import optparse import os import sys from corpus.models import Language optionParser = optparse.OptionParser(usage="%s <options> <document>" % os.environ["ESMT_PROG_NAME"], add_help_option=False) optionParser.add_option("-h", "--help", action="help", help=optparse.SUPPRESS_HELP) optionParser.add_option("-i", "--id", dest="id", help="language id (2 characters)", metavar="ID") optionParser.add_option("-l", "--language", dest="language", help="language name", metavar="LANG") (options, args) = optionParser.parse_args() if not options.id: optionParser.error("No language id given") if not options.language: optionParser.error("No language name given") log = sys.stdout foundLanguage = Language.objects.filter(id=options.id) if foundLanguage: sys.stderr.write("Error: language \"%s\" already exists in the database (as %s)\n" % (options.id, foundLanguage[0].humanReadable)) else: l = Language(id=options.id, humanReadable=options.language) l.save() log.write("Language %s => %s added to the database\n" % (l.id, l.humanReadable))
'generally': 6, 'typically': 6, 'likely': 6, 'probably': 6, 'often': 6, 'oftentimes': 6, 'frequently': 6, 'usually': 8, 'most': 8, 'mostly': 8, 'almost': 9, 'always': 10, 'every': 10, 'all': 10, } en = Language.get('en') dbfreqs = { -10: Frequency.objects.get(language=en, text=u"never"), -5: Frequency.objects.get(language=en, text=u"not"), -2: Frequency.objects.get(language=en, text=u"rarely"), 2: Frequency.objects.get(language=en, text=u"occasionally"), 4: Frequency.objects.get(language=en, text=u"sometimes"), 5: Frequency.objects.get(language=en, text=u""), 6: Frequency.objects.get(language=en, text=u"generally"), 8: Frequency.objects.get(language=en, text=u"usually"), 9: Frequency.objects.get(language=en, text=u"almost always"), 10: Frequency.objects.get(language=en, text=u"always"), } def map_adverb(adv):
raise e # Process entries page_range = [p for p in paginator.page_range if p >= start_page] for i in page_range: entries = paginator.page(i).object_list # Update progress batch.status = "process_entry_batch " + str(i) + "/" + str(paginator.num_pages) batch.progress_num = i batch.progress_den = paginator.num_pages batch.save() try: do_batch(entries) except ZeroDivisionError, e: batch.status = "process_entry_batch " + str(i) + "/" + str(paginator.num_pages) + " ERROR!" batch.remarks = str(e.entry) + "\n" + str(e) + "\n" + e.tb print "***TRACEBACK***" print batch.remarks batch.save() raise e import migrate_templated if __name__ == '__main__': user = User.objects.get(username='******') lang = Language.get('en') run(user, lang, start_page=214) migrate_templated.run(user, start_page=1)