#!/usr/bin/env python
import sys, traceback
from conceptnet4.models import Assertion, Batch, RawAssertion, Frame,\
  Frequency, Relation, SurfaceForm, Concept, Rating
import conceptnet.models as cn3
from corpus.models import Sentence, Language, Activity
from django.contrib.auth.models import User
from django.core.paginator import Paginator
from django.db import transaction
from corpus.parse.adverbs import map_adverb
from itertools import islice
import yaml

csamoa4_activity = Activity.objects.get(name='csamoa4 self-rating')
good_acts = [ 16, 20, 22, 24, 28, 31, 32 ]
en = Language.get('en')

def process_predicate(pred, batch):
    frametext = pred.frame.text
    matches = {1: pred.text1, 2: pred.text2}
    if pred.polarity < 0: matches['a'] = 'not'
    relation = pred.relation
    sentence = pred.sentence
    lang = pred.language

    surface_forms = [SurfaceForm.get(matches[i], lang, auto_create=True)
                     for i in (1, 2)]
    concepts = [s.concept for s in surface_forms]
    
    # FIXME: english only so far
    freq = map_adverb(matches.get('a', ''))
Example #2
0
import optparse
import os
import sys

from corpus.models import Language

optionParser = optparse.OptionParser(
    usage="%s <options> <document>" % os.environ["ESMT_PROG_NAME"], add_help_option=False
)
optionParser.add_option("-h", "--help", action="help", help=optparse.SUPPRESS_HELP)
optionParser.add_option("-i", "--id", dest="id", help="language id (2 characters)", metavar="ID")
optionParser.add_option("-l", "--language", dest="language", help="language name", metavar="LANG")
(options, args) = optionParser.parse_args()

if not options.id:
    optionParser.error("No language id given")
if not options.language:
    optionParser.error("No language name given")

log = sys.stdout

foundLanguage = Language.objects.filter(id=options.id)
if foundLanguage:
    sys.stderr.write(
        'Error: language "%s" already exists in the database (as %s)\n' % (options.id, foundLanguage[0].humanReadable)
    )
else:
    l = Language(id=options.id, humanReadable=options.language)
    l.save()
    log.write("Language %s => %s added to the database\n" % (l.id, l.humanReadable))
Example #3
0
#!/usr/bin/env python2

import optparse
import os
import sys

from corpus.models import Language

optionParser = optparse.OptionParser(usage="%s <options> <document>" % os.environ["ESMT_PROG_NAME"], add_help_option=False)
optionParser.add_option("-h", "--help", action="help", help=optparse.SUPPRESS_HELP)
optionParser.add_option("-i", "--id", dest="id", help="language id (2 characters)", metavar="ID")
optionParser.add_option("-l", "--language", dest="language", help="language name", metavar="LANG")
(options, args) = optionParser.parse_args()

if not options.id:
    optionParser.error("No language id given")
if not options.language:
    optionParser.error("No language name given")

log = sys.stdout

foundLanguage = Language.objects.filter(id=options.id)
if foundLanguage:
    sys.stderr.write("Error: language \"%s\" already exists in the database (as %s)\n" % (options.id, foundLanguage[0].humanReadable))
else:
    l = Language(id=options.id, humanReadable=options.language)
    l.save()
    log.write("Language %s => %s added to the database\n" % (l.id, l.humanReadable))
Example #4
0
    'generally': 6,
    'typically': 6,
    'likely': 6,
    'probably': 6,
    'often': 6,
    'oftentimes': 6,
    'frequently': 6,
    'usually': 8,
    'most': 8,
    'mostly': 8,
    'almost': 9,
    'always': 10,
    'every': 10,
    'all': 10,
}
en = Language.get('en')
dbfreqs = {
    -10: Frequency.objects.get(language=en, text=u"never"),
    -5: Frequency.objects.get(language=en, text=u"not"),
    -2: Frequency.objects.get(language=en, text=u"rarely"),
    2: Frequency.objects.get(language=en, text=u"occasionally"),
    4: Frequency.objects.get(language=en, text=u"sometimes"),
    5: Frequency.objects.get(language=en, text=u""),
    6: Frequency.objects.get(language=en, text=u"generally"),
    8: Frequency.objects.get(language=en, text=u"usually"),
    9: Frequency.objects.get(language=en, text=u"almost always"),
    10: Frequency.objects.get(language=en, text=u"always"),
}


def map_adverb(adv):
Example #5
0
                raise e

    # Process entries
    page_range = [p for p in paginator.page_range if p >= start_page]
    for i in page_range:
        entries = paginator.page(i).object_list
        
        # Update progress
        batch.status = "process_entry_batch " + str(i) + "/" + str(paginator.num_pages)
        batch.progress_num = i
        batch.progress_den = paginator.num_pages
        batch.save()

        try: do_batch(entries)
        
        except ZeroDivisionError, e:
            batch.status = "process_entry_batch " + str(i) + "/" + str(paginator.num_pages) + " ERROR!"
            batch.remarks = str(e.entry) + "\n" + str(e) + "\n" + e.tb
            print "***TRACEBACK***"
            print batch.remarks
            batch.save()
            raise e

import migrate_templated
if __name__ == '__main__':
    user = User.objects.get(username='******')
    lang = Language.get('en')
    run(user, lang, start_page=214)
    migrate_templated.run(user, start_page=1)