def process_sentence(sentence):
    print sentence.text.encode('utf-8')
    _, frametext, reltext, matches = pattern_parse(sentence.text)
    if reltext is None or reltext == 'junk': return []
    else:
        return [dict(id=sentence.id, frametext=frametext, reltext=reltext,
        matches=matches)]
Example #2
0
def process_sentence(sentence):
    print sentence.text.encode('utf-8')
    _, frametext, reltext, matches = pattern_parse(sentence.text)
    if reltext is None or reltext == 'junk': return []
    else:
        return [
            dict(id=sentence.id,
                 frametext=frametext,
                 reltext=reltext,
                 matches=matches)
        ]
Example #3
0
def process_sentence(sentence, lang, batch):
    print sentence.text.encode('utf-8')
    _, frametext, reltext, matches = pattern_parse(sentence.text)
    
    if reltext is None or reltext == 'junk': return []
    relation = Relation.objects.get(name=reltext)
    text_factors = [lang.nl.lemma_factor(matches[i]) for i in (1, 2)]
    concepts = [Concept.objects.get_or_create(language=lang, text=stem)[0]
                for stem, residue in text_factors]
    for c in concepts: c.save()
    
    surface_forms = [SurfaceForm.objects.get_or_create(concept=concepts[i],
                                                  text=matches[i+1],
                                                  residue=text_factors[i][1],
                                                  language=lang)[0]
                     for i in (0, 1)]
    for s in surface_forms: s.save()
    
    freq, _ = Frequency.objects.get_or_create(text=matches.get('a', ''),
                                              language=lang,
                                              defaults=dict(value=50))
    freq.save()
    
    frame, _ = Frame.objects.get_or_create(relation=relation, language=lang,
                                           text=frametext, frequency=freq,
                                           defaults=dict(goodness=1))
    frame.save()
    
    raw_assertion, _ = RawAssertion.objects.get_or_create(
        surface1=surface_forms[0],
        surface2=surface_forms[1],
        frame=frame,
        language=lang,
        defaults=dict(batch=batch))
    # still need to set assertion_id
    
    assertion, _ = Assertion.objects.get_or_create(
        relation=relation,
        concept1=concepts[0],
        concept2=concepts[1],
        frequency=freq,
        language=lang,
        defaults=dict(score=0)
    )
    assertion.score += 1
    assertion.save()
    raw_assertion.assertion = assertion
    raw_assertion.save()
    
    rating1, _ = Rating.objects.get_or_create(
        user=sentence.creator, activity=csamoa4_activity,
        sentence=sentence, score=1
    )
    rating2, _ = Rating.objects.get_or_create(
        user=sentence.creator, activity=csamoa4_activity,
        raw_assertion=raw_assertion, score=1
    )
    rating1.save()
    rating2.save()

    print '=>', str(assertion).encode('utf-8')
    return [assertion]
Example #4
0
def process_sentence(sentence, lang, batch):
    print sentence.text.encode('utf-8')
    _, frametext, reltext, matches = pattern_parse(sentence.text)

    if reltext is None or reltext == 'junk': return []
    relation = Relation.objects.get(name=reltext)
    text_factors = [lang.nl.lemma_factor(matches[i]) for i in (1, 2)]
    concepts = [
        Concept.objects.get_or_create(language=lang, text=stem)[0]
        for stem, residue in text_factors
    ]
    for c in concepts:
        c.save()

    surface_forms = [
        SurfaceForm.objects.get_or_create(concept=concepts[i],
                                          text=matches[i + 1],
                                          residue=text_factors[i][1],
                                          language=lang)[0] for i in (0, 1)
    ]
    for s in surface_forms:
        s.save()

    freq, _ = Frequency.objects.get_or_create(text=matches.get('a', ''),
                                              language=lang,
                                              defaults=dict(value=50))
    freq.save()

    frame, _ = Frame.objects.get_or_create(relation=relation,
                                           language=lang,
                                           text=frametext,
                                           frequency=freq,
                                           defaults=dict(goodness=1))
    frame.save()

    raw_assertion, _ = RawAssertion.objects.get_or_create(
        surface1=surface_forms[0],
        surface2=surface_forms[1],
        frame=frame,
        language=lang,
        defaults=dict(batch=batch))
    # still need to set assertion_id

    assertion, _ = Assertion.objects.get_or_create(relation=relation,
                                                   concept1=concepts[0],
                                                   concept2=concepts[1],
                                                   frequency=freq,
                                                   language=lang,
                                                   defaults=dict(score=0))
    assertion.score += 1
    assertion.save()
    raw_assertion.assertion = assertion
    raw_assertion.save()

    rating1, _ = Rating.objects.get_or_create(user=sentence.creator,
                                              activity=csamoa4_activity,
                                              sentence=sentence,
                                              score=1)
    rating2, _ = Rating.objects.get_or_create(user=sentence.creator,
                                              activity=csamoa4_activity,
                                              raw_assertion=raw_assertion,
                                              score=1)
    rating1.save()
    rating2.save()

    print '=>', str(assertion).encode('utf-8')
    return [assertion]