def process_sentence(sentence): print sentence.text.encode('utf-8') _, frametext, reltext, matches = pattern_parse(sentence.text) if reltext is None or reltext == 'junk': return [] else: return [dict(id=sentence.id, frametext=frametext, reltext=reltext, matches=matches)]
def process_sentence(sentence): print sentence.text.encode('utf-8') _, frametext, reltext, matches = pattern_parse(sentence.text) if reltext is None or reltext == 'junk': return [] else: return [ dict(id=sentence.id, frametext=frametext, reltext=reltext, matches=matches) ]
def process_sentence(sentence, lang, batch): print sentence.text.encode('utf-8') _, frametext, reltext, matches = pattern_parse(sentence.text) if reltext is None or reltext == 'junk': return [] relation = Relation.objects.get(name=reltext) text_factors = [lang.nl.lemma_factor(matches[i]) for i in (1, 2)] concepts = [Concept.objects.get_or_create(language=lang, text=stem)[0] for stem, residue in text_factors] for c in concepts: c.save() surface_forms = [SurfaceForm.objects.get_or_create(concept=concepts[i], text=matches[i+1], residue=text_factors[i][1], language=lang)[0] for i in (0, 1)] for s in surface_forms: s.save() freq, _ = Frequency.objects.get_or_create(text=matches.get('a', ''), language=lang, defaults=dict(value=50)) freq.save() frame, _ = Frame.objects.get_or_create(relation=relation, language=lang, text=frametext, frequency=freq, defaults=dict(goodness=1)) frame.save() raw_assertion, _ = RawAssertion.objects.get_or_create( surface1=surface_forms[0], surface2=surface_forms[1], frame=frame, language=lang, defaults=dict(batch=batch)) # still need to set assertion_id assertion, _ = Assertion.objects.get_or_create( relation=relation, concept1=concepts[0], concept2=concepts[1], frequency=freq, language=lang, defaults=dict(score=0) ) assertion.score += 1 assertion.save() raw_assertion.assertion = assertion raw_assertion.save() rating1, _ = Rating.objects.get_or_create( user=sentence.creator, activity=csamoa4_activity, sentence=sentence, score=1 ) rating2, _ = Rating.objects.get_or_create( user=sentence.creator, activity=csamoa4_activity, raw_assertion=raw_assertion, score=1 ) rating1.save() rating2.save() print '=>', str(assertion).encode('utf-8') return [assertion]
def process_sentence(sentence, lang, batch): print sentence.text.encode('utf-8') _, frametext, reltext, matches = pattern_parse(sentence.text) if reltext is None or reltext == 'junk': return [] relation = Relation.objects.get(name=reltext) text_factors = [lang.nl.lemma_factor(matches[i]) for i in (1, 2)] concepts = [ Concept.objects.get_or_create(language=lang, text=stem)[0] for stem, residue in text_factors ] for c in concepts: c.save() surface_forms = [ SurfaceForm.objects.get_or_create(concept=concepts[i], text=matches[i + 1], residue=text_factors[i][1], language=lang)[0] for i in (0, 1) ] for s in surface_forms: s.save() freq, _ = Frequency.objects.get_or_create(text=matches.get('a', ''), language=lang, defaults=dict(value=50)) freq.save() frame, _ = Frame.objects.get_or_create(relation=relation, language=lang, text=frametext, frequency=freq, defaults=dict(goodness=1)) frame.save() raw_assertion, _ = RawAssertion.objects.get_or_create( surface1=surface_forms[0], surface2=surface_forms[1], frame=frame, language=lang, defaults=dict(batch=batch)) # still need to set assertion_id assertion, _ = Assertion.objects.get_or_create(relation=relation, concept1=concepts[0], concept2=concepts[1], frequency=freq, language=lang, defaults=dict(score=0)) assertion.score += 1 assertion.save() raw_assertion.assertion = assertion raw_assertion.save() rating1, _ = Rating.objects.get_or_create(user=sentence.creator, activity=csamoa4_activity, sentence=sentence, score=1) rating2, _ = Rating.objects.get_or_create(user=sentence.creator, activity=csamoa4_activity, raw_assertion=raw_assertion, score=1) rating1.save() rating2.save() print '=>', str(assertion).encode('utf-8') return [assertion]