Ejemplo n.º 1
0
def serialize_qs4e(serializer, querysets, stream, **options):
    qs4e_options = {'transaction': False, 'batch_size': 50}
    for opt in ['batch_size', 'progress_callback', 'transaction']:
        val = options.pop(opt, None)
        if val is not None: qs4e_options[opt] = val

    serializer.options = options
    serializer.options['stream'] = stream
    serializer.stream = stream
    serializer.selected_fields = options.get("fields")

    def serialize_object(obj):
        #import pdb; pdb.set_trace()

        serializer.start_object(obj)
        for field in obj._meta.local_fields:
            if field.serialize:
                if field.rel is None:
                    if serializer.selected_fields is None or field.attname in serializer.selected_fields:
                        serializer.handle_field(obj, field)
                else:
                    if serializer.selected_fields is None or field.attname[:-3] in serializer.selected_fields:
                        serializer.handle_fk_field(obj, field)
        for field in obj._meta.many_to_many:
            if field.serialize:
                if serializer.selected_fields is None or field.attname in serializer.selected_fields:
                    serializer.handle_m2m_field(obj, field)
        serializer.end_object(obj)

    serializer.start_serialization()
    for queryset in querysets:
        queryset_foreach(queryset, serialize_object, **qs4e_options)
    serializer.end_serialization()
def run():
    #generator = yaml.load_all(open('delayed_test.yaml'))
    #all_entries = list(generator)

    #activity_filter = Q()
    #for actid in good_acts:
    #    activity_filter |= Q(sentence__activity__id=actid)
    for lang in ['it', 'fr', 'nl', 'es', 'pt']:
        queryset_foreach(cn3.Predicate.objects.filter(language__id=lang),
        process_predicate, batch_size=10)
Ejemplo n.º 3
0
def update_assertion_counts(lang):
    '''Fix the num_assertions count for each concept'''
    status = queryset_foreach(Concept.objects.filter(language=lang),
                              fix_concept)
    print 'Fixed %s of %s concepts (%s with >2 rels).' % (
        concepts_fixed, status.total, significant)
    return status
Ejemplo n.º 4
0
from csc.util import queryset_foreach
from csc.conceptnet4.models import Sentence, Assertion, RawAssertion, Language, Vote

pt = Language.get('pt')
def process(raw):
    if pt.nl.is_blacklisted(raw.surface1.text) or pt.nl.is_blacklisted(raw.surface2.text):
        raw.votes.delete()
    else:
        Vote.objects.record_vote(raw, raw.sentence.creator, 1)

queryset_foreach(RawAssertion.objects.filter(language=pt), process, batch_size=100)

Ejemplo n.º 5
0
from csc.util import queryset_foreach
from csc.corpus.models import Sentence

queryset_foreach(Sentence.objects.filter(id__lt=1367900).order_by('-id'),
                 lambda x: x.update_consistency(),
                 batch_size=100)
Ejemplo n.º 6
0
from csc.util import queryset_foreach
from csc.conceptnet4.models import Frame
from django.db import connection


def fix_dups(frame):
    dups = Frame.objects.filter(language=frame.language,
                                text=frame.text,
                                relation=frame.relation)
    for dup in dups:
        if dup.id == frame.id:
            continue
        print dup
        cursor = connection.cursor()
        print("UPDATE raw_assertions SET frame_id=%s WHERE frame_id=%s" %
              (frame.id, dup.id))
        cursor.execute(
            "UPDATE raw_assertions SET frame_id=%s WHERE frame_id=%s" %
            (frame.id, dup.id))
        dup.delete()
        print


queryset_foreach(Frame.objects.all().order_by('-goodness', 'id'),
                 fix_dups,
                 batch_size=100)
Ejemplo n.º 7
0
from csc.conceptnet4.models import Sentence, Assertion, RawAssertion
from csc.util import queryset_foreach

queryset_foreach(Assertion.objects.all(),
                 lambda a: a.update_raw_cache(),
                 batch_size=100)
Ejemplo n.º 8
0
from csc.util import queryset_foreach
from csc.conceptnet4.models import Frame, Assertion, RawAssertion, SurfaceForm
from django.db import connection

def check_frame(assertion):
    try:
        assertion.best_frame
    except Frame.DoesNotExist:
        print "No frame for:", assertion
        assertion.best_frame = None
        assertion.save()
    
    try:
        assertion.best_raw
        assertion.best_surface1
        assertion.best_surface2
    except (RawAssertion.DoesNotExist, SurfaceForm.DoesNotExist):
        print "No raw assertion for:", assertion
        assertion.best_raw = None
        assertion.best_surface1 = None
        assertion.best_surface2 = None
        assertion.save()

queryset_foreach(Assertion.objects.all(), check_frame,
  batch_size=100)

Ejemplo n.º 9
0
def update_surfaceform_usecounts(lang):
    '''Fix the num_assertions count for each concept'''
    status = queryset_foreach(SurfaceForm.objects.filter(language=lang), update_count)
    print 'Updated counts on %d of %d surface forms' % (fixed, status.total)
    return status
Ejemplo n.º 10
0
from csc.util import queryset_foreach
from csc.corpus.models import Sentence

queryset_foreach(Sentence.objects.filter(id__lt=1367900).order_by('-id'),
  lambda x: x.update_consistency(),
  batch_size=100)

Ejemplo n.º 11
0
def update_assertion_counts(lang):
    '''Fix the num_assertions count for each concept'''
    status = queryset_foreach(Concept.objects.filter(language=lang), fix_concept)
    print 'Fixed %s of %s concepts (%s with >2 rels).' % (concepts_fixed, status.total, significant)
    return status
Ejemplo n.º 12
0
from csc.conceptnet4.models import *
from events.models import Event, Activity
from voting.models import Vote
from csc.util import queryset_foreach

def nuke_it(event):
    object = event.object
    if object is None: return
    for vote in object.votes.all():
        vote.delete()
    object.delete()

#queryset_foreach(Event.objects.filter(content_type__id=92, activity__id=41),
#nuke_it, 50)
queryset_foreach(Event.objects.filter(content_type__id=90, activity__id=41),
nuke_it, 50)
queryset_foreach(Event.objects.filter(content_type__id=20, activity__id=41),
nuke_it, 50)

Ejemplo n.º 13
0
from csc.util import queryset_foreach
from csc.conceptnet.models import Concept, SurfaceForm, Language, Assertion
from django.db import connection

en = Language.get('en')


def fix_surface(surface):
    norm, residue = en.nl.lemma_split(surface.text)
    if norm != surface.concept.text:
        print
        print "surface:", surface.text.encode('utf-8')
        print "concept:", surface.concept.text.encode('utf-8')
        print "normal:", norm.encode('utf-8')
        surface.update(norm, residue)


queryset_foreach(SurfaceForm.objects.filter(language=en),
                 fix_surface,
                 batch_size=100)

# plan:
#  fix surface form -> concept mapping
#  remove obsolete concepts
Ejemplo n.º 14
0
from csc.util import queryset_foreach
from csc.conceptnet4.models import Frame, Assertion, RawAssertion, SurfaceForm
from django.db import connection


def check_frame(assertion):
    try:
        assertion.best_frame
    except Frame.DoesNotExist:
        print "No frame for:", assertion
        assertion.best_frame = None
        assertion.save()

    try:
        assertion.best_raw
        assertion.best_surface1
        assertion.best_surface2
    except (RawAssertion.DoesNotExist, SurfaceForm.DoesNotExist):
        print "No raw assertion for:", assertion
        assertion.best_raw = None
        assertion.best_surface1 = None
        assertion.best_surface2 = None
        assertion.save()


queryset_foreach(Assertion.objects.all(), check_frame, batch_size=100)
Ejemplo n.º 15
0
from csc.util import queryset_foreach
from csc.conceptnet.models import Sentence, Assertion, RawAssertion

queryset_foreach(Assertion.objects.all(), lambda x: x.update_score(),
batch_size=100)
queryset_foreach(RawAssertion.objects.all(), lambda x: x.update_score(),
batch_size=100)
# queryset_foreach(Sentence.objects.exclude(language__id='en'), lambda x: x.update_score(), batch_size=100)

Ejemplo n.º 16
0
def update_surfaceform_usecounts(lang):
    '''Fix the num_assertions count for each concept'''
    status = queryset_foreach(SurfaceForm.objects.filter(language=lang),
                              update_count)
    print 'Updated counts on %d of %d surface forms' % (fixed, status.total)
    return status
Ejemplo n.º 17
0
from csc.conceptnet4.models import *
from events.models import Event, Activity
from voting.models import Vote
from csc.util import queryset_foreach


def nuke_it(event):
    object = event.object
    if object is None: return
    for vote in object.votes.all():
        vote.delete()
    object.delete()


#queryset_foreach(Event.objects.filter(content_type__id=92, activity__id=41),
#nuke_it, 50)
queryset_foreach(Event.objects.filter(content_type__id=90, activity__id=41),
                 nuke_it, 50)
queryset_foreach(Event.objects.filter(content_type__id=20, activity__id=41),
                 nuke_it, 50)
Ejemplo n.º 18
0
from csc.util import queryset_foreach
from csc.conceptnet.models import Concept, Language

def set_visible(concept):
    if not concept.language.nl.is_blacklisted(concept.text):
        concept.visible=True
        concept.save()

def set_invisible(concept):
    if concept.language.nl.is_blacklisted(concept.text):
        concept.visible=False
        concept.save()
        
queryset_foreach(Concept.objects.filter(visible=False), set_visible)

from csc.util import queryset_foreach
from csc.conceptnet.models import Concept, SurfaceForm, Language, Assertion
from django.db import connection

en = Language.get('en')

def fix_surface(surface):
    norm, residue = en.nl.lemma_split(surface.text)
    if norm != surface.concept.text:
        print
        print "surface:", surface.text.encode('utf-8')
        print "concept:", surface.concept.text.encode('utf-8')
        print "normal:", norm.encode('utf-8')
        surface.update(norm, residue)

queryset_foreach(SurfaceForm.objects.filter(language=en),
  fix_surface,
  batch_size=100)


# plan:
#  fix surface form -> concept mapping
#  remove obsolete concepts
Ejemplo n.º 20
0
from csc.conceptnet4.models import Sentence, Assertion, RawAssertion
from csc.util import queryset_foreach

queryset_foreach(Assertion.objects.all(), lambda a: a.update_raw_cache(),
batch_size=100)