def serialize_qs4e(serializer, querysets, stream, **options): qs4e_options = {'transaction': False, 'batch_size': 50} for opt in ['batch_size', 'progress_callback', 'transaction']: val = options.pop(opt, None) if val is not None: qs4e_options[opt] = val serializer.options = options serializer.options['stream'] = stream serializer.stream = stream serializer.selected_fields = options.get("fields") def serialize_object(obj): #import pdb; pdb.set_trace() serializer.start_object(obj) for field in obj._meta.local_fields: if field.serialize: if field.rel is None: if serializer.selected_fields is None or field.attname in serializer.selected_fields: serializer.handle_field(obj, field) else: if serializer.selected_fields is None or field.attname[:-3] in serializer.selected_fields: serializer.handle_fk_field(obj, field) for field in obj._meta.many_to_many: if field.serialize: if serializer.selected_fields is None or field.attname in serializer.selected_fields: serializer.handle_m2m_field(obj, field) serializer.end_object(obj) serializer.start_serialization() for queryset in querysets: queryset_foreach(queryset, serialize_object, **qs4e_options) serializer.end_serialization()
def run(): #generator = yaml.load_all(open('delayed_test.yaml')) #all_entries = list(generator) #activity_filter = Q() #for actid in good_acts: # activity_filter |= Q(sentence__activity__id=actid) for lang in ['it', 'fr', 'nl', 'es', 'pt']: queryset_foreach(cn3.Predicate.objects.filter(language__id=lang), process_predicate, batch_size=10)
def update_assertion_counts(lang): '''Fix the num_assertions count for each concept''' status = queryset_foreach(Concept.objects.filter(language=lang), fix_concept) print 'Fixed %s of %s concepts (%s with >2 rels).' % ( concepts_fixed, status.total, significant) return status
from csc.util import queryset_foreach from csc.conceptnet4.models import Sentence, Assertion, RawAssertion, Language, Vote pt = Language.get('pt') def process(raw): if pt.nl.is_blacklisted(raw.surface1.text) or pt.nl.is_blacklisted(raw.surface2.text): raw.votes.delete() else: Vote.objects.record_vote(raw, raw.sentence.creator, 1) queryset_foreach(RawAssertion.objects.filter(language=pt), process, batch_size=100)
from csc.util import queryset_foreach from csc.corpus.models import Sentence queryset_foreach(Sentence.objects.filter(id__lt=1367900).order_by('-id'), lambda x: x.update_consistency(), batch_size=100)
from csc.util import queryset_foreach from csc.conceptnet4.models import Frame from django.db import connection def fix_dups(frame): dups = Frame.objects.filter(language=frame.language, text=frame.text, relation=frame.relation) for dup in dups: if dup.id == frame.id: continue print dup cursor = connection.cursor() print("UPDATE raw_assertions SET frame_id=%s WHERE frame_id=%s" % (frame.id, dup.id)) cursor.execute( "UPDATE raw_assertions SET frame_id=%s WHERE frame_id=%s" % (frame.id, dup.id)) dup.delete() print queryset_foreach(Frame.objects.all().order_by('-goodness', 'id'), fix_dups, batch_size=100)
from csc.conceptnet4.models import Sentence, Assertion, RawAssertion from csc.util import queryset_foreach queryset_foreach(Assertion.objects.all(), lambda a: a.update_raw_cache(), batch_size=100)
from csc.util import queryset_foreach from csc.conceptnet4.models import Frame, Assertion, RawAssertion, SurfaceForm from django.db import connection def check_frame(assertion): try: assertion.best_frame except Frame.DoesNotExist: print "No frame for:", assertion assertion.best_frame = None assertion.save() try: assertion.best_raw assertion.best_surface1 assertion.best_surface2 except (RawAssertion.DoesNotExist, SurfaceForm.DoesNotExist): print "No raw assertion for:", assertion assertion.best_raw = None assertion.best_surface1 = None assertion.best_surface2 = None assertion.save() queryset_foreach(Assertion.objects.all(), check_frame, batch_size=100)
def update_surfaceform_usecounts(lang): '''Fix the num_assertions count for each concept''' status = queryset_foreach(SurfaceForm.objects.filter(language=lang), update_count) print 'Updated counts on %d of %d surface forms' % (fixed, status.total) return status
def update_assertion_counts(lang): '''Fix the num_assertions count for each concept''' status = queryset_foreach(Concept.objects.filter(language=lang), fix_concept) print 'Fixed %s of %s concepts (%s with >2 rels).' % (concepts_fixed, status.total, significant) return status
from csc.conceptnet4.models import * from events.models import Event, Activity from voting.models import Vote from csc.util import queryset_foreach def nuke_it(event): object = event.object if object is None: return for vote in object.votes.all(): vote.delete() object.delete() #queryset_foreach(Event.objects.filter(content_type__id=92, activity__id=41), #nuke_it, 50) queryset_foreach(Event.objects.filter(content_type__id=90, activity__id=41), nuke_it, 50) queryset_foreach(Event.objects.filter(content_type__id=20, activity__id=41), nuke_it, 50)
from csc.util import queryset_foreach from csc.conceptnet.models import Concept, SurfaceForm, Language, Assertion from django.db import connection en = Language.get('en') def fix_surface(surface): norm, residue = en.nl.lemma_split(surface.text) if norm != surface.concept.text: print print "surface:", surface.text.encode('utf-8') print "concept:", surface.concept.text.encode('utf-8') print "normal:", norm.encode('utf-8') surface.update(norm, residue) queryset_foreach(SurfaceForm.objects.filter(language=en), fix_surface, batch_size=100) # plan: # fix surface form -> concept mapping # remove obsolete concepts
from csc.util import queryset_foreach from csc.conceptnet.models import Sentence, Assertion, RawAssertion queryset_foreach(Assertion.objects.all(), lambda x: x.update_score(), batch_size=100) queryset_foreach(RawAssertion.objects.all(), lambda x: x.update_score(), batch_size=100) # queryset_foreach(Sentence.objects.exclude(language__id='en'), lambda x: x.update_score(), batch_size=100)
from csc.util import queryset_foreach from csc.conceptnet.models import Concept, Language def set_visible(concept): if not concept.language.nl.is_blacklisted(concept.text): concept.visible=True concept.save() def set_invisible(concept): if concept.language.nl.is_blacklisted(concept.text): concept.visible=False concept.save() queryset_foreach(Concept.objects.filter(visible=False), set_visible)