def test_batch_create_no_reduce(store0, member): batch = Batch(Suggestion.objects, batch_size=2) assert batch.target == Suggestion.objects assert batch.batch_size == 2 last_sugg_pk = Suggestion.objects.order_by( "-pk").values_list("pk", flat=True).first() def _create_method(unit, source, mtime): return dict( unit_id=unit, creation_time=mtime, target_f=source, user_id=member.id) batches = batch.batched_create( store0.units.values_list("id", "source_f", "mtime"), _create_method, reduces=False) new_suggs = Suggestion.objects.filter(pk__gt=last_sugg_pk) assert new_suggs.count() == 0 for batched in batches: assert len(batched) assert len(batched) <= 2 assert all(isinstance(b, Suggestion) for b in batched) assert all(b.target_f == b.unit.source_f for b in batched) assert new_suggs.count() == store0.units.count() new_suggs.delete() created = batch.create( store0.units.values_list("id", "source_f", "mtime"), _create_method, reduces=False) new_suggs = Suggestion.objects.filter(pk__gt=last_sugg_pk) assert created == new_suggs.count() == store0.units.count() assert ( list(new_suggs.values_list("unit")) == list(store0.units.values_list("id")))
def _cleanup_accept_suggestion_subs(apps): # state/target changes that happened from suggestion had type=3, # and state change subs did not have their suggestion associated # They also credited the reviewer not the submitter # we need to: # - associate any state change subs with relevant suggestions # - set the suggester as submitter for all type=3 submissions # - update type to WEB for all type=3 subs that have suggestions # any remaining submissions of this type are likely dupes and will be deleted Submission = apps.get_model("pootle_statistics.Submission") subs = Submission.objects.all() suggestions = apps.get_model("pootle_store.Suggestion").objects # assoc state change subs with suggestions and update the sub to_update = [] accepted_suggestions = suggestions.filter(state__name="accepted").values( "review_time", "pk", "unit_id", "reviewer_id", "user_id") accept_sugg_state_subs = subs.filter(type=SUGG_ACCEPT_TYPE).filter( field=STATE_FIELD) accept_subs_for_update = accept_sugg_state_subs.select_related( "suggestion").only("id", "type", "submitter_id", "suggestion_id") for suggestion in accepted_suggestions.iterator(): suggestion["submitter_id"] = suggestion["reviewer_id"] suggestion["creation_time"] = suggestion["review_time"] suggestion_id = suggestion["pk"] suggestion_user = suggestion["user_id"] del suggestion["review_time"] del suggestion["reviewer_id"] del suggestion["pk"] del suggestion["user_id"] matching_sub = accept_subs_for_update.filter(**suggestion).first() if matching_sub: matching_sub.type = NEW_WEB_TYPE matching_sub.suggestion_id = suggestion_id matching_sub.submitter_id = suggestion_user to_update.append(matching_sub) if to_update: Batch(Submission.objects, batch_size=5000).update( to_update, update_fields=["type", "suggestion_id", "submitter_id"], reduces=False) # all remaining valid sugg_accept subs should be field=target and have a suggestion to_update = [] accept_sugg_subs = subs.filter(type=SUGG_ACCEPT_TYPE).filter( field=TARGET_FIELD).exclude(suggestion__isnull=True) accept_subs_for_update = accept_sugg_subs.select_related( "suggestion").only("id", "type", "submitter_id", "suggestion__user_id") for sub in accept_subs_for_update.iterator(): sub.submitter_id = sub.suggestion.user_id sub.type = NEW_WEB_TYPE to_update.append(sub) if to_update: Batch(Submission.objects, batch_size=5000).update(to_update, update_fields=["type", "submitter_id"], reduces=False)
def set_unit_source_data(apps, schema_editor): from pootle_store.models import Unit UnitSource = apps.get_model("pootle_store.UnitSource") counter = wordcount.get(Unit) unit_sources = (UnitSource.objects.select_related('unit').filter( source_wordcount=0).only("unit__source_f", "id").order_by("id")) Batch(unit_sources, batch_size=BATCH_SIZE).update( unit_sources, update_method=functools.partial(unit_source_update, counter), update_fields=['source_hash', 'source_length', 'source_wordcount'])
def update_sources_with_orm(apps, creators): UnitSource = apps.get_model("pootle_store.UnitSource") def _set_created_by(unit_source): unit_source.created_by_id = creators.get(unit_source.unit_id) return unit_source Batch(UnitSource.objects, batch_size=500).update( list(UnitSource.objects.filter(unit_id__in=creators.keys())), update_method=_set_created_by, update_fields=["created_by"], reduces=False)
def _add_submitted(qs, unit_changes): values = qs.values_list("id", "submitted_on", "submitted_by") def _create_method(unit, timestamp, user): return dict( unit_id=unit, changed_with=(SubmissionTypes.SYSTEM if user == get_system_user_id() else SubmissionTypes.WEB), submitted_by_id=user, submitted_on=timestamp) Batch(unit_changes).create(values, _create_method)
def test_batch_update_reduce(store0, member): """The source queryset reduces as batches are created""" batch = Batch(Unit, batch_size=2) store0.units.exclude(pk=store0.units.first().pk).update(target_f="FOO") def _update_method(unit): unit.target_f = "BAR" return unit count = batch.update(store0.units.filter(target_f="FOO"), _update_method) assert count == store0.units.count() - 1 assert count == store0.units.filter(target_f="BAR").count() store0.units.update(target_f="BAZ") newcount = batch.update(store0.units.filter(target_f="FOO"), _update_method, update_fields=["source_f"]) # units not updated, only source in update_fields assert newcount == 0 assert (store0.units.count() == store0.units.filter( target_f="BAZ").count())
def test_batch_lists(store0, member): batch = Batch(Suggestion.objects, batch_size=2) assert batch.target == Suggestion.objects assert batch.batch_size == 2 last_sugg_pk = Suggestion.objects.order_by("-pk").values_list( "pk", flat=True).first() def _create_method(unit, source, mtime): return dict(unit_id=unit, creation_time=mtime, target_f=source, user_id=member.id) batch.create(list(store0.units.values_list("id", "source_f", "mtime")), _create_method, reduces=False) new_suggs = Suggestion.objects.filter(pk__gt=last_sugg_pk) assert new_suggs.count() == store0.units.count() updated_suggs = [] for suggestion in new_suggs: suggestion.target_f = "suggestion %s" % suggestion.id updated_suggs.append(suggestion) batch.update(updated_suggs, update_fields=["target_f"], reduces=False) for suggestion in Suggestion.objects.filter(pk__gt=last_sugg_pk): assert suggestion.target_f == "suggestion %s" % suggestion.id
def create_changes_with_orm(apps): UnitChange = apps.get_model("pootle_store.UnitChange") missing = list(_missing_changes(apps).iterator()) def _unit_change_create(unit): return dict(unit_id=unit.pk, changed_with=SubmissionTypes.WEB, commented_by_id=unit.commented_by_id, commented_on=unit.commented_on, reviewed_by_id=unit.reviewed_by_id, reviewed_on=unit.reviewed_on, submitted_by_id=unit.submitted_by_id, submitted_on=unit.submitted_on) Batch(UnitChange.objects, batch_size=500).create(missing, _unit_change_create, reduces=False)
def create_sources_with_orm(apps, creators): sysuser = get_system_user_id() UnitSource = apps.get_model("pootle_store.UnitSource") units = apps.get_model("pootle_store.Unit").objects.all() _units = list( units.values_list("id", "source_hash", "source_length", "source_wordcount").iterator()) def _unit_source_create(pk, source_hash, source_length, source_wordcount): return dict(unit_id=pk, source_hash=source_hash, source_wordcount=source_wordcount, source_length=source_length, created_with=SubmissionTypes.WEB, created_by_id=creators.get(pk, sysuser)) Batch(UnitSource.objects, batch_size=500).create(_units, _unit_source_create, reduces=False)
def clean_abs_file_paths(apps, schema_editor): """Replace wrong absolute store file paths by proper relative paths built based on store.pootle_path values. """ store_model = apps.get_model("pootle_store.Store") stores = store_model.objects.filter( translation_project__project__treestyle="nongnu") stores = stores.filter(file__startswith="/").only("file", "pootle_path") to_update = [] for store in stores.iterator(): lang, prj, d, fn = split_pootle_path(store.pootle_path) store.file = os.path.join(prj, lang, d, fn) to_update.append(store) if to_update: result = Batch(store_model, batch_size=500).update(to_update, update_fields=["file"], reduces=False) logger.debug("Cleaned %s store paths" % result)