def save_objects(migration_rules, model_name, model_dicts, relic_ids, model_process_fn, instance, message_receiver=None): if model_name not in relic_ids: relic_ids[model_name] = {} model_key_map = relic_ids[model_name] # the model_key_map will be filled from the # database each time. combined with this statement, # the command becomes idempotent dicts_to_save = (dict for dict in model_dicts if dict['pk'] not in model_key_map) for model_dict in dicts_to_save: dependencies = (migration_rules.get(model_name, {}).get('dependencies', {}).items()) old_model_dict = model_dict.copy() old_model_dict['fields'] = model_dict['fields'].copy() # rewrite the fixture so that otm1 pks are replaced by # their corresponding otm2 pks if dependencies: for name, field in dependencies: old_id = model_dict['fields'][field] if old_id: old_id_to_new_id = relic_ids[name] try: new_id = old_id_to_new_id[old_id] except KeyError: raise MigrationException("Dependency not found. " "Have you imported %s yet?" % name) model_dict['fields'][field] = new_id model = dict_to_model(migration_rules, model_name, model_dict, instance) if model == data_util.DO_NOT_PROCESS: continue else: model = model_process_fn(model_dict, model, instance, old_model_dict=old_model_dict) if model != data_util.PROCESS_WITHOUT_SAVE and model is not None: pk = model.pk for fn in migration_rules[model_name].get( 'postsave_actions', []): fn(model, model_dict) if callable(message_receiver): message_receiver("saved model: %s - %s" % (model_name, model.pk)) else: pk = models.UNBOUND_MODEL_ID model_key_map[model_dict['pk']] = pk
def save_species(migration_rules, migration_event, species_dict, species_obj, instance, **kwargs): non_migrated_species = Species.objects.raw(""" SELECT * FROM treemap_species WHERE instance_id=%(instance_id)s AND id not in (SELECT otm2_model_id FROM otm1_migrator_otm1modelrelic WHERE otm2_model_name='species' AND instance_id=%(instance_id)s) """ % {'instance_id': instance.pk}) if len(list(non_migrated_species)) > 0: raise MigrationException("You cannot migrate species, at all, " "if any species for this instance are " "not the result of a migration. This is " "necessary to avoid record duplication.") species_obj.save_with_system_user_bypass_auth() OTM1ModelRelic.objects.create(instance=instance, migration_event=migration_event, otm1_model_id=species_dict['pk'], otm2_model_name='species', otm2_model_id=species_obj.pk) return species_obj
def set_boundary_fields(boundary_obj, boundary_dict): # The 'city' column in the Treezilla boundary table is a boundary type code boundary_type = BOUNDARY_TYPES.get( boundary_dict['fields'].get('city', None), None) if boundary_type is None: raise MigrationException("boundary_dict missing valid city value: " + str(boundary_dict)) boundary_obj.category = boundary_type['name'] boundary_obj.sort_order = boundary_type['sort_order'] return boundary_obj
def create_override(species_obj, species_dict): itree_code = species_dict['fields'].get('itree_code', None) if not itree_code: raise MigrationException("species_dict missing itree_code: " + str(species_dict)) override = ITreeCodeOverride( instance_species_id=species_obj.pk, region=ITreeRegion.objects.get(code=TREEZILLA_ITREE_REGION_CODE), itree_code=itree_code) override.save_with_user(User.system_user()) return species_obj
def _base_process_comment(migration_rules, migration_event, relic_ids, model_dict, comment_obj, instance): comment_obj.site_id = 1 if comment_obj.content_type_id == models.UNBOUND_MODEL_ID: try: print("Can't import comment %s because " "it is assigned to a ContentType (model) " "that does not exist in OTM2 .. SKIPPING" % comment_obj.comment.encode('utf-8')) except: # There was a problem handling the comment string when # printing the warning message. Ignore it and move on # rather than crash the import. print("Can't import comment because " "it is assigned to a ContentType (model) " "that does not exist in OTM2 .. SKIPPING") return None content_type = ContentType.objects.get(pk=comment_obj.content_type_id) # sometimes this is called object_pk, other times # it is called object_id. Try both. old_object_id = int(model_dict['fields'].get( 'object_pk', model_dict['fields'].get('object_id'))) try: new_object_id = relic_ids[content_type.model][old_object_id] except KeyError: raise MigrationException("threadedcomment dependency not met. " "did you import %s yet?" % comment_obj.content_type.model) if new_object_id == models.UNBOUND_MODEL_ID: print("Can't import comment '%s' because " "it's model object '%s:%s' does " "not exist in OTM2. It probably " "was marked as deleted in OTM1. .. SKIPPING" % (comment_obj.comment[:10] + '...', content_type.model, old_object_id)) return None # object_id is called object_pk in later versions comment_obj.object_pk = new_object_id return comment_obj
def overwrite_old_pks(migration_rules, model_hash, model_name, dependency_ids): dependencies = (migration_rules.get(model_name, {}).get('dependencies', {}).items()) # rewrite the fixture so that otm1 pks are replaced by # their corresponding otm2 pks if dependencies: for name, field in dependencies: old_id = model_hash['fields'][field] if old_id: old_id_to_new_id = dependency_ids[name] try: new_id = old_id_to_new_id[old_id] except KeyError: raise MigrationException("Dependency not found. " "Have you imported %s yet?" % name) model_hash['fields'][field] = new_id
def handle(self, *args, **options): if settings.DEBUG: self.stdout.write('In order to run this command you must manually' 'set DEBUG=False in your settings file. ' 'Unfortunately, django runs out of memory when ' 'this command is run in DEBUG mode.') return 1 if options['config_file']: config_data = json.load(open(options['config_file'], 'r')) for k, v in config_data.items(): if not options.get(k, None): options[k] = v rule_module = (options['rule_module'] or 'otm1_migrator.migration_rules.standard_otm1') migration_mod = importlib.import_module(rule_module) migration_rules = migration_mod.MIGRATION_RULES try: model_order = migration_mod.MODEL_ORDER except AttributeError: model_order = ORDER try: udfs = migration_mod.UDFS except AttributeError: udfs = {} # user photos live on userprofile in otm1 userphoto_path = options.get('userphoto_path', None) user_photo_fixture_specified_but_not_base_path = ( userphoto_path is None and options.get('userphoto_fixture') is not None) if user_photo_fixture_specified_but_not_base_path: raise MigrationException('Must specify the user photo path to ' 'import photos. please include a %s or ' '%s flag when importing.' % USERPHOTO_ARGS) treephoto_path = options.get('treephoto_path', None) treephoto_fixture_with_no_path = (treephoto_path is None and options.get('treephoto_fixture') is not None) if treephoto_fixture_with_no_path: raise MigrationException('Must specify the tree photo path to ' 'import photo') ################################################ # BEGIN SIDE EFFECTS ################################################ migration_event = MigrationEvent.objects.create() if options['instance']: # initialize system_user?? instance, __ = self.setup_env(*args, **options) else: migration_event.status = MigrationEvent.FAILURE migration_event.save() self.stdout.write('Invalid instance provided.') return 1 create_udfs(udfs, instance) add_udfs_to_migration_rules(migration_rules, udfs, instance) relic_ids = {model: {} for model in migration_rules} def default_partial(fn, *args): return partial(fn, migration_rules, migration_event, *args) # TODO: should this be merged into MIGRATION_RULES? process_fns = { 'boundary': default_partial(save_boundary), 'user': default_partial(save_user), 'audit': default_partial(save_audit, relic_ids), 'species': default_partial(save_species), 'plot': default_partial(save_plot), 'tree': default_partial(save_tree), 'treephoto': default_partial(save_treephoto, treephoto_path), 'contenttype': default_partial(process_contenttype), 'reputation': default_partial(process_reputation), 'registrationprofile': default_partial(save_registrationprofile), 'userprofile': default_partial(process_userprofile, userphoto_path), 'threadedcomment': default_partial(save_threadedcomment, relic_ids), 'comment': default_partial(save_comment, relic_ids), 'treefavorite': default_partial(save_treefavorite), } user_relics = OTM1UserRelic.objects.filter(instance=instance) model_relics = (OTM1ModelRelic.objects.filter( instance=instance).iterator()) comment_relics = (OTM1CommentRelic.objects.filter( instance=instance).iterator()) def _rpad_string(desired_length, pad_char, string): return string + (desired_length - len(string)) * pad_char self.stdout.write(_rpad_string(50, ".", "Reading relics into memory")) # depedency_ids is a cache of old pks to new pks, it is inflated # from database records for performance. for relic in chain(user_relics, model_relics, comment_relics): model = relic.otm2_model_name otm1_id = relic.otm1_model_id relic_ids[model][otm1_id] = relic.otm2_model_id self.stdout.write( _rpad_string(50, ".", "Done reading relics into memory")) def _get_json_dict(model_name): """ look for fixtures of the form '<model>_fixture' that were passed in as command line args and load them as python objects """ option_name = model_name + '_fixture' if options[option_name] and os.path.exists(options[option_name]): model_file = open(options[option_name], 'r') self.stdout.write( "%sSUCCESS" % _rpad_string(50, ".", "Loaded fixture '%s'" % option_name)) json_dict = json.load(model_file) model_file.close() else: self.stdout.write("%sSKIPPING" % _rpad_string( 50, ".", "No valid '%s' fixture " % model_name)) json_dict = None return json_dict for model in model_order: json_dict = _get_json_dict(model) if json_dict: # dicts must be sorted by pk for the case of models # that have foreign keys to themselves sorted_dicts = sorted(json_dict, key=operator.itemgetter('pk')) try: save_objects(migration_rules, model, sorted_dicts, relic_ids, process_fns[model], instance, message_receiver=print) except MigrationException: migration_event.status = MigrationEvent.FAILURE migration_event.save() raise migration_event.status = MigrationEvent.SUCCESS migration_event.save()
def handle(self, *args, **options): if settings.DEBUG: self.stdout.write('In order to run this command you must manually' 'set DEBUG=False in your settings file. ' 'Unfortunately, django runs out of memory when ' 'this command is run in DEBUG mode.') return 1 if options['instance']: # initialize system_user?? instance, _ = self.setup_env(*args, **options) else: self.stdout.write('Invalid instance provided.') return 1 rule_module = (options['rule_module'] or 'otm1_migrator.migration_rules.standard_otm1') migration_mod = importlib.import_module(rule_module) migration_rules = migration_mod.MIGRATION_RULES # look for fixtures of the form '<model>_fixture' that # were passed in as command line args and load them as # python objects json_hashes = {} for model_name in migration_rules: option_name = model_name + '_fixture' try: model_file = open(options[option_name], 'r') json_hashes[model_name] = json.load(model_file) except: json_hashes[model_name] = [] self.stdout.write('No valid %s fixture provided ... SKIPPING' % model_name) # user photos live on userprofile in otm1 userphoto_path = options.get('userphoto_path', None) user_photo_fixture_specified_but_not_base_path = ( 'userprofile' in json_hashes and json_hashes['userprofile'] and userphoto_path is None) if user_photo_fixture_specified_but_not_base_path: raise MigrationException('Must specify the user photo path to ' 'import photos. please include a %s or ' '%s flag when importing.' % USERPHOTO_ARGS) treephoto_path = options.get('treephoto_path', None) treephoto_fixture_with_no_path = ('treephoto' in json_hashes and json_hashes['treephoto'] and treephoto_path is None) if treephoto_fixture_with_no_path: raise MigrationException('Must specify the tree photo path to ' 'import photo') # TODO: don't call this dependency anymore. # It's an idempotency checker too. dependency_ids = {model: {} for model in migration_rules} # TODO: should this be merged into MIGRATION_RULES? save_fns = { 'user': partial(save_user, migration_rules), 'audit': partial(save_audit, migration_rules, dependency_ids), 'species': partial(save_species, migration_rules), 'plot': partial(save_other_with_user, migration_rules, 'plot'), 'tree': partial(save_other_with_user, migration_rules, 'tree'), 'treephoto': partial(save_treephoto, migration_rules, treephoto_path), 'contenttype': make_contenttype_relics, 'userprofile': partial(process_userprofile, userphoto_path), 'threadedcomment': partial(save_threadedcomment, migration_rules, dependency_ids), 'comment': partial(save_comment, migration_rules, dependency_ids), } # depedency_ids is a cache of old pks to new pks, it is inflated # from database records for performance. for relic in OTM1UserRelic.objects.filter(instance=instance): dependency_ids['user'][relic.otm1_id] = relic.otm2_user_id model_relics = OTM1ModelRelic.objects.filter(instance=instance) comment_relics = OTM1CommentRelic.objects.filter(instance=instance) for relic in chain(model_relics, comment_relics): model_ids = dependency_ids[relic.otm2_model_name] model_ids[relic.otm1_model_id] = relic.otm2_model_id for model in migration_rules: if json_hashes[model]: # hashes must be sorted by pk for the case of models # that have foreign keys to themselves sorted_hashes = sorted(json_hashes[model], key=operator.itemgetter('pk')) hashes_to_saved_objects(migration_rules, model, sorted_hashes, dependency_ids, save_fns[model], instance)