def denorm_instance(payload): logging.info('[map_reduce.denorm_instance] payload %s' % json.dumps(payload)) source_model = util.get_model_by_name(payload['source_model']) target_model = util.get_model_by_name(payload['target_model']) related_field_name = payload['related_field'] fields = payload['fields'] storage = payload['storage'] if storage == 'scalar': related_field_name_filter = related_field_name+'_id' denorm_values = fields else: assert(storage == 'shared_dict') # will look up source primary key in target's list field related_field_name_filter = Inflector().pluralize(related_field_name) denorm_values = { 'denorm_data': { related_field_name_filter: { payload['instance_id']: fields } } } pipeline = MapperPipeline( 'denorm-target-%s-source-%s-instance-%s-at-%s' % (payload['target_model'], payload['source_model'], payload['instance_id'], now().isoformat()), handler_spec = util.convert_func_to_string(denorm_entity_mapper), input_reader_spec = 'djangoappengine.mapreduce.input_readers.DjangoModelInputReader', # FIXME: should be self-contained output_writer_spec = util.convert_func_to_string(NullOutputWriter), params = { # _convert_model_to_string is a different encoding than util.get_model_name, but we have to use because # we use djangoappengine's input reader 'entity_kind': _convert_model_to_string(target_model), 'queue_name': payload['queue_name'], 'filters': [ [related_field_name_filter, '=', payload['instance_id']], ], 'denorm_values': denorm_values, #'storage': storage, }, shards = payload['shards'] ) pipeline.start(queue_name=payload['queue_name']) pipeline_id = pipeline.pipeline_id logging.info('[map_reduce.denorm_instance] pipeline_id = %s' % str(pipeline_id))
def register(target_model, options): logging.info('[denorm.register] %s' % target_model) if not hasattr(target_model, '_meta'): raise AttributeError('The model being registered must derive from Model.') target = util.get_model_name(target_model) target_options = target_model._meta # register signals for target. use dispatch_uid to prevent duplicates. # about signals: https://docs.djangoproject.com/en/1.8/topics/signals/ # built-in signals: https://docs.djangoproject.com/en/1.8/ref/signals/ db_signals.post_init.connect(receivers.target_model_post_init, sender=target_model, dispatch_uid='denorm_target_%s_post_init'%target) db_signals.pre_save.connect(receivers.target_model_pre_save, sender=target_model, dispatch_uid='denorm_target_%s_pre_save'%target) db_signals.post_save.connect(receivers.target_model_post_save, sender=target_model, dispatch_uid='denorm_target_%s_post_save'%target) target_graph = core.TARGET_GRAPH[target_model] = core.TARGET_GRAPH.get(target_model, {}) for source, source_dict in options['sources'].iteritems(): strategy = source_dict.get('strategy', 'cursor') # options are: [cursor, mapreduce]. defaults to cursor. # TODO: support storage options 'list' and 'dict' storage = source_dict.get('storage', 'scalar') # choices: [scalar, shared_dict] if storage == 'scalar': target_foreign_key = target_options.get_field(source) # if field did not exist, then get_field would have raised FieldDoesNotExist if not isinstance(target_foreign_key, ForeignKey): raise AttributeError('The source field %s.%s must be a ForeignKey' % (target, source)) source_model = target_foreign_key.rel.to elif storage == 'shared_dict': target_foreign_key_list = target_options.get_field(Inflector().pluralize(source)) # if field did not exist, then get_field would have raised FieldDoesNotExist if not isinstance(target_foreign_key_list, tb_fields.ListField): raise AttributeError('The target field %s.%s must be a ListField' % (target, source)) # model must be explicitly configured, because target field does not specify it source_model = source_dict.get('model') # create denorm data field try: target_options.get_field('denorm_data') except FieldDoesNotExist: # field should not exist. now let's create it. denorm_data_field = JSONField(name='denorm_data', null=True, blank=True, decoder_kwargs={'cls': json_fields.JSONDecoder, 'parse_float':float}) denorm_data_field.contribute_to_class(target_model, 'denorm_data') else: # field was already created on prior source field # TODO: do at beginning of target model configuration to make sure developer did not define it pass else: logging.error('[denorm.register] invalid storage option %s' % storage) source_options = source_model._meta # register signals for source. use dispatch_uid to prevent duplicates. db_signals.post_init.connect(receivers.source_model_post_init, sender=source_model, dispatch_uid='denorm_source_%s_post_init'%source) db_signals.pre_save.connect(receivers.source_model_pre_save, sender=source_model, dispatch_uid='denorm_source_%s_pre_save'%source) db_signals.post_save.connect(receivers.source_model_post_save, sender=source_model, dispatch_uid='denorm_source_%s_post_save'%source) # FIXME: it's quirky that label and throttles must be configured under each target-source in app's denorm_fields, # FIXME: but it gets applied here for entire source (not target dependent). it probably should be configured once # FIXME: per source, but how do accomplish that in the current configuration design? source_graph = core.SOURCE_GRAPH[source_model] = core.SOURCE_GRAPH.get(source_model, { 'label': source_dict.get('label'), 'throttles': source_dict.get('throttles'), 'fields': {} }) source_graph_fields = source_graph['fields'] # mark model as registered for denormalization source_model._denorm_registered = True # clone list, so that if we add _id below, it doesn't corrupt original list denorm_field_names = list(source_dict['fields']) target_graph[source] = { 'fields': denorm_field_names, 'storage': storage, 'source_model': source_model # important for shared_dict storage, because we don't know source model based on list field } for i, denorm_field_name in enumerate(denorm_field_names): source_field = source_options.get_field(denorm_field_name) # if field did not exist, then get_field would have raised FieldDoesNotExist target_field_name = '%s_%s' % (source, denorm_field_name) if storage == 'scalar': try: target_options.get_field(target_field_name) except FieldDoesNotExist: # field should not exist, so we're good pass else: raise AttributeError('The denorm field %s.%s must not already exist' % (target_model.__name__, target_field_name)) # create target field of same type as source_field target_field = _copy_field(source_field, target_field_name, target) target_field.contribute_to_class(target_model, target_field_name) #print('added field %s with name %s, column %s' % (target_field, target_field_name, target_field.column)) #print('added field %s with name %s' % (target_model._meta.get_field(target_field_name), target_field_name)) else: assert(storage == 'shared_dict') # denorm_data field was already created outside this iteration loop pass # if source field is a foreign key, then we reference its key rather than the actual related field, # because we are not deferencing further than the key, and do not want to do an extra db lookup. if isinstance(source_field, ForeignKey): denorm_field_name += '_id' denorm_field_names[i] = denorm_field_name source_field_graph = source_graph_fields[denorm_field_name] = source_graph_fields.get(denorm_field_name, []) source_field_graph.append({ 'target_model': target_model, 'source': source, 'strategy': strategy, 'storage': storage, 'shards': source_dict.get('shards') and util.convert_func_to_string(source_dict['shards']) })
def register(target_model, options): logging.info('[denorm.register] %s' % target_model) if not hasattr(target_model, '_meta'): raise AttributeError( 'The model being registered must derive from Model.') target = util.get_model_name(target_model) target_options = target_model._meta # register signals for target. use dispatch_uid to prevent duplicates. # about signals: https://docs.djangoproject.com/en/1.8/topics/signals/ # built-in signals: https://docs.djangoproject.com/en/1.8/ref/signals/ db_signals.post_init.connect(receivers.target_model_post_init, sender=target_model, dispatch_uid='denorm_target_%s_post_init' % target) db_signals.pre_save.connect(receivers.target_model_pre_save, sender=target_model, dispatch_uid='denorm_target_%s_pre_save' % target) db_signals.post_save.connect(receivers.target_model_post_save, sender=target_model, dispatch_uid='denorm_target_%s_post_save' % target) target_graph = core.TARGET_GRAPH[target_model] = core.TARGET_GRAPH.get( target_model, {}) for source, source_dict in options['sources'].iteritems(): strategy = source_dict.get( 'strategy', 'cursor') # options are: [cursor, mapreduce]. defaults to cursor. # TODO: support storage options 'list' and 'dict' storage = source_dict.get('storage', 'scalar') # choices: [scalar, shared_dict] if storage == 'scalar': target_foreign_key = target_options.get_field(source) # if field did not exist, then get_field would have raised FieldDoesNotExist if not isinstance(target_foreign_key, ForeignKey): raise AttributeError( 'The source field %s.%s must be a ForeignKey' % (target, source)) source_model = target_foreign_key.rel.to elif storage == 'shared_dict': target_foreign_key_list = target_options.get_field( Inflector().pluralize(source)) # if field did not exist, then get_field would have raised FieldDoesNotExist if not isinstance(target_foreign_key_list, tb_fields.ListField): raise AttributeError( 'The target field %s.%s must be a ListField' % (target, source)) # model must be explicitly configured, because target field does not specify it source_model = source_dict.get('model') # create denorm data field try: target_options.get_field('denorm_data') except FieldDoesNotExist: # field should not exist. now let's create it. denorm_data_field = JSONField(name='denorm_data', null=True, blank=True, decoder_kwargs={ 'cls': json_fields.JSONDecoder, 'parse_float': float }) denorm_data_field.contribute_to_class(target_model, 'denorm_data') else: # field was already created on prior source field # TODO: do at beginning of target model configuration to make sure developer did not define it pass else: logging.error('[denorm.register] invalid storage option %s' % storage) source_options = source_model._meta # register signals for source. use dispatch_uid to prevent duplicates. db_signals.post_init.connect( receivers.source_model_post_init, sender=source_model, dispatch_uid='denorm_source_%s_post_init' % source) db_signals.pre_save.connect(receivers.source_model_pre_save, sender=source_model, dispatch_uid='denorm_source_%s_pre_save' % source) db_signals.post_save.connect( receivers.source_model_post_save, sender=source_model, dispatch_uid='denorm_source_%s_post_save' % source) # FIXME: it's quirky that label and throttles must be configured under each target-source in app's denorm_fields, # FIXME: but it gets applied here for entire source (not target dependent). it probably should be configured once # FIXME: per source, but how do accomplish that in the current configuration design? source_graph = core.SOURCE_GRAPH[source_model] = core.SOURCE_GRAPH.get( source_model, { 'label': source_dict.get('label'), 'throttles': source_dict.get('throttles'), 'fields': {} }) source_graph_fields = source_graph['fields'] # mark model as registered for denormalization source_model._denorm_registered = True # clone list, so that if we add _id below, it doesn't corrupt original list denorm_field_names = list(source_dict['fields']) target_graph[source] = { 'fields': denorm_field_names, 'storage': storage, 'source_model': source_model # important for shared_dict storage, because we don't know source model based on list field } for i, denorm_field_name in enumerate(denorm_field_names): source_field = source_options.get_field(denorm_field_name) # if field did not exist, then get_field would have raised FieldDoesNotExist target_field_name = '%s_%s' % (source, denorm_field_name) if storage == 'scalar': try: target_options.get_field(target_field_name) except FieldDoesNotExist: # field should not exist, so we're good pass else: raise AttributeError( 'The denorm field %s.%s must not already exist' % (target_model.__name__, target_field_name)) # create target field of same type as source_field target_field = _copy_field(source_field, target_field_name, target) target_field.contribute_to_class(target_model, target_field_name) #print('added field %s with name %s, column %s' % (target_field, target_field_name, target_field.column)) #print('added field %s with name %s' % (target_model._meta.get_field(target_field_name), target_field_name)) else: assert (storage == 'shared_dict') # denorm_data field was already created outside this iteration loop pass # if source field is a foreign key, then we reference its key rather than the actual related field, # because we are not deferencing further than the key, and do not want to do an extra db lookup. if isinstance(source_field, ForeignKey): denorm_field_name += '_id' denorm_field_names[i] = denorm_field_name source_field_graph = source_graph_fields[ denorm_field_name] = source_graph_fields.get( denorm_field_name, []) source_field_graph.append({ 'target_model': target_model, 'source': source, 'strategy': strategy, 'storage': storage, 'shards': source_dict.get('shards') and util.convert_func_to_string(source_dict['shards']) })