def _fetch_reverse_dependencies(self, model, qs, result, fetched_pks): self._fetch_forward_dependencies(model, qs, result, fetched_pks) for dependency, attr in self.reverse_mapping.get(model, []): logger.info( "fetching dependency %s <- %s", model_name(model), model_name(dependency), ) qs_new = dependency._base_manager.filter(**{ attr + "__in": qs }).exclude(pk__in=fetched_pks[dependency])[:max( 0, self.model_settings.get(dependency, DEFAULT_LIMIT) - len(fetched_pks[dependency]), )] if qs_new: result[dependency].extend(list(qs_new)) fetched_pks[dependency].update( set(map(attrgetter("pk"), qs_new))) if dependency in self.reverse_mapping: self._fetch_reverse_dependencies(dependency, qs_new, result, fetched_pks) elif dependency in self.forward_mapping: self._fetch_forward_dependencies(dependency, qs_new, result, fetched_pks)
def collect_data(self, model_settings, limit=None): """ You can easily add more data by implementing get_custom_data """ # first add the data we are manually specifying logger.info("loading the custom data first") custom_data, fetched_pks = self.get_custom_data() models = set(map(itemgetter(0), model_settings)) for obj in custom_data.keys(): if obj in models: model_settings = list( filter(lambda x: x[0] != obj, model_settings)) logger.info( "skipping already collected data for custom model %s", model_name(obj), ) objects = list(chain.from_iterable(custom_data.values())) dependencies = defaultdict(list) for model, limit in model_settings[:limit]: logger.info("getting %s items for model %s", limit, model_name(model)) queryset = model._default_manager.order_by("-pk")[:limit] objects.extend(queryset) self._fetch_forward_dependencies(model, queryset, dependencies, fetched_pks) objects.extend(list(chain.from_iterable(dependencies.values()))) return objects
def _fetch_forward_dependencies(self, model, qs, result, fetched_pks): for dependency, attr in self.forward_mapping.get(model, []): if self.exclude_content_type and isinstance( dependency, (ContentType, Permission)): continue logger.info( "fetching dependency %s -> %s", model_name(model), model_name(dependency), ) if isinstance(model._meta.get_field(attr), ManyToManyField): qs_new = dependency._base_manager.none() try: qs = qs.prefetch_related(attr) except NotSupportedError: pass for q in qs: qs_new = qs_new.union( getattr(q, attr).exclude( pk__in=fetched_pks[dependency]).order_by()) else: qs_new = dependency._base_manager.filter( pk__in=tuple(map(attrgetter(attr + "_id"), qs))).exclude( pk__in=fetched_pks[dependency]) if qs_new: result[dependency].extend(list(qs_new)) fetched_pks[dependency].update( set(map(attrgetter("pk"), qs_new))) if dependency in self.forward_mapping: self._fetch_forward_dependencies(dependency, qs_new, result, fetched_pks)
def get_model_settings(self): """ determines how large a sample set we need for the given tables """ model_settings = [] full_required = self.get_full_required() for model in self.models: logger.info("getting settings for %s", model_name(model)) max_id = get_max_id(model) if max_id > 50: limit = 10 else: limit = DEFAULT_LIMIT if model in full_required: limit = 2000 setting = (model, limit) model_settings.append(setting) return model_settings
def get_all_models(self): return [(m, model_name(m)) for m in get_models()]