def split_input(cls, job_config): """Inherit docs.""" params = job_config.input_reader_params shard_count = job_config.shard_count query_spec = cls._get_query_spec(params) if not property_range.should_shard_by_property_range( query_spec.filters): return super(ModelDatastoreInputReader, cls).split_input(job_config) p_range = property_range.PropertyRange(query_spec.filters, query_spec.model_class_path) p_ranges = p_range.split(shard_count) # User specified a namespace. if query_spec.ns: ns_range = namespace_range.NamespaceRange( namespace_start=query_spec.ns, namespace_end=query_spec.ns, _app=query_spec.app) ns_ranges = [copy.copy(ns_range) for _ in p_ranges] else: ns_keys = namespace_range.get_namespace_keys( query_spec.app, cls.MAX_NAMESPACES_FOR_KEY_SHARD + 1) if not ns_keys: return # User doesn't specify ns but the number of ns is small. # We still split by property range. if len(ns_keys) <= cls.MAX_NAMESPACES_FOR_KEY_SHARD: ns_ranges = [ namespace_range.NamespaceRange(_app=query_spec.app) for _ in p_ranges ] # Lots of namespaces. Split by ns. else: ns_ranges = namespace_range.NamespaceRange.split( n=shard_count, contiguous=False, can_query=lambda: True, _app=query_spec.app) p_ranges = [copy.copy(p_range) for _ in ns_ranges] assert len(p_ranges) == len(ns_ranges) iters = [ db_iters.RangeIteratorFactory.create_property_range_iterator( p, ns, query_spec) for p, ns in zip(p_ranges, ns_ranges) ] return [cls(i) for i in iters]
def split_input(cls, job_config): """Inherit docs.""" params = job_config.input_reader_params shard_count = job_config.shard_count query_spec = cls._get_query_spec(params) if not property_range.should_shard_by_property_range(query_spec.filters): return super(ModelDatastoreInputReader, cls).split_input(job_config) p_range = property_range.PropertyRange(query_spec.filters, query_spec.model_class_path) p_ranges = p_range.split(shard_count) # User specified a namespace. if query_spec.ns: ns_range = namespace_range.NamespaceRange( namespace_start=query_spec.ns, namespace_end=query_spec.ns, _app=query_spec.app) ns_ranges = [copy.copy(ns_range) for _ in p_ranges] else: ns_keys = namespace_range.get_namespace_keys( query_spec.app, cls.MAX_NAMESPACES_FOR_KEY_SHARD+1) if not ns_keys: return # User doesn't specify ns but the number of ns is small. # We still split by property range. if len(ns_keys) <= cls.MAX_NAMESPACES_FOR_KEY_SHARD: ns_ranges = [namespace_range.NamespaceRange(_app=query_spec.app) for _ in p_ranges] # Lots of namespaces. Split by ns. else: ns_ranges = namespace_range.NamespaceRange.split(n=shard_count, contiguous=False, can_query=lambda: True, _app=query_spec.app) p_ranges = [copy.copy(p_range) for _ in ns_ranges] assert len(p_ranges) == len(ns_ranges) iters = [ db_iters.RangeIteratorFactory.create_property_range_iterator( p, ns, query_spec) for p, ns in zip(p_ranges, ns_ranges)] return [cls(i) for i in iters]
def testShouldShardByPropertyRange(self): filters = [("a", "=", 1)] self.assertFalse(property_range.should_shard_by_property_range(filters)) filters = [("a", "<=", 1)] self.assertTrue(property_range.should_shard_by_property_range(filters))
def testShouldShardByPropertyRange(self): filters = [("a", "=", 1)] self.assertFalse( property_range.should_shard_by_property_range(filters)) filters = [("a", "<=", 1)] self.assertTrue(property_range.should_shard_by_property_range(filters))