def __init__(self, config, use_shards=True): self.student_filter = filter_factory(config) self.batch_size = config.importer_batch_size self.filename = config.import_filename self.in_file = gcs.open(self.filename) self.reader = csv.reader(self.in_file) self.imported = 0 self.rejected = 0 if use_shards: LOG.info("Checking student shards") StudentShard.init_shards(config) self.shard_gen = StudentShard.circular_shard_generator() else: self.shard_gen = None
def setup_dummy_students(sharded=None): shard_gen = None if not sharded: pass elif sharded == "circular": setup_dummy_shards() shard_gen = StudentShard.circular_shard_generator() else: # Most other test cases won't care what type of sharding to use, # they can simply pass in True to default to linear. setup_dummy_shards() shard_gen = StudentShard.linear_shard_generator() for i in get_dummy_ids(): instance = get_dummy_student(int(i), shard_gen) instance.put()
def test_student_sharding(self): # Ensure empty starting DB result = DeploymentConfig().query().fetch(limit=None) self.assertFalse(result) result = Student().query().fetch(limit=None) self.assertFalse(result) result = StudentShard().query().fetch(limit=None) self.assertFalse(result) # Test creation of dummy deployment config config = get_dummy_config() result = DeploymentConfig().query().fetch(limit=None) self.assertEqual(1, len(result)) # Create/delete shards shard_count = len(get_dummy_ids()) setup_dummy_shards() result = StudentShard().query().fetch(limit=None) self.assertEqual(shard_count, len(result)) self.assertEqual(shard_count, len(StudentShard.get_all())) StudentShard.delete_all() self.assertFalse(StudentShard.get_all()) # Linear shard generator serves each shard once setup_dummy_shards() seen_shards = {} gen = StudentShard.linear_shard_generator for i in gen(): seen_shards[i.key] = True self.assertEqual(shard_count, len(seen_shards)) # Circular shard generator serves each shard once, then loops seen_shards = {} gen = StudentShard.circular_shard_generator failsafe = 10000 count = 0 try: for i in gen(): count += 1 if count > failsafe: raise ValueError("Infinite loop when testing circular_shard_generator") if i.key in seen_shards: raise StopIteration("Shard repeat") else: seen_shards[i.key] = True except StopIteration: pass self.assertEqual(shard_count, len(seen_shards))
def generate(self, multi): #pylint: disable=no-self-use for shard in StudentShard.linear_shard_generator(): batch = Student.query(ancestor=shard.key).fetch() if multi: self.imported += len(batch) yield batch else: for student in batch: self.imported += 1 yield student
def test_abstract_csv_importer(self): dummy_count = self.config.estimated_student_count self.write_stub_student_file(dummy_count) # Basic row conversion imp = StubCsvImporter(self.config, use_shards=False) stu = imp.convert_row("dummy, row") imp = StubCsvImporter(self.config, use_shards=True) stu = imp.convert_row("dummy, row") # Input file conversion, no shards imp = StubCsvImporter(self.config, use_shards=False) self.check_importer_count(dummy_count, imp, multi=True) imp = StubCsvImporter(self.config, use_shards=False) self.check_importer_count(dummy_count, imp, multi=False) # Input file conversion, with shards StudentShard.init_shards(self.config) imp = StubCsvImporter(self.config, use_shards=True) self.check_importer_count(dummy_count, imp, multi=True) imp = StubCsvImporter(self.config, use_shards=True) self.check_importer_count(dummy_count, imp, multi=False)
def setup_dummy_shards(): config = get_dummy_config() StudentShard.init_shards(config)