def main(argv): parser = GetParser() options = parser.parse_args(argv) entities_path = options.entities creds_file = options.service_acct_json project_id = options.project_id namespace = options.namespace entities = [] c, project_id = _GetClient(creds_file, project_id, namespace) if options.parent_key: upper_parent_key = c.key(*ast.literal_eval(options.parent_key)) else: upper_parent_key = None with open(entities_path, 'r') as f: entities = GetEntities(project_id, f, upper_parent_key, namespace) for chunk in iter_utils.SplitToChunks(entities, _BATCH_CHUNK_SIZE): batch = c.batch() for e in chunk: batch.put(e) batch.commit()
def ChunkedBatchWrite(entities, client, batch_size=_BATCH_CHUNK_SIZE): """Write |entities| to datastore |client| in batches of size |batch_size|. Datastore has a entities-per-batch limit of 500. This utility function breaks helps write a large number of entities to datastore by splitting it into limited size batch writes. Args: entities: iterator of datastore entities to write. client: datastore.Client instance. batch_size: (default: 500) Maximum number of entities per batch. """ for chunk in iter_utils.SplitToChunks(entities, batch_size): entities = list(chunk) batch = client.batch() for entity in entities: batch.put(entity) try: batch.commit() except gcloud.exceptions.BadRequest: logging.warning('Unexportable entities:\n%s', entities) raise
def testExtraFinalChunk(self): self.assertEqual( list(iter_utils.SplitToChunks([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 4)), [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10]])
def testEqualChunks(self): self.assertEqual( list(iter_utils.SplitToChunks([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 5)), [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])
def testNoInput(self): self.assertEqual(list(iter_utils.SplitToChunks([], 1)), [])