Exemple #1
0
 def setUp(self):
     self.client = mock.Mock()
     self.client.batch_write_item.return_value = {'UnprocessedItems': {}}
     self.table_name = 'tablename'
     self.flush_amount = 2
     self.batch_writer = BatchWriter(self.table_name, self.client,
                                     self.flush_amount)
    def massive_insert(self):
        sizes = [10, 100, 1000, 10000, 100000, 1000000]

        for size in sizes:
            self.create_table_for_massive_test(size)
            print("time;nb of writers;inserted items")
            table = self.dynamodb.Table('test_earth_input_big_table_%s' % size)
            batch = BatchWriter(table_name='test_earth_input_big_table_%s' %
                                size,
                                client=table.meta.client,
                                flush_amount=25)
            t0 = time.time()
            for i in range(0, size):
                item = {'serie_name': 'MYSERIE', 'date': str(i), 'value': i}
                batch.put_item(Item=item)
            total_time = time.time() - t0
            print("%s;%s;%s" % (total_time, self.writers_number, i))
    def batch_writer(self, overwrite_by_pkeys=None):
        """Create a batch writer object.

        https://boto3.readthedocs.io/en/latest/reference/services/dynamodb.html#DynamoDB.Table.batch_writer

        :type overwrite_by_pkeys: list(string)
        :param overwrite_by_pkeys: De-duplicate request items in buffer if match new request
            item on specified primary keys. i.e ``["partition_key1", "sort_key2", "sort_key3"]``
        """
        encrypted_client = EncryptedClient(
            client=self._table.meta.client,
            materials_provider=self._materials_provider,
            attribute_actions=self._attribute_actions,
            auto_refresh_table_indexes=self._auto_refresh_table_indexes,
            expect_standard_dictionaries=True)
        return BatchWriter(table_name=self._table.name,
                           client=encrypted_client,
                           overwrite_by_pkeys=overwrite_by_pkeys)
Exemple #4
0
    def test_auto_dedup_for_dup_requests(self):
        with BatchWriter(
                self.table_name,
                self.client,
                flush_amount=5,
                overwrite_by_pkeys=["pkey", "skey"],
        ) as b:
            # dup 1
            b.put_item(Item={
                'pkey': 'foo1',
                'skey': 'bar1',
                'other': 'other1'
            })
            b.put_item(Item={
                'pkey': 'foo1',
                'skey': 'bar1',
                'other': 'other2'
            })
            # dup 2
            b.delete_item(Key={
                'pkey': 'foo1',
                'skey': 'bar2',
            })
            b.put_item(Item={
                'pkey': 'foo1',
                'skey': 'bar2',
                'other': 'other3'
            })
            # dup 3
            b.put_item(Item={
                'pkey': 'foo2',
                'skey': 'bar2',
                'other': 'other3'
            })
            b.delete_item(Key={
                'pkey': 'foo2',
                'skey': 'bar2',
            })
            # dup 4
            b.delete_item(Key={
                'pkey': 'foo2',
                'skey': 'bar3',
            })
            b.delete_item(Key={
                'pkey': 'foo2',
                'skey': 'bar3',
            })
            # 5
            b.delete_item(Key={
                'pkey': 'foo3',
                'skey': 'bar3',
            })
            # 2nd batch
            b.put_item(Item={
                'pkey': 'foo1',
                'skey': 'bar1',
                'other': 'other1'
            })
            b.put_item(Item={
                'pkey': 'foo1',
                'skey': 'bar1',
                'other': 'other2'
            })

        first_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'pkey': 'foo1',
                                'skey': 'bar1',
                                'other': 'other2',
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'pkey': 'foo1',
                                'skey': 'bar2',
                                'other': 'other3',
                            }
                        }
                    },
                    {
                        'DeleteRequest': {
                            'Key': {
                                'pkey': 'foo2',
                                'skey': 'bar2',
                            }
                        }
                    },
                    {
                        'DeleteRequest': {
                            'Key': {
                                'pkey': 'foo2',
                                'skey': 'bar3',
                            }
                        }
                    },
                    {
                        'DeleteRequest': {
                            'Key': {
                                'pkey': 'foo3',
                                'skey': 'bar3',
                            }
                        }
                    },
                ]
            }
        }
        second_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'pkey': 'foo1',
                                'skey': 'bar1',
                                'other': 'other2',
                            }
                        }
                    },
                ]
            }
        }
        self.assert_batch_write_calls_are([first_batch, second_batch])
Exemple #5
0
 def test_repeated_flushing_on_exit(self):
     # We're going to simulate unprocessed_items
     # returning multiple unprocessed items across calls.
     self.client.batch_write_item.side_effect = [
         {
             'UnprocessedItems': {
                 self.table_name: [
                     {
                         'PutRequest': {
                             'Item': {
                                 'Hash': 'foo2'
                             }
                         }
                     },
                     {
                         'PutRequest': {
                             'Item': {
                                 'Hash': 'foo3'
                             }
                         }
                     },
                 ],
             },
         },
         {
             'UnprocessedItems': {
                 self.table_name: [
                     {
                         'PutRequest': {
                             'Item': {
                                 'Hash': 'foo3'
                             }
                         }
                     },
                 ],
             },
         },
         {
             'UnprocessedItems': {}
         },
     ]
     with BatchWriter(self.table_name, self.client, flush_amount=4) as b:
         b.put_item(Item={'Hash': 'foo1'})
         b.put_item(Item={'Hash': 'foo2'})
         b.put_item(Item={'Hash': 'foo3'})
     # So when we exit, we expect three calls.
     # First we try the normal batch write with 3 items:
     first_batch = {
         'RequestItems': {
             self.table_name: [
                 {
                     'PutRequest': {
                         'Item': {
                             'Hash': 'foo1'
                         }
                     }
                 },
                 {
                     'PutRequest': {
                         'Item': {
                             'Hash': 'foo2'
                         }
                     }
                 },
                 {
                     'PutRequest': {
                         'Item': {
                             'Hash': 'foo3'
                         }
                     }
                 },
             ]
         }
     }
     # Then we see two unprocessed items so we send another batch.
     second_batch = {
         'RequestItems': {
             self.table_name: [
                 {
                     'PutRequest': {
                         'Item': {
                             'Hash': 'foo2'
                         }
                     }
                 },
                 {
                     'PutRequest': {
                         'Item': {
                             'Hash': 'foo3'
                         }
                     }
                 },
             ]
         }
     }
     # And then we still see one more unprocessed item so
     # we need to send another batch.
     third_batch = {
         'RequestItems': {
             self.table_name: [
                 {
                     'PutRequest': {
                         'Item': {
                             'Hash': 'foo3'
                         }
                     }
                 },
             ]
         }
     }
     self.assert_batch_write_calls_are(
         [first_batch, second_batch, third_batch])
Exemple #6
0
    def test_never_send_more_than_max_batch_size(self):
        # Suppose the server sends backs a response that indicates that
        # all the items were unprocessed.
        self.client.batch_write_item.side_effect = [
            {
                'UnprocessedItems': {
                    self.table_name: [
                        {
                            'PutRequest': {
                                'Item': {
                                    'Hash': 'foo1'
                                }
                            }
                        },
                        {
                            'PutRequest': {
                                'Item': {
                                    'Hash': 'foo2'
                                }
                            }
                        },
                    ],
                },
            },
            {
                'UnprocessedItems': {
                    self.table_name: [
                        {
                            'PutRequest': {
                                'Item': {
                                    'Hash': 'foo2'
                                }
                            }
                        },
                    ],
                },
            },
            {
                'UnprocessedItems': {}
            },
        ]
        with BatchWriter(self.table_name, self.client, flush_amount=2) as b:
            b.put_item(Item={'Hash': 'foo1'})
            b.put_item(Item={'Hash': 'foo2'})
            b.put_item(Item={'Hash': 'foo3'})

        # Note how we're never sending more than flush_amount=2.
        first_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo1'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                ]
            }
        }
        # Even when the server sends us unprocessed items of 2 elements,
        # we'll still only send 2 at a time, in order.
        second_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo1'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                ]
            }
        }
        # And then we still see one more unprocessed item so
        # we need to send another batch.
        third_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo3'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                ]
            }
        }
        self.assert_batch_write_calls_are(
            [first_batch, second_batch, third_batch])