コード例 #1
0
 def setUp(self):
     self.client = mock.Mock()
     self.client.batch_write_item.return_value = {'UnprocessedItems': {}}
     self.table_name = 'tablename'
     self.flush_amount = 2
     self.batch_writer = BatchWriter(self.table_name, self.client,
                                     self.flush_amount)
コード例 #2
0
ファイル: test_table.py プロジェクト: Armin-Smailzade/boto3
 def setUp(self):
     self.client = mock.Mock()
     self.client.batch_write_item.return_value = {'UnprocessedItems': {}}
     self.table_name = 'tablename'
     self.flush_amount = 2
     self.batch_writer = BatchWriter(self.table_name, self.client,
                                     self.flush_amount)
コード例 #3
0
    def massive_insert(self):
        sizes = [10, 100, 1000, 10000, 100000, 1000000]

        for size in sizes:
            self.create_table_for_massive_test(size)
            print("time;nb of writers;inserted items")
            table = self.dynamodb.Table('test_earth_input_big_table_%s' % size)
            batch = BatchWriter(table_name='test_earth_input_big_table_%s' %
                                size,
                                client=table.meta.client,
                                flush_amount=25)
            t0 = time.time()
            for i in range(0, size):
                item = {'serie_name': 'MYSERIE', 'date': str(i), 'value': i}
                batch.put_item(Item=item)
            total_time = time.time() - t0
            print("%s;%s;%s" % (total_time, self.writers_number, i))
コード例 #4
0
def import_ticker(batch: BatchWriter, ticker: str, period: str):
    stock = yf.Ticker(ticker)
    data = stock.history(
        period=period,
        interval='1d',
    )

    for timestamp, series in data.iterrows():
        batch.put_item(
            Item={
                'Ticker': ticker,
                'Name': stock.info['shortName'],
                'Date': timestamp.strftime('%Y-%m-%d'),
                'Open': Decimal(str(series['Open'])),
                'Close': Decimal(str(series['Close'])),
                'High': Decimal(str(series['High'])),
                'Low': Decimal(str(series['Low'])),
            })

    print('Imported ticker: {}'.format(ticker))
コード例 #5
0
    def batch_writer(self, overwrite_by_pkeys=None):
        """Create a batch writer object.

        https://boto3.readthedocs.io/en/latest/reference/services/dynamodb.html#DynamoDB.Table.batch_writer

        :type overwrite_by_pkeys: list(string)
        :param overwrite_by_pkeys: De-duplicate request items in buffer if match new request
            item on specified primary keys. i.e ``["partition_key1", "sort_key2", "sort_key3"]``
        """
        encrypted_client = EncryptedClient(
            client=self._table.meta.client,
            materials_provider=self._materials_provider,
            attribute_actions=self._attribute_actions,
            auto_refresh_table_indexes=self._auto_refresh_table_indexes,
            expect_standard_dictionaries=True)
        return BatchWriter(table_name=self._table.name,
                           client=encrypted_client,
                           overwrite_by_pkeys=overwrite_by_pkeys)
コード例 #6
0
    def test_auto_dedup_for_dup_requests(self):
        with BatchWriter(
                self.table_name,
                self.client,
                flush_amount=5,
                overwrite_by_pkeys=["pkey", "skey"],
        ) as b:
            # dup 1
            b.put_item(Item={
                'pkey': 'foo1',
                'skey': 'bar1',
                'other': 'other1'
            })
            b.put_item(Item={
                'pkey': 'foo1',
                'skey': 'bar1',
                'other': 'other2'
            })
            # dup 2
            b.delete_item(Key={
                'pkey': 'foo1',
                'skey': 'bar2',
            })
            b.put_item(Item={
                'pkey': 'foo1',
                'skey': 'bar2',
                'other': 'other3'
            })
            # dup 3
            b.put_item(Item={
                'pkey': 'foo2',
                'skey': 'bar2',
                'other': 'other3'
            })
            b.delete_item(Key={
                'pkey': 'foo2',
                'skey': 'bar2',
            })
            # dup 4
            b.delete_item(Key={
                'pkey': 'foo2',
                'skey': 'bar3',
            })
            b.delete_item(Key={
                'pkey': 'foo2',
                'skey': 'bar3',
            })
            # 5
            b.delete_item(Key={
                'pkey': 'foo3',
                'skey': 'bar3',
            })
            # 2nd batch
            b.put_item(Item={
                'pkey': 'foo1',
                'skey': 'bar1',
                'other': 'other1'
            })
            b.put_item(Item={
                'pkey': 'foo1',
                'skey': 'bar1',
                'other': 'other2'
            })

        first_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'pkey': 'foo1',
                                'skey': 'bar1',
                                'other': 'other2',
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'pkey': 'foo1',
                                'skey': 'bar2',
                                'other': 'other3',
                            }
                        }
                    },
                    {
                        'DeleteRequest': {
                            'Key': {
                                'pkey': 'foo2',
                                'skey': 'bar2',
                            }
                        }
                    },
                    {
                        'DeleteRequest': {
                            'Key': {
                                'pkey': 'foo2',
                                'skey': 'bar3',
                            }
                        }
                    },
                    {
                        'DeleteRequest': {
                            'Key': {
                                'pkey': 'foo3',
                                'skey': 'bar3',
                            }
                        }
                    },
                ]
            }
        }
        second_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'pkey': 'foo1',
                                'skey': 'bar1',
                                'other': 'other2',
                            }
                        }
                    },
                ]
            }
        }
        self.assert_batch_write_calls_are([first_batch, second_batch])
コード例 #7
0
 def test_repeated_flushing_on_exit(self):
     # We're going to simulate unprocessed_items
     # returning multiple unprocessed items across calls.
     self.client.batch_write_item.side_effect = [
         {
             'UnprocessedItems': {
                 self.table_name: [
                     {
                         'PutRequest': {
                             'Item': {
                                 'Hash': 'foo2'
                             }
                         }
                     },
                     {
                         'PutRequest': {
                             'Item': {
                                 'Hash': 'foo3'
                             }
                         }
                     },
                 ],
             },
         },
         {
             'UnprocessedItems': {
                 self.table_name: [
                     {
                         'PutRequest': {
                             'Item': {
                                 'Hash': 'foo3'
                             }
                         }
                     },
                 ],
             },
         },
         {
             'UnprocessedItems': {}
         },
     ]
     with BatchWriter(self.table_name, self.client, flush_amount=4) as b:
         b.put_item(Item={'Hash': 'foo1'})
         b.put_item(Item={'Hash': 'foo2'})
         b.put_item(Item={'Hash': 'foo3'})
     # So when we exit, we expect three calls.
     # First we try the normal batch write with 3 items:
     first_batch = {
         'RequestItems': {
             self.table_name: [
                 {
                     'PutRequest': {
                         'Item': {
                             'Hash': 'foo1'
                         }
                     }
                 },
                 {
                     'PutRequest': {
                         'Item': {
                             'Hash': 'foo2'
                         }
                     }
                 },
                 {
                     'PutRequest': {
                         'Item': {
                             'Hash': 'foo3'
                         }
                     }
                 },
             ]
         }
     }
     # Then we see two unprocessed items so we send another batch.
     second_batch = {
         'RequestItems': {
             self.table_name: [
                 {
                     'PutRequest': {
                         'Item': {
                             'Hash': 'foo2'
                         }
                     }
                 },
                 {
                     'PutRequest': {
                         'Item': {
                             'Hash': 'foo3'
                         }
                     }
                 },
             ]
         }
     }
     # And then we still see one more unprocessed item so
     # we need to send another batch.
     third_batch = {
         'RequestItems': {
             self.table_name: [
                 {
                     'PutRequest': {
                         'Item': {
                             'Hash': 'foo3'
                         }
                     }
                 },
             ]
         }
     }
     self.assert_batch_write_calls_are(
         [first_batch, second_batch, third_batch])
コード例 #8
0
    def test_never_send_more_than_max_batch_size(self):
        # Suppose the server sends backs a response that indicates that
        # all the items were unprocessed.
        self.client.batch_write_item.side_effect = [
            {
                'UnprocessedItems': {
                    self.table_name: [
                        {
                            'PutRequest': {
                                'Item': {
                                    'Hash': 'foo1'
                                }
                            }
                        },
                        {
                            'PutRequest': {
                                'Item': {
                                    'Hash': 'foo2'
                                }
                            }
                        },
                    ],
                },
            },
            {
                'UnprocessedItems': {
                    self.table_name: [
                        {
                            'PutRequest': {
                                'Item': {
                                    'Hash': 'foo2'
                                }
                            }
                        },
                    ],
                },
            },
            {
                'UnprocessedItems': {}
            },
        ]
        with BatchWriter(self.table_name, self.client, flush_amount=2) as b:
            b.put_item(Item={'Hash': 'foo1'})
            b.put_item(Item={'Hash': 'foo2'})
            b.put_item(Item={'Hash': 'foo3'})

        # Note how we're never sending more than flush_amount=2.
        first_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo1'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                ]
            }
        }
        # Even when the server sends us unprocessed items of 2 elements,
        # we'll still only send 2 at a time, in order.
        second_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo1'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                ]
            }
        }
        # And then we still see one more unprocessed item so
        # we need to send another batch.
        third_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo3'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                ]
            }
        }
        self.assert_batch_write_calls_are(
            [first_batch, second_batch, third_batch])
コード例 #9
0
class BaseTransformationTest(unittest.TestCase):

    maxDiff = None

    def setUp(self):
        self.client = mock.Mock()
        self.client.batch_write_item.return_value = {'UnprocessedItems': {}}
        self.table_name = 'tablename'
        self.flush_amount = 2
        self.batch_writer = BatchWriter(self.table_name, self.client,
                                        self.flush_amount)

    def assert_batch_write_calls_are(self, expected_batch_writes):
        assert self.client.batch_write_item.call_count == len(
            expected_batch_writes)
        batch_write_calls = [
            args[1] for args in self.client.batch_write_item.call_args_list
        ]
        assert batch_write_calls == expected_batch_writes

    def test_batch_write_does_not_immediately_write(self):
        self.batch_writer.put_item(Item={'Hash': 'foo'})
        assert not self.client.batch_write_item.called

    def test_batch_write_flushes_at_flush_amount(self):
        self.batch_writer.put_item(Item={'Hash': 'foo1'})
        self.batch_writer.put_item(Item={'Hash': 'foo2'})
        expected = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo1'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                ]
            }
        }
        self.assert_batch_write_calls_are([expected])

    def test_multiple_flushes_reset_items_to_put(self):
        self.batch_writer.put_item(Item={'Hash': 'foo1'})
        self.batch_writer.put_item(Item={'Hash': 'foo2'})
        self.batch_writer.put_item(Item={'Hash': 'foo3'})
        self.batch_writer.put_item(Item={'Hash': 'foo4'})
        # We should have two batch calls, one for foo1,foo2 and
        # one for foo3,foo4.
        first_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo1'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                ]
            }
        }
        second_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo3'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo4'
                            }
                        }
                    },
                ]
            }
        }
        self.assert_batch_write_calls_are([first_batch, second_batch])

    def test_can_handle_puts_and_deletes(self):
        self.batch_writer.put_item(Item={'Hash': 'foo1'})
        self.batch_writer.delete_item(Key={'Hash': 'foo2'})
        expected = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo1'
                            }
                        }
                    },
                    {
                        'DeleteRequest': {
                            'Key': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                ]
            }
        }
        self.assert_batch_write_calls_are([expected])

    def test_multiple_batch_calls_with_mixed_deletes(self):
        self.batch_writer.put_item(Item={'Hash': 'foo1'})
        self.batch_writer.delete_item(Key={'Hash': 'foo2'})
        self.batch_writer.delete_item(Key={'Hash': 'foo3'})
        self.batch_writer.put_item(Item={'Hash': 'foo4'})
        first_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo1'
                            }
                        }
                    },
                    {
                        'DeleteRequest': {
                            'Key': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                ]
            }
        }
        second_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'DeleteRequest': {
                            'Key': {
                                'Hash': 'foo3'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo4'
                            }
                        }
                    },
                ]
            }
        }
        self.assert_batch_write_calls_are([first_batch, second_batch])

    def test_unprocessed_items_added_to_next_batch(self):
        self.client.batch_write_item.side_effect = [
            {
                'UnprocessedItems': {
                    self.table_name: [{
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo2'
                            }
                        }
                    }],
                },
            },
            # Then the last response shows that everything went through
            {
                'UnprocessedItems': {}
            },
        ]
        self.batch_writer.put_item(Item={'Hash': 'foo1'})
        self.batch_writer.put_item(Item={'Hash': 'foo2'})
        self.batch_writer.put_item(Item={'Hash': 'foo3'})

        # We should have sent two batch requests consisting of 2
        # 2 requests.  foo1,foo2 and foo2,foo3.
        # foo2 is sent twice because the first response has it listed
        # as an unprocessed item which means it needs to be part
        # of the next batch.
        first_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo1'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                ]
            }
        }
        second_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo3'
                            }
                        }
                    },
                ]
            }
        }
        self.assert_batch_write_calls_are([first_batch, second_batch])

    def test_all_items_flushed_on_exit(self):
        with self.batch_writer as b:
            b.put_item(Item={'Hash': 'foo1'})
        self.assert_batch_write_calls_are([
            {
                'RequestItems': {
                    self.table_name: [
                        {
                            'PutRequest': {
                                'Item': {
                                    'Hash': 'foo1'
                                }
                            }
                        },
                    ]
                },
            },
        ])

    def test_never_send_more_than_max_batch_size(self):
        # Suppose the server sends backs a response that indicates that
        # all the items were unprocessed.
        self.client.batch_write_item.side_effect = [
            {
                'UnprocessedItems': {
                    self.table_name: [
                        {
                            'PutRequest': {
                                'Item': {
                                    'Hash': 'foo1'
                                }
                            }
                        },
                        {
                            'PutRequest': {
                                'Item': {
                                    'Hash': 'foo2'
                                }
                            }
                        },
                    ],
                },
            },
            {
                'UnprocessedItems': {
                    self.table_name: [
                        {
                            'PutRequest': {
                                'Item': {
                                    'Hash': 'foo2'
                                }
                            }
                        },
                    ],
                },
            },
            {
                'UnprocessedItems': {}
            },
        ]
        with BatchWriter(self.table_name, self.client, flush_amount=2) as b:
            b.put_item(Item={'Hash': 'foo1'})
            b.put_item(Item={'Hash': 'foo2'})
            b.put_item(Item={'Hash': 'foo3'})

        # Note how we're never sending more than flush_amount=2.
        first_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo1'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                ]
            }
        }
        # Even when the server sends us unprocessed items of 2 elements,
        # we'll still only send 2 at a time, in order.
        second_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo1'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                ]
            }
        }
        # And then we still see one more unprocessed item so
        # we need to send another batch.
        third_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo3'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                ]
            }
        }
        self.assert_batch_write_calls_are(
            [first_batch, second_batch, third_batch])

    def test_repeated_flushing_on_exit(self):
        # We're going to simulate unprocessed_items
        # returning multiple unprocessed items across calls.
        self.client.batch_write_item.side_effect = [
            {
                'UnprocessedItems': {
                    self.table_name: [
                        {
                            'PutRequest': {
                                'Item': {
                                    'Hash': 'foo2'
                                }
                            }
                        },
                        {
                            'PutRequest': {
                                'Item': {
                                    'Hash': 'foo3'
                                }
                            }
                        },
                    ],
                },
            },
            {
                'UnprocessedItems': {
                    self.table_name: [
                        {
                            'PutRequest': {
                                'Item': {
                                    'Hash': 'foo3'
                                }
                            }
                        },
                    ],
                },
            },
            {
                'UnprocessedItems': {}
            },
        ]
        with BatchWriter(self.table_name, self.client, flush_amount=4) as b:
            b.put_item(Item={'Hash': 'foo1'})
            b.put_item(Item={'Hash': 'foo2'})
            b.put_item(Item={'Hash': 'foo3'})
        # So when we exit, we expect three calls.
        # First we try the normal batch write with 3 items:
        first_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo1'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo3'
                            }
                        }
                    },
                ]
            }
        }
        # Then we see two unprocessed items so we send another batch.
        second_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo3'
                            }
                        }
                    },
                ]
            }
        }
        # And then we still see one more unprocessed item so
        # we need to send another batch.
        third_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo3'
                            }
                        }
                    },
                ]
            }
        }
        self.assert_batch_write_calls_are(
            [first_batch, second_batch, third_batch])

    def test_auto_dedup_for_dup_requests(self):
        with BatchWriter(
                self.table_name,
                self.client,
                flush_amount=5,
                overwrite_by_pkeys=["pkey", "skey"],
        ) as b:
            # dup 1
            b.put_item(Item={
                'pkey': 'foo1',
                'skey': 'bar1',
                'other': 'other1'
            })
            b.put_item(Item={
                'pkey': 'foo1',
                'skey': 'bar1',
                'other': 'other2'
            })
            # dup 2
            b.delete_item(Key={
                'pkey': 'foo1',
                'skey': 'bar2',
            })
            b.put_item(Item={
                'pkey': 'foo1',
                'skey': 'bar2',
                'other': 'other3'
            })
            # dup 3
            b.put_item(Item={
                'pkey': 'foo2',
                'skey': 'bar2',
                'other': 'other3'
            })
            b.delete_item(Key={
                'pkey': 'foo2',
                'skey': 'bar2',
            })
            # dup 4
            b.delete_item(Key={
                'pkey': 'foo2',
                'skey': 'bar3',
            })
            b.delete_item(Key={
                'pkey': 'foo2',
                'skey': 'bar3',
            })
            # 5
            b.delete_item(Key={
                'pkey': 'foo3',
                'skey': 'bar3',
            })
            # 2nd batch
            b.put_item(Item={
                'pkey': 'foo1',
                'skey': 'bar1',
                'other': 'other1'
            })
            b.put_item(Item={
                'pkey': 'foo1',
                'skey': 'bar1',
                'other': 'other2'
            })

        first_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'pkey': 'foo1',
                                'skey': 'bar1',
                                'other': 'other2',
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'pkey': 'foo1',
                                'skey': 'bar2',
                                'other': 'other3',
                            }
                        }
                    },
                    {
                        'DeleteRequest': {
                            'Key': {
                                'pkey': 'foo2',
                                'skey': 'bar2',
                            }
                        }
                    },
                    {
                        'DeleteRequest': {
                            'Key': {
                                'pkey': 'foo2',
                                'skey': 'bar3',
                            }
                        }
                    },
                    {
                        'DeleteRequest': {
                            'Key': {
                                'pkey': 'foo3',
                                'skey': 'bar3',
                            }
                        }
                    },
                ]
            }
        }
        second_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'pkey': 'foo1',
                                'skey': 'bar1',
                                'other': 'other2',
                            }
                        }
                    },
                ]
            }
        }
        self.assert_batch_write_calls_are([first_batch, second_batch])

    def test_added_unsent_request_not_flushed_put(self):
        # If n requests that get sent fail to process where n = flush_amount
        # and at least one more request gets created before the second attempt,
        # then previously if n requests were successful on the next run and
        # returned an empty dict, _item_buffer would be emptied before sending
        # the next batch of n requests
        self.client.batch_write_item.side_effect = [
            {
                'UnprocessedItems': {
                    self.table_name: [
                        {
                            'PutRequest': {
                                'Item': {
                                    'Hash': 'foo1'
                                }
                            }
                        },
                        {
                            'PutRequest': {
                                'Item': {
                                    'Hash': 'foo2'
                                }
                            }
                        },
                    ],
                },
            },
            {
                'UnprocessedItems': {},
            },
            {
                'UnprocessedItems': {},
            },
        ]
        self.batch_writer.put_item({'Hash': 'foo1'})
        self.batch_writer.put_item({'Hash': 'foo2'})
        self.batch_writer.put_item({'Hash': 'foo3'})
        self.assertIn(
            {'PutRequest': {
                'Item': {
                    'Hash': 'foo3'
                }
            }},
            self.batch_writer._items_buffer,
        )
        batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo1'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                ]
            }
        }
        final_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo3'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo4'
                            }
                        }
                    },
                ]
            }
        }
        # same batch sent twice since all failed on first try
        # and flush_items = 2
        self.assert_batch_write_calls_are([batch, batch])
        # test that the next two items get sent
        self.batch_writer.put_item({'Hash': 'foo4'})
        self.assert_batch_write_calls_are([batch, batch, final_batch])
        # the buffer should be empty now
        self.assertEqual(self.batch_writer._items_buffer, [])

    def test_added_unsent_request_not_flushed_delete(self):
        # If n requests that get sent fail to process where n = flush_amount
        # and at least one more request gets created before the second attempt,
        # then previously if n requests were successful on the next run and
        # returned an empty dict, _item_buffer would be emptied before sending
        # the next batch of n requests
        self.client.batch_write_item.side_effect = [
            {
                'UnprocessedItems': {
                    self.table_name: [
                        {
                            'DeleteRequest': {
                                'Key': {
                                    'Hash': 'foo1'
                                }
                            }
                        },
                        {
                            'DeleteRequest': {
                                'Key': {
                                    'Hash': 'foo2'
                                }
                            }
                        },
                    ],
                },
            },
            {
                'UnprocessedItems': {},
            },
            {
                'UnprocessedItems': {},
            },
        ]
        self.batch_writer.delete_item({'Hash': 'foo1'})
        self.batch_writer.delete_item({'Hash': 'foo2'})
        self.batch_writer.delete_item({'Hash': 'foo3'})
        self.assertIn(
            {'DeleteRequest': {
                'Key': {
                    'Hash': 'foo3'
                }
            }},
            self.batch_writer._items_buffer,
        )
        batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'DeleteRequest': {
                            'Key': {
                                'Hash': 'foo1'
                            }
                        }
                    },
                    {
                        'DeleteRequest': {
                            'Key': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                ]
            }
        }
        final_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'DeleteRequest': {
                            'Key': {
                                'Hash': 'foo3'
                            }
                        }
                    },
                    {
                        'DeleteRequest': {
                            'Key': {
                                'Hash': 'foo4'
                            }
                        }
                    },
                ]
            }
        }
        # same batch sent twice since all failed on first try
        # and flush_items = 2
        self.assert_batch_write_calls_are([batch, batch])
        # test that the next two items get sent
        self.batch_writer.delete_item({'Hash': 'foo4'})
        self.assert_batch_write_calls_are([batch, batch, final_batch])
        # the buffer should be empty now
        self.assertEqual(self.batch_writer._items_buffer, [])
コード例 #10
0
ファイル: test_table.py プロジェクト: yoichi/boto3
class BaseTransformationTest(unittest.TestCase):

    maxDiff = None

    def setUp(self):
        self.client = mock.Mock()
        self.client.batch_write_item.return_value = {'UnprocessedItems': {}}
        self.table_name = 'tablename'
        self.flush_amount = 2
        self.batch_writer = BatchWriter(self.table_name, self.client,
                                        self.flush_amount)

    def assert_batch_write_calls_are(self, expected_batch_writes):
        self.assertEqual(self.client.batch_write_item.call_count,
                         len(expected_batch_writes))
        batch_write_calls = [
            args[1] for args in self.client.batch_write_item.call_args_list
        ]
        self.assertEqual(batch_write_calls, expected_batch_writes)

    def test_batch_write_does_not_immediately_write(self):
        self.batch_writer.put_item(Item={'Hash': 'foo'})
        self.assertFalse(self.client.batch_write_item.called)

    def test_batch_write_flushes_at_flush_amount(self):
        self.batch_writer.put_item(Item={'Hash': 'foo1'})
        self.batch_writer.put_item(Item={'Hash': 'foo2'})
        expected = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo1'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                ]
            }
        }
        self.assert_batch_write_calls_are([expected])

    def test_multiple_flushes_reset_items_to_put(self):
        self.batch_writer.put_item(Item={'Hash': 'foo1'})
        self.batch_writer.put_item(Item={'Hash': 'foo2'})
        self.batch_writer.put_item(Item={'Hash': 'foo3'})
        self.batch_writer.put_item(Item={'Hash': 'foo4'})
        # We should have two batch calls, one for foo1,foo2 and
        # one for foo3,foo4.
        first_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo1'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                ]
            }
        }
        second_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo3'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo4'
                            }
                        }
                    },
                ]
            }
        }
        self.assert_batch_write_calls_are([first_batch, second_batch])

    def test_can_handle_puts_and_deletes(self):
        self.batch_writer.put_item(Item={'Hash': 'foo1'})
        self.batch_writer.delete_item(Key={'Hash': 'foo2'})
        expected = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo1'
                            }
                        }
                    },
                    {
                        'DeleteRequest': {
                            'Key': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                ]
            }
        }
        self.assert_batch_write_calls_are([expected])

    def test_multiple_batch_calls_with_mixed_deletes(self):
        self.batch_writer.put_item(Item={'Hash': 'foo1'})
        self.batch_writer.delete_item(Key={'Hash': 'foo2'})
        self.batch_writer.delete_item(Key={'Hash': 'foo3'})
        self.batch_writer.put_item(Item={'Hash': 'foo4'})
        first_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo1'
                            }
                        }
                    },
                    {
                        'DeleteRequest': {
                            'Key': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                ]
            }
        }
        second_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'DeleteRequest': {
                            'Key': {
                                'Hash': 'foo3'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo4'
                            }
                        }
                    },
                ]
            }
        }
        self.assert_batch_write_calls_are([first_batch, second_batch])

    def test_unprocessed_items_added_to_next_batch(self):
        self.client.batch_write_item.side_effect = [
            {
                'UnprocessedItems': {
                    self.table_name: [{
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo2'
                            }
                        }
                    }],
                },
            },
            # Then the last response shows that everything went through
            {
                'UnprocessedItems': {}
            }
        ]
        self.batch_writer.put_item(Item={'Hash': 'foo1'})
        self.batch_writer.put_item(Item={'Hash': 'foo2'})
        self.batch_writer.put_item(Item={'Hash': 'foo3'})

        # We should have sent two batch requests consisting of 2
        # 2 requests.  foo1,foo2 and foo2,foo3.
        # foo2 is sent twice because the first response has it listed
        # as an unprocessed item which means it needs to be part
        # of the next batch.
        first_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo1'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                ]
            }
        }
        second_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo3'
                            }
                        }
                    },
                ]
            }
        }
        self.assert_batch_write_calls_are([first_batch, second_batch])

    def test_all_items_flushed_on_exit(self):
        with self.batch_writer as b:
            b.put_item(Item={'Hash': 'foo1'})
        self.assert_batch_write_calls_are([
            {
                'RequestItems': {
                    self.table_name: [
                        {
                            'PutRequest': {
                                'Item': {
                                    'Hash': 'foo1'
                                }
                            }
                        },
                    ]
                },
            },
        ])

    def test_repeated_flushing_on_exit(self):
        # We're going to simulate unprocessed_items
        # returning multiple unprocessed items across calls.
        self.client.batch_write_item.side_effect = [
            {
                'UnprocessedItems': {
                    self.table_name: [
                        {
                            'PutRequest': {
                                'Item': {
                                    'Hash': 'foo2'
                                }
                            }
                        },
                        {
                            'PutRequest': {
                                'Item': {
                                    'Hash': 'foo3'
                                }
                            }
                        },
                    ],
                },
            },
            {
                'UnprocessedItems': {
                    self.table_name: [
                        {
                            'PutRequest': {
                                'Item': {
                                    'Hash': 'foo3'
                                }
                            }
                        },
                    ],
                },
            },
            {
                'UnprocessedItems': {}
            },
        ]
        with BatchWriter(self.table_name, self.client, flush_amount=4) as b:
            b.put_item(Item={'Hash': 'foo1'})
            b.put_item(Item={'Hash': 'foo2'})
            b.put_item(Item={'Hash': 'foo3'})
        # So when we exit, we expect three calls.
        # First we try the normal batch write with 3 items:
        first_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo1'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo3'
                            }
                        }
                    },
                ]
            }
        }
        # Then we see two unprocessed items so we send another batch.
        second_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo2'
                            }
                        }
                    },
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo3'
                            }
                        }
                    },
                ]
            }
        }
        # And then we still see one more unprocessed item so
        # we need to send another batch.
        third_batch = {
            'RequestItems': {
                self.table_name: [
                    {
                        'PutRequest': {
                            'Item': {
                                'Hash': 'foo3'
                            }
                        }
                    },
                ]
            }
        }
        self.assert_batch_write_calls_are(
            [first_batch, second_batch, third_batch])
コード例 #11
0
ファイル: test_table.py プロジェクト: Armin-Smailzade/boto3
class BaseTransformationTest(unittest.TestCase):

    maxDiff = None

    def setUp(self):
        self.client = mock.Mock()
        self.client.batch_write_item.return_value = {'UnprocessedItems': {}}
        self.table_name = 'tablename'
        self.flush_amount = 2
        self.batch_writer = BatchWriter(self.table_name, self.client,
                                        self.flush_amount)

    def assert_batch_write_calls_are(self, expected_batch_writes):
        self.assertEqual(self.client.batch_write_item.call_count,
                         len(expected_batch_writes))
        batch_write_calls = [
            args[1] for args in
            self.client.batch_write_item.call_args_list
        ]
        self.assertEqual(batch_write_calls, expected_batch_writes)

    def test_batch_write_does_not_immediately_write(self):
        self.batch_writer.put_item(Item={'Hash': 'foo'})
        self.assertFalse(self.client.batch_write_item.called)

    def test_batch_write_flushes_at_flush_amount(self):
        self.batch_writer.put_item(Item={'Hash': 'foo1'})
        self.batch_writer.put_item(Item={'Hash': 'foo2'})
        expected = {
            'RequestItems': {
                self.table_name: [
                    {'PutRequest': {'Item': {'Hash': 'foo1'}}},
                    {'PutRequest': {'Item': {'Hash': 'foo2'}}},
                ]
            }
        }
        self.assert_batch_write_calls_are([expected])

    def test_multiple_flushes_reset_items_to_put(self):
        self.batch_writer.put_item(Item={'Hash': 'foo1'})
        self.batch_writer.put_item(Item={'Hash': 'foo2'})
        self.batch_writer.put_item(Item={'Hash': 'foo3'})
        self.batch_writer.put_item(Item={'Hash': 'foo4'})
        # We should have two batch calls, one for foo1,foo2 and
        # one for foo3,foo4.
        first_batch = {
            'RequestItems': {
                self.table_name: [
                    {'PutRequest': {'Item': {'Hash': 'foo1'}}},
                    {'PutRequest': {'Item': {'Hash': 'foo2'}}},
                ]
            }
        }
        second_batch = {
            'RequestItems': {
                self.table_name: [
                    {'PutRequest': {'Item': {'Hash': 'foo3'}}},
                    {'PutRequest': {'Item': {'Hash': 'foo4'}}},
                ]
            }
        }
        self.assert_batch_write_calls_are([first_batch, second_batch])

    def test_can_handle_puts_and_deletes(self):
        self.batch_writer.put_item(Item={'Hash': 'foo1'})
        self.batch_writer.delete_item(Key={'Hash': 'foo2'})
        expected = {
            'RequestItems': {
                self.table_name: [
                    {'PutRequest': {'Item': {'Hash': 'foo1'}}},
                    {'DeleteRequest': {'Key': {'Hash': 'foo2'}}},
                ]
            }
        }
        self.assert_batch_write_calls_are([expected])

    def test_multiple_batch_calls_with_mixed_deletes(self):
        self.batch_writer.put_item(Item={'Hash': 'foo1'})
        self.batch_writer.delete_item(Key={'Hash': 'foo2'})
        self.batch_writer.delete_item(Key={'Hash': 'foo3'})
        self.batch_writer.put_item(Item={'Hash': 'foo4'})
        first_batch = {
            'RequestItems': {
                self.table_name: [
                    {'PutRequest': {'Item': {'Hash': 'foo1'}}},
                    {'DeleteRequest': {'Key': {'Hash': 'foo2'}}},
                ]
            }
        }
        second_batch = {
            'RequestItems': {
                self.table_name: [
                    {'DeleteRequest': {'Key': {'Hash': 'foo3'}}},
                    {'PutRequest': {'Item': {'Hash': 'foo4'}}},
                ]
            }
        }
        self.assert_batch_write_calls_are([first_batch, second_batch])

    def test_unprocessed_items_added_to_next_batch(self):
        self.client.batch_write_item.side_effect = [
            {
                'UnprocessedItems': {
                    self.table_name: [
                        {'PutRequest': {'Item': {'Hash': 'foo2'}}}
                    ],
                },
            },
            # Then the last response shows that everything went through
            {'UnprocessedItems': {}}
        ]
        self.batch_writer.put_item(Item={'Hash': 'foo1'})
        self.batch_writer.put_item(Item={'Hash': 'foo2'})
        self.batch_writer.put_item(Item={'Hash': 'foo3'})

        # We should have sent two batch requests consisting of 2
        # 2 requests.  foo1,foo2 and foo2,foo3.
        # foo2 is sent twice because the first response has it listed
        # as an unprocessed item which means it needs to be part
        # of the next batch.
        first_batch = {
            'RequestItems': {
                self.table_name: [
                    {'PutRequest': {'Item': {'Hash': 'foo1'}}},
                    {'PutRequest': {'Item': {'Hash': 'foo2'}}},
                ]
            }
        }
        second_batch = {
            'RequestItems': {
                self.table_name: [
                    {'PutRequest': {'Item': {'Hash': 'foo2'}}},
                    {'PutRequest': {'Item': {'Hash': 'foo3'}}},
                ]
            }
        }
        self.assert_batch_write_calls_are([first_batch, second_batch])

    def test_all_items_flushed_on_exit(self):
        with self.batch_writer as b:
            b.put_item(Item={'Hash': 'foo1'})
        self.assert_batch_write_calls_are([
            {
                'RequestItems': {
                    self.table_name: [
                        {'PutRequest': {'Item': {'Hash': 'foo1'}}},
                    ]
                },
            },
        ])

    def test_never_send_more_than_max_batch_size(self):
        # Suppose the server sends backs a response that indicates that
        # all the items were unprocessed.
        self.client.batch_write_item.side_effect = [
            {
                'UnprocessedItems': {
                    self.table_name: [
                        {'PutRequest': {'Item': {'Hash': 'foo1'}}},
                        {'PutRequest': {'Item': {'Hash': 'foo2'}}},
                    ],
                },
            },
            {
                'UnprocessedItems': {
                    self.table_name: [
                        {'PutRequest': {'Item': {'Hash': 'foo2'}}},
                    ],
                },
            },
            {
                'UnprocessedItems': {}
            },
        ]
        with BatchWriter(self.table_name, self.client, flush_amount=2) as b:
            b.put_item(Item={'Hash': 'foo1'})
            b.put_item(Item={'Hash': 'foo2'})
            b.put_item(Item={'Hash': 'foo3'})

        # Note how we're never sending more than flush_amount=2.
        first_batch = {
            'RequestItems': {
                self.table_name: [
                    {'PutRequest': {'Item': {'Hash': 'foo1'}}},
                    {'PutRequest': {'Item': {'Hash': 'foo2'}}},
                ]
            }
        }
        # Even when the server sends us unprocessed items of 2 elements,
        # we'll still only send 2 at a time, in order.
        second_batch = {
            'RequestItems': {
                self.table_name: [
                    {'PutRequest': {'Item': {'Hash': 'foo1'}}},
                    {'PutRequest': {'Item': {'Hash': 'foo2'}}},
                ]
            }
        }
        # And then we still see one more unprocessed item so
        # we need to send another batch.
        third_batch = {
            'RequestItems': {
                self.table_name: [
                    {'PutRequest': {'Item': {'Hash': 'foo3'}}},
                    {'PutRequest': {'Item': {'Hash': 'foo2'}}},
                ]
            }
        }
        self.assert_batch_write_calls_are([first_batch, second_batch,
                                           third_batch])


    def test_repeated_flushing_on_exit(self):
        # We're going to simulate unprocessed_items
        # returning multiple unprocessed items across calls.
        self.client.batch_write_item.side_effect = [
            {
                'UnprocessedItems': {
                    self.table_name: [
                        {'PutRequest': {'Item': {'Hash': 'foo2'}}},
                        {'PutRequest': {'Item': {'Hash': 'foo3'}}},
                    ],
                },
            },
            {
                'UnprocessedItems': {
                    self.table_name: [
                        {'PutRequest': {'Item': {'Hash': 'foo3'}}},
                    ],
                },
            },
            {
                'UnprocessedItems': {}
            },
        ]
        with BatchWriter(self.table_name, self.client, flush_amount=4) as b:
            b.put_item(Item={'Hash': 'foo1'})
            b.put_item(Item={'Hash': 'foo2'})
            b.put_item(Item={'Hash': 'foo3'})
        # So when we exit, we expect three calls.
        # First we try the normal batch write with 3 items:
        first_batch = {
            'RequestItems': {
                self.table_name: [
                    {'PutRequest': {'Item': {'Hash': 'foo1'}}},
                    {'PutRequest': {'Item': {'Hash': 'foo2'}}},
                    {'PutRequest': {'Item': {'Hash': 'foo3'}}},
                ]
            }
        }
        # Then we see two unprocessed items so we send another batch.
        second_batch = {
            'RequestItems': {
                self.table_name: [
                    {'PutRequest': {'Item': {'Hash': 'foo2'}}},
                    {'PutRequest': {'Item': {'Hash': 'foo3'}}},
                ]
            }
        }
        # And then we still see one more unprocessed item so
        # we need to send another batch.
        third_batch = {
            'RequestItems': {
                self.table_name: [
                    {'PutRequest': {'Item': {'Hash': 'foo3'}}},
                ]
            }
        }
        self.assert_batch_write_calls_are([first_batch, second_batch,
                                           third_batch])