Example #1
0
    def test_generators(self):
        a = [1, 2]
        assert util.get_size_of_deep(iter(a)) == sys.getsizeof(iter(a))

        def ima_gen():
            for i in range(10):
                yield i

        assert util.get_size_of_deep(ima_gen()) == sys.getsizeof(ima_gen())
Example #2
0
    def test_sequences(self):
        pytest.importorskip('six', reason='Uses six for compatibility')

        assert util.get_size_of_deep([0]) == sys.getsizeof(0)
        assert util.get_size_of_deep([0, 0]) == 2 * sys.getsizeof(0)
        assert util.get_size_of_deep([[0, 0]]) == 2 * sys.getsizeof(0)

        assert util.get_size_of_deep({0: 0}) == 2 * sys.getsizeof(0)
        assert util.get_size_of_deep({0: [0]}) == 2 * sys.getsizeof(0)
Example #3
0
    def test_basic(self):
        pytest.importorskip('six', reason='Uses six for compatibility')

        assert util.get_size_of_deep("") == sys.getsizeof("")
        assert util.get_size_of_deep(0) == sys.getsizeof(0)

        bs = b"abc"
        assert util.get_size_of_deep(bytes(bs)) == sys.getsizeof(bytes(bs))
        assert \
          util.get_size_of_deep(bytearray(bs)) == sys.getsizeof(bytearray(bs))
Example #4
0
 def test_numpy(self):
   pytest.importorskip('six', reason='Uses six for compatibility')
   
   np = pytest.importorskip("numpy")
   
   arr = np.array([0])
   assert util.get_size_of_deep(arr) == arr.nbytes
   assert util.get_size_of_deep([arr]) == arr.nbytes
   assert util.get_size_of_deep([arr, arr]) == 2 * arr.nbytes
   assert util.get_size_of_deep({0: arr}) == (sys.getsizeof(0) + arr.nbytes)
Example #5
0
    def test_big_lists(self):
        pytest.importorskip('six', reason='Uses six for compatibility')

        # Case: consider we have a large array, like a Tensor
        # but as a list. We want get_size_of_deep() to be fast.
        arr = list(range(int(1e6)))
        assert util.get_size_of_deep(arr) == (len(arr) * sys.getsizeof(0))

        # A big list of strings will still require a slower reduce
        ss = list(str(i) for i in range(100))
        assert util.get_size_of_deep(ss) == sum(sys.getsizeof(s) for s in ss)
Example #6
0
        def __call__(self, pid):
            # Convert pesky numpy boxed numeric types if needed
            import numpy as np
            if isinstance(pid, np.generic):
                pid = pid.item()

            part_df = df.filter(df[shard_col] == pid)
            part_rdd = part_df.rdd.repartition(100)
            rows = part_rdd.map(spark_row_to_tf_element).toLocalIterator()
            util.log.info("Reading partition %s " % pid)
            t = util.ThruputObserver(name='Partition %s' % pid,
                                     log_on_del=True)
            t.start_block()
            for row in rows:
                yield row
                t.update_tallies(n=1, num_bytes=util.get_size_of_deep(row))
            t.stop_block()
            util.log.info("Done reading partition %s, stats:\n %s" % (pid, t))
            with self.lock:
                # Since partitions are read in parallel, we need to maintain
                # independent timing stats for the main thread
                self.overall_thruput.stop_block(n=t.n, num_bytes=t.num_bytes)
                self.overall_thruput.maybe_log_progress(every_n=1)
                self.overall_thruput.start_block()
Example #7
0
  def test_obj(self):
    pytest.importorskip('six', reason='Uses six for compatibility')

    class Obj(object):
      # Has a __dict__ attribute
      def __init__(self):
        self.x = 0
    expected = sys.getsizeof('x') + sys.getsizeof(0)
    assert util.get_size_of_deep(Obj()) == expected
    assert util.get_size_of_deep([Obj()]) == expected
    assert util.get_size_of_deep([Obj(), Obj()]) == 2 * expected

    class Slotted(object):
      __slots__ = ['x']
      def __init__(self):
        self.x = 0
    assert util.get_size_of_deep(Slotted()) == sys.getsizeof(0)
    assert util.get_size_of_deep([Slotted()]) == sys.getsizeof(0)
    assert util.get_size_of_deep([Slotted(), Slotted()]) == 2 * sys.getsizeof(0)
Example #8
0
 def test_basic(self):
   pytest.importorskip('six', reason='Uses six for compatibility')
   
   assert util.get_size_of_deep("") == sys.getsizeof("")
   assert util.get_size_of_deep(0) == sys.getsizeof(0)