def words_generator(n):
    start = time.time()
    gib = Gibberish()
    words = gib.generate_words(n)
    end = time.time()
    print('Time generating:: {}'.format(end - start))
    print('Real Number of words: {}'.format(len(words)))
    len_unique = len(set(words))
    print('Number of unique words: {}'.format(len_unique))
    print('Size of words: {} Mb, {} Kb'.format(asizeof.flatsize(words)/1024/1024, asizeof.flatsize(words)/1024))
    print('________________________________________________________________\n')
    return words, len_unique
Beispiel #2
0
 def _print_functions(self,
                      obj,
                      name=None,
                      align=8,
                      detail=MAX,
                      code=False,
                      limit=MAX,
                      opt='',
                      **unused):
     if name:
         self._printf('%sasizeof functions for %s ... %s', os.linesep, name,
                      opt)
     self._printf('%s(): %s', ' basicsize', asizeof.basicsize(obj))
     self._printf('%s(): %s', ' itemsize', asizeof.itemsize(obj))
     self._printf('%s(): %r', ' leng', asizeof.leng(obj))
     self._printf('%s(): %s', ' refs', _repr(asizeof.refs(obj)))
     self._printf('%s(): %s', ' flatsize',
                  asizeof.flatsize(obj, align=align))  # , code=code
     self._printf(
         '%s(): %s', ' asized',
         asizeof.asized(obj,
                        align=align,
                        detail=detail,
                        code=code,
                        limit=limit))
Beispiel #3
0
 def _print_functions(self, obj, name=None, align=8, detail=MAX, code=False, limit=MAX,
                           opt='', **unused):
     if name:
         self._printf('%sasizeof functions for %s ... %s', os.linesep, name, opt)
     self._printf('%s(): %s', ' basicsize', asizeof.basicsize(obj))
     self._printf('%s(): %s', ' itemsize',  asizeof.itemsize(obj))
     self._printf('%s(): %r', ' leng',      asizeof.leng(obj))
     self._printf('%s(): %s', ' refs',     _repr(asizeof.refs(obj)))
     self._printf('%s(): %s', ' flatsize',  asizeof.flatsize(obj, align=align))  # , code=code
     self._printf('%s(): %s', ' asized',           asizeof.asized(obj, align=align, detail=detail, code=code, limit=limit))
Beispiel #4
0
def test_flatsize(failf=None, stdf=None):
    '''Compare the results of **flatsize()** without using ``sys.getsizeof()``
       with the accurate sizes returned by ``sys.getsizeof()``.

       Return the total number of tests and number of unexpected failures.

       Expect differences for sequences as dicts, lists, sets, tuples, etc.
       While this is no proof for the accuracy of **flatsize()** on Python
       builds without ``sys.getsizeof()``, it does provide some evidence that
       function **flatsize()** produces reasonable and usable results.
    '''
    t, g, e = [], asizeof._getsizeof, 0
    if g:
        for v in asizeof._values(asizeof._typedefs):
            t.append(v.type)
            try:  # creating one instance
                if v.type.__module__ not in ('io',):  # avoid 3.0 RuntimeWarning
                    t.append(v.type())
            except Exception:  # ignore errors
                pass
        t.extend(({1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8},
                  [1, 2, 3, 4, 5, 6, 7, 8], ['1', '2', '3'], [0] * 100,
                  '12345678', 'x' * 1001,
                  (1, 2, 3, 4, 5, 6, 7, 8), ('1', '2', '3'), (0,) * 100,
                  asizeof._Slots((1, 2, 3, 4, 5, 6, 7, 8)),
                  asizeof._Slots(('1', '2', '3')),
                  asizeof._Slots((0,) * 100),
                  0, 1 << 8, 1 << 16, 1 << 32, 1 << 64, 1 << 128,
                  complex(0, 1), True, False))
        asizeof._getsizeof = None  # zap _getsizeof for flatsize()
        for o in t:
            a = asizeof.flatsize(o)
            s = sys.getsizeof(o, 0)  # 0 as default
            if a != s:
                if isinstance(o, (dict, list, set, frozenset, tuple, bytes)):
                    # flatsize() approximates length of sequences
                    x = ', expected failure'
                elif (isinstance(o, (type, bool, asizeof.ABCMeta)) and
                      sys.version_info >= (3, 0)):
                    x = ', expected failure'
                elif isinstance(o, str) and sys.version_info >= (3, 3):
                    x = ', expected failure'
                elif isinstance(o, deque) and sys.version_info >= (3, 4):
                    x = ', expected failure'
                else:
                    x = ', %r' % asizeof._typedefof(o)
                    e += 1
                    if failf:  # report failure
                        failf('%s vs %s for %s: %s',
                              a, s, asizeof._nameof(type(o)), _repr(o))
                if stdf:
                    asizeof._printf('flatsize() %s vs sys.getsizeof() %s for %s: %s%s',
                        a, s, asizeof._nameof(type(o)), _repr(o), x, file=stdf)
        asizeof._getsizeof = g  # restore
    return len(t), e
Beispiel #5
0
        def test_flatsize(self):
            '''Test asizeof.flatsize()
            '''
            l = ["spam", 2, 3, 4, "eggs", 6, 7, 8]
            for _type in (list, tuple, set, frozenset):
                data = _type(l)
                bsz = asizeof.basicsize(data)
                isz = asizeof.itemsize(data)
                lng = asizeof.leng(data)
                fsz = asizeof.flatsize(data)
                self.assertEqual(fsz, bsz + (lng * isz), (fsz, bsz, lng, isz))

            self.assertRaises(ValueError, asizeof.flatsize, l, **{'align': 3})
Beispiel #6
0
        def test_flatsize(self):
            '''Test asizeof.flatsize()
            '''
            l = ["spam", 2, 3, 4, "eggs", 6, 7, 8]
            for _type in (list, tuple, set, frozenset):
                data = _type(l)
                bsz = asizeof.basicsize(data)
                isz = asizeof.itemsize(data)
                lng = asizeof.leng(data)
                fsz = asizeof.flatsize(data)
                self.assertEqual(fsz, bsz + (lng * isz), (fsz, bsz, lng, isz))

            self.assertRaises(ValueError, asizeof.flatsize, l, **{'align': 3})
    def __init__(self,
                 dict_of_classes_with_training_files,
                 max_elements=2000000,
                 error_rate=0.00001):
        self.blooms = {}
        self.classification_result = []
        self.stop_words = set(
            stopwords.words("english") + get_stop_words('english'))

        for model in dict_of_classes_with_training_files:
            self.blooms[model] = BloomFilter(max_elements=max_elements,
                                             error_rate=error_rate)

            print('Size of initializing Bloom Filter: {} Kb'.format(
                asizeof.flatsize(self.blooms[model]) / 1000))
            # print('num_bits_m = {}'.format(self.blooms[model].num_bits_m))
            # print('num_probes_k = {}'.format(self.blooms[model].num_probes_k))
            # print('len array of Int32 = {}'.format(len(self.blooms[model].backend.array_)))
            # print('size of array of Int32 = {} Kb'.format(asizeof.flatsize(self.blooms[model].backend.array_)/1000))
            # print('\n')

            # TRAINING MODEL
            # print('Start training model "{}"'.format(model))
            start_overall = time.time()
            for file_path in dict_of_classes_with_training_files[model]:
                with open(file_path, errors='ignore') as f:
                    start = time.time()
                    for word in self.text_preprocessing(
                            " ".join(f.readlines()),
                            os.path.basename(file_path)):
                        self.blooms[model].add(word)
                    # print('Time training text {}: {}'.format(file_path, time.time() - start))
            print('\nTime for training model {}: {}'.format(
                model,
                time.time() - start_overall))
            print(
                '________________________________________________________________\n'
            )
    quit(2)
print("------------------------------------------------------------")
print("Controller capabilities : {}".format(network.controller.capabilities))
print("Controller node capabilities : {}".format(network.controller.node.capabilities))
print("------------------------------------------------------------")
print("Driver statistics : {}".format(network.controller.stats))
print("------------------------------------------------------------")
print("Nodes in network : {}".format(network.nodes_count))
print("------------------------------------------------------------")
print("Memory use : ")
print("------------------------------------------------------------")
print("Memory use for network {} : ".format(network.home_id_str))
print("  asizeof   : {} bytes".format(asizeof(network)))
print("  basicsize : {} bytes".format(basicsize(network)))
print("  itemsize  : {} bytes".format(itemsize(network)))
print("  flatsize  : {} bytes".format(flatsize(network)))
print("------------------------------------------------------------")
manager = network.manager
print("Memory use for manager : ")
print("  asizeof   : {} bytes".format(asizeof(manager)))
print("  basicsize : {} bytes".format(basicsize(manager)))
print("  itemsize  : {} bytes".format(itemsize(manager)))
print("  flatsize  : {} bytes".format(flatsize(manager)))
print("------------------------------------------------------------")
print("Memory use for controller : ")
print("  asizeof   : {} bytes".format(asizeof(network.controller)))
print("  basicsize : {} bytes".format(basicsize(network.controller)))
print("  itemsize  : {} bytes".format(itemsize(network.controller)))
print("  flatsize  : {} bytes".format(flatsize(network.controller)))
print("------------------------------------------------------------")
print("Memory use for {} scenes (scenes are generated on call) : ".format(network.scenes_count))
print("------------------------------------------------------------")
print("Controller capabilities : {}".format(network.controller.capabilities))
print("Controller node capabilities : {}".format(
    network.controller.node.capabilities))
print("------------------------------------------------------------")
print("Driver statistics : {}".format(network.controller.stats))
print("------------------------------------------------------------")
print("Nodes in network : {}".format(network.nodes_count))
print("------------------------------------------------------------")
print("Memory use : ")
print("------------------------------------------------------------")
print("Memory use for network {} : ".format(network.home_id_str))
print("  asizeof   : {} bytes".format(asizeof(network)))
print("  basicsize : {} bytes".format(basicsize(network)))
print("  itemsize  : {} bytes".format(itemsize(network)))
print("  flatsize  : {} bytes".format(flatsize(network)))
print("------------------------------------------------------------")
manager = network.manager
print("Memory use for manager : ")
print("  asizeof   : {} bytes".format(asizeof(manager)))
print("  basicsize : {} bytes".format(basicsize(manager)))
print("  itemsize  : {} bytes".format(itemsize(manager)))
print("  flatsize  : {} bytes".format(flatsize(manager)))
print("------------------------------------------------------------")
print("Memory use for controller : ")
print("  asizeof   : {} bytes".format(asizeof(network.controller)))
print("  basicsize : {} bytes".format(basicsize(network.controller)))
print("  itemsize  : {} bytes".format(itemsize(network.controller)))
print("  flatsize  : {} bytes".format(flatsize(network.controller)))
print("------------------------------------------------------------")
print("Memory use for {} scenes (scenes are generated on call) : ".format(
Beispiel #10
0
    def to_db(self, server = 'beavis.ph.utexas.edu'):
        print 'in to_db'
        sim_blob = {'author': 'Dmitry',
         'tags': ['fusion', 'in', '50 years'],
         'date': datetime.utcnow()}
        for key, value in self.__dict__.iteritems():
            try:
                print key, value.__class__, sys.getsizeof(value), asizeof(value)
            except:
                print key

            if type(value) not in ban_types:
                sim_blob[key] = value
            elif type(value) == type(np.array([12])):
                print key, value.nbytes
                if value.ndim == 1:
                    sim_blob[key] = value.tolist()
                elif value.size == 1:
                    sim_blob[key] = value
                else:
                    print 'adding to the db obj', key, type(value), value.shape, value.size
                    if value.size< 1600000:
                        sim_blob[key] = value
                    else:
                        print 'nevermind, ', key, ' is too big'

        print 'before: ', asizeof(sim_blob), ' ', sys.getsizeof(sim_blob), ' ', flatsize(sim_blob)
        cutlist = ['pos_i','kx','ky','pdf_x','df_x','v']
        #cutlist = ['pos_i','kx','ky','pdf_x','df_x','v']
        #cutlist = ['t_stop','dx']

        keep_list = ['nz','nx','alpha_c']
        
        for k in cutlist:
            try:
                print 'k: ',(sim_blob[k])
                sim_blob.pop(k, None)
            except:
                print 'not found'
        #new_blob = {}
    # for k in keep_list:
        #     new_blob.pop(k, None)

        # print 'look for bugs'
        # for k in sim_blob:
        #     print 'trying: ',k#,sim_blob[k]
        #     new_blob[k] = sim_blob[k]
        #     ser_dict = self.serialize(new_blob)
        #     #print sys.getsizeof(new_dict), asizeof(ser_dict), ' ', asizesof(sim_blob)
        #     print BSON.encode(ser_dict).__sizeof__()

        # exit()

        #sim_blob = new_blob

        print 'after: ', asizeof(sim_blob), ' ', sys.getsizeof(sim_blob), ' ', flatsize(sim_blob)
        print 'coarse_x' in sim_blob
        print 'df_x' in sim_blob
        #print type(sim_blob['coarse_x']), sim_blob['coarse_x'][0].__class__
        #exit()
    
        ser_dict = self.serialize(sim_blob)
        # for elem in ser_dict:
        #     print elem,': ',type(ser_dict[elem])m
        #print ser_dict
        print sys.getsizeof(ser_dict), asizeof(ser_dict), ' ', asizesof(sim_blob)
        print BSON.encode(ser_dict).__sizeof__()
        #exit()
        c = MongoClient(host=server)
        db = c.new_database
        print 'db info'
        print db.command('collstats', 'alpha_runs')
        alpha_runs = db.alpha_runs
        alpha_runs.ensure_index('md5', unique=True, dropDups=True)
        #db.things.ensureIndex({'source_references.key' : 1}, {unique : true, dropDups : true})

        try:
            #print ser_dict.keys()
            alpha_runs.insert(ser_dict, db)
        except mongoErr.DuplicateKeyError:
            alpha_runs.remove({'path': ser_dict['path']})
            alpha_runs.remove({'md5': ser_dict['md5']})
            
            #alpha_runs.remove({ "$and" :[{'path': ser_dict['path']},{'md5': ser_dict['md5']}]})
            alpha_runs.insert(ser_dict, db)
            print 'Duplicate run not adding to db, but updating'
Beispiel #11
0
 def size(obj):
     return asizeof.flatsize(obj)
Beispiel #12
0
def test_flatsize(failf=None, stdf=None):
    '''Compare the results of **flatsize()** without using ``sys.getsizeof()``
       with the accurate sizes returned by ``sys.getsizeof()``.

       Return the total number of tests and number of unexpected failures.

       Expect differences for sequences as dicts, lists, sets, tuples, etc.
       While this is no proof for the accuracy of **flatsize()** on Python
       builds without ``sys.getsizeof()``, it does provide some evidence that
       function **flatsize()** produces reasonable and usable results.
    '''
    t, g, e = [], asizeof._getsizeof, 0
    if g:
        for v in asizeof._typedefs.copy().values():
            t.append(v.type)
            try:  # creating one instance
                if v.type.__module__ not in (
                        'io', ):  # avoid 3.0 RuntimeWarning
                    t.append(v.type())
            except Exception:  # ignore errors
                pass
        t.extend(
            ({
                1: 1,
                2: 2,
                3: 3,
                4: 4,
                5: 5,
                6: 6,
                7: 7,
                8: 8
            }, [1, 2, 3, 4, 5, 6, 7,
                8], ['1', '2', '3'], [0] * 100, '12345678', 'x' * 1001,
             (1, 2, 3, 4, 5, 6, 7, 8), ('1', '2', '3'), (0, ) * 100,
             asizeof._Slots((1, 2, 3, 4, 5, 6, 7, 8)),
             asizeof._Slots(('1', '2', '3')), asizeof._Slots(
                 (0, ) * 100), 0, 1 << 8, 1 << 16, 1 << 32, 1 << 64, 1 << 128,
             complex(0, 1), True, False))
        asizeof._getsizeof = None  # zap _getsizeof for flatsize()
        for o in t:
            a = asizeof.flatsize(o)
            s = sys.getsizeof(o, 0)  # 0 as default
            if a != s:
                if isinstance(o, (dict, list, set, frozenset, tuple, bytes)):
                    # flatsize() approximates length of sequences
                    x = ', expected failure'
                elif (isinstance(o, (type, bool, asizeof.ABCMeta))
                      and sys.version_info >= (3, 0)):
                    x = ', expected failure'
                elif isinstance(o, str) and sys.version_info >= (3, 3):
                    x = ', expected failure'
                elif isinstance(o, deque) and sys.version_info >= (3, 4):
                    x = ', expected failure'
                else:
                    x = ', %r' % asizeof._typedefof(o)
                    e += 1
                    if failf:  # report failure
                        failf('%s vs %s for %s: %s', a, s,
                              asizeof._nameof(type(o)), _repr(o))
                if stdf:
                    asizeof._printf(
                        'flatsize() %s vs sys.getsizeof() %s for %s: %s%s',
                        a,
                        s,
                        asizeof._nameof(type(o)),
                        _repr(o),
                        x,
                        file=stdf)
        asizeof._getsizeof = g  # restore
    return len(t), e
Beispiel #13
0
    quit(2)
print "------------------------------------------------------------"
print "Controller capabilities : %s" % network.controller.capabilities
print "Controller node capabilities : %s" % network.controller.node.capabilities
print "------------------------------------------------------------"
print "Driver statistics : %s" % network.controller.stats
print "------------------------------------------------------------"
print "Nodes in network : %s" % network.nodes_count
print "------------------------------------------------------------"
print "Memory use : "
print "------------------------------------------------------------"
print "Memory use for network %s : " % (network.home_id_str)
print "  asizeof   : %s bytes" % (asizeof(network))
print "  basicsize : %s bytes" % (basicsize(network))
print "  itemsize  : %s bytes" % (itemsize(network))
print "  flatsize  : %s bytes" % (flatsize(network))
print "------------------------------------------------------------"
manager = network.manager
print "Memory use for manager : "
print "  asizeof   : %s bytes" % (asizeof(manager))
print "  basicsize : %s bytes" % (basicsize(manager))
print "  itemsize  : %s bytes" % (itemsize(manager))
print "  flatsize  : %s bytes" % (flatsize(manager))
print "------------------------------------------------------------"
print "Memory use for controller : "
print "  asizeof   : %s bytes" % (asizeof(network.controller))
print "  basicsize : %s bytes" % (basicsize(network.controller))
print "  itemsize  : %s bytes" % (itemsize(network.controller))
print "  flatsize  : %s bytes" % (flatsize(network.controller))
print "------------------------------------------------------------"
print "Memory use for %s scenes (scenes are generated on call) : " % (
    quit(2)
print "------------------------------------------------------------"
print "Controller capabilities : %s" % network.controller.capabilities
print "Controller node capabilities : %s" % network.controller.node.capabilities
print "------------------------------------------------------------"
print "Driver statistics : %s" % network.controller.stats
print "------------------------------------------------------------"
print "Nodes in network : %s" % network.nodes_count
print "------------------------------------------------------------"
print "Memory use : "
print "------------------------------------------------------------"
print "Memory use for network %s : " %(network.home_id_str)
print "  asizeof   : %s bytes" %(asizeof(network))
print "  basicsize : %s bytes" %(basicsize(network))
print "  itemsize  : %s bytes" %(itemsize(network))
print "  flatsize  : %s bytes" %(flatsize(network))
print "------------------------------------------------------------"
manager = network.manager
print "Memory use for manager : "
print "  asizeof   : %s bytes" %(asizeof(manager))
print "  basicsize : %s bytes" %(basicsize(manager))
print "  itemsize  : %s bytes" %(itemsize(manager))
print "  flatsize  : %s bytes" %(flatsize(manager))
print "------------------------------------------------------------"
print "Memory use for controller : "
print "  asizeof   : %s bytes" %(asizeof(network.controller))
print "  basicsize : %s bytes" %(basicsize(network.controller))
print "  itemsize  : %s bytes" %(itemsize(network.controller))
print "  flatsize  : %s bytes" %(flatsize(network.controller))
print "------------------------------------------------------------"
print "Memory use for %s scenes (scenes are generated on call) : " %(network.scenes_count)