def retrain(chunks, model): alpha_decay_rate = (alpha_0 - alpha_final) / len(chunks) prev_0 = model.syn0.copy() prev_1 = model.syn1.copy() round_1 = np.vectorize(round_to_1) for i in range(len(chunks)): model.alpha = alpha_0 - alpha_decay_rate * i model.min_alpha = alpha_0 - alpha_decay_rate * (i + 1) model.train(chunks[i]) # 0 get gradient grad_0 = model.syn0 - prev_0 grad_1 = model.syn1 - prev_1 # 2 apply gradient model.syn0 = prev_0 + grad_0 model.syn1 = prev_1 + grad_1 # 3 adhoc print print "normal array size: " + str(sys.getsizeof(pickle.dumps(grad_0, -1))) print grad_0 grad_0 = grad_0.astype(np.float16) print grad_0 print "new size: " + str(sys.getsizeof(pickle.dumps(grad_0, -1))) print "\n" # end prepare for next iteration prev_0 = model.syn0.copy() prev_1 = model.syn1.copy() return model
def getstats(self): """ return stats for events """ stats = {} stats['Base Sizes'] = {} stats['Base Sizes']['showorder'] = ['Class', 'Api', 'loadedpluginsd', 'plugininfo'] stats['Base Sizes']['loadedpluginsd'] = '%s bytes' % \ sys.getsizeof(self.loadedpluginsd) stats['Base Sizes']['plugininfo'] = '%s bytes' % \ sys.getsizeof(self.allplugininfo) stats['Base Sizes']['Class'] = '%s bytes' % sys.getsizeof(self) stats['Base Sizes']['Api'] = '%s bytes' % sys.getsizeof(self.api) stats['Plugins'] = {} stats['Plugins']['showorder'] = ['Total', 'Loaded', 'Bad'] stats['Plugins']['Total'] = len(self.allplugininfo) stats['Plugins']['Loaded'] = len(self.loadedpluginsd) badplugins = self._updateallplugininfo() stats['Plugins']['Bad'] = len(badplugins) return stats
def read_input_files(): for input_name, rel_path in inputs.items(): absolute_path = os.path.join(workdir, rel_path) # read xml file if is valid try: xmldoc = minidom.parse(absolute_path) file_list = xmldoc.getElementsByTagName('FileName') file_names = map(lambda e: e.firstChild.data, file_list) for file_name in file_names: data_path = os.path.join(workdir, file_data_dir, file_name) data = '' with open(data_path, mode='rb') as blob: data = blob.read() print("read %s (%s bytes)" % (file_name, sys.getsizeof(data))) junk_files.append(data) except Exception, e: # read just the file as junk with open(absolute_path, mode='rb') as blob: data = blob.read() print("read blob %s (%s bytes)" % (absolute_path, sys.getsizeof(data))) junk_files.append(data)
def precompute_GLCM_PCA_Cache(images_dir_path, images_names): print '--------------------------------------------------------------------------' start = time.time() flattened_descriptors = [None] * len(images_names); for i in xrange(len(images_names)): image_name = images_names[i] raw_descriptor = getGLCM(images_dir_path, image_name) flattened_descriptors[i] = raw_descriptor.flatten() PCA_train_set = numpy.array(flattened_descriptors) pca = PCA(n_components=0.8) print 'RAW:' print PCA_train_set.shape print PCA_train_set print '' transformedTrainSet = pca.fit_transform(PCA_train_set) print 'PCAed:' print transformedTrainSet.shape print transformedTrainSet print '' end = time.time() secs = end - start msecs = secs * 1000 # millisecs for i in xrange(len(images_names)): image_name = images_names[i] glcm_PCA_cache[image_name] = transformedTrainSet[i] print 'PCA GLCMs cache size:' + repr(sys.getsizeof(glcm_PCA_cache)) + ' bytes' print 'PCA GLCMs cache dim:' + repr(len(glcm_PCA_cache.keys())) + '*' + repr(len(glcm_PCA_cache[glcm_PCA_cache.keys()[0]])) print 'PCA GLCMs descriptors size:' + repr(sys.getsizeof(glcm_PCA_cache.values())) + ' bytes' print 'PCA GLCM elapsed time: %f s (%f ms)' % (secs, msecs) print '--------------------------------------------------------------------------'
def init_cache(filename, cache_type, classes): assert cache_type in ('FULL', 'ENCODED', 'NONE') print('Load images in the cache: {}'.format(cache_type)) generator, size = ObjectDetectorJson.json_iterator(filename, classes) items = [pickle.loads(item) for item in generator()] def _read_image_from_disk(im_path, cache_type): if cache_type == 'ENCODED': with open(im_path, 'rb') as file: encoded_image = file.read() encoded_image = np.array(bytearray(encoded_image), dtype=np.uint8) return encoded_image if cache_type == 'FULL': image = imread(im_path) return image items = tqdm(items, total=size, unit='images') total_cache_usage = 0 for item in items: im_path = item['image'] if cache_type != 'NONE': image = _read_image_from_disk(im_path, cache_type) else: image = None annotation = ObjectDetectorJson._get_annotation(item, classes) ObjectDetectorJson._cache[im_path] = [image, annotation] if isinstance(image, np.ndarray): total_cache_usage += image.nbytes else: total_cache_usage += sys.getsizeof(image) total_cache_usage += sys.getsizeof(annotation) # Bad estimation items.set_postfix({'cache usage (GB)': total_cache_usage / 1024 ** 3})
def pro_progess(filepath="../data"): height = 299 train_files = os.listdir(filepath + '/train') train = np.zeros((len(train_files), height, height, 3), dtype=np.uint8) labels = list(filter(lambda x: x[:3] == 'dog', train_files)) test_files = os.listdir(filepath + '/test') test = np.zeros((len(test_files), height, height, 3), dtype=np.uint8) for i in tqdm(range(len(train_files))): filename = filepath + train_files[i] img = cv2.imread(filename) img = cv2.resize(img, (height, height)) train[i] = img[:, :, ::-1] for i in tqdm(range(len(test_files))): filename = filepath + test_files[i] img = cv2.imread(filename) img = cv2.resize(img, (height, height)) test[i] = img[:, :, ::-1] print ('Training Data Size = %.2 GB' % (sys.getsizeof(train)/1024**3)) print ('Testing Data Size = %.2 GB' % (sys.getsizeof(test)/1024**3)) X_train, X_val, y_train, y_val = train_test_split( train, labels, shuffle=True, test_size=0.2, random_state=42) return X_train, X_val, y_train, y_val
def regulardict_to_ordereddict(): """sort a dict by it's key, value, or customized rules. user can choose ascend or descend. OrderedDict其实并不是生成一个全新的字典。OrderedDict只是生成了一个新的Key的序列, 然后通过维护这个 Key序列来决定输出的顺序。 如果 d 的 key 和 value 都是可排序的数字或者字符串, 而我们不引用任何复杂的规则, 仅仅是根据key或者 value来排序, 那么生成的OrderedDict的内存开销就不变, 因为仅仅是在调用iter方法时, 临时排序输出即可。 而如果使用形如: 根据value中第二个元素进行排序 那么就会带来额外的内存开销。本例中就是这种情况。 """ d = {"c":[1, 3], "a":[3, 2], "b":[2, 1]} print("{:=^100}".format("sort by value, ascend")) od1 = OrderedDict( sorted(list(d.items()), key=lambda t: t[1], # t[0]指根据key排序, t[1]指根据value排序 reverse = False) ) # True指逆序排序,False指正序排序 for k,v in list(od1.items()): print(k,v) ## 看看是否按照设定有序输出 print("{:=^100}".format("sort by value[1], descend")) od2 = OrderedDict( sorted(list(d.items()), key=lambda t: t[1][1], # t[1][1]指根据value[1]排序 reverse = True) ) for k,v in list(od2.items()): print(k,v) ## 看看是否按照设定有序输出 print("原始字典占用内存大小为: %s" % sys.getsizeof(d)) # 288 print("有序字典占用内存大小为: %s" % sys.getsizeof(od1)) # 1304 print("有序字典占用内存大小为: %s" % sys.getsizeof(od2)) # 1304 print("d == od1? %s" % (d == od1)) # True print("d == od2? %s" % (d == od2)) # True
def run_network_tests(): msg=[] # Build a message, of size m size=1 while size <= 32768: # Increase the size of msg until it is at least size big. while(sys.getsizeof(msg)<size*1024): msg.append(rand.random()) size = size * 2 # Take turns broadcasting from one process to all other processes. comm.Barrier() for s in range(comm.Get_size()): # Repeat it the specified number of times. for attempt in range(args.network_iterations): comm.Barrier() t=time.time() data = comm.bcast(msg, root=s) if rank==0: print "Broadcast message size: %s from rank: %s in %s" % (sys.getsizeof(msg), s, time.time()-t) # Send a message to and from every node for (s,r) in combinations(range(comm.Get_size()),2): for attempt in range(args.network_iterations): comm.Barrier() t=time.time() if rank==s: comm.send(msg, dest=1, tag=11) elif rank==r: data=comm.recv(source=s,tag=11) print "Got message size: %s from rank: %s to: %s in %s" % (sys.getsizeof(data), s,r, time.time()-t)
def GetSizeOfCache(): """ Returns number of bytes held in cache. returns: int - size of cache including static and dynamic """ global _static_data, _dynamic_data return sys.getsizeof(_static_data) + sys.getsizeof(_dynamic_data)
def send_to_client(self, sctag, msg): #msg is json in str msg_ = check_smsg('send', sctag, msg) if msg_ == None: logging.error('msg is not proto-good') return cp_info = self.commpair_info_dict[sctag] proto = cp_info['proto'] #sock = cp_info['sock'] sock = None c_addr = cp_info['c_addr'] # if proto == 'tcp': try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect(c_addr) sock.sendall(msg) logging.info('sent to %s_client=%s, datasize=%sBs', proto,c_addr,sys.getsizeof(msg)) except IOError as e: if e.errno == errno.EPIPE: #due to insuffient recv_buffer at the other end logging.error('broken pipe err, check recv_buffer') finally: sock.close() elif proto == 'udp': sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) sock.sendto(msg, c_addr) logging.info('sent to %s_client=%s, datasize=%sBs', proto,c_addr,sys.getsizeof(msg))
def start(self, paras=None, refresh=True): """ :param paras: :param refresh: True表示刷新绩效且需要释放资源,即用户一个完整的请求已经结束;False的情况主要是参数优化时批量运行回测。 """ try: if not self.__initialized: self.init() gc.collect() self.__is_alive = True if paras is not None: self.__strategy.set_parameters(paras) self.__strategy_engine.start() self.__data_generator.start() if refresh: self.__performance_manager = self.__strategy_engine.wait(self.__get_performance_manager) self.__data_generator.stop() if MEMORY_DEBUG: print('gb:\n%s' % sys.getsizeof(gc.garbage)) # 写日志,计算垃圾占用的内存等 gb_log = {} for gb in gc.garbage: type_ = type(gb) if type_ not in gb_log: gb_log[type_] = 0 gb_log[type_] += sys.getsizeof(gb) print(gb_log) result = self.__performance_manager else: result = self.__strategy_engine.wait() self.log(self.__timer.time("策略运算完成,耗时:{0}"), logging.INFO) return result except Exception as e: self.stop() raise e
def sendLine(self, line, queue=True): ''' normal sendLine with flood protection ''' if type(line) == unicode: try: line = line.encode('utf-8') except UnicodeDecodeError: pass if line.startswith(('PRIVMSG', 'NOTICE')): length = sys.getsizeof(line) - sys.getsizeof(type(line)()) + 2 if length <= self.floodBuffer - self._floodCurrentBuffer: # buffer isn't full, send self.updateFloodBuffer(length) irc.IRCClient.sendLine(self, line) return True else: # send an invalid command if queue: with self._floodLock: self._floodQueue.append(line) if not self._floodWaitInvalid: irc.IRCClient.sendLine(self, '_!') self._floodWaitInvalid = True return False else: irc.IRCClient.sendLine(self, line) return True
def obtenerCodigoHTMLparaCedula(cedula): # direccion web de consulta por numero de cedula del tse URL = 'http://www.consulta.tse.go.cr/consulta_persona/consulta_cedula.aspx' # Crear instancia del navegador b = mechanize.Browser() # Cargar la pagina r = b.open(URL) # Obtener el codigo HTML htmlSource = r.read() print 'recibido HTML de ' + str(sys.getsizeof(htmlSource)) + ' bytes.' # Buscar Captcha dentro del codigo HTML valorCaptcha = re.search(r'[A-Z0-9]{6}\.bmp', htmlSource).group().rstrip('.bmp') # Seleccionamos el formulario b.select_form('form1') # Llenamos los campos requeridos para la consulta b['txtcedula'] = cedula b['txtcodigo'] = valorCaptcha # Enviamos el formulario y esperamos la respuesta print 'enviando formulario con cedula [' + cedula + '] y captcha [' + valorCaptcha + ']' respuesta = b.submit() # Obtenermos el codigo HTML de la respuesta htmlSource = respuesta.read() print 'respuesta recibida de ' + str(sys.getsizeof(htmlSource)) + ' bytes.' return htmlSource
def read_stbl(bstr): """Parse a string table (ID 0x220557DA)""" f = utils.BReader(bstr) if f.get_raw_bytes(4) != b'STBL': raise utils.FormatException("Bad magic") version = f.get_uint16() if version != 5: raise utils.FormatException("We only support STBLv5") compressed = f.get_uint8() numEntries = f.get_uint64() f.off += 2 mnStringLength = f.get_uint32() # This is the total size of all # the strings plus one null byte # per string (to make the parsing # code faster, probably) entries = {} size = 0 for _ in range(numEntries): keyHash = f.get_uint32() flags = f.get_uint8() # What is in this? length = f.get_uint16() val = f.get_raw_bytes(length).decode('utf-8') entries[keyHash] = val size += sys.getsizeof(keyHash, val) size += sys.getsizeof(entries) return entries
def frame_profile(frame_idx, serial_data_path, pickle_path, mol_types, coords, sys_type, assoc_sel_idxs, assoc_type, inx_type): inxs, system, assoc = profile_coords(mol_types, coords, sys_type, assoc_sel_idxs, assoc_type, inx_type) # data output inx_type.pdb_serial_output(inxs[inx_type], serial_data_path, delim=" ") # persistent storage with open(pickle_path, 'wb') as f: pickle.dump(inxs, f) print("--------------------------------------------------------------------------------") print("frame", frame_idx) print("----------------------------------------") print("size of inxs {}".format(sys.getsizeof(inxs))) print("size of system {}".format(sys.getsizeof(system))) print("size of assoc {}".format(sys.getsizeof(assoc))) if len(inxs[inx_type]) > 0: print(len(inxs[inx_type]), "intermolecular hydrogen bonds") for inx in inxs[inx_type]: inx.pp() else: print(0, "intermolecular hydrogen bonds")
def saveImagesInLMDB(lmdbName, imgs): N = len(imgs) aux = misc.imread(imgs[0][0]) # map_size = (num_images+1) * height * width * channels * byte_size * double size + labels size if len(aux.shape) == 3: map_size = (N+1) * aux.shape[0] * aux.shape[1] * aux.shape[2] * 8 * 2 + (N+1)*getsizeof(str(0)) else: map_size = (N+1) * aux.shape[0] * aux.shape[1] * 8 * 2 + (N+1)*getsizeof(str(0)) del aux #map_size = 3145780 * (N+1) print 'Map size:', map_size, 'bytes ->', int(map_size/1024.0/10240.0), 'MB' inputs = lmdb.open(lmdbName, map_size=map_size) with inputs.begin(write=True) as lmdbInputs: print 'Saving `%s`...' % lmdbName cont = 0;porc = 0;step=10 for i in imgs: if cont%(N/step) == 0: stdout.write('\r (%06d/%06d) %02d%%' % (cont, N, porc)) porc += 100/step; stdout.flush() insertImageInLMDB(i[0], i[1], i[2], lmdbInputs, cont) cont += 1 print '\r (%06d/%06d) 100%%\nDone.' % (cont, N) print 'Closing LMDB file (it will take some seconds).' inputs.close()
def to_externalizable(self): compressed = zlib.compress(pickle.dumps(self.docs)) logger.info( "Compression changed size of metric store from [%d] bytes to [%d] bytes" % (sys.getsizeof(self.docs), sys.getsizeof(compressed)) ) return compressed
def testSTructSize(self): tp0 = objc.createStructType("FooStruct", b'{FooStruct=}', None) tp1 = objc.createStructType("FooStruct", b'{FooStruct="first"i}', None) tp2 = objc.createStructType("FooStruct", b'{FooStruct="first"i"second"i}', None) self.assertEqual(sys.getsizeof(tp0()) + 1 * PTR_SIZE, sys.getsizeof(tp1())) self.assertEqual(sys.getsizeof(tp0()) + 2 * PTR_SIZE, sys.getsizeof(tp2()))
def evaluate_message_generation(net, state, action, reward, next_state): tic = time.time() net.forward() net.backward() toc = time.time() print "-- MESSAGE INFORMATION --" print "Forward/backward pass: %0.3f ms" % (1000 * (toc - tic)) # Extract gradients tic = time.time() msg = create_gradient_message(net, compress=False) toc = time.time() print "Without compression:" print "Message size: %0.2f MB" % (sys.getsizeof(msg) / 1.0e6) print "Generation time: %0.2f ms" % (1000 * (toc - tic)) tic = time.time() grads = load_gradient_message(msg, compressed=False) toc = time.time() print "Loading time: %0.2f ms" % (1000 * (toc - tic)) tic = time.time() msg = create_gradient_message(net, compress=True) toc = time.time() print print "With compression:" print "Message size: %0.2f MB" % (sys.getsizeof(msg) / 1.0e6) print "Generation time: %0.2f ms" % (1000 * (toc - tic)) tic = time.time() grads = load_gradient_message(msg, compressed=True) toc = time.time() print "Loading time: %0.2f ms" % (1000 * (toc - tic)) print print "Parameters included in message:" for param in grads: print " -", param
def find_the_most_like(resp, first_fifty=50): """top 50 most liked""" most_liked = sorted(resp.iteritems(), key=lambda x: -x[1])[:first_fifty] print sys.getsizeof(most_liked) # not sure how much optimation is being done using sorted vs sort ## but it is using 472 bytes using sorted. return most_liked
def _pop_and_size(self, queue): """ Pop an item off the queue and return its size. Add its referents to the queue. """ obj = queue.popleft() if id(obj) in self._seen: return 0 else: self._seen.add(id(obj)) if isinstance(obj, ndarray): # The elements of a numpy array are not seen by the gc as a referent container_size = sys.getsizeof(obj) if obj.dtype != dtype('object'): return container_size + obj.nbytes else: # In this case, the element is a pointer (4 bytes). # Convert to a list to find the referents. referents = self._filtered_referents(obj.tolist()) queue.extend(referents) return container_size else: size = sys.getsizeof(obj) referents = self._filtered_referents(obj) queue.extend(referents) return size
def main(data_txt_path, label_txt_path, stride=25, images_folder='roadC621/'): """ Train a neural network with patches of patch_size x patch_size (as given via the module network_path). Parameters ---------- network_path : str Path to a Python script with a function generate_nnet(feats) which returns a neural network image_batch_size : int stride : int """ assert image_batch_size >= 1 assert stride >= 1 features, labels = load_data_raw_images(train_images_folder=images_folder) mem_size = (sys.getsizeof(42) * len(features) * features[0].size + sys.getsizeof(42) * len(labels) * labels[0].size) logging.info("Loaded %i data images with their labels (approx %s)", len(features), utils.sizeof_fmt(mem_size)) nn_params = {'training': {'image_batch_size': image_batch_size, 'stride': stride}} logging.info("## Network: %s", network_path) network = imp.load_source('sst.network', network_path) logging.info("Fully network: %s", str(network.fully)) nn_params['code'] = inspect.getsource(network) nn_params['fully'] = network.fully nn_params['patch_size'] = network.patch_size assert nn_params['patch_size'] > 0 labeled_patches = get_patches(features[:1], labels[:1], nn_params=nn_params) feats, _ = get_features(labeled_patches, fully=nn_params['fully']) net1 = network.generate_nnet(feats) for block in range(0, len(features), image_batch_size): from_img = block to_img = block + image_batch_size logging.info("Training on batch %i - %i of %i total", from_img, to_img, len(features)) labeled_patches = get_patches(features[from_img:to_img], labels[from_img:to_img], nn_params=nn_params, stride=stride) logging.info(("labeled_patches[0].shape: %s , " "labeled_patches[1].shape: %s"), labeled_patches[0].shape, labeled_patches[1].shape) net1 = train_nnet(labeled_patches, net1, fully=nn_params['fully']) model_pickle_name = 'nnet1-trained.pickle' utils.serialize_model(net1, filename=model_pickle_name, parameters=nn_params)
def testSizeOf(self): try: if hasattr(sys, 'getsizeof'): sys.getsizeof(univ.noValue) except PyAsn1Error: assert False, 'sizeof failed for NoValue object'
def get_policies(self): self.controllerUrl = 'http://'+self.controllerIp+':8080/controller/nb/v2/statistics/default/flow' resp, content = self.h.request(self.controllerUrl, "GET") print sys.getsizeof(content) allFlowStats = json.loads(content) flowStats = allFlowStats['flowStatistics'] for fs in flowStats: print "\nSwitch ID : " + fs['node']['id'] print '{0:8} {1:8} {2:5} {3:15}'.format('Count', 'Action', 'Port', 'DestIP') for aFlow in fs['flowStatistic']: count = aFlow['packetCount'] actions = aFlow['flow']['actions'] actionType = '' actionPort = '' #print actions if(type(actions) == type(list())): actionType = actions[0]['type'] if actions[0].get('port') is not None: actionPort = actions[0]['port']['id'] else: actionType = actions['type'] actionPort = actions['port']['id'] dst = aFlow['flow']['match']['matchField'][0]['value'] print '{0:8} {1:8} {2:5} {3:15}'.format(count, actionType, actionPort, dst)
def main(): # 기본 floating point 활용 i = 0.1 boolean_status = True while boolean_status: print i i += 0.1 if i == 2: boolean_status = False # 오류를 보정한 decimal 자료형 활용 boolean_status = True import decimal import sys dcimal_i = decimal.Decimal('0.1') dcimal_j = decimal.Decimal('0.1') while boolean_status: print dcimal_i dcimal_i += dcimal_j if dcimal_i == 2: boolean_status = False print '=='*6 print 'i' print type(i) print sys.getsizeof(i) print '=='*6 print 'dcimal_i' print type(dcimal_i) print sys.getsizeof(dcimal_i) # 일반적인 float형에 비해 3배의 공간을 필요
def evaluate_node(self, node): self.bytes_received += sys.getsizeof(node.parameters) self.bytes_received += sys.getsizeof(node.node_id) self.role_satisfaction_map[node.node_id] = \ set([role_id for (role_id, role_criteria) in \ enumerate(self.role_criterias) if role_criteria.evaluate_against(node.parameters) > 0])
def sendMessage(csock, message): print('Transmitting', sys.getsizeof(message), "bytes") csock.send(bytes(str(sys.getsizeof(message)), "utf-8")) csock.recv(4) csock.send(bytes(message, "utf-8")) csock.recv(4) print('Transmission complete.')
def dummy_send(self, data, noftimes=1): self.sendstart_time = time.time() nofBs_sent = 0 logging.info('dummy_send started at time=%s', time.time() ) logging.info('noftimes=%s', noftimes) for i in range(0, int(noftimes)): if self.proto == 'tcp': try: self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.sock.connect(self.dst_addr) self.sock.sendall(data) datasize = sys.getsizeof(data)-37 #37 is python string format header length logging.info('tcp_sent datasize=%sB', datasize) nofBs_sent += datasize except socket.error, e: if isinstance(e.args, tuple): logging.error('errno is %d', e[0]) if e[0] == errno.EPIPE: logging.error('Detected remote peer disconnected') else: # determine and handle different error pass else: logging.error('socket error') elif self.proto == 'udp': self.sock.sendto(data, self.dst_addr) datasize = sys.getsizeof(data)-37 #37 is python string format header length nofBs_sent += datasize logging.info('udp_sent datasize=%sB', sys.getsizeof(data))
def __sizeof__(self): size = sys.getsizeof(self.sigma) + \ sys.getsizeof(self.unit_num) + \ sys.getsizeof(list(self.w)) + \ sys.getsizeof(list(self.X)) + \ sys.getsizeof(list(self.T)) return size
def walkdir(pathname): total_files = files_number(pathname) currentsize = 0 memsizeapprox = 0 numfiles = 0 sizeofint = sys.getsizeof(int()) for root, dirs, files in os.walk(pathname): for name in files: fullname = os.path.join(root, name) numfiles += 1 try: if not os.path.isfile(fullname): sz = 0 else: sz = os.path.getsize(fullname) # i should use sys.getsizeof here memsizeapprox += sys.getsizeof(fullname) + sizeofint currentsize += sz print_update( "%d/%d, %s (Memsize: %s)" % (numfiles, total_files, human(currentsize), human(memsizeapprox)) ) yield fullname, sz except OSError: print("""Cannot read '%s'""" % fullname, file=sys.stderr) pass
def record_size_index(self): return sys.getsizeof(self.inverted_index)
rando_gen(): random.randint(1, 9), rando_gen(): random.randint(1, 9) }, '6': { rando_gen(): random.randint(1, 9), rando_gen(): random.randint(1, 9) }, '7': { rando_gen(): random.randint(1, 9), rando_gen(): random.randint(1, 9) }, '8': { rando_gen(): random.randint(1, 9), rando_gen(): random.randint(1, 9) }, '9': { rando_gen(): random.randint(1, 9), rando_gen(): random.randint(1, 9) } } #print (graph2.keys()) start = time.time() dj(graph2, rando_gen(), rando_gen()) stop = time.time() print 'Dijkstra:', stop - start graphSize = sys.getsizeof(graph2) print 'The size of the graph is:', graphSize, "bytes"
def get_size(obj): return sys.getsizeof(obj)/(1024*1024)
def test_info_memory_usage(self): # Ensure memory usage is displayed, when asserted, on the last line dtypes = [ 'int64', 'float64', 'datetime64[ns]', 'timedelta64[ns]', 'complex128', 'object', 'bool' ] data = {} n = 10 for i, dtype in enumerate(dtypes): data[i] = np.random.randint(2, size=n).astype(dtype) df = DataFrame(data) buf = StringIO() # display memory usage case df.info(buf=buf, memory_usage=True) res = buf.getvalue().splitlines() assert "memory usage: " in res[-1] # do not display memory usage cas df.info(buf=buf, memory_usage=False) res = buf.getvalue().splitlines() assert "memory usage: " not in res[-1] df.info(buf=buf, memory_usage=True) res = buf.getvalue().splitlines() # memory usage is a lower bound, so print it as XYZ+ MB assert re.match(r"memory usage: [^+]+\+", res[-1]) df.iloc[:, :5].info(buf=buf, memory_usage=True) res = buf.getvalue().splitlines() # excluded column with object dtype, so estimate is accurate assert not re.match(r"memory usage: [^+]+\+", res[-1]) df_with_object_index = pd.DataFrame({'a': [1]}, index=['foo']) df_with_object_index.info(buf=buf, memory_usage=True) res = buf.getvalue().splitlines() assert re.match(r"memory usage: [^+]+\+", res[-1]) df_with_object_index.info(buf=buf, memory_usage='deep') res = buf.getvalue().splitlines() assert re.match(r"memory usage: [^+]+$", res[-1]) assert (df_with_object_index.memory_usage(index=True, deep=True).sum() > df_with_object_index.memory_usage(index=True).sum()) df_object = pd.DataFrame({'a': ['a']}) assert (df_object.memory_usage(deep=True).sum() > df_object.memory_usage().sum()) # Test a DataFrame with duplicate columns dtypes = ['int64', 'int64', 'int64', 'float64'] data = {} n = 100 for i, dtype in enumerate(dtypes): data[i] = np.random.randint(2, size=n).astype(dtype) df = DataFrame(data) df.columns = dtypes # Ensure df size is as expected # (cols * rows * bytes) + index size df_size = df.memory_usage().sum() exp_size = len(dtypes) * n * 8 + df.index.nbytes assert df_size == exp_size # Ensure number of cols in memory_usage is the same as df size_df = np.size(df.columns.values) + 1 # index=True; default assert size_df == np.size(df.memory_usage()) # assert deep works only on object assert df.memory_usage().sum() == df.memory_usage(deep=True).sum() # test for validity DataFrame(1, index=['a'], columns=['A']).memory_usage(index=True) DataFrame(1, index=['a'], columns=['A']).index.nbytes df = DataFrame(data=1, index=pd.MultiIndex.from_product([['a'], range(1000)]), columns=['A']) df.index.nbytes df.memory_usage(index=True) df.index.values.nbytes # sys.getsizeof will call the .memory_usage with # deep=True, and add on some GC overhead diff = df.memory_usage(deep=True).sum() - sys.getsizeof(df) assert abs(diff) < 100
import time import pickle import sys ## import data (train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data() train_images = train_images.reshape((60000, 784)) test_images = test_images.reshape((10000, 784)) # Normalize pixel values to be between -1 and 1 train_images, test_images = train_images / 127.5 - 1, test_images / 127.5 - 1 ##kNN implementation t = time.time() neigh = KNeighborsClassifier(n_neighbors=10, weights='distance', leaf_size=100) neigh.fit(train_images, train_labels) print("fit time:", time.time()-t) t = time.time() result = neigh.predict(test_images) print("predict time:", time.time()-t) acc = 0 for i in range(len(test_images)): acc += result[i] == test_labels[i] print("accuracy = ", acc/len(test_images)) print(sys.getsizeof(pickle.dumps(neigh)))
sess.run( tf.global_variables_initializer() ) saver = tf.train.Saver() saver.restore(sess, "models/model.ckpt") client = cm.connect_to_server(client_ip_address, port) total = 38421 chunk_size = 4096 steering_vector = np.asarray( [0, 0, 0], dtype = np.uint8 ) raw_data = steering_vector.tostring() while True : print 'Sending : ', steering_vector, ' size : ', sys.getsizeof( raw_data ) client.sendall( raw_data ) raw_data = cm.collect_bytes(client, total, chunk_size) frame = np.fromstring( raw_data, dtype = np.uint8 ) frame = frame.astype( dtype = np.float32 ) frame = np.asarray( [ frame ] ) with tf.Session() as sess : prediction = sess.run( [ compute_prediction ], feed_dict = { x: frame } ) steering_vector = dsm.convert_to_steering_vector( prediction ) raw_data = steering_vector.tostring()
def obj_size(o): if o is None: return 0 # handling numpy obj size (nbytes property) return o.nbytes if hasattr(o, 'nbytes') else sys.getsizeof(o, -1)
import sys ## xrange is more faster than range it consume less memory a = xrange(1, 6) print(sys.getsizeof(a)) b = range(1, 6) print(sys.getsizeof(b))
def __sizeof__(self): return sys.getsizeof(self.depth) + sys.getsizeof( self.node_desc_dictionary)
# -*- coding: utf-8 -*- """ Created on Fri Mar 1 15:36:38 2019 @author: Trevor """ import base64 import sys string = "" with open("E:/Programs/Python/images/t.png", "rb") as imageFile: string = base64.b64encode(imageFile.read()) #print(string) print(len(string)) print(sys.getsizeof(string)) # ============================================================================= # fh = open("imageToSave.png", "wb") # fh.write(string.decode('base64')) # fh.close() # ============================================================================= imgdata = base64.b64decode(string) filename = 'some_image.png' # I assume you have a way of picking unique filenames with open(filename, 'wb') as f: f.write(imgdata)
import sys def my_range(n: int): print("my_range starts") start = 0 while start < n: print("my_range is returning {}".format(start)) yield start start += 1 big_range = range(5) # big_range = my_range(5) # _ = input("line 14") print("big_range is {} bytes".format(sys.getsizeof(big_range))) # create a list containing al the values in big_range big_list = [] # _ = input("line 22") for val in big_range: # _ = input("line 24 - inside loop") big_list.append(val) print("big_list is {} bytes".format(sys.getsizeof(big_list))) print(big_range) print(big_list) print("looping again ... or not") for i in big_range:
def _getsizeof(attribute): """Return the size of attribute in MBs. param attribute: The object's attribute. """ return "{} MB".format(getsizeof(attribute) / 1024 / 1024)
def test_default(self): size = test.support.calcvobjsize self.assertEqual(sys.getsizeof(True), size('') + self.longdigit) self.assertEqual(sys.getsizeof(True, -1), size('') + self.longdigit)
#!/usr/bin/env python3 """Print size of variable.""" import sys VAR = 30 print(sys.getsizeof(VAR))
print('............... ' + layer_name + ' ..............') if layer_name == 'layer0': layer_output = data else: intermediate_layer_model = Model(inputs=model.input, outputs=model.get_layer(layer_name).output) layer_output = intermediate_layer_model.predict(data) name_to_save = layer_name name_to_save = 'layer_' + str(xx) filename = pjoin(path_to_save_activations , name_to_save) if(saveformat == 'pickle'): outfile = open(filename,'wb') filters = layer_output pickle.dump(filters,outfile) outfile.close() elif(saveformat == 'MATLAB'): if(2**31-getsizeof(layer_output) > 0): # faster save with v5 format if smaller than 2 GB scipy.io.savemat(filename + '.mat',{'filters':layer_output}) else: # slower save with v7.3 format if larger than 2 GB hdf5storage.savemat(filename + '.mat',{'filters':layer_output}) else: raise SystemExit('Output format not defined. Must be pickle or MATLAB.') xx = xx+1 ################################################################################ #saving the name and information of the used layers numpy.save(path_to_save_meta + 'layer_names' , layer_names) numpy.save(path_to_save_meta + 'number_of_examples' , n_examples) numpy.save(path_to_save_meta + 'repeatTimes' , repeatTimes) ###############################################################################
def check_code_size(a, expected_size): self.assertGreaterEqual(sys.getsizeof(a), expected_size)
def _calculate_gas(self, message): gas_price = sys.getsizeof(message) * 0.001 return gas_price
def check_slots(self, obj, base, extra): expected = sys.getsizeof(base) + struct.calcsize(extra) if gc.is_tracked(obj) and not gc.is_tracked(base): expected += self.gc_headsize self.assertEqual(sys.getsizeof(obj), expected)
obj_str = PyObject.from_address(id(str)) print obj_a.refcnt print obj_str.refcnt b = [a]*10 print b print obj_a.refcnt print obj_a.typeid # print type(a) # print str # print id(type(a)) # print id(str) # print id(type(str)) # print isinstance(a,str) # print dir(a) # print dir(obj_a) print '==========================================' b = 127 bb = c_byte(127) obj_b = PyByte.from_address(id(b)) print obj_b.val obj_bb = PyByte(12) print obj_bb.val print 'bb.value:',bb.value print 'bb.size:',bb.__sizeof__() print dir(c_byte) print sys.getsizeof(b) print sys.getsizeof(bb) print sys.getsizeof(obj_str)
with open('test.txt', 'a') as file: file.write('This is the last line') binary: -b flag image = open('img_2011.jpg', 'rb') image = open('img_2011.jpg', 'wb') file.name file.mode file size: getsizeof() import sys sys.getsizeof(file) exceptions: https://docs.python.org/3/library/exceptions.html try: kód, který Python zkusí pustit except: kód, který Python spustí, pokud narazil na chybu Neexistující proměnná - NameError Chybějící znaky - SyntaxError Špatné odsazení - IndenationError Špatný datový typ - TypeError Špatná hodnoty - ValueError Dělení nulou - ZeroDivisionError
price_size = 0 print "\n[+] Finding Integer overflow vulnerabilities,Potential vulnerabilities will be printed below,Please wait......" for f in Functions(SegStart(current_addr), SegEnd(current_addr)): stack_frame = GetFrame(f) #get frame of stack frame_size = GetStrucSize(stack_frame) #compute size of stackframe frame_counter = 0 prev_count = -1 distance = 0 ana_fun_name = stack_frame #要分析的函数名 para_num = 0 #参数数量 ana = AnayBinFil() dic = ana.Anayl_Func_Call(ana_fun_name, para_num + 1) size = sys.getsizeof(dic) while frame_counter < frame_size: stack_var = GetMemberName(stack_frame, frame_counter) #get one from stack if stack_var != "": if prev_count != -1: distance = frame_counter - prev_distance prev_distance = frame_counter #record last location if distance >= var_size_threshold: if distance < size: author_name = doc.createElement('Vulnerability-name') author_stackVariable = doc.createElement( 'Vulnerability-stackVariable') author_name_text = doc.createTextNode(
def memory(self): mem = sys.getsizeof(self.cache) for s in self.cache.iterkeys(): mem += sys.getsizeof(s) return mem
def record_span(self, span): """Per BasicSpan.record_span, safely add a span to the buffer. Will drop a previously-added span if the limit has been reached. """ if self._disabled_runtime: return # Lazy-init the flush loop (if need be). self._maybe_init_flush_thread() # Checking the len() here *could* result in a span getting dropped that # might have fit if a report started before the append(). This would only # happen if the client lib was being saturated anyway (and likely # dropping spans). But on the plus side, having the check here avoids # doing a span conversion when the span will just be dropped while also # keeping the lock scope minimized. with self._mutex: if len(self._span_records) >= self._max_span_records: return span_record = ttypes.SpanRecord( trace_guid=util._id_to_hex(span.context.trace_id), span_guid=util._id_to_hex(span.context.span_id), runtime_guid=util._id_to_hex(self.guid), span_name=util._coerce_str(span.operation_name), join_ids=[], oldest_micros=util._time_to_micros(span.start_time), youngest_micros=util._time_to_micros(span.start_time + span.duration), attributes=[], log_records=[]) if span.parent_id != None: span_record.attributes.append( ttypes.KeyValue(constants.PARENT_SPAN_GUID, util._id_to_hex(span.parent_id))) if span.tags: for key in span.tags: if key[:len(constants.JOIN_ID_TAG_PREFIX )] == constants.JOIN_ID_TAG_PREFIX: span_record.join_ids.append( ttypes.TraceJoinId(key, util._coerce_str(span.tags[key]))) else: span_record.attributes.append( ttypes.KeyValue(key, util._coerce_str(span.tags[key]))) for log in span.logs: event = log.key_values.get('event') or '' if len(event) > 0: # Don't allow for arbitrarily long log messages. if sys.getsizeof(event) > constants.MAX_LOG_MEMORY: event = event[:constants.MAX_LOG_LEN] payload = log.key_values.get('payload') fields = None if log.key_values is not None and len(log.key_values) > 0: fields = [ ttypes.KeyValue(k, util._coerce_str(v)) for (k, v) in log.key_values.items() ] span_record.log_records.append( ttypes.LogRecord(timestamp_micros=util._time_to_micros( log.timestamp), fields=fields)) with self._mutex: if len(self._span_records) < self._max_span_records: self._span_records.append(span_record)
def determine_memory_usage(self, nodes): return (self.max_nodes * sys.getsizeof(self.root))
def get_size(cls, show=False): vs = (sys.getsizeof(o) for o in get_type(cls)) v = sum(vs) * 1024 ** -2 if show: print('{:<30s} {}'.format(cls, v)) return v
def test_preallocation(self): iterable = [0] * 10 iter_size = sys.getsizeof(iterable) self.assertEqual(iter_size, sys.getsizeof(list([0] * 10))) self.assertEqual(iter_size, sys.getsizeof(list(range(10))))
print(id(l1), id(l2), id(t1), id(t2)) print(l1 is l2, t1 is t2) # Note that this is the case even if the tuple contains non constant elements: t1 = ([1, 2], fn1, 3) t2 = tuple(t1) print(t1 is t2) ## Storage Efficiency ## import sys prev = 0 for i in range(10): c = tuple(range(i + 1)) size_c = sys.getsizeof(c) delta, prev = size_c - prev, size_c print(f'{i+1} items: {size_c}, delta: {delta}') prev = 0 for i in range(10): c = list(range(i + 1)) size_c = sys.getsizeof(c) delta, prev = size_c - prev, size_c print(f'{i+1} items: {size_c}, delta: {delta}') c = [] prev = sys.getsizeof(c) print(f'0 items: {sys.getsizeof(c)}') for i in range(255): c.append(i)
print("Hello World!") ## 2. Data Storage, Memory, and RAM ## my_int = 8 int_addr = id(my_int) my_str = 'Naveen' str_addr = id(my_str) ## 4. Understanding How Python Stores Data ## import sys my_int = 200 size_of_my_int = sys.getsizeof(my_int) int1 = 10 int2 = 100000 str1 = "Hello" str2 = "Hi" int_diff = sys.getsizeof(int1) - sys.getsizeof(int2) str_diff = sys.getsizeof(str1) - sys.getsizeof(str2) ## 6. Understanding Disk Storage ## import time import csv f = open("list.csv", "r")
def actionListobj(self): import gc import sys self.sendHeader() if "Multiuser" in PluginManager.plugin_manager.plugin_names and not config.multiuser_local: yield "This function is disabled on this proxy" raise StopIteration # No more if not in debug mode if not config.debug: yield "Not in debug mode" raise StopIteration type_filter = self.get.get("type") yield """ <style> * { font-family: monospace; white-space: pre } table * { text-align: right; padding: 0px 10px } </style> """ yield "Listing all %s objects in memory...<br>" % cgi.escape( type_filter) ref_count = {} objs = gc.get_objects() for obj in objs: obj_type = str(type(obj)) if obj_type != type_filter: continue refs = [ ref for ref in gc.get_referrers(obj) if hasattr(ref, "__class__") and ref.__class__.__name__ not in [ "list", "dict", "function", "type", "frame", "WeakSet", "tuple" ] ] if not refs: continue try: yield "%.1fkb <span title=\"%s\">%s</span>... " % ( float(sys.getsizeof(obj)) / 1024, cgi.escape( str(obj)), cgi.escape(str(obj)[0:100].ljust(100))) except: continue for ref in refs: yield " [" if "object at" in str(ref) or len(str(ref)) > 100: yield str(ref.__class__.__name__) else: yield str(ref.__class__.__name__) + ":" + cgi.escape( str(ref)) yield "] " ref_type = ref.__class__.__name__ if ref_type not in ref_count: ref_count[ref_type] = [0, 0] ref_count[ref_type][0] += 1 # Count ref_count[ref_type][1] += float( sys.getsizeof(obj)) / 1024 # Size yield "<br>" yield "<br>Object referrer (total: %s, %.2fkb):<br>" % ( len(ref_count), sum([stat[1] for stat in ref_count.values()])) for obj, stat in sorted(ref_count.items(), key=lambda x: x[1][0], reverse=True)[0:30]: # Sorted by count yield " - %.1fkb = %s x %s<br>" % (stat[1], stat[0], cgi.escape(str(obj))) gc.collect() # Implicit grabage collection
A=[0]*3 #is [0,0,0] A=[[0]*3]*3 #is [[0,0,0],[0,0,0],[0,0,0]] #bad because if you do A[1][1] = 2, it means A[0][1] is also 2. This method creates a one dimensional array that points to another one #dimensional array. #the better way A=[0 for i in range(3)] A=[[0 for i in range(2)]for i in range(3)] #on a 64-bit machine, when you write 0, it uses 64 bytes A=[0 for i in range(100)] import sys sys.getsizeof(A)#gets size of A, but all the indexes of A are pointers so it gets the size of A and the size of the pointers print(sys.getsizeof(A)+sum([sys.getsizeof(a) for a in A])) #when you run, you get 3708 import array B=array.array('b',A) #the little b means it will represent the assigned integers in A as one byte #B[3]='cat' would get an error because you can only store very small numbers #B would not have arrows/pointers. Instead, the array has the things inside the array print(sys.getsizeof(B)) for i in A: i+=1 #it doesn't change anything in A. If you want to change A, you have to use the index: for i in range(len(A)): A[i]=A[i]+1