def do(file_name, output_dir='output', debug=True): dir_fb2 = os.path.basename(file_name) dir_im = os.path.join(output_dir, dir_fb2) os.makedirs(dir_im, exist_ok=True) debug and print(dir_im + ':') total_image_size = 0 with open(file_name, encoding='utf8') as fb2: pattern = re.compile( '<binary ((content-type=".+?") (id=".+?")' '|(id=".+?") (content-type=".+?")) *?>(.+?)</binary>', re.DOTALL) find_content_type = re.compile('content-type="(.+?)"') find_id = re.compile('id="(.+?)"') for i, binary in enumerate(pattern.findall(fb2.read()), 1): try: im_id, content_type, im_base64 = None, None, None for part in binary: if not part: continue match_id = find_id.search(part) if im_id is None and match_id is not None: im_id = match_id.group(1) match_content_type = find_content_type.search(part) if content_type is None and match_content_type is not None: content_type = match_content_type.group(1) if match_id is None and match_content_type is None: im_base64 = part im_file_name = get_file_name_from_binary(im_id, content_type) im_file_name = os.path.join(dir_im, im_file_name) im_data = base64.b64decode(im_base64.encode()) count_bytes = len(im_data) total_image_size += count_bytes with open(im_file_name, mode='wb') as f: f.write(im_data) im = Image.open(io.BytesIO(im_data)) debug and print(' {}. {} {} format={} size={}'.format( i, im_id, sizeof_fmt(count_bytes), im.format, im.size)) except: import traceback traceback.print_exc() file_size = os.path.getsize(file_name) debug and print() debug and print('fb2 file size =', sizeof_fmt(file_size)) debug and print('total image size = {} ({:.2f}%)'.format( sizeof_fmt(total_image_size), total_image_size / file_size * 100))
def do(file_name, output_dir='output', debug=True): dir_fb2 = os.path.basename(file_name) dir_im = os.path.join(output_dir, dir_fb2) if not os.path.exists(dir_im): os.makedirs(dir_im) debug and print(dir_im + ':') total_image_size = 0 number = 1 tree = ET.parse(file_name) root = tree.getroot() for child in root: tag = child.tag if "}" in tag: tag = tag[tag.index('}') + 1:] if tag != 'binary': continue try: im_id = child.attrib['id'] content_type = child.attrib['content-type'] im_file_name = get_file_name_from_binary(im_id, content_type) im_file_name = os.path.join(dir_im, im_file_name) im_data = base64.b64decode(child.text.encode()) count_bytes = len(im_data) total_image_size += count_bytes with open(im_file_name, mode='wb') as f: f.write(im_data) im = Image.open(io.BytesIO(im_data)) debug and print(' {}. {} {} format={} size={}'.format( number, im_id, sizeof_fmt(count_bytes), im.format, im.size)) number += 1 except: import traceback traceback.print_exc() file_size = os.path.getsize(file_name) debug and print() debug and print('fb2 file size =', sizeof_fmt(file_size)) debug and print('total image size = {} ({:.2f}%)'.format( sizeof_fmt(total_image_size), total_image_size / file_size * 100))
def endElement(self, name): if name != 'binary': return data = PARSE_DATA['last_tag_data'] try: im_id = PARSE_DATA['last_tag_attrs']['id'] content_type = PARSE_DATA['last_tag_attrs']['content-type'] im_file_name = get_file_name_from_binary(im_id, content_type) im_file_name = os.path.join(dir_im, im_file_name) im_data = base64.b64decode(data.encode()) count_bytes = len(im_data) PARSE_DATA['total_image_size'] += count_bytes with open(im_file_name, mode='wb') as f: f.write(im_data) im = Image.open(io.BytesIO(im_data)) debug and print(' {}. {} {} format={} size={}'.format( PARSE_DATA['number'], im_id, sizeof_fmt(count_bytes), im.format, im.size)) PARSE_DATA['number'] += 1 except: import traceback traceback.print_exc()
def get_dir_total_size(dir_name: str) -> (int, str): total_size = 0 for root, dirs, files in os.walk(dir_name): total_size += sum(getsize(join(root, name)) for name in files) return total_size, sizeof_fmt(total_size)
def main(argv): args = read_parameter(argv) if args.info: logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") if args.debug: logging.basicConfig(level=logging.DEBUG, format="%(levelname)s: %(message)s") else: logging.basicConfig(level=logging.WARNING, format="%(message)s") logging.info('Input folder: %s' % args.folder) tar_file = create_tar(args.folder, args.compress) logging.info('Created TAR file: %s' % tar_file) size = get_file_size(tar_file) logging.info('File size: %s (%i bytes)' % (sizeof_fmt(size), size)) tree_hash_hex = get_tree_hash_of_file(tar_file) logging.info('Hash (SHA-256 treehash): %s' % tree_hash_hex) description = "files from %s" % re.sub(r'[^\x00-\x7F]+','', args.folder) # remove non ASCII chars archive_id = upload_to_glacier(tar_file, size, description, args.vault, tree_hash_hex) delete_temp_file(tar_file) logging.info('Removed temporary file') now = datetime.datetime.now() print "%s\t%s\t%s\t%s\t%s" % (now, args.folder, args.vault, archive_id, tree_hash_hex)
def get_dir_total_size(dir_name: str, ignore_permission_error=True) -> (int, str): def _get_sub_size(root_path: str) -> int: if isfile(root_path): return getsize(root_path) total_size = 0 try: for path in listdir(root_path): abs_path = join(root_path, path) if isfile(abs_path): size = getsize(abs_path) else: size = _get_sub_size(abs_path) total_size += size except Exception as e: if type(e) is PermissionError: if not ignore_permission_error: print('Error: "{}"'.format(e)) else: print('Path: "{}", error: "{}"'.format(root_path, e)) return total_size total_size = _get_sub_size(dir_name) return total_size, sizeof_fmt(total_size)
def do(file_name, output_dir='output', debug=True): dir_fb2 = os.path.basename(file_name) dir_im = os.path.join(output_dir, dir_fb2) if not os.path.exists(dir_im): os.makedirs(dir_im) debug and print(dir_im + ':') total_image_size = 0 with open(file_name, 'rb') as fb2: root = BeautifulSoup(fb2, 'html.parser') binaries = root.select("binary") for i, binary in enumerate(binaries, 1): try: im_id = binary.attrs['id'] content_type = binary.attrs['content-type'] im_file_name = get_file_name_from_binary(im_id, content_type) im_file_name = os.path.join(dir_im, im_file_name) im_data = base64.b64decode(binary.text.encode()) count_bytes = len(im_data) total_image_size += count_bytes with open(im_file_name, mode='wb') as f: f.write(im_data) im = Image.open(io.BytesIO(im_data)) debug and print(' {}. {} {} format={} size={}'.format( i, im_id, sizeof_fmt(count_bytes), im.format, im.size )) except: import traceback traceback.print_exc() file_size = os.path.getsize(file_name) debug and print() debug and print('fb2 file size =', sizeof_fmt(file_size)) debug and print('total image size = {} ({:.2f}%)'.format( sizeof_fmt(total_image_size), total_image_size / file_size * 100 ))
def print_inventory(inventory): print 'Date:', inventory['InventoryDate'] for archive in inventory['ArchiveList']: print '-' print 'ID: ', archive['ArchiveId'] print 'Description: ', archive['ArchiveDescription'] print 'Creation date: ', archive['CreationDate'] print 'Size: ', sizeof_fmt( archive['Size']), '(%i)' % archive['Size'] print 'SHA256TreeHash:', archive['SHA256TreeHash']
def do(file_name, output_dir='output', debug=True): dir_fb2 = os.path.basename(file_name) dir_im = os.path.join(output_dir, dir_fb2) os.makedirs(dir_im, exist_ok=True) debug and print(dir_im + ':') total_image_size = 0 with open(file_name, 'rb') as fb2: tree = etree.XML(fb2.read()) binaries = tree.xpath("//*[local-name()='binary']") for i, binary in enumerate(binaries, 1): try: im_id = binary.attrib['id'] content_type = binary.attrib['content-type'] im_file_name = get_file_name_from_binary(im_id, content_type) im_file_name = os.path.join(dir_im, im_file_name) im_data = base64.b64decode(binary.text.encode()) count_bytes = len(im_data) total_image_size += count_bytes with open(im_file_name, mode='wb') as f: f.write(im_data) im = Image.open(io.BytesIO(im_data)) debug and print(' {}. {} {} format={} size={}'.format( i, im_id, sizeof_fmt(count_bytes), im.format, im.size )) except: import traceback traceback.print_exc() file_size = os.path.getsize(file_name) debug and print() debug and print('fb2 file size =', sizeof_fmt(file_size)) debug and print('total image size = {} ({:.2f}%)'.format( sizeof_fmt(total_image_size), total_image_size / file_size * 100 ))
def get_dir_total_size(dir_name: str) -> (int, str): total_size = 0 # for file_name in pathlib.Path(dir_name).rglob('*'): # OR: for file_name in pathlib.Path(dir_name).glob('**/*'): if file_name.is_file(): total_size += file_name.stat().st_size return total_size, sizeof_fmt(total_size)
def get_dir_total_size(dir_name: str) -> (int, str): total_size = 0 for file_name in iglob(escape(dir_name) + '/**', recursive=True): try: if os.path.isfile(file_name): total_size += os.path.getsize(file_name) except Exception as e: print('File: "{}", error: "{}"'.format(file_name, e)) return total_size, sizeof_fmt(total_size)
def get_image_info(file_name__or__bytes__or__bytes_io, pretty_json_str=False): data = file_name__or__bytes__or__bytes_io type_data = type(data) # File name if type_data == str: with open(data, mode='rb') as f: data = f.read() if type(data) == bytes: import io data = io.BytesIO(data) length = len(data.getvalue()) exif = get_exif_tags(data) from PIL import Image img = Image.open(data) # Save order from collections import OrderedDict info = OrderedDict() info['length'] = OrderedDict() info['length']['value'] = length info['length']['text'] = sizeof_fmt(length) info['format'] = img.format info['mode'] = img.mode info['channels'] = len(img.getbands()) info['bit_color'] = { '1': 1, 'L': 8, 'P': 8, 'RGB': 24, 'RGBA': 32, 'CMYK': 32, 'YCbCr': 24, 'I': 32, 'F': 32 }[img.mode] info['size'] = OrderedDict() info['size']['width'] = img.width info['size']['height'] = img.height info['exif'] = exif if pretty_json_str: import json info = json.dumps(info, indent=4, ensure_ascii=False) return info
def main(argv): args = read_parameter(argv) if args.info: logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") if args.debug: logging.basicConfig(level=logging.DEBUG, format="%(levelname)s: %(message)s") else: logging.basicConfig(level=logging.WARNING, format="%(message)s") logging.info('Archive ID: %s' % args.archive_id) glacier_client = boto3.client('glacier') job_id, tree_hash_hex = start_retrieval_job(glacier_client, args.vault, args.archive_id) ## 190 MB job #job_id = 'p1Uv1IN7BQaveZLr843hheHQ1rrF47k37HscMl038m_TPtKMlyWFuXGTVc4A_o2Gknqp0wqCFA0sfLjaLHineXHkkBgb' #tree_hash_hex = '37436496846009fd0c40fac72db10ff61457074ee4af730ef5a3abb6a06a367b' ## 3 MB job # job_id = 'hwz0r-BfGXJ69ks5iC6PPA4AnAEVpd222G1mGOUMaG1OuP0qO8oy_iqq7mQxwOzL7Qpz5FlPhd6eVF4Tx0MMdtM4X5be' #tree_hash_hex = '3b0a8b676f33708ef577838a68cf1a1c591c981af1f9d03a50a568af79c4965d' logging.info('Job is created: %s' % job_id) size = monitor_job(glacier_client, args.vault, job_id) logging.info('Job is completed, size: %s (%i)' % (sizeof_fmt(size), size)) tar_file_name = download_archive(glacier_client, args.vault, job_id, size, tree_hash_hex) logging.info('Downloaded TAR file: %s (%s)' % (tar_file_name, sizeof_fmt(size))) unpack_tar_file(tar_file_name, args.folder) logging.info('Unpacket TAR file to: %s', args.folder) delete_temp_file(tar_file_name) logging.info('Removed temporary TAR file') now = datetime.datetime.now() print "%s\t%s\t%s\t%s\t%s" % (now, args.folder, args.vault, archive_id, tree_hash_hex)
def get_file_size(): print(request.files) # check if the post request has the file part if 'file' not in request.files: return redirect('/') length = 0 file = request.files['file'] if file: data = file.stream.read() length = len(data) return jsonify({'length': length, 'length_human': sizeof_fmt(length)})
def steamUsers(pbar=False): logging = common.setupLogging() try: logging.info("Running Steam Users Online") client = MongoClient(host=config.mongodb_ip, port=config.mongodb_port) db = client['steam'] collection = db['steamusers'] collection.create_index("epochint", unique=True) collection.create_index("date", unique=True) # pull Steam online users over the last 24 hours # https://store.steampowered.com/stats/ r = requests.get("https://store.steampowered.com/stats/userdata.json") if (r.ok): data = r.json()[0]['data'] if (pbar): bar = progressbar.ProgressBar(max_value=len(data)).start() for i, users in enumerate(data): if (pbar): bar.update(i + 1) # convert Epoch seconds to UTC time # https://stackoverflow.com/questions/1697815/how-do-you-convert-a-python-time-struct-time-object-into-a-datetime-object conv_time = datetime.datetime.fromtimestamp( time.mktime(time.gmtime(int(users[0]) / 1000))) #update_one will keep whatever information already exists collection.update_one({'epochint': int(users[0])}, { '$set': { 'numberonlineusers': int(users[1]), 'date': conv_time } }, upsert=True) if (pbar): bar.finish() logging.info("Finished downloading Steam users online.") logging.info("Downloaded: " + common.sizeof_fmt(len(r.content))) common.writeBandwidth(db, len(r.content)) else: logging.error("status code: " + str(r.status_code)) except Exception as e: logging.error(str(e)) time.sleep(1)
def downloadAllAppIDs(pbar=False): logging = common.setupLogging() try: logging.info("Downloading All AppIDs") # downloads a list of every appid and name from the API # and stores in MongoDB collection client = MongoClient(host=config.mongodb_ip, port=config.mongodb_port) db = client['steam'] collection = db['apps'] r = requests.get("https://api.steampowered.com/ISteamApps/GetAppList/v0002/") if (r.ok): data = r.json() # create an index for appid, this vastly improves performance collection.create_index("appid", unique=True) if (pbar): bar = progressbar.ProgressBar(max_value=len(data['applist']['apps'])).start() requests_list = [] for i,app in enumerate(data['applist']['apps']): if (pbar): bar.update(i+1) #UpdateOne will keep whatever information already exists requests_list.append(UpdateOne({'appid': int(app['appid'])}, {'$set': app}, upsert=True)) # do bulk writes in batches, instead of one at a time if (i % 1000 == 0 or i+1 == len(data['applist']['apps'])): try: collection.bulk_write(requests_list) requests_list = [] except BulkWriteError as bwe: logging.error(bwe) if (pbar): bar.finish() logging.info("Finished downloading AppIDs.") logging.info("Downloaded: " + common.sizeof_fmt(len(r.content))) common.writeBandwidth(db, len(r.content)) else: logging.error("status code: " + str(r.status_code)) except Exception as e: logging.error(str(e)) time.sleep(1)
def run(): total_size = 0 start = time.time() last = start last_window = start index = 0 socket = setup() while True: try: start_recv = time.time() data = socket.recv() print('recv took %rs' % (time.time() - start_recv)) size = sys.getsizeof(data) nparray = np.frombuffer(data) print('nparray shape', nparray.shape) # del data total_size += size socket.send(b"client message to server %d" % index) except zmq.error.Again: print('Waiting for server') socket = setup(socket) index += 1 now = time.time() if now - last > 1: print("bandwidth last 5 seconds is %dMB/s" % (total_size / (now - start) // 10**6)) print("bandwidth last 5 seconds is %s/s" % common.sizeof_fmt(total_size / (now - start))) if now - last_window > 5: total_size = 0 start = now last_window = now last = now
#!/usr/bin/env python3 # -*- coding: utf-8 -*- __author__ = 'ipetrash' # SOURCE: https://github.com/qbittorrent/qbittorrent/wiki/WebUI-API-Documentation#get-torrent-list from common import get_client, sizeof_fmt qb = get_client() torrents = qb.torrents(filter='downloading') total_size = 0 for i, torrent in enumerate(torrents, 1): torrent_size = torrent['total_size'] total_size += torrent_size print('{:<3} {} ({})'.format(i, torrent['name'], sizeof_fmt(torrent_size))) print() print('Total torrents: {}, total size: {} ({} bytes)'.format(len(torrents), sizeof_fmt(total_size), total_size))
socket = context.socket(zmq.SUB) print("Collecting updates from weather server...") socket.connect("tcp://localhost:%s" % port) if len(sys.argv) > 2: socket.connect("tcp://localhost:%s" % port1) # Subscribe to zipcode, default is NYC, 10001 topicfilter = b"0" socket.setsockopt(zmq.SUBSCRIBE, topicfilter) total_size = 0 start = time.time() last = start for update_nbr in range(10): string = socket.recv() topic, index, data = string.split() size = sys.getsizeof(data) total_size += size print('received data size of %r index %r' % (size, index)) now = time.time() del data del string if now - last > 1: print("bandwidth is %dMB/s" % (total_size / (now - start) // 10**6)) print("bandwidth is %s/s" % common.sizeof_fmt(total_size / (now - start))) last = now
#!/usr/bin/env python3 # -*- coding: utf-8 -*- __author__ = 'ipetrash' from common import sizeof_fmt, get_client qb = get_client() torrents_max_top5 = qb.torrents(sort='size', reverse='true', limit='5') torrents_min_top5 = qb.torrents(sort='size', limit='5') print('Max top5:') for i, torrent in enumerate(torrents_max_top5, 1): print(' {}. {} ({})'.format(i, torrent['name'], sizeof_fmt(torrent['total_size']))) print() print('Min top5:') for i, torrent in enumerate(torrents_min_top5, 1): print(' {}. {} ({})'.format(i, torrent['name'], sizeof_fmt(torrent['total_size'])))
def plot(data=None, output=None): WORKDIR = os.getcwd() RESULTSDIR = data RESULTEXT = '.csv' IMAGESTAT = 'imagestats' IMAGE_SIZE_KEY = 'image_size' NUMSYMS_KEY = 'number_symbols' GROUP_BAR_WIDTH = .8 DEFAULT = '_' files = [] apps = [] stats = {} throughput_max = 0 # maximum observed rx mpps bar_colors = { 'linux-dpdk-vhost-user': '******', 'linux-dpdk-vhost-net': '#000000', 'unikraft-vhost-user': '******', 'unikraft-vhost-net': '#8000CA' } markers = { 'linux-dpdk-vhost-user': '******', 'linux-dpdk-vhost-net': ',', 'unikraft-vhost-user': '******', 'unikraft-vhost-net': '4' } labels = { 'linux-dpdk-vhost-user': '******', 'linux-dpdk-vhost-net': 'Linux DPDK with vhost-net', 'unikraft-vhost-user': '******', 'unikraft-vhost-net': 'Rhea with vhost-net' } for f in os.listdir(RESULTSDIR): if f.endswith(RESULTEXT): index = f.replace(RESULTEXT,'') files.append(f) unikernel = index with open(os.path.join(RESULTSDIR, f), 'r') as csvfile: csvdata = csv.reader(csvfile, delimiter="\t") next(csvdata) # skip header for row in csvdata: if unikernel not in stats: stats[unikernel] = {} throughput = float(row[1]) * KBYTES * KBYTES stats[unikernel][str(row[0])] = throughput if throughput > throughput_max: throughput_max = throughput # General style common_style(plt) throughput_max += KBYTES * KBYTES * 1 # add "margin" above tallest bar # Setup matplotlib axis fig = plt.figure(figsize=(8, 4)) renderer = fig.canvas.get_renderer() # image size axis ax1 = fig.add_subplot(1,1,1) ax1.set_ylabel("Throughout (Mp/s)") ax1.set_xlabel("Packet Size (Bytes)") ax1.grid(which='major', axis='y', linestyle=':', alpha=0.5, zorder=0) ax1_yticks = np.arange(0, throughput_max, step=KBYTES * KBYTES * 2) ax1.set_yticks(ax1_yticks, minor=False) ax1.set_yticklabels([sizeof_fmt(ytick, suffix='') for ytick in ax1_yticks]) ax1.set_ylim(0, throughput_max) # Plot coordinates xlabels = list(stats[list(stats.keys())[0]].keys()) # Adjust margining fig.subplots_adjust(bottom=.15) #, top=1) for unikernel in stats.keys(): ax1.plot(list(stats[unikernel].keys()), list(stats[unikernel].values()), marker=markers[unikernel], label=labels[unikernel], zorder=3, linewidth=3, markersize=9, markeredgewidth=4, color=bar_colors[unikernel], ) # set up x-axis labels xticks = range(0, len(xlabels)) ax1.set_xticks(xticks) ax1.margins(x=.05) # Create a unique legend handles, labels = plt.gca().get_legend_handles_labels() by_label = dict(zip(labels, handles)) leg = plt.legend(by_label.values(), by_label.keys(), fontsize=LARGE_SIZE, loc='upper right', ncol=1) leg.get_frame().set_linewidth(0.0) # Save to file fig.tight_layout() fig.savefig(output) #, bbox_extra_artists=(ax1,), bbox_inches='tight')
# -*- coding: utf-8 -*- __author__ = 'ipetrash' from common import get_client, sizeof_fmt qb = get_client() def get_torrents(qb, search_name='', **filters) -> list: def match(name: str) -> bool: return search_name.lower() in name.lower() return [ torrent for torrent in qb.torrents(**filters) if match(torrent['name']) ] torrents = get_torrents(qb, search_name='.mkv') total_size = 0 for i, torrent in enumerate(torrents, 1): torrent_size = torrent['total_size'] total_size += torrent_size print('{:<3} {} ({})'.format(i, torrent['name'], sizeof_fmt(torrent_size))) print() print('Total torrents: {}, total size: {} ({} bytes)'.format( len(torrents), sizeof_fmt(total_size), total_size))
def updateOpenCritic(refresh_type="OLDEST", pbar=False): logging = common.setupLogging() try: logging.info("Updating OpenCritic games via " + refresh_type) client = MongoClient(host=config.mongodb_ip, port=config.mongodb_port) db = client['steam'] collection_oc = db['opencritic'] # create an index for id, this vastly improves performance collection_oc.create_index("id", unique=True) collection_oc.create_index("date") collection_oc.create_index("steamId") if (refresh_type == "OLDEST"): # find a sampling of OpenCritic IDs to work on ordered by date # will run on the oldest entries first names_cur = collection_oc.aggregate([ { "$match": {} }, { "$sort": { "date": 1 } }, # oldest first { "$limit": 25 }, { "$project": { "id": 1, "_id": 0 } } ]) # convert cursor to Python list to_update = [] for item in names_cur: to_update.append(item['id']) if (pbar): bar = progressbar.ProgressBar(max_value=len(to_update)).start() bytes_downloaded = 0 for i, oc_id in enumerate(to_update): if (pbar): bar.update(i + 1) try: # OpenCritic Game API e.g. # https://opencritic.com/api/game/7592 r = requests.get( requests.Request( 'GET', "https://opencritic.com/api/game/" + str(oc_id)).prepare().url) if (r.ok): data = r.json() bytes_downloaded = bytes_downloaded + len(r.content) oc = data # add current datetimestamp oc['date'] = datetime.datetime.utcnow() #update_one will keep whatever information already exists collection_oc.update_one({'id': int(oc['id'])}, {'$set': oc}, upsert=True) else: logging.error("status code: " + str(r.status_code)) logging.error("opencritic game id: " + str(oc_id)) # sleep for a bit, there's no information on API throttling time.sleep(2) #seconds # grab review information which is a separate URL # e.g. https://opencritic.com/api/review/game/7592 r = requests.get( requests.Request( 'GET', "https://opencritic.com/api/review/game/" + str(oc_id)).prepare().url) if (r.ok): data = r.json() bytes_downloaded = bytes_downloaded + len(r.content) oc['Reviews'] = data #update_one will keep whatever information already exists collection_oc.update_one({'id': int(oc['id'])}, {'$set': oc}, upsert=True) else: logging.error("status code: " + str(r.status_code)) logging.error("opencritic game id: " + str(oc_id)) except Exception as e: logging.error(str(e) + " - id: " + str(oc_id)) # sleep for a bit, there's no information on API throttling time.sleep(2) #seconds if (pbar): bar.finish() logging.info("Finished updating OpenCritic games via " + refresh_type) logging.info("Downloaded: " + common.sizeof_fmt(bytes_downloaded)) common.writeBandwidth(db, bytes_downloaded) except Exception as e: logging.error(str(e)) time.sleep(1)
def plot(data=None, output=None): WORKDIR = os.getcwd() RESULTSDIR = data RESULTEXT = '.csv' IMAGESTAT = 'imagestats' IMAGE_SIZE_KEY = 'image_size' NUMSYMS_KEY = 'number_symbols' GROUP_BAR_WIDTH = .8 DEFAULT = '_' files = [] labels = [] apps = [] imagestats = {} imagesize_max = 0 # maximum observed image size number_symbols_max = 0 # maximum observed symbol count total_apps = 0 bar_colors = { 'nginx': '#0C8828', 'redis': '#CE1216', 'hello': 'dimgray', 'sqlite': '#4BA3E1' } labels = { 'hermitux': 'Hermitux', 'linuxuser': '******', 'lupine': 'Lupine', 'osv': 'OSv', 'rump': 'Rumprun', 'unikraft': 'Unikraft', 'mirage': 'Mirage' } # Prepare maxplotlib data by parsing the individual .csv files. This process # goes through all image sizes and number of symbols and populates a dictionary # of unikernels and the application "image stats" based on the framework. for f in os.listdir(RESULTSDIR): if f.endswith(RESULTEXT): index = f.replace(RESULTEXT,'') files.append(f) result = index.split('-') unikernel = result[0] app = result[1] if unikernel not in imagestats: imagestats[unikernel] = {} if app not in imagestats[unikernel]: total_apps += 1 imagestats[unikernel][app] = 0 if app not in apps: apps.append(app) with open(os.path.join(RESULTSDIR, f), 'r') as csvfile: size= int(csvfile.readline()) imagestats[unikernel][app] = size # General style common_style(plt) imagesize_max += KBYTES * KBYTES * 12 # add MB "margin" number_symbols_max += 2000 # Setup matplotlib axis fig = plt.figure(figsize=(8, 5)) renderer = fig.canvas.get_renderer() # image size axis ax1 = fig.add_subplot(1,1,1) ax1.set_ylabel("Image size") ax1.grid(which='major', axis='y', linestyle=':', alpha=0.5, zorder=0) ax1_yticks = np.arange(0, imagesize_max, step=KBYTES*KBYTES*2) ax1.set_yticks(ax1_yticks, minor=False) ax1.set_yticklabels([sizeof_fmt(ytick) for ytick in ax1_yticks]) ax1.set_ylim(0, imagesize_max) # Plot coordinates scale = 1. / len(labels.keys()) xlabels = [] # Adjust margining fig.subplots_adjust(bottom=.15) #, top=1) i = 0 line_offset = 0 for unikernel in [ 'unikraft', 'hermitux', 'linuxuser', 'lupine', 'mirage', 'osv', 'rump' ]: xlabels.append(labels[unikernel]) apps = imagestats[unikernel] # Plot a line beteween unikernel applications if i > 0: line = plt.Line2D([i * scale, i * scale], [-.02, 1], transform=ax1.transAxes, color='black', linewidth=1) line.set_clip_on(False) ax1.add_line(line) j = 0 bar_width = GROUP_BAR_WIDTH / len(apps.keys()) bar_offset = (bar_width / 2) - (GROUP_BAR_WIDTH / 2) # Plot each application for app_label in sorted(apps): app = imagestats[unikernel][app_label] print(unikernel, app_label, app) bar = ax1.bar([i + 1 + bar_offset], app, label=app_label, align='center', zorder=3, width=bar_width, color=bar_colors[app_label], linewidth=.5 ) ax1.text(i + 1 + bar_offset, app + 500000, sizeof_fmt(app), ha='center', va='bottom', fontsize=LARGE_SIZE, linespacing=0, zorder=2, bbox=dict(pad=0, facecolor='white', linewidth=0), rotation='vertical' ) bar_offset += bar_width j += 1 i += 1 # sys.exit(1) # set up x-axis labels xticks = range(1, len(xlabels) + 1) ax1.set_xticks(xticks) ax1.set_xticklabels(xlabels, fontsize=LARGE_SIZE, rotation=40, ha='right', rotation_mode='anchor') # ax1.set_xticklabels(xlabels, fontsize=LARGE_SIZE, fontweight='bold') ax1.set_xlim(.5, len(xlabels) + .5) ax1.yaxis.grid(True, zorder=0, linestyle=':') ax1.tick_params(axis='both', which='both', length=0) # Create a unique legend handles, labels = plt.gca().get_legend_handles_labels() by_label = dict(zip(labels, handles)) leg = plt.legend(by_label.values(), by_label.keys(), loc='upper left', ncol=2, fontsize=LARGE_SIZE, ) leg.get_frame().set_linewidth(0.0) plt.setp(ax1.lines, linewidth=.5) # Save to file fig.tight_layout() fig.savefig(output) #, bbox_extra_artists=(ax1,), bbox_inches='tight')
#!/usr/bin/env python3 # -*- coding: utf-8 -*- __author__ = 'ipetrash' from common import sizeof_fmt, get_client qb = get_client() torrents = qb.torrents() torrents_max_top5 = sorted(torrents, key=lambda x: x['total_size'], reverse=True)[:5] torrents_min_top5 = sorted(torrents, key=lambda x: x['total_size'])[:5] print('Max top5:') for i, torrent in enumerate(torrents_max_top5, 1): print(' {}. {} ({})'.format(i, torrent['name'], sizeof_fmt(torrent['total_size']))) print() print('Min top5:') for i, torrent in enumerate(torrents_min_top5, 1): print(' {}. {} ({})'.format(i, torrent['name'], sizeof_fmt(torrent['total_size'])))
def do(file_name, output_dir='output', debug=True): dir_fb2 = os.path.basename(file_name) dir_im = os.path.join(output_dir, dir_fb2) if not os.path.exists(dir_im): os.makedirs(dir_im) debug and print(dir_im + ':') # Analog: fb2_pictures__using_xml_expat.py PARSE_DATA = { 'last_start_tag': None, 'last_tag_attrs': None, 'last_tag_data': '', 'total_image_size': 0, 'number': 1, } class BinaryHandler(xml.sax.ContentHandler): def startElement(self, name, attrs): PARSE_DATA['last_start_tag'] = name PARSE_DATA['last_tag_attrs'] = attrs PARSE_DATA['last_tag_data'] = '' def characters(self, content): if PARSE_DATA['last_start_tag'] != 'binary': return PARSE_DATA['last_tag_data'] += content def endElement(self, name): if name != 'binary': return data = PARSE_DATA['last_tag_data'] try: im_id = PARSE_DATA['last_tag_attrs']['id'] content_type = PARSE_DATA['last_tag_attrs']['content-type'] im_file_name = get_file_name_from_binary(im_id, content_type) im_file_name = os.path.join(dir_im, im_file_name) im_data = base64.b64decode(data.encode()) count_bytes = len(im_data) PARSE_DATA['total_image_size'] += count_bytes with open(im_file_name, mode='wb') as f: f.write(im_data) im = Image.open(io.BytesIO(im_data)) debug and print(' {}. {} {} format={} size={}'.format( PARSE_DATA['number'], im_id, sizeof_fmt(count_bytes), im.format, im.size)) PARSE_DATA['number'] += 1 except: import traceback traceback.print_exc() parser = xml.sax.make_parser() parser.setContentHandler(BinaryHandler()) parser.parse(file_name) file_size = os.path.getsize(file_name) debug and print() debug and print('fb2 file size =', sizeof_fmt(file_size)) debug and print('total image size = {} ({:.2f}%)'.format( sizeof_fmt(PARSE_DATA['total_image_size']), PARSE_DATA['total_image_size'] / file_size * 100))
def updatePriceHistory(refresh_type="FULL", pbar=False): logging = common.setupLogging() try: logging.info("Updating Price History via " + refresh_type) client = MongoClient(host=config.mongodb_ip, port=config.mongodb_port) db = client['steam'] collection_hist = db['pricehistory'] collection_apps = db['apps'] # create an index for appid, this vastly improves performance collection_hist.create_index("appid") collection_hist.create_index("date") # e.g.: CS Source # https://store.steampowered.com/api/appdetails?appids=240&cc=us&l=en # https://wiki.teamfortress.com/wiki/User:RJackson/StorefrontAPI#Known_methods # https://stackoverflow.com/questions/13784059/how-to-get-the-price-of-an-app-in-steam-webapi # find prices for all games and dlc to_update = collection_apps.distinct( "appid", { "updated_date": { "$exists": True }, "type": { "$in": ["game", "dlc"] }, "is_free": False, "price_overview": { "$exists": True }, "failureCount": { "$exists": False } }) if (refresh_type == "PARTIAL"): # sort by newest to oldest updated in pricehistory appid_dict = collection_hist.aggregate([ { "$group": { "_id": "$appid", "maxDate": { "$max": "$date" } } }, { "$sort": { "maxDate": -1 } } # newest first ]) for item in appid_dict: if len(to_update) == 1200: break else: if item['_id'] in to_update: # remove this fairly "new" appid from our list items to run on and refresh to_update.remove(item['_id']) if (pbar): bar = progressbar.ProgressBar(max_value=len(to_update)).start() if (refresh_type == "FULL"): # shuffle the appids so we hit new ones each time random.shuffle(to_update) #in-place bytes_downloaded = 0 appids = [] for i, appid in enumerate(to_update): appids.append(appid) if (pbar): bar.update(i + 1) # run 20 or so at a time if ((i + 1) % 20 == 0 or (i + 1) == len(to_update)): try: # create a comma-delimited string of appids appids_str = ','.join(map(str, appids)) # https://github.com/BrakeValve/dataflow/issues/5 # e.g. # https://store.steampowered.com/api/appdetails?appids=662400,833310,317832,39150,830810,224540,931720,261900,431290,914410,812110,216464,826503,509681,71115,24679,231474,202452,863900,457100&cc=us&l=en&filters=price_overview r = requests.get( "https://store.steampowered.com/api/appdetails?appids=" + appids_str + "&cc=us&l=en&filters=price_overview") if (r.ok): data = r.json() bytes_downloaded = bytes_downloaded + len(r.content) for k, value in data.items(): if (value["success"] is True): if (value['data']): price_hist = value['data'][ 'price_overview'] # set the appid based on the key price_hist['appid'] = int(k) # add current datetimestamp price_hist[ 'date'] = datetime.datetime.utcnow() # remove formatted values, not needed # if they ever get added to the database, this will remove them # db.getCollection('pricehistory').update({},{"$unset": {"initial_formatted":1, "final_formatted":1, "currency":1}}, {multi: true}) # and to validate that it worked, this should return nothing: # db.getCollection('pricehistory').find({"$or": [{"initial_formatted":{"$exists":true}}, {"final_formatted":{"$exists":true}}, {"currency":{"$exists":true}} ]}) price_hist.pop('initial_formatted', None) price_hist.pop('final_formatted', None) price_hist.pop('currency', None) collection_hist.insert_one(price_hist) else: # No price_overview information returned, remove it from the entry # to prevent future unnecessary calls. This is also an indicator # of stale app information. collection_apps.update_one( {'appid': int(k)}, {"$unset": { "price_overview": "" }}) logging.info( "No price information returned for appid: " + str(k) + " - clearing app price info.") else: logging.error("status code: " + str(r.status_code)) logging.error("price history appids: " + appids_str) except Exception as e: logging.error( str(e) + " - appids: " + str(appids_str) + " - data: " + str(value)) appids = [] # sleep for a bit, the API is throttled # limited to 200 requests every 5 minutes or so... # 10 requests every 10 seconds # 100,000 requests per day time.sleep(1.75) #seconds if (pbar): bar.finish() logging.info("Finished updating price history via " + refresh_type) logging.info("Downloaded: " + common.sizeof_fmt(bytes_downloaded)) common.writeBandwidth(db, bytes_downloaded) except Exception as e: logging.error(str(e)) time.sleep(1)
def updateTwitchTopGames(refresh_type="TOP", pbar=False): logging = common.setupLogging() try: logging.info("Updating Twitch top games via " + refresh_type) client = MongoClient(host=config.mongodb_ip, port=config.mongodb_port) db = client['steam'] collection_twitchhistorical = db['twitchhistorical'] collection_apps = db['apps'] # create an index for id, this vastly improves performance collection_twitchhistorical.create_index("id") collection_twitchhistorical.create_index("date") collection_twitchhistorical.create_index("steamId") # API page w/examples # https://dev.twitch.tv/docs/api/ # grab the top X number of games on Twitch top_x = 100 # number of results to return in each top games request first_x = 50 # number of streams to return for each game, max 100 num_streams = 100 access_token = getTwitchToken(logging) if (pbar): bar = progressbar.ProgressBar(max_value=int(top_x * num_streams)).start() bytes_downloaded = 0 game_rank = 1 # for game rank/order returned via Twitch i = 1 # for progress bar while (i < top_x * num_streams): try: # Twitch Top Games # https://dev.twitch.tv/docs/api/reference/#get-top-games params = {'first':first_x} if i != 1: params = {'first':first_x, 'after':pagination} r = requests.get("https://api.twitch.tv/helix/games/top", headers={'Client-ID':config.twitch_client_id, 'Authorization':"Bearer "+access_token}, params=params) if (r.ok): if (int(r.headers['Ratelimit-Remaining']) < 4): logging.info("rate limit: " + r.headers['Ratelimit-Limit']) logging.info("rate limit remaining: " + r.headers['Ratelimit-Remaining']) data = r.json() bytes_downloaded = bytes_downloaded + len(r.content) if (data['pagination']['cursor']): pagination = data['pagination']['cursor'] else: logging.error("Unable to find pagination cursor") break # out of while loop for value in data['data']: # add to our historical listing # https://dev.twitch.tv/docs/api/reference/#get-streams r_g = requests.get("https://api.twitch.tv/helix/streams", headers={'Client-ID': config.twitch_client_id, 'Authorization':"Bearer "+access_token}, params={'first':num_streams, 'game_id':int(value['id'])}) if (r_g.ok): if (int(r_g.headers['Ratelimit-Remaining']) < 4): logging.info("rate limit: " + r_g.headers['Ratelimit-Limit']) logging.info("rate limit remaining: " + r_g.headers['Ratelimit-Remaining']) data_g = r_g.json() for v in data_g['data']: v['date'] = datetime.datetime.utcnow() v.pop('thumbnail_url', None) v['name'] = value['name'] # pull the game name from our top games listing v['gamerank'] = game_rank appid = getSteamId(value['name'], collection_apps) if (appid): v['steamId'] = appid collection_twitchhistorical.insert_one(v) if (pbar): bar.update(i) i = i + 1 else: logging.error("status code: " + str(r.status_code)) # check OAuth and tokens if (r_g.status_code == 401): sys.exit(1) game_rank = game_rank + 1 # https://dev.twitch.tv/docs/api/guide/#rate-limits time.sleep(2) #seconds else: logging.error("status code: " + str(r.status_code)) # check OAuth and tokens if (r.status_code == 401): sys.exit(1) # sleep for a bit # https://dev.twitch.tv/docs/api/guide/#rate-limits time.sleep(2) #seconds # in some cases, there aren't the max number of streams for a game, thus we can jump ahead i = int(game_rank * num_streams) except Exception as e: logging.error(str(e)) time.sleep(1) if (pbar): bar.finish() logging.info("Finished updating Twitch top games via " + refresh_type) logging.info("Downloaded: " + common.sizeof_fmt(bytes_downloaded)) common.writeBandwidth(db, bytes_downloaded) except Exception as e: logging.error(str(e)) time.sleep(1)
async def handle_request(self, reader: StreamReader, writer: StreamWriter): while True: request = await common.recv_json(reader) print(request) if request is None: break if not request["method"]: print("Invalid Request: missing method field.") if request["method"].upper().startswith("LIST"): files = [f for f in os.listdir(".") if os.path.isfile(f)] file_sizes = [ common.sizeof_fmt(os.path.getsize(f)) for f in os.listdir(".") if os.path.isfile(f) ] files = list(zip(files, file_sizes)) files = filter(filter_files, files) files = list(files) await common.send_json(writer, { "files": files, }) elif request["method"].upper().startswith("RETRIEVE"): filename = request["filename"] if not os.path.exists(filename): await common.send_json(writer, {"error": "file does not exist"}) return with open(filename, "rb") as infile: contents = infile.read() # base64 encode the binary file contents = base64.b64encode(contents).decode("utf-8") await common.send_json(writer, { "filename": filename, "content": contents }) elif request["method"].upper().startswith("STORE"): filename = request["filename"] with open(filename, "wb") as outfile: # base64 decode from the request body contents = base64.b64decode(request["content"]) outfile.write(contents) # threaded_print("-> Store Complete") elif request["method"].upper().startswith("QUIT"): # threaded_print("-> Client disconnected via QUIT") pass elif request["method"].upper().startswith("DELETE"): filename = request["filename"] if not os.path.exists(filename): await common.send_json(writer, {"error": "file does not exist"}) else: os.remove(filename) await common.send_json(writer, {"success": "file removed"}) else: await common.send_json(writer, {"error": "Unsupported command"}) writer.close() await writer.wait_closed()
def steamReviews(pbar=False): logging = common.setupLogging() try: logging.info("Running Steam Reviews") client = MongoClient(host=config.mongodb_ip, port=config.mongodb_port) db = client['steam'] collection = db['apps'] to_update = collection.aggregate([ { "$match": { "type": { "$in": ["game", "dlc"] } } }, { "$sort": { "reviews.last_updated": 1 } }, # oldest first { "$limit": 50 }, { "$project": { "appid": 1, "_id": 0 } } ]) to_update = ([item['appid'] for item in to_update]) if (pbar): bar = progressbar.ProgressBar(max_value=len(to_update)).start() bytes_downloaded = 0 for i, appid in enumerate(to_update): if (pbar): bar.update(i + 1) #logging.info("Running on appid: " + str(appid)) r = requests.get( "https://store.steampowered.com/appreviewhistogram/" + str(appid) + "?l=english&review_score_preference=0") if (r.ok): bytes_downloaded = bytes_downloaded + len(r.content) data = r.json()['results'] # add current datetimestamp data['last_updated'] = datetime.datetime.utcnow() # convert Epoch seconds to UTC time # https://stackoverflow.com/questions/1697815/how-do-you-convert-a-python-time-struct-time-object-into-a-datetime-object if ('start_date' in data and data['start_date']): data['start_date'] = datetime.datetime.fromtimestamp( time.mktime( time.gmtime(round(float(data['start_date']))))) if ('end_date' in data and data['end_date']): data['end_date'] = datetime.datetime.fromtimestamp( time.mktime(time.gmtime(round(float( data['end_date']))))) if ('recent_events' in data): for k, event in enumerate(data['recent_events']): if (event['start_date']): data['recent_events'][k][ 'start_date'] = datetime.datetime.fromtimestamp( time.mktime( time.gmtime( round(float( event['start_date']))))) data['recent_events'][k][ 'end_date'] = datetime.datetime.fromtimestamp( time.mktime( time.gmtime( round(float(event['end_date']))))) if ('rollups' in data): for k, event in enumerate(data['rollups']): if (event['date']): data['rollups'][k][ 'date'] = datetime.datetime.fromtimestamp( time.mktime( time.gmtime(round(float( event['date']))))) if ('recent' in data): for k, event in enumerate(data['recent']): if (event['date']): data['recent'][k][ 'date'] = datetime.datetime.fromtimestamp( time.mktime( time.gmtime(round(float( event['date']))))) #update_one will keep whatever information already exists collection.update_one({'appid': int(appid)}, {'$set': { 'reviews': data }}, upsert=True) else: logging.error("status code: " + str(r.status_code)) if (pbar): bar.update(i + 1) time.sleep(1) if (pbar): bar.finish() logging.info("Finished downloading Steam reviews.") logging.info("Downloaded: " + common.sizeof_fmt(bytes_downloaded)) common.writeBandwidth(db, bytes_downloaded) except Exception as e: logging.error(str(e)) time.sleep(3)