def __call__(self, row, context=None): lsize = row.get(self.prop) if lsize is not None: row[self.prop] = filesize(lsize, system=si) else: if self.empty: row[self.prop] = self.empty
def run(self): try: r = requests.get(self.url, stream=True) except: logging.exception('Exception in url_scanner') return mime_type = None encoding = None if 'Content-Type' in r.headers: content_type_header = [ x.strip() for x in r.headers['Content-Type'].split(';') ] mime_type = content_type_header[0] for param in content_type_header[1:]: if param.startswith('charset='): encoding = param[param.index('=') + 1:] else: mime_type = 'unknown/unknown' try: logging.debug(mime_type) if mime_type == 'text/html': logging.debug(encoding) response_content = '' for content in r.iter_content(1024): response_content += content.decode(encoding or 'ascii', errors='ignore') if '</title>' in response_content: break elif '</head>' in response_content: # don't bother going on when <head> ends break title = re.search(r'<title>(.+)</title>', response_content, re.S | re.I) if title: title = title.groups(1)[0].strip() h = html.parser.HTMLParser() title = h.unescape(title) self.reply('Title: {}'.format(title)) else: self.reply('No title found on page...') elif mime_type.startswith('image/'): try: size = get_image_size2(int(r.headers['Content-Length']), r.raw) self.reply('Image [{}]: dimensions {} x {}'.format( mime_type.split('/')[1], size[0], size[1])) except Exception as e: self.reply('Image [{}]: unknown size'.format(mime_type)) logging.exception('Failed to determine image size') else: self.reply('Content type: "{}", size: "{}"'.format( r.headers['Content-Type'], filesize(int(r.headers['Content-Length'])))) except: self.reply('Exception in reading response content.') logging.exception('Exception in reading response content') finally: r.close()
def size(filesystem): """Perform analyses related to file size""" # exclude 0-byte items: those are directories. bytesizes = [value.st_size for value in filesystem.values() if value.st_size > 0] kbsizes = [n/1024 for n in bytesizes] if arguments['--verbose']: for value in sorted(bytesizes): print("Found a %s file" % filesize(value)) print("Average file size:\t{0}\t({1} files)".format(filesize(mean(bytesizes)), bytesizes.count(mean(bytesizes)))) print("Maximum file size:\t{0}\t({1} files)".format(filesize(max(bytesizes)), bytesizes.count(max(bytesizes)))) print("Minimum file size:\t{0}\t({1} files)\n".format(filesize(min(bytesizes)), bytesizes.count(min(bytesizes)))) if arguments['--histogram']: histogram(kbsizes, 'File size in kB', 'Histogram of file sizes in tree starting at %s' % arguments["DIRECTORY"])
def download_variant(config): print('vcf') check_dir('vcf') df = pd.read_html(VCF_FILEPATH, skiprows=3)[0].iloc[:, [1, 3]] df.columns = ['path', 'size'] df = df[df['path'].apply(lambda x: type(x) == str and '.gz' in x)] df = skipped(df, config) print("%d files, %s" % (len(df), filesize(np.sum(df['size'].apply(get_bytes))))) for url in df['path']: download_url(VCF_FILEPATH + url)
def __call__(self, row, context=None): lsize = row.get(self.prop) if lsize is not None: if hasattr(lsize, "value"): # it is a cell. we must not transform it right now # bacause it will tranform it in string and break the sort # feature. return try: row[self.prop] = filesize(lsize, system=si) except TypeError: pass else: if self.empty: row[self.prop] = self.empty
def __unicode__(self): if self.raw: return unicode(self.value) if self.value is None: return self.none return filesize(self.value, system=si)
def download_progress(count, blockSize, totalSize): percent = int(count * blockSize * 100 / totalSize) sys.stdout.write("\r...%d%% (%s)" % (percent, filesize(totalSize))) sys.stdout.flush()
def start(): with open("../config/config.json", "r") as f: doctest.testmod() config = json.load(f) if 'vcf' in config: download_variant(config) if 'fastq' in config: print('retrieving fastq data') check_dir('fastq') df = pd.read_csv("src/reqs/sequence.index", header=None, skiprows=29, usecols=[0, 2, 4, 10, 13, 20, 23, 25], sep='\t', error_bad_lines=False) df.columns = [ 'path', 'run id', 'study name', 'population', 'instrument model', 'withdrawn', 'read count', 'analysis group' ] for setting in config['fastq']: df = df[df[setting] == config['fastq'][setting]] df.reset_index(drop=True, inplace=True) df = skipped(df, config) Gbs = round(sum(df['read count'].unique().astype(np.int64)) / 1.2e7, 2) print("%s files to download totalling %sG" % (str(len(df)), Gbs)) for i, row in df.iterrows(): download_url(row['path']) if 'aligned' in config: paths = { 'low coverage': "src/reqs/low_coverage.alignment.index", 'high coverage': "src/reqs/high_coverage.alignment.index", 'exome': "src/reqs/exome.alignment.index" } df = pd.read_csv(paths[config['aligned']['analysis group']], header=None, skiprows=9, usecols=[0], sep='\t', error_bad_lines=False) df.columns = ['path'] df = skipped(df, config) print("%s files to download." % len(df)) check_dir('aligned') for i, row in df.iterrows(): path = row['path'] d = urllib.request.urlopen(path) print("%s: fetching %s, approximately %s" % (i, path, filesize(int(d.info()['Content-length'])))) urllib.request.urlretrieve( path, "data/pipeline/aligned/" + Path(path).name)
def reporthook(a, b, c): print("% 3.1f%% of %s\r" % (min(100, float(a * b) / c * 100), filesize(c))) sys.stdout.flush()
def __get_sys_memory_usage() -> str: return filesize(psutil.virtual_memory().used)
def __get_bot_memory_usage() -> str: process = psutil.Process(os.getpid()) mem_bytes = process.memory_info().rss return filesize(mem_bytes)
async def _stat_memory(self, ctx: Context): process = psutil.Process(os.getpid()) mem_bytes = process.memory_info().rss friendly_size = filesize(mem_bytes) await ctx.send(f"Bot memory usage: `{friendly_size}`")
def format_filesize(self, data, attr): """The current fied is replaced by a formatted date. The previous field is saved to a new field called 'field_raw'.""" for row in data: row[attr + u"_raw"] = row[attr] row[attr] = filesize(row[attr])
def run(self): r = urllib.request.urlopen(self.url) if not 'Content-Type' in r.headers: return content_type_header = [x.strip() for x in r.headers['Content-Type'].split(';')] mime_type = content_type_header[0] encoding = None for param in content_type_header[1:]: if param.startswith('charset='): encoding = param[param.index('=')+1:] try: logging.debug(mime_type) if mime_type == 'text/html': logging.debug(encoding) content = r.read(1024).decode(encoding or 'ascii') while not re.search('</title>', content, re.I): _ = r.read(1024) if not _: break content += _.decode(encoding or 'ascii') title = re.search(r'<title>(.+)</title>', content, re.S | re.I) if title: title = title.groups(1)[0].strip() h = html.parser.HTMLParser() title = h.unescape(title) self.reply('Title: {}'.format(title)) else: self.reply('No title found on page...') elif mime_type.startswith('image/'): try: size = get_image_size2(int(r.headers['Content-Length']), r) self.reply('Image [{}]: dimensions {} x {}'.format(mime_type.split('/')[1], size[0], size[1])) except Exception as e: print(e.msg) else: self.reply('Content type: {}, size: {}'.format(r.headers['Content-Type'], filesize(int(r.headers['Content-Length'])))) except: logging.exception('Errorrr') finally: r.close()
def run(self): try: r = requests.get(self.url, stream=True) except: logging.exception('Exception in url_scanner') return mime_type = None encoding = None if 'Content-Type' in r.headers: content_type_header = [x.strip() for x in r.headers['Content-Type'].split(';')] mime_type = content_type_header[0] for param in content_type_header[1:]: if param.startswith('charset='): encoding = param[param.index('=')+1:] else: mime_type = 'unknown/unknown' try: logging.debug(mime_type) if mime_type == 'text/html': logging.debug(encoding) response_content = '' for content in r.iter_content(1024): response_content += content.decode(encoding or 'ascii', errors='ignore') if '</title>' in response_content: break elif '</head>' in response_content: # don't bother going on when <head> ends break title = re.search(r'<title>(.+)</title>', response_content, re.S | re.I) if title: title = title.groups(1)[0].strip() h = html.parser.HTMLParser() title = h.unescape(title) self.reply('Title: {}'.format(title)) else: self.reply('No title found on page...') elif mime_type.startswith('image/'): try: size = get_image_size2(int(r.headers['Content-Length']), r.raw) self.reply('Image [{}]: dimensions {} x {}'.format(mime_type.split('/')[1], size[0], size[1])) except Exception as e: self.reply('Image [{}]: unknown size'.format(mime_type)) logging.exception('Failed to determine image size') else: self.reply('Content type: "{}", size: "{}"'.format(r.headers['Content-Type'], filesize(int(r.headers['Content-Length'])))) except: self.reply('Exception in reading response content.') logging.exception('Exception in reading response content') finally: r.close()