Example #1
0
 def __call__(self, row, context=None):
     lsize = row.get(self.prop)
     if lsize is not None:
         row[self.prop] = filesize(lsize, system=si)
     else:
         if self.empty:
             row[self.prop] = self.empty
Example #2
0
    def run(self):
        try:
            r = requests.get(self.url, stream=True)
        except:
            logging.exception('Exception in url_scanner')
            return

        mime_type = None
        encoding = None
        if 'Content-Type' in r.headers:
            content_type_header = [
                x.strip() for x in r.headers['Content-Type'].split(';')
            ]
            mime_type = content_type_header[0]
            for param in content_type_header[1:]:
                if param.startswith('charset='):
                    encoding = param[param.index('=') + 1:]
        else:
            mime_type = 'unknown/unknown'

        try:
            logging.debug(mime_type)
            if mime_type == 'text/html':
                logging.debug(encoding)
                response_content = ''
                for content in r.iter_content(1024):
                    response_content += content.decode(encoding or 'ascii',
                                                       errors='ignore')
                    if '</title>' in response_content:
                        break
                    elif '</head>' in response_content:  # don't bother going on when <head> ends
                        break
                title = re.search(r'<title>(.+)</title>', response_content,
                                  re.S | re.I)
                if title:
                    title = title.groups(1)[0].strip()
                    h = html.parser.HTMLParser()
                    title = h.unescape(title)
                    self.reply('Title: {}'.format(title))
                else:
                    self.reply('No title found on page...')
            elif mime_type.startswith('image/'):
                try:
                    size = get_image_size2(int(r.headers['Content-Length']),
                                           r.raw)
                    self.reply('Image [{}]: dimensions {} x {}'.format(
                        mime_type.split('/')[1], size[0], size[1]))
                except Exception as e:
                    self.reply('Image [{}]: unknown size'.format(mime_type))
                    logging.exception('Failed to determine image size')
            else:
                self.reply('Content type: "{}", size: "{}"'.format(
                    r.headers['Content-Type'],
                    filesize(int(r.headers['Content-Length']))))
        except:
            self.reply('Exception in reading response content.')
            logging.exception('Exception in reading response content')
        finally:
            r.close()
Example #3
0
def size(filesystem):
	"""Perform analyses related to file size"""

	# exclude 0-byte items: those are directories.
	bytesizes = [value.st_size for value in filesystem.values() if value.st_size > 0]
	kbsizes = [n/1024 for n in bytesizes] 

	if arguments['--verbose']:
		for value in sorted(bytesizes):
			print("Found a %s file" % filesize(value))

	print("Average file size:\t{0}\t({1} files)".format(filesize(mean(bytesizes)), bytesizes.count(mean(bytesizes))))
	print("Maximum file size:\t{0}\t({1} files)".format(filesize(max(bytesizes)), bytesizes.count(max(bytesizes))))
	print("Minimum file size:\t{0}\t({1} files)\n".format(filesize(min(bytesizes)), bytesizes.count(min(bytesizes))))

	if arguments['--histogram']:
		histogram(kbsizes, 'File size in kB', 'Histogram of file sizes in tree starting at %s' % arguments["DIRECTORY"])
Example #4
0
def download_variant(config):
    print('vcf')
    check_dir('vcf')
    df = pd.read_html(VCF_FILEPATH, skiprows=3)[0].iloc[:, [1, 3]]
    df.columns = ['path', 'size']
    df = df[df['path'].apply(lambda x: type(x) == str and '.gz' in x)]

    df = skipped(df, config)
    print("%d files, %s" %
          (len(df), filesize(np.sum(df['size'].apply(get_bytes)))))

    for url in df['path']:
        download_url(VCF_FILEPATH + url)
Example #5
0
 def __call__(self, row, context=None):
     lsize = row.get(self.prop)
     if lsize is not None:
         if hasattr(lsize, "value"):
             # it is a cell. we must not transform it right now
             # bacause it will tranform it in string and break the sort
             # feature.
             return
         try:
             row[self.prop] = filesize(lsize, system=si)
         except TypeError:
             pass
     else:
         if self.empty:
             row[self.prop] = self.empty
Example #6
0
 def __unicode__(self):
     if self.raw:
         return unicode(self.value)
     if self.value is None:
         return self.none
     return filesize(self.value, system=si)
def download_progress(count, blockSize, totalSize):
    percent = int(count * blockSize * 100 / totalSize)
    sys.stdout.write("\r...%d%% (%s)" % (percent, filesize(totalSize)))
    sys.stdout.flush()
Example #8
0
def start():
    with open("../config/config.json", "r") as f:
        doctest.testmod()
        config = json.load(f)

    if 'vcf' in config:
        download_variant(config)

    if 'fastq' in config:
        print('retrieving fastq data')
        check_dir('fastq')
        df = pd.read_csv("src/reqs/sequence.index",
                         header=None,
                         skiprows=29,
                         usecols=[0, 2, 4, 10, 13, 20, 23, 25],
                         sep='\t',
                         error_bad_lines=False)
        df.columns = [
            'path', 'run id', 'study name', 'population', 'instrument model',
            'withdrawn', 'read count', 'analysis group'
        ]

        for setting in config['fastq']:
            df = df[df[setting] == config['fastq'][setting]]

        df.reset_index(drop=True, inplace=True)

        df = skipped(df, config)
        Gbs = round(sum(df['read count'].unique().astype(np.int64)) / 1.2e7, 2)
        print("%s files to download totalling %sG" % (str(len(df)), Gbs))

        for i, row in df.iterrows():
            download_url(row['path'])

    if 'aligned' in config:
        paths = {
            'low coverage': "src/reqs/low_coverage.alignment.index",
            'high coverage': "src/reqs/high_coverage.alignment.index",
            'exome': "src/reqs/exome.alignment.index"
        }
        df = pd.read_csv(paths[config['aligned']['analysis group']],
                         header=None,
                         skiprows=9,
                         usecols=[0],
                         sep='\t',
                         error_bad_lines=False)
        df.columns = ['path']

        df = skipped(df, config)
        print("%s files to download." % len(df))

        check_dir('aligned')

        for i, row in df.iterrows():
            path = row['path']

            d = urllib.request.urlopen(path)
            print("%s: fetching %s, approximately %s" %
                  (i, path, filesize(int(d.info()['Content-length']))))
            urllib.request.urlretrieve(
                path, "data/pipeline/aligned/" + Path(path).name)
Example #9
0
def reporthook(a, b, c):
    print("% 3.1f%% of %s\r" % (min(100, float(a * b) / c * 100), filesize(c)))
    sys.stdout.flush()
Example #10
0
 def __get_sys_memory_usage() -> str:
     return filesize(psutil.virtual_memory().used)
Example #11
0
    def __get_bot_memory_usage() -> str:
        process = psutil.Process(os.getpid())
        mem_bytes = process.memory_info().rss

        return filesize(mem_bytes)
Example #12
0
    async def _stat_memory(self, ctx: Context):
        process = psutil.Process(os.getpid())
        mem_bytes = process.memory_info().rss
        friendly_size = filesize(mem_bytes)

        await ctx.send(f"Bot memory usage: `{friendly_size}`")
Example #13
0
 def format_filesize(self, data, attr):
     """The current fied is replaced by a formatted date. The previous
     field is saved to a new field called 'field_raw'."""
     for row in data:
         row[attr + u"_raw"] = row[attr]
         row[attr] = filesize(row[attr])
Example #14
0
 def run(self):
     r = urllib.request.urlopen(self.url)
     if not 'Content-Type' in r.headers:
         return
     content_type_header = [x.strip() for x in r.headers['Content-Type'].split(';')]
     mime_type = content_type_header[0]
     encoding = None
     for param in content_type_header[1:]:
         if param.startswith('charset='):
             encoding = param[param.index('=')+1:]
     try:
         logging.debug(mime_type)
         if mime_type == 'text/html':
             logging.debug(encoding)
             content = r.read(1024).decode(encoding or 'ascii')
             while not re.search('</title>', content, re.I):
                 _ = r.read(1024)
                 if not _:
                     break
                 content += _.decode(encoding or 'ascii')
             title = re.search(r'<title>(.+)</title>', content, re.S | re.I)
             if title:
                 title = title.groups(1)[0].strip()
                 h = html.parser.HTMLParser()
                 title = h.unescape(title)
                 self.reply('Title: {}'.format(title))
             else:
                 self.reply('No title found on page...')
         elif mime_type.startswith('image/'):
             try:
                 size = get_image_size2(int(r.headers['Content-Length']), r)
                 self.reply('Image [{}]: dimensions {} x {}'.format(mime_type.split('/')[1], size[0], size[1]))
             except Exception as e:
                 print(e.msg)
         else:
             self.reply('Content type: {}, size: {}'.format(r.headers['Content-Type'], filesize(int(r.headers['Content-Length']))))
     except:
         logging.exception('Errorrr')
     finally:
         r.close()
Example #15
0
 def format_filesize(self, data, attr):
     """The current fied is replaced by a formatted date. The previous
     field is saved to a new field called 'field_raw'."""
     for row in data:
         row[attr + u"_raw"] = row[attr]
         row[attr] = filesize(row[attr])
Example #16
0
    def run(self):
        try:
            r = requests.get(self.url, stream=True)
        except:
            logging.exception('Exception in url_scanner')
            return

        mime_type = None
        encoding = None
        if 'Content-Type' in r.headers:
            content_type_header = [x.strip() for x in r.headers['Content-Type'].split(';')]
            mime_type = content_type_header[0]
            for param in content_type_header[1:]:
                if param.startswith('charset='):
                    encoding = param[param.index('=')+1:]
        else:
            mime_type = 'unknown/unknown'

        try:
            logging.debug(mime_type)
            if mime_type == 'text/html':
                logging.debug(encoding)
                response_content = ''
                for content in r.iter_content(1024):
                    response_content += content.decode(encoding or 'ascii', errors='ignore')
                    if '</title>' in response_content:
                        break
                    elif '</head>' in response_content: # don't bother going on when <head> ends
                        break
                title = re.search(r'<title>(.+)</title>', response_content, re.S | re.I)
                if title:
                    title = title.groups(1)[0].strip()
                    h = html.parser.HTMLParser()
                    title = h.unescape(title)
                    self.reply('Title: {}'.format(title))
                else:
                    self.reply('No title found on page...')
            elif mime_type.startswith('image/'):
                try:
                    size = get_image_size2(int(r.headers['Content-Length']), r.raw)
                    self.reply('Image [{}]: dimensions {} x {}'.format(mime_type.split('/')[1], size[0], size[1]))
                except Exception as e:
                    self.reply('Image [{}]: unknown size'.format(mime_type))
                    logging.exception('Failed to determine image size')
            else:
                self.reply('Content type: "{}", size: "{}"'.format(r.headers['Content-Type'], filesize(int(r.headers['Content-Length']))))
        except:
            self.reply('Exception in reading response content.')
            logging.exception('Exception in reading response content')
        finally:
            r.close()