def e2freefrag(self): if self.filesystem != 'ext4': return cmd = ["e2freefrag", self.devname] proc = subprocess.Popen(cmd, stdout=subprocess.PIPE) proc.wait() part = 0 sums_dict = {} hist_table = "" hist_df = dataframe.DataFrame() for line in proc.stdout: if part == 0: if "HISTOGRAM" in line: part = 1 continue mo = re.search(r'(.*): (\d+)', line, re.M) if mo: keyname = mo.group(1) keyname = keyname.replace('.', '') keyname = "_".join(keyname.split()) sums_dict[keyname] = mo.group(2) elif part == 1: # This part is the histogram. line = line.strip() if "Extent Size" in line: hist_table = "Extent_start Extent_end Free_extents Free_Blocks Percent" hist_df.header = hist_table.split() continue fline = re.sub(r'[\-:\n]', "", line) fline = re.sub(r'\.{3}', "", fline) row = fline.split() hist_df.addRowByList(row) hist_df.addColumns( keylist=["HEADERMARKER_freefrag_hist", "monitor_time", "jobid"], valuelist=[ "DATAMARKER_freefrag_hist", self.monitor_time, self.jobid ]) # convert dict to data frame sums_df = dataframe.DataFrame(header=sums_dict.keys(), table=[sums_dict.values()]) sums_df.addColumn(key="HEADERMARKER_freefrag_sum", value="DATAMARKER_freefrag_sum") sums_df.addColumn(key="monitor_time", value=self.monitor_time) sums_df.addColumn(key="jobid", value=self.jobid) return {"FragSummary": sums_df, "ExtSizeHistogram": hist_df}
def getFSBlockCount(self, df_files): "df_files has number of metablocks datablocks of each file" if self.filesystem != 'ext4': return if len(df_files.table) == 0: return "" fs_nmetablocks = 0 fs_ndatablocks = 0 nmetaindex = df_files.header.index('n_metablock') ndataindex = df_files.header.index('n_datablock') for row in df_files.table: if row[nmetaindex] == 'NA' or row[ndataindex] == 'NA': fs_nmetablocks = 'NA' fs_ndatablocks = 'NA' break fs_nmetablocks += int(row[nmetaindex]) fs_ndatablocks += int(row[ndataindex]) headerstr = "fs_nmetablocks fs_ndatablocks monitor_time HEADERMARKER_extstatssum jobid" valuelist = [ fs_nmetablocks, fs_ndatablocks, self.monitor_time, 'DATAMARKER_extstatssum', self.jobid ] fsblkcount_df = dataframe.DataFrame(header=headerstr.split(), table=[valuelist]) return fsblkcount_df
def __init__(self): QThread.__init__(self) self.upper = 200 self.lower = 80 self.thresh = 100 self.minimumGray = 70 self.defectsSize = 2500 self.bottonLine = -120 self.visibleFrame = True self.visibleEdges = True self.visibleGray = False self.visibleFloodfill = True self.showConvexHull = True self.showHullDefects = True self.drawContour = True self.showPredict = True self.pause = False self.dataframe = dataframe.DataFrame() self.MLP = 0 self.scaler = 0 self.fps = 0 self.running = False self.destroyAllWindows = False
def count(self): ''' Compute count of group. ''' r = self._groupby.count() if isinstance(r, MIDataFrame): return dataframe.DataFrame(dataframe=r) else: return series.Series(series=r)
def get_filepath_inode_map2(paths): "paths should be absolute paths" df = dataframe.DataFrame() df.header = ['filepath', 'inode_number'] for path in paths: inode_number = Monitor.stat_a_file(path)['inode_number'] df.addRowByList([path, inode_number]) return df
def min(self): ''' Compute minimum of group. ''' r = self._groupby.min() if isinstance(r, MIDataFrame): return dataframe.DataFrame(dataframe=r) else: return series.Series(series=r)
def ext34_getExtentList_of_myfiles(target): files = get_all_my_files(target) df = dataframe.DataFrame() for f in files: if len(df.header) == 0: df = filefrag(f) else: df.table.extend(filefrag(f).table) return df
def std(self): ''' Compute standard deviation of groups. ''' r = self._groupby.median() if isinstance(r, MIDataFrame): return dataframe.DataFrame(dataframe=r) else: return series.Series(series=r)
def sum(self): ''' Compute sum of groups. ''' r = self._groupby.sum() if isinstance(r, MIDataFrame): return dataframe.DataFrame(dataframe=r) else: return series.Series(series=r)
def get_filepath_inode_map(mountpoint, dir): paths = Monitor.get_all_paths(mountpoint, dir) df = dataframe.DataFrame() df.header = ['filepath', 'inode_number'] for path in paths: inode_number = Monitor.stat_a_file(os.path.join(mountpoint, path))['inode_number'] df.addRowByList([path, inode_number]) return df
def get_group(self, name): ''' Get a group :param name: The name of the group :return: The group ''' r = self._groupby.getGroup(name) if isinstance(r, MIDataFrame): return dataframe.DataFrame(dataframe=r) else: return series.Series(series=r)
def xfs_getExtentList_of_a_dir(self, target="."): "rootdir is actually relative to mountpoint. Seems bad" #files = self.getAllInodePaths(target) files = get_all_my_files(target) df = dataframe.DataFrame() for f in files: #print "UU____UU" if len(df.header) == 0: df = self.xfs_get_extentlist_of_a_file(f) else: df.table.extend(self.xfs_get_extentlist_of_a_file(f).table) return df
def dumpfs(self): if self.filesystem != 'ext4': return print "dumpfs..." cmd = ["dumpe2fs", self.devname] proc = subprocess.Popen(cmd, stdout=subprocess.PIPE) print "dumpfs finished. Parsing results..." freeblocks = [] freeinodes = [] for line in proc.stdout: if line.startswith(" Free blocks:"): freeblocks += self._spliter_dumpfs(line) elif line.startswith(" Free inodes:"): freeinodes += self._spliter_dumpfs(line) else: pass proc.wait() # initialize freeblocks_df = dataframe.DataFrame(header=['start', 'end'], table=freeblocks) freeinodes_df = dataframe.DataFrame(header=['start', 'end'], table=freeinodes) # add additional columns freeblocks_df.addColumn(key="monitor_time", value=self.monitor_time) freeblocks_df.addColumn(key="jobid", value=self.jobid) freeblocks_df.addColumn(key="HEADERMARKER_freeblocks", value="DATAMARKER_freeblocks") freeinodes_df.addColumn(key="monitor_time", value=self.monitor_time) freeinodes_df.addColumn(key="jobid", value=self.jobid) freeinodes_df.addColumn(key="HEADERMARKER_freeinodes", value="DATAMARKER_freeinodes") return {"freeblocks": freeblocks_df, "freeinodes": freeinodes_df}
def quantile(self, q): ''' Return values at the given quantile. :param q: (*float*) Value between 0 <= q <= 1, the quantile(s) to compute. :returns: Series or DataFrame ''' r = self._groupby.percentile(q) if isinstance(r, MIDataFrame): return dataframe.DataFrame(dataframe=r) else: return series.Series(series=r) ########################################################
def getPerFileBlockCounts(self, rootdir="."): if self.filesystem != 'ext4': return files = self.getAllInodePaths(rootdir) counts_df = dataframe.DataFrame() for f in files: d = self.dumpextents_sum(f) if len(counts_df.header) == 0: counts_df.header = d.keys() counts_df.addRowByDict(d) counts_df.addColumns( keylist=["HEADERMARKER_extstats", "monitor_time", "jobid"], valuelist=["DATAMARKER_extstats", self.monitor_time, self.jobid]) return counts_df
def btrfs_convert_rawext_to_ext(df_rawext, df_chunk, df_map): #print df_rawext.toStr() #print df_chunk.toStr() #print df_map.toStr() dic_map = btrfs_df_map_to_dic(df_map) hdr = df_rawext.header devices = set() df_ext = dataframe.DataFrame() df_ext.header = [ 'Level_index', 'Max_level', 'Entry_index', 'N_Entry', 'Virtual_start', 'Logical_start', 'Logical_end', 'Physical_start', 'Physical_end', 'Length', 'Flag', 'filepath' ] for row in df_rawext.table: rowdic = {} for col in hdr: rowdic[col] = row[hdr.index(col)] #print rowdic phy_starts = btrfs_db_parser.virtual_to_physical( rowdic['Virtual_start'], df_chunk) for stripe in phy_starts: devices.add(stripe['devid']) assert len(devices) == 1, 'we only allow one device at this time' rowdic['Physical_start'] = stripe['physical_addr'] rowdic['Physical_end'] = stripe['physical_addr'] + \ int( rowdic['Length'] ) rowdic['Logical_end'] = int(rowdic['Logical_start']) + \ int( rowdic['Length'] ) rowdic['Level_index'] = 0 rowdic['Max_level'] = 0 rowdic['Entry_index'] = 0 rowdic['N_Entry'] = 0 rowdic['filepath'] = dic_map[str(rowdic['inode_number'])] rowdic['Flag'] = "NA" df_ext.addRowByDict(rowdic) return df_ext
def getExtentList_of_a_dir(self, target): """ this only works for absolute path """ if self.filesystem != 'ext4': return #files = self.getAllInodePaths(target) files = get_all_my_files(target) #print files #exit(1) df = dataframe.DataFrame() for f in files: f = os.path.relpath(f, target) if len(df.header) == 0: df = self.dump_extents_of_a_file(f) else: df.table.extend(self.dump_extents_of_a_file(f).table) return df
def filefrag(filepath): cmd = ["filefrag", "-sv", filepath] #print cmd proc = subprocess.Popen(cmd, stdout=subprocess.PIPE) df_ext = dataframe.DataFrame() header = [ "Level_index", "Max_level", "Entry_index", "N_Entry", "Logical_start", "Logical_end", "Physical_start", "Physical_end", "Length", "Flag", "filepath" ] df_ext.header = header #ext logical physical expected length flags #0 0 1545 12 merged for line in proc.stdout: if isfilefrag_ext_line(line): items = line.split() # it is 4 because there might be some line without # both expected and flags assert len(items) >= 4, line if len(items) == 5 or len(items) == 4: items.insert(3, -1) #print items d = { 'Level_index': 0, 'Max_level': 0, 'Entry_index': int(items[0]), 'N_Entry': 'NA', 'Logical_start': int(items[1]), 'Logical_end': int(items[1]) + int(items[4]), 'Physical_start': int(items[2]), 'Physical_end': int(items[2]) + int(items[4]), 'Length': int(items[4]), 'Flag': 'NA', 'filepath': filepath } df_ext.addRowByDict(d) #pprint.pprint(d) #print df_ext.toStr() proc.wait() return df_ext
def __do_aggregate(self, clazz, new_col, *col_names): # init a dictionary of lists where the keys are the grouping # colnames + the new column name resvals = {i: [] for i in self.__grouping.grouping_colnames} resvals[new_col] = [] # iterate over every group for _, group in self.__grouping: # get the columns that should be used for aggregation colvals = [group[x] for x in col_names] # cal the result res = clazz()(*colvals) if hasattr(res, "__len__"): raise ValueError( "The function you provided yields an array " + "of false length!") resvals[new_col].append(res) for i, colname in enumerate(group.grouping_colnames): resvals[colname].append(group.grouping_values[i]) # create a new UN-GROUPED data-frame object return dataframe.DataFrame(**resvals)
def __init__(self): QThread.__init__(self) self.upper = 200 self.lower = 80 self.thresh = 100 self.minLineSize = 30 self.defectsSize = 2500 self.bottonLine = -120 self.visibleFrame = True self.visibleEdges = True self.visibleGray = False self.visibleFloodfill = True self.showConvexHull = True self.showHullDefects = True self.showHoughLines = True self.dataframe = dataframe.DataFrame() self.fps = 0 self.running = False self.destroyAllWindows = False
def xfs_bmap_of_a_file (self, filepath): "find all extents of a file in xfs by inode number" cmd = 'xfs_bmap -v ' + filepath cmd = shlex.split(cmd) print cmd proc = subprocess.Popen(cmd, stdout = subprocess.PIPE) lines = proc.communicate()[0] lines = lines.strip() lines = lines.split('\n') #print "------------------------" #print lines df_ext = dataframe.DataFrame() header = ["Level_index", "Max_level", "Entry_index", "N_Entry", "Logical_start", "Logical_end", "Physical_start", "Physical_end", "Length", "Flag"] df_ext.header = header for i, line in enumerate(lines): #print line if i < 2: # skip the header line continue nums = re.findall(r'\d+', line, re.M) assert len(nums) == 9, "xfs_bmap, number parsing failed" d = { "Level_index":"NA", "Max_level" :"NA", "Entry_index":"NA", "N_Entry" :"NA", # this output of xfs_bmap is in 512 byte block # we convert it to use 4096 byte block by # blocknumber * 512/4096=blocknumber/8 "Logical_start": int(nums[1])/8, "Logical_end": int(nums[2])/8, "Physical_start": int(nums[3])/8, "Physical_end": int(nums[4])/8, "Length": int(nums[8])/8, "Flag": "NA" } df_ext.addRowByDict(d) #print df_ext.toStr() inode_number = self.stat_a_file(filepath)['inode_number'] inode_fsb = self.xfs_convert_ino_to_fsb(int(inode_number)) d = {} d['Level_index'] = '-1' d['Max_level'] = '-1' d['Entry_index'] = 'NA' d['N_Entry'] = 'NA' d['Logical_start'] = 'NA' d['Logical_end'] = 'NA' d['Physical_start'] = inode_fsb d['Physical_end'] = inode_fsb d['Length'] = '1' d['Flag'] = 'NA' df_ext.addRowByDict(d) df_ext.addColumn(key = "filepath", value = filepath) df_ext.addColumn(key = "HEADERMARKER_extlist", value = "DATAMARKER_extlist") df_ext.addColumn(key = "jobid", value = self.jobid) df_ext.addColumn(key = "monitor_time", value = self.monitor_time) return df_ext
def parse(self): """ number of tab before a line = level of the line """ # path is used to hold dictionaries along from root to the # current node path = [None] * 10 # node_queue[i] store entries of a child node of path[i-1] node_queue = [[]] * 10 # dataframe to store the results df_ext = dataframe.DataFrame() df_ext.header = [ 'Length', 'inode_number', 'Logical_start', 'Virtual_start' ] df_chunk = dataframe.DataFrame() df_chunk.header = [ 'devid', 'physical_offset', 'stripe', 'chunk_virtual_off_start' ] cur_level = -1 for line in self.lines: pre_level = cur_level cur_level = nPrefixTab(line) line_dic = line_parts(line) #print "cur_level", cur_level, "pre_level", pre_level, 'linetype', #if line_dic: #print line_dic['linetype'] #else: #print "UNKNOWN" if cur_level > pre_level: # XXXXXX <- pre # XXXXXXX <- cur #path[cur_level] = line_dic # It could be None node_queue[cur_level] = [line_dic] elif cur_level == pre_level: # XXXXXX <- pre # XXXXXXX <- cur if line_dic != None: #path[cur_level].update(line_dic) node_queue[cur_level].append(line_dic) else: pass else: # XXXXXX <- pre # XXXXXXX <- cur #path[pre_level] = None node_queue[pre_level] = [ ] # probably not necessary, but good to clean node_queue[cur_level] = [line_dic] # It can be None #pprint.pprint( node_queue ) # Now you can get what you want. # Let's do the math work in R, here # we only print out things like: # # EXTENT_DATA_DATA INODE_NUMBER Logical_start virtual_start length if node_queue[cur_level] != [] and \ node_queue[cur_level][-1] != None and \ node_queue[cur_level][-1]['linetype'] == 'EXTENT_DATA_DATA_3': # we know we have a whole extent #print path[cur_level] #print "Parent:", path[cur_level-1]['key']['objectid'] # for short ext_dic_1 = node_queue[cur_level][0] ext_dic_2 = node_queue[cur_level][1] ext_dic_3 = node_queue[cur_level][2] parent = node_queue[cur_level - 1][-1] #pprint.pprint( node_queue ) # Note that when extent_disk_number_of_bytes == 0, this is # an empty extent and should not be used to show data. if ext_dic_1['extent_disk_byte'] != '0' and \ ext_dic_1['extent_disk_number_of_bytes'] != '0': # Ignore the empty extent dic = { 'inode_number': parent['key']['objectid'], 'Logical_start': parent['key']['offset'], 'Virtual_start': int(ext_dic_1['extent_disk_byte']) + int(ext_dic_2['in_extent_offset']), 'Length': ext_dic_2['in_extent_number_of_bytes'] } df_ext.addRowByDict(dic) #print dic elif node_queue[cur_level] != [] and \ node_queue[cur_level][-1] != None and \ node_queue[cur_level][-1]['linetype'] == "CHUNK_ITEM_DATA_STRIPE": grandparent = node_queue[cur_level - 2][-1] stripe_dic = node_queue[cur_level][-1] stripe_dic['chunk_virtual_off_start'] = grandparent['key'][ 'offset'] del stripe_dic['linetype'] df_chunk.addRowByDict(stripe_dic) elif node_queue[cur_level] != [] and \ node_queue[cur_level][-1] != None and \ node_queue[cur_level][-1]['linetype'] == "INODE_REF_DATA": parent = node_queue[cur_level - 1][-1] ref_dic = node_queue[cur_level][-1] #ref_dic.update( parent ) #print ref_dic #dic = {} #dic['inode_number'] = ref_dic['key']['objectid'] pass elif node_queue[cur_level] != [] and \ node_queue[cur_level][-1] != None and \ node_queue[cur_level][-1]['linetype'] \ == "EXTENT_DATA_DATA_INLINE": parent = node_queue[cur_level - 1][-1] itemoff = parent['item']['itemoff'] assert len(node_queue[cur_level - 2]) == 3, 'not a good leaf!' assert node_queue[cur_level - 2][0]['linetype'] == 'LEAFLINE',\ "NOT a leaf!!!" grandpa = node_queue[cur_level - 2][0] leaf_v_addr = grandpa['virtual_bytenr'] dic = { 'inode_number': parent['key']['objectid'], 'Logical_start': parent['key']['offset'], # The magic numbers: # 40: size of node header # 21: members of btrfs_file_extent_item stored # in the item data, before file data 'Virtual_start': int(leaf_v_addr) + \ 40 + \ int(itemoff) +\ 21, 'Length': node_queue[cur_level][-1]['data_size'] } df_ext.addRowByDict(dic) #print df_ext.toStr() #print df_chunk.toStr() return {'extents': df_ext, 'chunks': df_chunk}
def __init__(self, sysname, angle=0.0): title = "%s_ac" % sysname self.df = dataframe.DataFrame(title) self.theta = angle * (pi / 180.0) self._r_tx = (c_light * self.df.dt) / (cos(self.theta) * self.df.dx) self._r_yt = (sin(self.theta) * self.df.dy) / (c_light * self.df.dt)
def next(self): if self.iterator.hasNext(): v = self.iterator.next() return v.getKey(), dataframe.DataFrame(dataframe=v.getValue()) else: raise StopIteration()
def xfs_get_extent_tree(inode_number, devname): inode_lines = xfs_db_commands(["inode "+str(inode_number), "print u"], devname) print inode_lines inode_dict = xfs_lines_to_dict(inode_lines) pprint.pprint(inode_dict) df_ext = dataframe.DataFrame() header = ["Level_index", "Max_level", "Entry_index", "N_Entry", "Logical_start", "Logical_end", "Physical_start", "Physical_end", "Length", "Flag"] df_ext.header = header # Find out the fsb of the inode inode_fsb = xfs_convert_ino_to_fsb(inode_number, devname) df_ext = _dataframe_add_an_extent(df_ext, Level_index="-1", Max_level="-1", Entry_index="NA", N_Entry="NA", Logical_start="NA", Logical_end="NA", Physical_start=inode_fsb, Physical_end=inode_fsb, Length='1', Flag='NA') if inode_dict.has_key('u.bmx'): print "All extents pointers are in inode" exts = xfs_parse_type03(inode_dict['u.bmx']) #print "exts",exts for ext in exts: df_ext = _dataframe_add_ext_tuple(df_ext, level_index=0, max_level=0, ext=ext) return df_ext if inode_dict.has_key('u.bmbt.level'): # in this case, we have a B+tree max_level = int(inode_dict['u.bmbt.level']) cur_xfs_level = int(inode_dict['u.bmbt.level']) ptrs = xfs_parse_type02( inode_dict['u.bmbt.ptrs'] ) # the root of B+Tree # Initialize the tree for traversing ptr_queue = Queue.Queue() for p in ptrs: ptr_queue.put_nowait(p) df_ext = _dataframe_add_an_extent(df_ext, Level_index=max_level-cur_xfs_level, Max_level=max_level, Entry_index="NA", N_Entry="NA", Logical_start="NA", Logical_end="NA", Physical_start=p, Physical_end=p, Length='1', Flag='NA') while not ptr_queue.empty(): cur_blk = ptr_queue.get_nowait() block_lines = xfs_db_commands(["fsb "+str(cur_blk), "type bmapbta", "p"], devname) #print "********* block_lines *******" #print block_lines block_attrs = xfs_lines_to_dict(block_lines) cur_xfs_level = int(block_attrs['level']) if cur_xfs_level > 0: # This is still an internal node of the tree # It has pointers in ptrs ptrs = xfs_parse_type02( block_attrs['ptrs'] ) for p in ptrs: ptr_queue.put_nowait(p) df_ext = _dataframe_add_an_extent(df_ext, Level_index=max_level-cur_xfs_level, Max_level=max_level, Entry_index="NA", N_Entry="NA", Logical_start="NA", Logical_end="NA", Physical_start=p, Physical_end=p, Length='1', Flag='NA') else: # This is a leaf of the tree # The data extents are in recs[] exts = xfs_parse_type03( block_attrs['recs'] ) #print exts for ext in exts: df_ext = _dataframe_add_ext_tuple(df_ext, level_index=max_level-cur_xfs_level, max_level=max_level, ext=ext) return df_ext # It is empty return df_ext
def dump_extents_of_a_file(self, filepath): "This function only gets ext list for this file" if self.filesystem != 'ext4': return #print "filepath:", filepath #cmd = "debugfs " + self.devname + " -R 'dump_extents " + filepath + "'" cmd = [ 'debugfs', self.devname, '-R', 'dump_extents "' + filepath + '"' ] #print cmd, '......' #cmd = shlex.split(cmd) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE) ext_list = [] # Use list here in case I want to extract data in Python header = [] max_level = 0 df_ext = dataframe.DataFrame() header = [ "Level_index", "Max_level", "Entry_index", "N_Entry", "Logical_start", "Logical_end", "Physical_start", "Physical_end", "Length", "Flag" ] df_ext.header = header for line in proc.stdout: #print "LLL:", line, if "Level" in line: pass else: savedline = line line = re.sub(r'[/\-]', " ", line) tokens = line.split() if len(tokens) == 8: # there is no physical end tokens.insert(7, tokens[6]) #TODO: this is dangerous d = {} for i in range(9): try: d[header[i]] = tokens[i] except: print savedline print "token:", tokens print "header:", header # having a try-except can grant you # the opportunity to do something # after bad thing happen if len(tokens) == 10: d["Flag"] = tokens[10] else: d["Flag"] = "NA" df_ext.addRowByDict(d) proc.wait() # Put the location of the inode the df_ext, level_index as -1 to # indicate that it is a inode imapdict = self.imap_of_a_file(filepath) d = {} d['Level_index'] = '-1' d['Max_level'] = '-1' d['Entry_index'] = 'NA' d['N_Entry'] = 'NA' d['Logical_start'] = 'NA' d['Logical_end'] = 'NA' d['Physical_start'] = imapdict['block_number'] d['Physical_end'] = imapdict['block_number'] d['Length'] = '1' d['Flag'] = 'NA' df_ext.addRowByDict(d) df_ext.addColumn(key="filepath", value=fill_white_space(filepath)) df_ext.addColumn(key="HEADERMARKER_extlist", value="DATAMARKER_extlist") df_ext.addColumn(key="jobid", value=self.jobid) df_ext.addColumn(key="monitor_time", value=self.monitor_time) return df_ext