def bps_process_active_vmas(auxdata, active_vmas, appname, app_pid): tag = 'bps_process_active_vmas' # In this processing method, we want to calculate the amount # of additional fragmentation that would occur if we mapped # the vmas in the active_vmas list with base pages of varying # sizes. # # Notes: # This fragmentation is the fragmentation *beyond* the fragmentation # that already exists with 4 KB base pages; some fragmentation will # already exist with 4 KB base pages because the minimum vma size # is forced up to this size, but we can't know that fragmentation # here. plotevents = [] # items() gets key-value pairs in hash-table order, so sort by # increase bps - this will result in datapoints being added to # series by increasing bps, so that when the series are used later # we don't have to sort again. sortedbps = sorted(CANDIDATE_BASEPAGESIZES.items(), key=lambda pair: pair[1]) for (bps_label, bps) in sortedbps: # For each candidate bps, we add two datapoints to two separate # series: one for the total number of pages needed to map all # of the vmas, and one for the total bytes of fragmentation that # would result from this mapping. total_pages = 0 total_frag = 0 total_vmsize = 0 for vma in active_vmas: # Ignore shared lib vmas, etc.: if vm.ignore_vma(vma): debug_ignored(tag, ("ignoring vma: {}").format(vma)) else: (pages, frag) = vm.pages_needed(bps, vma.length) total_pages += pages total_frag += frag total_vmsize += vma.length datapoint = BPSDatapoint('totalpages', bps_label, total_pages) plot_event = PlotEvent(datapoint=datapoint) plotevents.append(plot_event) datapoint = BPSDatapoint('fragmentation', bps_label, total_frag) plot_event = PlotEvent(datapoint=datapoint) plotevents.append(plot_event) # 'overhead': new overhead of internal fragmentation, expressed # as a percentage of the original virtual memory size. datapoint = BPSDatapoint('overhead', bps_label, float(total_frag / total_vmsize)) plot_event = PlotEvent(datapoint=datapoint) plotevents.append(plot_event) return plotevents
def os_process_active_vmas(auxdata, active_vmas, appname, app_pid): tag = 'os_process_active_vmas' # Ok, we want to calculate the number of "translation entries" # that would be needed to map all of the vmas in the active_vmas # list to physical pages / segments, for particular combinations # of page / segment sizes. For example, if we directly mapped # each vma with a segment of exactly the right size, then we # would need just one translation entry per vma - this is one # datapoint to pass to the plot. With just 4 KB pages, we'll need # a whole lot more translation entries - this is another datapoint. plotevents = [] txln_entries = len(active_vmas) seriesname = 'segments' datapoint = OSDatapoint(seriesname, txln_entries) plot_event = PlotEvent(datapoint=datapoint) plotevents.append(plot_event) # For each set of translation entry sizes that we want to consider, # create a mapping in this dict, from the name/description to a # list of sizes (in bytes). # IMPORTANT: the lists here must be sorted from smallest size to # largest. txln_size_dict = { '4KB' : [vm.PAGE_SIZE_4KB], '4KB,2MB' : [vm.PAGE_SIZE_4KB, vm.PAGE_SIZE_2MB], '4KB,2MB,1GB' : [vm.PAGE_SIZE_4KB, vm.PAGE_SIZE_2MB, vm.PAGE_SIZE_1GB], } for (descr, txln_sizes) in txln_size_dict.items(): total_entries_needed = 0 for vma in active_vmas: # Ignore shared lib vmas, etc.: if vm.ignore_vma(vma): debug_ignored(tag, ("ignoring vma: {}").format(vma)) else: entries_needed = vm.txln_entries_needed(txln_sizes, vma) total_entries_needed += sum(entries_needed) # ignore different entry sizes for now print_debug(tag, ("adding OSDatapoint: {}, {}").format( descr, total_entries_needed)) datapoint = OSDatapoint(descr, total_entries_needed) plot_event = PlotEvent(datapoint=datapoint) plotevents.append(plot_event) return plotevents
def update_phys_size(page_event, auxdata, do_ratio, separate_components): tag = 'update_phys_size' if SKIP_PTES_LINKED_TO_IGNORED_VMAS: # For calculating VM size, we're skipping vmas that represent # shared libs, guard regions, etc. (in update_vm_size()). So, # seems like we have to skip PTE events linked to those VMAs too. # Looking at the plots output for dedup before and after adding # this check, this makes no difference in the plot appearance; only # 306 ptes were ignored here. Perhaps more of a difference would # be made for e.g. firefox or office. if page_event.pte.vma and vm.ignore_vma(page_event.pte.vma): debug_ignored(tag, ("ignoring pte linked to vma {}").format( page_event.pte.vma)) return [] # At the very least, components will contain ['total']. components = determine_basic_components(page_event.pte.vma, separate_components) if page_event.pte.vma: filename = page_event.pte.vma.filename else: filename = "no-linked-vma" # See extensive comments in consume_vma() about how each operation # is encoded, especially frees! if page_event.unmap: print_debug_sizes(("page unmap, filename={}, size={}").format( filename, page_event.pte.pagesize)) newpoints = update_component_sizes(components, auxdata, page_event.pte.pagesize, 'sub', vm.PHYS_LABEL, page_event.timestamp, do_ratio) else: print_debug_sizes(("page map, filename={}, size={}").format( filename, page_event.pte.pagesize)) newpoints = update_component_sizes(components, auxdata, page_event.pte.pagesize, 'add', vm.PHYS_LABEL, page_event.timestamp, do_ratio) return newpoints
def vmacount_datafn(auxdata, plot_event, tgid, currentapp): tag = 'vmacount_datafn' print_allocs_frees = False vma = plot_event.vma if vma is None: return None # Skip this vma if it's for a shared lib, guard region, etc. if vm.ignore_vma(vma): debug_ignored(tag, ("ignoring vma {}").format(vma)) return None # See extensive comments in consume_plot_event() about how each # operation is encoded, especially frees! # Note: similar logic is used here for both vmacount_datafn() and # update_vm_size() - if you change one, examine the other one too. if ((vma.vma_op == 'alloc' or vma.vma_op == 'access_change') and not vma.is_unmapped): # Very first allocation of this vma, OR a remap for an access_change: # on an access_change, a vma that we were previously ignoring (e.g. # due to read-only permissions) may now be not-ignored (e.g. if its # permissions were changed to writeable), so we need to count the # vma here now. # Ugh, this is complicated and ugly... effectively we're hiding # the unmap-remap pairs for resizes, relocations, and # flag_changes now, but not for access_changes, which is kind # of inconsistent :-/ auxdata.current_vmacount += 1 point = datapoint() point.timestamp = vma.timestamp point.count = auxdata.current_vmacount if print_allocs_frees: print(("{} ALLOC [{}]: {}").format(tgid, str(auxdata.current_vmacount).zfill(4), vma.to_str_maps_format())) elif (vma.is_unmapped and (vma.unmap_op == 'free' or vma.unmap_op == 'access_change')): # Explicit free of this vma (no matter the operation that # allocated it (most recently operated on it)), OR an # unmap operation for an access_change: we can't ignore # access_change operations because when the vma is remapped, # we might ignore it (see ignore_vma() above), so we need # to un-count it here first! # This access_change case definitely happens regularly: # shared lib file vmas are mapped in as rw-p first, then # changed to r--p. auxdata.current_vmacount -= 1 point = datapoint() point.timestamp = vma.unmap_timestamp point.count = auxdata.current_vmacount if auxdata.current_vmacount < 0: print_error_exit(tag, ("current_vmacount hit {}; unmap " "timestamp is {}, vma is {}").format( auxdata.current_vmacount, vma.unmap_timestamp, vma.to_str_maps_format())) if print_allocs_frees: print(("{} FREE [{}]: {}").format(tgid, str(auxdata.current_vmacount).zfill(4), vma.to_str_maps_format())) else: #print_debug(tag, ("vma_op={}, is_unmapped={}, unmap_op={}: " # "not an explicit alloc or free, " # "so ignoring this vma").format(vma.vma_op, # vma.is_unmapped, vma.unmap_op)) return None seriesname = currentapp point.appname = currentapp debug_ignored(tag, ("counted vma: {}").format(vma)) debug_count(tag, ("{} [series {}]").format(point.count, seriesname)) #debug_ignored(tag, "") #debug_ignored(tag, "") return [(seriesname, point)]
def update_vm_size(vma, auxdata, do_ratio, separate_components, do_difference=False): tag = 'update_vm_size' #debug_vmsize(tag, None, ("total_vm_size - got vma {}").format( # vma)) # Skip this vma if it's for a shared lib, guard region, etc. # See more detailed comments in vmacount_datafn(). if vm.ignore_vma(vma): debug_ignored(tag, ("ignoring vma {}").format(vma)) return [] # At the very least, components will contain ['total']. components = determine_basic_components(vma, separate_components) # See extensive comments in consume_plot_event() about how each # operation is encoded, especially frees. # # IMPORTANT: when tracking vma SIZE (and not just counts), we # also need to consider resize events! # The HIDE_RESIZE constant controls whether or not we completely # remove the vma's size on the unmap and add the new size back # on the remap (HIDE_RESIZE = False), or if we make the plots # looks smoother by keeping track of the unmap-remap pair and # only adding/subtracting the difference in size (HIDE_RESIZE = # True). # handle_plot_event() should ensure that we never get # a resize-remap without getting a resize-unmap first # (unless we happen to have started our kernel trace # right in the middle of an unmap-remap pair, which is # extremely unlikely, and even if this happens it # will be noticed in the analysis script already I # think). # Does it matter that the unmap-remap pairs for multiple # processes may be fed into the same plot? Possibly; if # it turns out that this sort of interleaving does happen # in some traces, then it's non-trivial to solve here # because the tgid passed to the datafn is the same (the # "tgid_for_stats") for all of the processes, so we can't # separate out the context in that way. Presumably we could # keep a running "deficit" of unmapped sizes, and only # generate new points on the plot when the number of # outstanding unmaps is actually 0. I should probably just # implement this now, before I run into this case in a # real trace... # Note: similar logic is used here for both vmacount_datafn() and # update_vm_size() - if you change one, examine the other one too. if ((vma.vma_op == 'alloc' or vma.vma_op == 'access_change') and not vma.is_unmapped): # Very first allocation of this vma, OR a remap for an # access_change, which could result in a previously-ignored # vma now being not-ignored (see vmacount_datafn()). #print_debug_sizes(("vma alloc, filename={}, size={}").format( # vma.filename, vma.length)) newpoints = update_component_sizes(components, auxdata, vma.length, 'add', vm.VIRT_LABEL, vma.timestamp, do_ratio, do_difference) elif vma.vma_op == 'resize' and not vma.is_unmapped: if HIDE_RESIZES: size = auxdata.resize_remap(vma.length) print_debug(tag, ("resize unmap-remap pair: resized " "length is {}, adding diff {} to component " "sizes").format(vma.length, size)) else: size = vma.length # resize_remap() may return None if there are still outstanding # unmaps. if size is not None: # update_component_sizes() should work ok even if size # is negative with 'add'. newpoints = update_component_sizes(components, auxdata, size, 'add', vm.VIRT_LABEL, vma.timestamp, do_ratio, do_difference) else: newpoints = [] elif vma.is_unmapped and vma.unmap_op == 'resize': if HIDE_RESIZES: # When we get a resize-unmap, save the unmapped vma's size # in auxdata, but don't update any tracked sizes or generate # any new points yet. auxdata.save_resize_unmap(vma.length) print_debug(tag, ("resize unmap: saved unmapped vma length " "{}").format(vma.length)) newpoints = [] else: # Same as unmapped-free case below. newpoints = update_component_sizes(components, auxdata, vma.length, 'sub', vm.VIRT_LABEL, vma.unmap_timestamp, do_ratio, do_difference) elif (vma.is_unmapped and (vma.unmap_op == 'free' or vma.unmap_op == 'access_change')): # Explicit free of this vma (no matter the operation that # allocated it (most recently operated on it)), OR an # unmap operation for an access_change, which could be # changing the permissions on a vma to something that we # want to ignore, so we need to un-count the vma's size here # first! (see vmacount_datafn() too). #print_debug_sizes(("vma free, filename={}, size={}").format( # vma.filename, vma.length)) newpoints = update_component_sizes(components, auxdata, vma.length, 'sub', vm.VIRT_LABEL, vma.unmap_timestamp, do_ratio, do_difference) else: newpoints = [] return newpoints
def vmaops_datafn(auxdata, plot_event, tgid, currentapp, desired_ops, label_series_with_app=True, combine_ops=False): tag = 'vmaops_datafn' vma = plot_event.vma if vma is None: return None # Skip this vma if it's for a shared lib, guard region, etc. # Are there any other special considerations that we have to # make for ignored vmas here (like in vmacount_datafn and # vm_size_datafn)? These are the vma-op possibilities that # are tracked below: # alloc map # resize remap # relocation remap # access_change remap # flag_change remap # free unmap # If any of these operations act on a shared-lib / guard / # shared-file vma, then they will be ignored here. One # possible weirdness that I see is if a vma is first allocated # as something that's ignored (e.g. r--pf for a shared lib) and # then is access_changed to something that's not ignored, it # will appear to be an access_change without a corresponding # alloc, but I think this case occurs rarely if ever. The opposite # occurs more frequently: something that was previously counted # (e.g. rw-pf for a shared lib) is access_changed to something # that's now ignored. In this case, the access_change will # never be counted, and additionally there will be an alloc # without a corresponding free. # Ok, so this could be a little weird, and difficult to handle # here because we don't do any tracking on unmaps at all. # Just live with the weirdness I guess, or comment out the # ignore_vma code here altogether for vmaops plots, depending # on what we want to count exactly. if vm.ignore_vma(vma): debug_ignored(tag, ("ignoring vma {}").format(vma)) return None # See extensive comments in consume_vma() about how each operation # is encoded, especially frees! # Look for explicit free operations first, then ignore any unmap # operations that are part of unmap-remap pairs and count the # remap operations. if vma.is_unmapped and vma.unmap_op == 'free': op = 'free' timestamp = vma.unmap_timestamp elif not vma.is_unmapped: op = vma.vma_op timestamp = vma.timestamp elif auxdata.veryfirstvma: # Create a point with the very first timestamp, so that every # plot will start from the same time (rather than every plot # starting from the first occurrence of a desired_op). This # difference is meaningful for apps with very short execution # times (e.g. it's misleading if the "frees" plot starts from # the time of the very first free, which could only be at # the very end of the execution). # Only check this condition after checking the op conditions # above, so that we don't skip the first op if it's meaningful # for desired_ops. # This works for the very first timestamp, but we should also # do this for the very last timestamp too (which we don't # know until plotting time... crap). op = 'veryfirst' timestamp = vma.timestamp else: print_debug(tag, ("vma_op={}, is_unmapped={}, unmap_op={}" "this is an unmap for an unmap-remap " "pair, so not counting this as an op.").format(vma.vma_op, vma.is_unmapped, vma.unmap_op)) return None print_debug(tag, ("op={}, timestamp={}").format(op, timestamp)) if op not in desired_ops and op != 'veryfirst': # Don't care about this op type return None elif combine_ops: # Combine all desired ops into one series op_orig = op op = 'combined' try: count = auxdata.opcounts[op] except KeyError: if op != 'veryfirst': # usual case count = 0 else: # This is the weird case: we want to create a 0 datapoint # for the op that this plot is tracking. If this plot is # tracking more than one op type, but is not combining # them, then this gets a bit weird... but this doesn't # actually happen right now. count = -1 op = desired_ops[0] if len(desired_ops) > 1: print_warning(tag, ("very first op is not in desired_ops, " "but desired_ops has len > 1, so creating a 0 datapoint " "for just the first op {}").format(op)) count += 1 auxdata.opcounts[op] = count auxdata.veryfirstvma = False if count == 0: print_debug(tag, ("creating a 0 datapoint for op {} " "at timestamp {}").format(op, timestamp)) point = datapoint() point.timestamp = timestamp point.count = count point.appname = currentapp if label_series_with_app: # No longer label seriesname with op - just with app name, and # then use op in the title. #seriesname = "{}-{}".format(currentapp, op) seriesname = "{}".format(currentapp) else: seriesname = op if combine_ops: # don't allow, would put all ops for all apps into one series. print_error(tag, ("invalid combination of label_series " "and combine_ops")) seriesname = op_orig # Return a list of (seriesname, datapoint) tuples: return [(seriesname, point)]