def get_high_ranked_neighborhoods(fcgnx_file, w, sorted_weights_idx, show_small=False, weights=1): # g = FCGextractor.build_cfgnx(fcgnx_file) g = pz.load(fcgnx_file) g_hash = ml.neighborhood_hash(g) neighborhoods = [] remaining_weights = weights for idx in sorted_weights_idx: if remaining_weights > 0: label_bin = np.binary_repr(idx, 15) label = np.array([int(i) for i in label_bin]) matching_neighborhoods = [] for m, nh in g_hash.node.iteritems(): if np.array_equal(nh["label"], label): neighbors_l = g_hash.neighbors(m) if neighbors_l: neighbors = "\n".join([str(i) for i in neighbors_l]) matching_neighborhoods.append("{0}\n{1}\n{2}\n".format(w[idx], m, neighbors)) else: if show_small: matching_neighborhoods.append("{0}\n{1}\n".format(w[idx], m)) if matching_neighborhoods: remaining_weights -= 1 neighborhoods += matching_neighborhoods else: n_nodes = g_hash.number_of_nodes() del g del g_hash return neighborhoods, n_nodes
def compute_feature_vector(self, g): """ Compute the neighboorhood hash of a graph g and return the histogram of the hashed labels. """ g_hash = ml.neighborhood_hash(g) g_x = ml.label_histogram(g_hash) return g_x
def compute_label_histogram(self, g): """ Compute the neighborhood hash of a graph g and return the histogram of the hashed labels. """ g_hash = ml.neighborhood_hash(g) g_x = ml.label_histogram(g_hash) return g_x
def add_weights_to_nodes(g, w, show_labels=True): g_hash = ml.neighborhood_hash(g) # initialize the weight for every node in g_hash for n, nh in g_hash.node.iteritems(): idx = int("".join([str(i) for i in nh["label"]]), 2) w_nh = w[idx] g_hash.node[n]["label"] = w_nh # create a copy of the weighted graph g_hash_weighted = g_hash.copy() # aggregate the weights of each node with the # original weight of its caller for n, nh in g_hash.node.iteritems(): for neighbor in g_hash.neighbors(n): g_hash_weighted.node[neighbor]["label"] += g_hash.node[n]["label"] # create array of the node weigths g_weights = [] for n, nh in g_hash_weighted.node.iteritems(): g_weights.append(nh["label"]) # normalize weight between 0.5 and 1 to plot g_weights = np.array(g_weights) g_weights.sort() g_weights_norm = normalize_weights(g_weights) g_weights_norm = g_weights_norm[::-1] d_w_norm = dict(zip(g_weights, g_weights_norm)) # add normalized weight as color to each node for n, nh in g_hash_weighted.node.iteritems(): w = g_hash_weighted.node[n]["label"] g_hash_weighted.node[n]["style"] = "filled" g_hash_weighted.node[n]["fillcolor"] = "0.000 0.000 {0}".format( d_w_norm[w]) # write function name in the label of the node or remove label if show_labels: for n, nh in g_hash_weighted.node.iteritems(): node_text = (n[0].split("/")[-1] + n[1] + "\n" + str(g_hash_weighted.node[n]["label"])) g_hash_weighted.node[n]["label"] = node_text else: for n, nh in g_hash_weighted.node.iteritems(): g_hash_weighted.node[n]["label"] = "" return g_hash_weighted
def add_weights_to_nodes(g, w, show_labels=True): g_hash = ml.neighborhood_hash(g) # initialize the weight for every node in g_hash for n, nh in g_hash.node.iteritems(): idx = int("".join([str(i) for i in nh["label"]]), 2) w_nh = w[idx] g_hash.node[n]["label"] = w_nh # create a copy of the weighted graph g_hash_weighted = g_hash.copy() # aggregate the weights of each node with the # original weight of its caller for n, nh in g_hash.node.iteritems(): for neighbor in g_hash.neighbors(n): g_hash_weighted.node[neighbor]["label"] += g_hash.node[n]["label"] # create array of the node weigths g_weights = [] for n, nh in g_hash_weighted.node.iteritems(): g_weights.append(nh["label"]) # normalize weight between 0.5 and 1 to plot g_weights = np.array(g_weights) g_weights.sort() g_weights_norm = normalize_weights(g_weights) g_weights_norm = g_weights_norm[::-1] d_w_norm = dict(zip(g_weights, g_weights_norm)) # add normalized weight as color to each node for n, nh in g_hash_weighted.node.iteritems(): w = g_hash_weighted.node[n]["label"] g_hash_weighted.node[n]["style"] = "filled" g_hash_weighted.node[n]["fillcolor"] = "0.000 0.000 {0}".format(d_w_norm[w]) # write function name in the label of the node or remove label if show_labels: for n, nh in g_hash_weighted.node.iteritems(): node_text = n[0].split("/")[-1] + n[1] + "\n" + str(g_hash_weighted.node[n]["label"]) g_hash_weighted.node[n]["label"] = node_text else: for n, nh in g_hash_weighted.node.iteritems(): g_hash_weighted.node[n]["label"] = "" return g_hash_weighted
def get_high_ranked_neighborhoods(fcgnx_file, w, sorted_weights_idx, show_small=False, weights=1): # g = FCGextractor.build_cfgnx(fcgnx_file) g = pz.load(fcgnx_file) g_hash = ml.neighborhood_hash(g) neighborhoods = [] remaining_weights = weights for idx in sorted_weights_idx: if remaining_weights > 0: label_bin = np.binary_repr(idx, 15) label = np.array([int(i) for i in label_bin]) matching_neighborhoods = [] for m, nh in g_hash.node.iteritems(): if np.array_equal(nh["label"], label): neighbors_l = g_hash.neighbors(m) if neighbors_l: neighbors = '\n'.join([str(i) for i in neighbors_l]) matching_neighborhoods.append("{0}\n{1}\n{2}\n".format( w[idx], m, neighbors)) else: if show_small: matching_neighborhoods.append("{0}\n{1}\n".format( w[idx], m)) if matching_neighborhoods: remaining_weights -= 1 neighborhoods += matching_neighborhoods else: n_nodes = g_hash.number_of_nodes() del g del g_hash return neighborhoods, n_nodes
def get_high_ranked_neighborhoods(self, fcgnx_file, sorted_weights_idx, n_weights=3): """ Retrieve the neigborhoods in a hashed graph with maximum weights. Args: fcgnx_file: path of a fcgnx file containing a fcg. sorted_weights_idx: index that sort the weights from the linear classifer. n_weights: number of weights with maximum value to retrieve the associated neighborhoods. Returns: A list of matching neighborhoods. """ # g = FCGextractor.build_cfgnx(fcgnx_file) g = pz.load(fcgnx_file) g_hash = ml.neighborhood_hash(g) bits = len(instructionSet.INSTRUCTION_CLASS_COLOR) neighborhoods = [] remaining_weights = n_weights for idx in sorted_weights_idx: if remaining_weights > 0: label_decimal = idx / self.b label_bin = np.binary_repr( label_decimal, bits ) label = np.array( [ int(i) for i in label_bin ] ) matching_neighborhoods = [] for m, nh in g_hash.node.iteritems(): if np.array_equal( nh["label"], label ): matching_neighborhoods.append("{0} {1}.{2}({3})".format(remaining_weights, m[0], m[1], m[2])) if matching_neighborhoods: remaining_weights -= 1 neighborhoods += matching_neighborhoods else: del g del g_hash return neighborhoods