def discover_histogram_by_os(self, os_name, output=None): """ Displays a histogram of the file distributions across all systems of the specified OS :param os_name: name of the operating system :param output: (optional) output filename in PNG format """ print '[+] Running \"Histogram by OS\"..."' cursor = self.cnx.cursor() num_systems = core.get_num_systems(self.cnx, os_name) print "NUM: {}".format(num_systems) if num_systems is None or num_systems == 0: print "Error: OS {} does not exist".format(os_name) return bins = range(1, num_systems + 2) #query = """ # SELECT COUNT(file_metadata.os_id), global_file_prevalence.count FROM global_file_prevalence # LEFT JOIN file_metadata ON global_file_prevalence.unique_file_id = file_metadata.unique_file_id # WHERE file_metadata.os_id = (SELECT os.id FROM os WHERE os.name = "{}") # GROUP BY global_file_prevalence.count ORDER BY global_file_prevalence.count ASC; #""".format(os_name) cursor.execute( """ SELECT COUNT(file_metadata.os_id), global_file_prevalence.count FROM global_file_prevalence LEFT JOIN file_metadata ON global_file_prevalence.unique_file_id = file_metadata.unique_file_id WHERE file_metadata.os_id = (SELECT os.id FROM os WHERE os.name = %s) GROUP BY global_file_prevalence.count ORDER BY global_file_prevalence.count ASC; """, (os_name, )) data = cursor.fetchall() counts, ranges = zip(*data) fig = plt.figure() perc = int(float(sum(counts[1:])) / sum(counts) * 100) ax = fig.add_subplot(111, title="File Prevalence of {} with {}% > 1".format( os_name, perc)) ax.hist(ranges, weights=counts, bins=bins) ax.set_xlabel("Num of Systems") ax.set_ylabel("File Occurrences") if output is None: plt.show() else: print "Saving histogram to {}".format(output) plt.savefig(output)
def discover_histogram_by_source(self, source_name, output=None): """ Displays a histogram of the distribution of file extensions that are executable of a single source as it relates to all occurrences of that extension across all systems :param source_name: The name of the source """ print '[+] Running \"Histogram by Source\"...' cursor = self.cnx.cursor() src_info = core.get_source_info(self.cnx, source_name) if src_info is None: print "Source {} does not exist".format(source_name) return num_systems = core.get_num_systems(self.cnx, src_info.os_id) bins = range(1, num_systems+2) # !! TODO query = """ SELECT COUNT(file_metadata.id), global_file_prevalence.count FROM global_file_prevalence LEFT JOIN file_metadata ON global_file_prevalence.unique_file_id = file_metadata.unique_file_id WHERE file_metadata.source_id = (SELECT media_source.id FROM media_source WHERE media_source.name = "{}") GROUP BY global_file_prevalence.count ORDER BY global_file_prevalence.count ASC; """.format(source_name) cursor.execute(query) data = cursor.fetchall() if data == None: return counts, ranges = zip(*data) fig = plt.figure() perc = int( float(sum(counts[1:])) / sum(counts) * 100) ax = fig.add_subplot(111, title="File Executable Prevalence of {} with {}% > 1".format(src_info.source_name, perc)) ax.hist(ranges, weights=counts, bins = bins) ax.set_xlabel("Num of Systems") ax.set_ylabel("File Occurrences") plt.xticks(bins) if output is None: plt.show() else: plt.savefig(output)
def discover_histogram_by_os(self, os_name, output=None): """ Displays a histogram of the distributions of file extensions that are executable across all systems of the specified OS :param os_name: name of the operating system """ print '[+] Running \"Histogram by OS\"..."' cursor = self.cnx.cursor() num_systems = core.get_num_systems(self.cnx, os_name) if num_systems is None or num_systems == 0: print "Error: OS {} does not exist".format(os_name) return bins = range(1, num_systems+2) # !! TODO query = """ SELECT COUNT(file_metadata.os_id), global_file_prevalence.count FROM global_file_prevalence LEFT JOIN file_metadata ON global_file_prevalence.unique_file_id = file_metadata.unique_file_id WHERE file_metadata.os_id = (SELECT os.id FROM os WHERE os.name = "{}") GROUP BY global_file_prevalence.count ORDER BY global_file_prevalence.count ASC; """.format(os_name) cursor.execute(query) data = cursor.fetchall() counts, ranges = zip(*data) fig = plt.figure() perc = int( float(sum(counts[1:])) / sum(counts) * 100) ax = fig.add_subplot(111, title="File Executable Prevalence of {} with {}% > 1".format(os_name, perc)) ax.hist(ranges, weights=counts, bins = bins) ax.set_xlabel("Num of Systems") ax.set_ylabel("File Occurrences") plt.xticks(bins) if output is None: plt.show() else: plt.savefig(output)
def discover_histogram_by_source(self, source_name, output=None): """ Displays a histogram of the file distribution of a single source as it relates to all occurrences of that file across all systems :param source_name: The name of the source :param output: (optional) output filename in PNG format """ print '[+] Running \"Histogram by Source\"...' cursor = self.cnx.cursor() src_info = core.get_source_info(self.cnx, source_name) if src_info is None: print "Source {} does not exist".format(source_name) return num_systems = core.get_num_systems(self.cnx, src_info.os_id) bins = range(1, num_systems + 2) #query = """ # SELECT COUNT(file_metadata.id), global_file_prevalence.count FROM global_file_prevalence # LEFT JOIN file_metadata ON global_file_prevalence.unique_file_id = file_metadata.unique_file_id # WHERE file_metadata.source_id = (SELECT media_source.id FROM media_source WHERE media_source.name = "{}") # GROUP BY global_file_prevalence.count ORDER BY global_file_prevalence.count ASC; #""".format(source_name) cursor.execute( """ SELECT COUNT(file_metadata.id), global_file_prevalence.count FROM global_file_prevalence LEFT JOIN file_metadata ON global_file_prevalence.unique_file_id = file_metadata.unique_file_id WHERE file_metadata.source_id = (SELECT media_source.id FROM media_source WHERE media_source.name = %s) GROUP BY global_file_prevalence.count ORDER BY global_file_prevalence.count ASC; """, (source_name, )) data = cursor.fetchall() if data == None or len(data) is 0: return counts, ranges = zip(*data) fig = plt.figure() perc = int(float(sum(counts[1:])) / sum(counts) * 100) ax = fig.add_subplot(111, title="File Prevalence of {} with {}% > 1".format( src_info.source_name, perc)) ax.hist(ranges, weights=counts, bins=bins) ax.set_xlabel("Num of Systems") ax.set_ylabel("File Occurrences") if output is None: plt.show() else: print "Saving histogram to {}".format(output) plt.savefig(output)