def runGraph(self, policy: Policy = None, outputDir: str = None, quiet: bool = False): """Build a graph of IFs in the simulation given a policy.""" outputDir = policy.getOutputDir(parent=outputFsEnabled()) if \ policy else outputFsEnabled() if not quiet: tprnt("\nCompiling the Unified Graph...") if not quiet: tprnt("\tMaking graph...") g = UnifiedGraph(outputDir=outputDir) if not quiet: tprnt("\tPopulating graph...") g.populate(policy=policy, quiet=quiet) output = policy.name+"-graph-unified" if policy else \ "graph-unified" if not plottingDisabled(): if not quiet: tprnt("\tPlotting graph...") g.plot(output=output) if not quiet: tprnt("\tComputing community clusters...") # We are computing the global graph with all the information. if not policy: g.computeClusters() self.globMembership = g.clusters.membership self.globNames = g.g.vs['name'] # We are computing a policy graph to compare to the global graph. else: if not self.globMembership: if not quiet: tprnt("\t\tWarning: cannot re-use global communities as " "they aren't computed yet, computing local ones " "instead.") g.computeClusters() else: if not quiet: tprnt("\t\tUsing global community memberships to refine " "clusters.") applyCommunities(g, self.globMembership, self.globNames, True) if not plottingDisabled(): if not quiet: tprnt("\tPlotting communities...") g.plotClusters(output=output) if not quiet: tprnt("\tCalculating costs to optimal communities...") g.calculateCosts(output=output, policy=policy, quiet=quiet) if not quiet: tprnt("\tCalculating potential reachability improvement...") g.calculateReachability(output=output, quiet=quiet, nodeCount=self.docCount) if not quiet: tprnt("Done.")
def __init__(self, parent: CommonGraph, quiet: bool = False): """Construct a FlatGraph.""" super(FlatGraph, self).__init__() if not isinstance(parent, CommonGraph): raise TypeError("FlatGraph constructor needs a CommonGraph " "parent, received a %s." % parent.__class__.__name__) self.g = None self.clusters = None self.outputDir = parent.outputDir self.vertices = dict() self.edges = set() self.weights = dict() # Step 1. make a copy of the graph without file-file nodes, to # find paths between files that go through apps. if not quiet: tprnt("\t\t\tStep 1: copy graph, excluding file-file nodes...") tprnt("\t\t\t\tCopy graph...") copy = parent.g.copy() # type: Graph types = parent.g.vs['type'] names = parent.g.vs['name'] toBeRemoved = [] namesRemoved = [] if not quiet: tprnt("\t\t\t\tFind edges to delete...") for edge in copy.es: if types[edge.source] == "file" and \ types[edge.target] == "file": toBeRemoved.append(edge) namesRemoved.append((names[edge.source], names[edge.target])) if not quiet: tprnt("\t\t\t\tDelete edges...") copy.delete_edges(toBeRemoved) # Step 2. run an all-pairs shortest path algorithm. # Step 2. pick out file-file paths with no intermediary files. # Step 2. save this info in the form of an edge list. if not quiet: tprnt("\t\t\tStep 2: run an all-pairs shortest path " "algorithm, remove file-file paths with intermediary " "files and gather final file-file edges...") tprnt("\t\t\t\tCopy file nodes...") fileNodes = list( (copy.vs[i] for i, t in enumerate(types) if t == "file")) edges = set() # weights = dict() self.idgen = UniqueIdGenerator() fileNodeCount = len(fileNodes) if not quiet: tprnt("\t\t\t\tGet shortest paths for each of %d file nodes..." % fileNodeCount) threshold = fileNodeCount / 100 nodeI = 0 lastNodePct = 0 nodePct = 0 for v in fileNodes: nodeI += 1 if nodeI >= (threshold * nodePct): nodePct = int(nodeI / threshold) if nodePct >= lastNodePct + 5: print("\t\t\t\t\t... (%d%% done)" % nodePct) lastNodePct = nodePct # Get shortest paths. vPaths = copy.get_shortest_paths(v, to=fileNodes) # Remove unnecessary bits. delSet = set() for (idx, p) in enumerate(vPaths): if len(p) < 1: continue # Ignore paths with intermediary files. for node in p[1:-1]: if types[node] == "file": delSet.add(idx) # Remove unsuitable paths. for i in sorted(list(delSet), reverse=True): del vPaths[i] del delSet # Save the shortest paths remaining as edges. for p in vPaths: if len(p) <= 1: continue key = (self.idgen[names[p[0]]], self.idgen[names[p[-1]]]) edges.add(key) # weights[key] = 1 / (len(p) - 1) # Add edges for removed names if not quiet: tprnt("\t\t\t\tRe-add file-file direct nodes into graph...") for (src, dest) in namesRemoved: edges.add((self.idgen[src], self.idgen[dest])) # Step 3. construct a graph with only file nodes. if not quiet: tprnt("\t\t\tStep 3: construct a graph with only file nodes...") edges = list(edges) self.g = Graph(edges) del edges # self.g.es["weight"] = list((weights[e] for e in edges)) self.g.vs["name"] = self.idgen.values() # Steph 4. apply community information to the nodes. if not quiet: tprnt("\t\t\tStep 4: apply communities to flat graph...") applyCommunities(self, parent.clusters.membership, names)
def populate(self, policy: Policy = None, quiet: bool = False): """Populate the AccessGraph, filtering it based on a Policy.""" appStore = ApplicationStore.get() fileStore = FileStore.get() fileFactory = FileFactory.get() userConf = UserConfigLoader.get() # Add all user apps. if not quiet: tprnt("\t\tAdding apps...") for app in appStore: if app.isUserlandApp(): self._addAppNode(app) def _allowed(policy, f, acc): return acc.actor.isUserlandApp() and \ (acc.isByDesignation() or not policy or policy.allowedByPolicy(f, acc.actor)) # Add all user documents. if not quiet: tprnt("\t\tAdding user documents...") self.docCount = 0 for f in fileStore: if not f.isUserDocument(userHome=userConf.getHomeDir(), allowHiddenFiles=True): continue if f.isFolder(): continue # Provided they have userland apps accessing them. hasUserlandAccesses = False for acc in f.getAccesses(): if _allowed(policy, f, acc): hasUserlandAccesses = True break # And then add such userland apps to user document accesses. if hasUserlandAccesses: self.docCount += 1 self._addFileNode(f) for acc in f.getAccesses(): if _allowed(policy, f, acc): self._addAccess(f, acc) if not quiet: tprnt("\t\tAdding file links...") links = fileFactory.getFileLinks() for (pred, follow) in links.items(): source = str(pred.inode) dest = str(follow) if source in self.vertices and dest in self.vertices: tprnt("Info: adding link from File %s to File %s in graph " "as there is a file move/copy event between those." % (source, dest)) edge = (source, dest) if source <= dest else (dest, source) self.edges.add(edge) self.weights[edge] = 999999999 if not quiet: tprnt("\t\tConstructing graph...") self._construct()
def calculateReachability(self, output: str = None, quiet: bool = False, nodeCount: int = 0): """Model the reachability improvement of community finding.""" if self.clusters is None: raise ValueError("Clusters for a graph must be computed " "before modelling how community isolation " "decreases its average reachability.") if self.editCount is None: raise ValueError("Costs for a graph must be calculated " "before modelling how community isolation " "decreases its average reachability.") msg = "" def _print(clusters, header, tag): msg = "\nGraph statistics %s:\n" % header if len(clusters) == 0: msg += "no clusters for this graph." return (msg, 0, 1) sizes = [ x for x in sorted(list((len(x) for x in clusters))) if x != 0 ] vertexSum = sum(sizes) isolatedNC = nodeCount - self.docCount msg += ("* %s-size distribution: %s\n" % (tag, sizes.__str__())) msg += ("* %s-cluster count: %d\n" % (tag, len(sizes))) msg += ("* %s-isolated nodes: %d\n" % (tag, isolatedNC)) msg += ("* %s-smallest cluster: %d\n" % (tag, min(sizes))) msg += ("* %s-largest cluster: %d\n" % (tag, max(sizes))) avgSize = vertexSum / len(sizes) msg += ("* %s-average size: %f\n" % (tag, avgSize)) reach = sum([i**2 for i in sizes]) / vertexSum msg += ("* %s-average reachability: %f\n" % (tag, reach)) reach = (sum([i ** 2 for i in sizes]) + isolatedNC) / \ (vertexSum + isolatedNC) msg += ("* %s-adjusted reachability: %f\n" % (tag, reach)) return (msg, avgSize, reach) def _printAndSum(g, editCount, tagPrefix=None): msg = "\n" preTag = tagPrefix + "-pre" if tagPrefix else "pre" _m, avgPreSize, preReach = _print(g.g.clusters(), "pre community finding", preTag) msg += _m postTag = tagPrefix + "-post" if tagPrefix else "post" _m, avgPostSize, postReach = _print(g.clusters, "post community finding", postTag) msg += _m if avgPreSize: deltaSize = 1 - (avgPostSize / avgPreSize) sizeEfficiency = deltaSize / editCount if editCount else 1 msg += "\nEvol. of avg. cluster size: {:.2%}\n".format( deltaSize) msg += ("Efficiency of edits wrt. average size: %f\n" % sizeEfficiency) else: msg += "\nEvol. of avg. cluster size: N/A\n" if preReach: deltaReach = 1 - (postReach / preReach) reachEfficiency = deltaReach / editCount if editCount else 1 msg += "\nEvol. of reachability: {:.2%}\n".format(deltaReach) msg += ("Efficiency of edits wrt. adj. reachability: %f\n" % reachEfficiency) else: msg += "\nEvol. of adj. reachability: N/A\n" return msg if not quiet: tprnt("\t\tPrinting statistics on whole graph...") msg += _printAndSum(self, self.editCount) if not quiet: tprnt("\t\tBuilding flat file graph...") fg = FlatGraph(parent=self, quiet=quiet) if not plottingDisabled(): if not quiet: tprnt("\t\tPlotting flat file graph...") fg.plot(output=output) if not quiet: tprnt("\t\tPrinting statistics on flat file graph...") msg += _printAndSum(fg, self.editCount, tagPrefix="flat") if not quiet: tprnt(msg) if output: path = self.outputDir + "/" + output + ".graphstats.txt" os.makedirs(File.getParentNameFromName(path), exist_ok=True) with open(path, "a") as f: print(msg, file=f)
def calculateCosts(self, output: str = None, quiet: bool = False, policy: Policy = None): """Model the usability costs needed to reach found communities.""" if not self.clusters: raise ValueError("Clusters for a graph must be computed " "before calculating its cost.") msg = "" appStore = ApplicationStore.get() crossing = self.clusters.crossing() grantingCost = 0 isolationCost = 0 splittingCost = 0 for (index, x) in enumerate(crossing): if not x: continue edge = self.g.es[index] source = self.g.vs[edge.source] target = self.g.vs[edge.target] sourceType = source.attributes()['type'] targetType = target.attributes()['type'] sourceName = source.attributes()['name'] targetName = target.attributes()['name'] # Case where a file-file node was removed. Should normally not # happen so we will not write support for it yet. if sourceType == "file": if targetType == "app": grantingCost += 1 if policy: app = appStore.lookupUid(targetName) policy.incrementScore('graphGrantingCost', None, app) else: # Check if an app co-accessed the files. If so, increase the # cost of splitting that app instance into two. sAccessors = [] for n in source.neighbors(): if n.attributes()['type'] == 'app': sAccessors.append(n) tAccessors = [] for n in target.neighbors(): if n.attributes()['type'] == 'app': tAccessors.append(n) inter = intersection(sAccessors, tAccessors) for i in inter: splittingCost += 1 if policy: app = appStore.lookupUid(sourceName) policy.incrementScore('graphSplittingCost', None, app) if not inter: print( "Warning: file-file node removed by graph " "community finding algorithm. Not supported.", file=sys.stderr) print(source, target) raise NotImplementedError elif targetType == "file": # sourceType in "app", "appstate" grantingCost += 1 if sourceType == "app" and policy: app = appStore.lookupUid(sourceName) policy.incrementScore('graphGrantingCost', None, app) elif policy: policy.incrementScore('graphGranting', None, None) else: # app-app links are just noise in the UnifiedGraph if sourceType != "app" and targetType == "app": isolationCost += 1 if policy: app = appStore.lookupUid(targetName) policy.incrementScore('graphIsolationCost', None, app) elif sourceType == "app" and targetType != "app": isolationCost += 1 if policy: app = appStore.lookupUid(sourceName) policy.incrementScore('graphIsolationCost', None, app) editCount = grantingCost + isolationCost + splittingCost msg += ("%d edits performed: %d apps isolated, %d apps split and " "%d accesses revoked.\n" % (editCount, isolationCost, splittingCost, grantingCost)) if not quiet: tprnt(msg) if output: path = self.outputDir + "/" + output + ".graphstats.txt" os.makedirs(File.getParentNameFromName(path), exist_ok=True) with open(path, "w") as f: print(msg, file=f) self.editCount = editCount
def processFrequentItemLists(self, inputDirs: list): """Process frequent item lists found in a list of input folders.""" from orangecontrib.associate.fpgrowth import frequent_itemsets from os.path import isfile, exists from os import replace, makedirs inputPaths = [ d + '/typesPerInstance.list' for d in inputDirs.split(",") ] # Check for missing files. for p in inputPaths: if not isfile(p): raise ValueError("File '%s' could not be found, please verify " "you have invoked the analysis software with " "the --related-files flag for this user." % p) # Read every file and aggregate transactions. tprnt("Aggregating transactions from input files...") transactions = [] for p in inputPaths: participantFolder = p.split("/")[-2] tprnt("%s: %s" % (participantFolder, p)) with open(p, 'r') as f: for line in f: transaction = line.rstrip("\n").split("\t") transaction[0] = participantFolder + "/" + transaction[0] transactions.append(transaction) tprnt("Done.") # Compute itemsets from transactions. tprnt("\nComputing frequent itemsets.") itemsets = frequent_itemsets(transactions, frequency()) tprnt("Done.") # Functions to sort itemsets. def _isPath(elem): return elem[0] in ['/', '~', '@'] def _hasPath(item): typeCnt = 0 for t in item[0]: if _isPath(t): return True return False def _uniqueType(item): typeCnt = 0 for t in item[0]: if not _isPath(t): typeCnt += 1 # Save time. if typeCnt > 1: return False return typeCnt == 1 def _uniqueTypeWithAccessVariations(item): uniqueType = None for t in item[0]: if not _isPath(t): if t.endswith(":r") or t.endswith(":w"): t = t[:-2] if not uniqueType: uniqueType = t elif uniqueType != t: return False return uniqueType != None def _multipleTypes(item): uniqueType = None for t in item[0]: if not _isPath(t): if t.endswith(":r") or t.endswith(":w"): t = t[:-2] if not uniqueType: uniqueType = t elif uniqueType != t: return True return False # Sort itemsets tprnt("\nSorting frequent itemsets to isolate mime type co-access " "patterns.") uniques = [] patterns = dict() for item in itemsets: if _hasPath(item): pass elif _uniqueType(item): uniques.append(item) elif _uniqueTypeWithAccessVariations(item): pass elif _multipleTypes(item): patterns[item[0]] = item[1] tprnt("Done.") # Make output directory. if exists(self.outputDir): backup = self.outputDir.rstrip("/") + ".backup" if exists(backup): shutil.rmtree(backup) replace(self.outputDir, backup) makedirs(self.outputDir, exist_ok=False) # displayPatterns = dict() # for p in patterns: # disp = set() # for elem in p: # if elem.endswith(":r") or elem.endswith(":w"): # disp.add(elem) # elif elem+":w" not in p and elem+":r" not in p: # disp.add(elem) # displayPatterns[p] = disp # Print to files. with open(self.outputDir + '/' + 'patterns.out', 'w') as f: tprnt("\nMost commonly found types:") print("Most commonly found types:", file=f) for item in sorted(uniques, key=lambda x: x[1], reverse=True): print("\t", item) print("mcft\t", item, file=f) tprnt("\nMost commonly found patterns:") print("\nMost commonly found patterns:", file=f) for item in sorted(patterns.items(), key=lambda x: x[1], reverse=True): print("\t", item) print("mcfp\t", item, file=f) print("", file=f) del itemsets # Match items in patterns to transactions, and print out app and file # names. tprnt("\nMatching frequent patterns to transactions...") transactionsPerPattern = dict() for t in transactions: for p in patterns.keys(): if p.issubset(t): matches = transactionsPerPattern.get(p) or [] matches.append(t) transactionsPerPattern[p] = matches tprnt("Done.") def _printPattern(p, matches, counter, exclusiveCounter): msg = "" listing = "" summary = "" # Base pattern identity. msg += ("\n\nPATTERN: %d\t%s" % (patterns[p], p.__str__())) # Transaction listing. for matchedTransaction in matches: listing += ("App: %s\n" % matchedTransaction[0]) for transactionElem in sorted(matchedTransaction[1:]): listing += ("\t* %s\n" % transactionElem) listing += ("\n") # Counters of file extension co-occurrences. for (k, v) in sorted(counter.items()): summary += ("\t{%s} occurs %d times, in %d patterns\n" % (','.join(k), v, counterI[k])) summary += "\n" for (k, v) in sorted(exclusiveCounter.items()): summary += ("\t{%s} is exclusive %d times, in %d patterns\n" % (','.join(k), v, exclusiveCounterI[k])) # Print to files. with open(self.outputDir + '/' + 'patterns.out', 'a') as f: print(msg, file=f) print(summary, file=f) with open(self.outputDir + '/' + 'patternsListing.out', 'a') as f: print(msg, file=f) print(listing, file=f) # Pre-analyse the relationships between file endings in patterns. tprnt("\nPre-analysing the relationships between files in patterns...") for (p, matches) in sorted(transactionsPerPattern.items()): # Counter used to count combos of files with the same name and # different extensions. counter = dict() exclusiveCounter = dict() counterI = dict() exclusiveCounterI = dict() # Go through file accesses that match the pattern. for matchedTransaction in matches: # We collect sets of names for each encountered file extension. nameDict = dict() extensions = set() for transactionElem in sorted(matchedTransaction[1:]): if not (transactionElem.startswith("/") or transactionElem.startswith("~")): continue # Get the base name and file extension. ftype = mimetypes.guess_type(transactionElem)[0] fname = File.getFileNameFromPath(transactionElem) fnoext = File.getNameWithoutExtensionFromPath(fname) fext = File.getExtensionFromPath(fname, filterInvalid=True) # Remember which exts were found for a name and overall. if fext: extensions.add(fext) extSet = nameDict.get(fnoext) or set() extSet.add(fext) nameDict[fnoext] = extSet # Now check which extension combos exist, and how many times # they occur. extPairOccs = dict() for (fname, extSet) in nameDict.items(): fs = frozenset(extSet) extPairOccs[fs] = (extPairOccs.get(fs) or 0) + 1 # Compile list of all valid extension combos, and browse them # in reverse order of length as we first want to validate the # largest combinations. combos = list(extPairOccs.keys()) combos.sort(key=len, reverse=True) # Count patterns which exclusively have one extension tied to # another (i.e. extension never appears on its own). exclusives = dict() nonExclusiveKeys = set() for k in combos: # All the subsets of the current combo of filetypes are not # exclusive since they're included in this set. subcombos = list() for i in range(1, len(k)): subcombos.extend([ frozenset(x) for x in itertools.combinations(k, i) ]) nonExclusiveKeys.update(subcombos) # Also check if any of these subsets is itself in the list, # if so the current set is not exclusive. for sub in subcombos: if sub in extPairOccs: break else: # Remember: subsets of a previous set aren't exclusive. if k not in nonExclusiveKeys: exclusives[k] = extPairOccs[k] # Now add the match's groups of filenames to counters for the # whole pattern. Count both number of cases where the pattern # is found / exclusively found, and the number of times it is # found. for (k, v) in extPairOccs.items(): counter[k] = (counter.get(k) or 0) + v counterI[k] = (counterI.get(k) or 0) + 1 for (k, v) in exclusives.items(): exclusiveCounter[k] = (exclusiveCounter.get(k) or 0) + v exclusiveCounterI[k] = (exclusiveCounterI.get(k) or 0) + 1 # Finally, print information on the pattern. _printPattern(p, matches, counter, exclusiveCounter)
def _runAttackRound(self, attack: Attack, policy: Policy, acListInst: dict, lookUps: dict, allowedCache: dict): """Run an attack round with a set source and time.""" fileStore = FileStore.get() appStore = ApplicationStore.get() userConf = UserConfigLoader.get() userHome = userConf.getHomeDir() seen = set() # Already seen targets. spreadTimes = dict() # Times from which the attack can spread. toSpread = deque() toSpread.append(attack.source) spreadTimes[attack.source] = attack.time # Statistics counters. appSet = set() userAppSet = set() fileCount = 0 docCount = 0 if debugEnabled(): tprnt("Launching attack on %s at time %s %s app memory." % (attack.source if isinstance(attack.source, File) else attack.source.uid(), time2Str(attack.time), "with" if attack.appMemory else "without")) def _allowed(policy, f, acc): k = (policy, f, acc) if k not in allowedCache: v = (policy.fileOwnedByApp(f, acc) or policy.allowedByPolicy(f, acc.actor) or policy.accessAllowedByPolicy(f, acc)) allowedCache[k] = v return v else: return allowedCache[k] # As long as there are reachable targets, loop. while toSpread: current = toSpread.popleft() currentTime = spreadTimes[current] # When the attack spreads to a File. if isinstance(current, File): fileCount += 1 if current.isUserDocument(userHome): docCount += 1 if debugEnabled(): tprnt("File added @%d: %s" % (currentTime, current)) # Add followers. for f in current.follow: if f.time > currentTime: follower = fileStore.getFile(f.inode) if follower not in seen: toSpread.append(follower) seen.add(follower) spreadTimes[follower] = f.time # Add future accesses. for acc in current.accesses: if acc.time > currentTime and \ acc.actor.desktopid not in appSet and \ _allowed(policy, current, acc): toSpread.append(acc.actor) spreadTimes[acc.actor] = acc.time # When the attack spreads to an app instance. elif isinstance(current, Application): if debugEnabled(): tprnt("App added @%d: %s" % (currentTime, current.uid())) # Add files accessed by the app. for (accFile, acc) in acListInst.get(current.uid()) or []: if acc.time > currentTime and \ accFile not in seen and \ _allowed(policy, accFile, acc): toSpread.append(accFile) seen.add(accFile) spreadTimes[accFile] = acc.time # Add future versions of the app. if attack.appMemory and current.desktopid not in appSet: for app in appStore.lookupDesktopId(current.desktopid): if app.tstart > currentTime: toSpread.append(app) spreadTimes[app] = app.tstart # We do this last to use appSet as a cache for already seen # apps, so we append all future instances once and for all to # the spread list. appSet.add(current.desktopid) if current.isUserlandApp(): userAppSet.add(current.desktopid) else: print("Error: attack simulator attempting to parse an unknown" " object (%s)" % type(current), file=sys.stderr) return (appSet, userAppSet, fileCount, docCount)
def main(argv): __opt_inode_query = None __opt_post_analysis = None __opt_quick_pol = None # Parse command-line parameters try: (opts, args) = getopt.getopt(argv, "hta:cedf:o:q:sk:rpgGi:u:x", [ "help", "attacks", "post-analysis=", "check-missing", "check-excluded-files", "debug", "frequency", "inode", "extensions", "related-files", "output=", "output-fs=", "score", "quick-pol=", "skip=", "user", "clusters", "print-clusters", "graph", "graph-clusters", "disable-plotting" ]) except (getopt.GetoptError): print(USAGE_STRING) sys.exit(2) else: for opt, arg in opts: if opt in ('-h', '--help'): print(USAGE_STRING + "\n\n\n\n") print("--attacks:\n\tSimulates attacks and reports " "on proportions of infected files and apps.\n") print("--check-excluded-files:\n\tPrints the lists of files " "accessed by apps that also wrote to excluded\n\tfiles," " then aborts execution of the program.\n") print("--check-missing:\n\tChecks whether some Desktop IDs " "for apps in the user's directory are\n\tmissing. If so," " aborts execution of the program.\n") print("--clusters:\n\tPrints clusters of files with " "information flows to one another. Requires\n\tthe " "--score option.\n") print("--debug:\n\tPrints additional debug information in " "various code paths to help debug\n\tthe program.\n") print("--disable-plotting:\n\tDo not plot cluster graphs. See " "the --graph option.\n") print("--extensions:\n\tPrints file extensions and MIME type " "associations for this user.\n") print("--frequency:\n\tSets the frequency used by the " "frequent-itemsets algorithm in the\n\t--related-files " "post-analysis. Requires the --related-files option.\n") print("--graph:\n\tFind communities in file/app " "accesses using graph theory methods.\n") print("--help:\n\tPrints this help information and exits.\n") print("--output=<DIR>:\n\tSaves a copy of the simulated " "files, and some information on events\n\trelated to " "them, in a folder created at the <DIR> path.\n") print("--post-analysis=<DIR,DIR,DIR>:\n\t" "Uses the value pointed to" " by --output in order to produce graphs and\n\t" "statistics.\n") print("--quick-pol=Policy:\n\tReplace the default policies " "with this one single Policy.\n") print("--related-files:\n\tMines for files that are frequently" " accessed together by apps. Produces\n\toutput files in" " scoring mode, and an analysis output in post-analysis" "\n\tmode. See also --frequency.\n") print("--score:\n\tCalculates the usability and security " "scores of a number of file access\n\tcontrol policies" ", replayed over the simulated accesses. Prints results" "\n\tand saves them to the output directory.\n") print( "--skip=<Policy,Policy,'graphs'>:\n\tSkip the scoring of " "policies in the lists. If the list contains the word" "\n\t'graphs', skips the general graph computation.\n") sys.exit() elif opt in ('-c', '--check-missing'): __setCheckMissing(True) elif opt in ('-e', '--check-excluded-files'): __setCheckExcludedFiles(True) elif opt in ('-x', '--extensions'): __setPrintExtensions(True) elif opt in ('-d', '--debug'): __setDebug(True) elif opt in ('-r', '--related-files'): __setRelatedFiles(True) elif opt in ('-s', '--score'): __setScore(True) elif opt in ('-p', '--print-clusters', '--clusters'): __setPrintClusters(True) elif opt in ('-g', '--graph-clusters', '--graph'): __setGraph(True) elif opt in ('-t', '--attacks'): __setAttacks(True) elif opt in ('-G', '--disable-plotting'): __setPlottingDisabled(True) elif opt in ('-f', '--frequency'): if not arg: print(USAGE_STRING) sys.exit(2) __setFrequency(arg[1:] if arg[0] == '=' else arg) elif opt in ('-o', '--output-fs', '--output'): if not arg: print(USAGE_STRING) sys.exit(2) __setOutputFs(arg[1:] if arg[0] == '=' else arg) elif opt in ('-u', '--user'): if not arg: print(USAGE_STRING) sys.exit(2) __setUser(arg[1:] if arg[0] == '=' else arg) elif opt in ('-i', '--inode'): if not arg: print(USAGE_STRING) sys.exit(2) try: __opt_inode_query = (arg[1:] if arg[0] == '=' else arg) except (ValueError) as e: print(USAGE_STRING) sys.exit(2) elif opt in ('-a', '--post-analysis'): if not arg: print(USAGE_STRING) sys.exit(2) __opt_post_analysis = (arg[1:] if arg[0] == '=' else arg) elif opt in ('-q', '--quick-pol'): if not arg: print(USAGE_STRING) sys.exit(2) __opt_quick_pol = (arg[1:] if arg[0] == '=' else arg) elif opt in ('-k', '--skip'): if not arg: print(USAGE_STRING) sys.exit(2) __opt_skip = (arg[1:] if arg[0] == '=' else arg) __setSkip(__opt_skip.split(",")) registerTimePrint() if __opt_post_analysis: if relatedFilesEnabled(): tprnt("Starting post-analysis of related files...\n") engine = FrequentFileEngine() engine.processFrequentItemLists(__opt_post_analysis) else: tprnt("Starting post-analysis of usability/security scores...\n") from AnalysisEngine import AnalysisEngine if outputFsEnabled(): engine = AnalysisEngine(inputDir=__opt_post_analysis, outputDir=outputFsEnabled()) else: engine = AnalysisEngine(inputDir=__opt_post_analysis) engine.analyse() sys.exit(0) # Make the application, event and file stores store = ApplicationStore.get() evStore = EventStore.get() fileStore = FileStore.get() initMimeTypes() datapath = getDataPath() # Load up user-related variables userConf = UserConfigLoader.get(path=datapath + USERCONFIGNAME) # Load up and check the SQLite database sql = None tprnt("\nLoading the SQLite database: %s..." % (datapath + DATABASENAME)) try: sql = SqlLoader(datapath + DATABASENAME) except ValueError as e: print("Failed to parse SQL: %s" % e.args[0], file=sys.stderr) sys.exit(-1) if checkMissingEnabled(): tprnt("Checking for missing application identities...") sql.listMissingActors() sql.loadDb(store) sqlAppCount = sql.appCount sqlInstCount = sql.instCount sqlEvCount = sql.eventCount sqlValidEvCount = sql.validEventRatio tprnt("Loaded the SQLite database.") # Load up the PreloadLogger file parser tprnt("\nLoading the PreloadLogger logs in folder: %s..." % datapath) pll = PreloadLoggerLoader(datapath) if checkMissingEnabled(): tprnt("Checking for missing application identities...") pll.listMissingActors() pll.loadDb(store) pllAppCount = pll.appCount pllInstCount = pll.instCount pllEvCount = pll.eventCount pllValidEvCount = pll.validEventRatio tprnt("Loaded the PreloadLogger logs.") # Resolve actor ids in all apps' events tprnt("\nUsing PreloadLogger Applications to resolve interpreters in " "Zeitgeist Applications...") (interpretersAdded, instancesEliminated) = store.resolveInterpreters() tprnt("Resolved interpreter ids in %d Applications, and removed %d " "instances by merging them with another as a result." % (interpretersAdded, instancesEliminated)) # Update events' actor ids in the ApplicationStore, then take them and send # them to the EvnetStore. Finally, sort the EventStore by timestamp. tprnt("\nInserting and sorting all events...") store.sendEventsToStore() evStore.sort() evCount = evStore.getEventCount() tprnt("Sorted all %d events in the event store." % evCount) # Simulate the events to build a file model tprnt("\nSimulating all events to build a file model...") evStore.simulateAllEvents() del sql del pll evStore.sort() tprnt("Simulated all events. %d files initialised." % len(fileStore)) appCount = store.getAppCount() userAppCount = store.getUserAppCount() instCount = len(store) userInstCount = store.getUserInstCount() fileCount = len(fileStore) docCount = fileStore.getUserDocumentCount(userConf.getSetting("HomeDir")) if printExtensions(): exts = set() for f in fileStore: exts.add(f.getExtension()) try: exts.remove(None) except (KeyError): pass tprnt("Info: the following file extensions were found:") for e in sorted(exts): print("\t%s: %s" % (e, mimetypes.guess_type("f.%s" % e, strict=False))) if checkExcludedFilesEnabled(): tprnt("\nPrinting files written and read by instances which wrote" "to excluded directories...") dbgPrintExcludedEvents() import time as t t.sleep(10) # Manage --inode queries if __opt_inode_query: inodes = __opt_inode_query.split(",") for inode in sorted(int(i) for i in inodes): f = fileStore.getFile(inode) tprnt("\nInode queried: %d" % inode) tprnt("Corresponding file: %s\n\t(%s)" % (f.getName(), f)) sys.exit(0) # Print the model as proof of concept if debugEnabled(): tprnt("\nPrinting the file model...\n") fileStore.printFiles(showDeleted=True, showCreationTime=True, showDocumentsOnly=True, userHome=userConf.getSetting("HomeDir"), showDesignatedOnly=False) # Make the filesystem corresponding to the model if outputFsEnabled(): tprnt("\nMaking a copy of the file model at '%s'...\n" % outputFsEnabled()) fileStore.makeFiles(outputDir=outputFsEnabled(), showDeleted=True, showDocumentsOnly=False, userHome=userConf.getSetting("HomeDir"), showDesignatedOnly=False) with open(os.path.join(outputFsEnabled(), "statistics.txt"), "w") as f: msg = "SQL: %d apps; %d instances; %d events; %d%% valid\n" % \ (sqlAppCount, sqlInstCount, sqlEvCount, sqlValidEvCount) msg += "PreloadLogger: %d apps; %d instances; %d events; " \ "%d%% valid\n" % \ (pllAppCount, pllInstCount, pllEvCount, pllValidEvCount) msg += "Simulated: %d apps; %d instances; %d user apps; %d user" \ " instances; %d events; %d files; %d user documents\n" % \ (appCount, instCount, userAppCount, userInstCount, evCount, fileCount, docCount) exclLists = userConf.getDefinedSecurityExclusionLists() for l in exclLists: msg += "Exclusion list '%s' defined.\n" % l print(msg, file=f) # Build a general access graph. if graphEnabled(): skipList = skipEnabled() if skipList and 'graphs' in skipList: tprnt("\nGraphs in skip list, skipping global graph generation.") else: engine = GraphEngine.get() engine.runGraph(policy=None) # Policy engine. Create a policy and run a simulation to score it. if scoreEnabled() or attacksEnabled() or graphEnabled(): engine = PolicyEngine() if __opt_quick_pol: policies = [__opt_quick_pol] polArgs = [None] else: policies = [ CompoundLibraryPolicy, CustomLibraryPolicy, DesignationPolicy, DistantFolderPolicy, FilenamePolicy, FileTypePolicy, FolderPolicy, OneDistantFolderPolicy, OneFolderPolicy, OneLibraryPolicy, UnsecurePolicy, Win10Policy, Win8Policy, HSecurePolicy, HBalancedPolicy, 'HSecureSbPolicy', 'HSecureSbFaPolicy', 'HSecureFaPolicy', 'HBalancedSbPolicy', 'HBalancedSbFaPolicy', 'HBalancedFaPolicy', 'OneDistantFolderSbPolicy', 'OneDistantFolderSbFaPolicy', 'OneDistantFolderFaPolicy', 'HUsableSecuredSbPolicy', 'HUsableSecuredSbFaPolicy', 'HUsableSecuredFaPolicy', 'HBalancedSecuredSbPolicy', 'HBalancedSecuredSbFaPolicy', 'HBalancedSecuredFaPolicy', 'DistantFolderSbPolicy', 'DistantFolderSbFaPolicy', 'DistantFolderFaPolicy', 'LibraryFolderSbPolicy', 'LibraryFolderSbFaPolicy', 'LibraryFolderFaPolicy', 'FileTypeSbPolicy', 'FileTypeSbFaPolicy', 'FileTypeFaPolicy', 'OneFolderSbPolicy', 'OneFolderSbFaPolicy', 'OneFolderFaPolicy', 'FolderSbPolicy', 'FolderSbFaPolicy', 'FolderFaPolicy', 'OneLibrarySbPolicy', 'OneLibrarySbFaPolicy', 'OneLibraryFaPolicy', 'CompoundLibrarySbPolicy', 'CompoundLibrarySbFaPolicy', 'CompoundLibraryFaPolicy', 'CustomLibrarySbPolicy', 'CustomLibrarySbFaPolicy', 'CustomLibraryFaPolicy', ] polArgs = [ None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, ] # dict(folders=["~/Downloads", "/tmp"]) skipList = skipEnabled() for (polIdx, polName) in enumerate(policies): pol = None arg = polArgs[polIdx] # Names with certain suffixes are dynamically generated policies. if isinstance(polName, str): if polName.endswith('SbPolicy'): pols = [ getattr(sys.modules[__name__], polName[:-8] + 'Policy'), StickyBitPolicy ] args = [arg, dict(folders=["~", "/media", "/mnt"])] pol = CompositionalPolicy(pols, args, polName) elif polName.endswith('SbFaPolicy'): pols = [ getattr(sys.modules[__name__], polName[:-10] + 'Policy'), StickyBitPolicy, FutureAccessListPolicy ] args = [arg, dict(folders=["~", "/media", "/mnt"]), None] pol = CompositionalPolicy(pols, args, polName) elif polName.endswith('FaPolicy'): pols = [ getattr(sys.modules[__name__], polName[:-8] + 'Policy'), FutureAccessListPolicy ] args = [arg, None] pol = CompositionalPolicy(pols, args, polName) # A normal policy, just invoke it directly. else: polName = getattr(sys.modules[__name__], polName) # Existing policies, with arguments / or normal policies passed as # strings, including via the --quick flag. if not pol: pol = polName(**arg) if arg else polName() tprnt("\nRunning %s..." % pol.name) if skipList and pol.name in skipList: tprnt("%s is in skip list, skipping." % pol.name) continue engine.runPolicy(pol, outputDir=outputFsEnabled(), printClusters=printClustersEnabled()) if pol.name == "FileTypePolicy" and checkMissingEnabled(): pol.abortIfUnsupportedExtensions() if attacksEnabled(): tprnt("Simulating attacks on %s..." % pol.name) sim = AttackSimulator(seed=0) sim.runAttacks(pol, outputDir=outputFsEnabled() or "/tmp/") del pol # Calculate frequently co-accessed files: if relatedFilesEnabled(): engine = FrequentFileEngine() tprnt("\nMining for frequently co-accessed file types...") engine.mineFileTypes()