Exemple #1
0
    def runGraph(self,
                 policy: Policy = None,
                 outputDir: str = None,
                 quiet: bool = False):
        """Build a graph of IFs in the simulation given a policy."""
        outputDir = policy.getOutputDir(parent=outputFsEnabled()) if \
            policy else outputFsEnabled()

        if not quiet:
            tprnt("\nCompiling the Unified Graph...")
        if not quiet:
            tprnt("\tMaking graph...")
        g = UnifiedGraph(outputDir=outputDir)

        if not quiet:
            tprnt("\tPopulating graph...")
        g.populate(policy=policy, quiet=quiet)
        output = policy.name+"-graph-unified" if policy else \
            "graph-unified"

        if not plottingDisabled():
            if not quiet:
                tprnt("\tPlotting graph...")
            g.plot(output=output)

        if not quiet:
            tprnt("\tComputing community clusters...")
        # We are computing the global graph with all the information.
        if not policy:
            g.computeClusters()
            self.globMembership = g.clusters.membership
            self.globNames = g.g.vs['name']
        # We are computing a policy graph to compare to the global graph.
        else:
            if not self.globMembership:
                if not quiet:
                    tprnt("\t\tWarning: cannot re-use global communities as "
                          "they aren't computed yet, computing local ones "
                          "instead.")
                g.computeClusters()
            else:
                if not quiet:
                    tprnt("\t\tUsing global community memberships to refine "
                          "clusters.")
                applyCommunities(g, self.globMembership, self.globNames, True)

        if not plottingDisabled():
            if not quiet:
                tprnt("\tPlotting communities...")
            g.plotClusters(output=output)

        if not quiet:
            tprnt("\tCalculating costs to optimal communities...")
        g.calculateCosts(output=output, policy=policy, quiet=quiet)

        if not quiet:
            tprnt("\tCalculating potential reachability improvement...")
        g.calculateReachability(output=output,
                                quiet=quiet,
                                nodeCount=self.docCount)

        if not quiet:
            tprnt("Done.")
Exemple #2
0
    def __init__(self, parent: CommonGraph, quiet: bool = False):
        """Construct a FlatGraph."""
        super(FlatGraph, self).__init__()
        if not isinstance(parent, CommonGraph):
            raise TypeError("FlatGraph constructor needs a CommonGraph "
                            "parent, received a %s." %
                            parent.__class__.__name__)

        self.g = None
        self.clusters = None
        self.outputDir = parent.outputDir
        self.vertices = dict()
        self.edges = set()
        self.weights = dict()

        # Step 1. make a copy of the graph without file-file nodes, to
        # find paths between files that go through apps.
        if not quiet:
            tprnt("\t\t\tStep 1: copy graph, excluding file-file nodes...")
            tprnt("\t\t\t\tCopy graph...")
        copy = parent.g.copy()  # type: Graph
        types = parent.g.vs['type']
        names = parent.g.vs['name']
        toBeRemoved = []
        namesRemoved = []
        if not quiet:
            tprnt("\t\t\t\tFind edges to delete...")
        for edge in copy.es:
            if types[edge.source] == "file" and \
                    types[edge.target] == "file":
                toBeRemoved.append(edge)
                namesRemoved.append((names[edge.source], names[edge.target]))

        if not quiet:
            tprnt("\t\t\t\tDelete edges...")
        copy.delete_edges(toBeRemoved)

        # Step 2. run an all-pairs shortest path algorithm.
        # Step 2. pick out file-file paths with no intermediary files.
        # Step 2. save this info in the form of an edge list.
        if not quiet:
            tprnt("\t\t\tStep 2: run an all-pairs shortest path "
                  "algorithm, remove file-file paths with intermediary "
                  "files and gather final file-file edges...")
            tprnt("\t\t\t\tCopy file nodes...")
        fileNodes = list(
            (copy.vs[i] for i, t in enumerate(types) if t == "file"))

        edges = set()
        # weights = dict()
        self.idgen = UniqueIdGenerator()

        fileNodeCount = len(fileNodes)
        if not quiet:
            tprnt("\t\t\t\tGet shortest paths for each of %d file nodes..." %
                  fileNodeCount)
        threshold = fileNodeCount / 100
        nodeI = 0
        lastNodePct = 0
        nodePct = 0
        for v in fileNodes:
            nodeI += 1
            if nodeI >= (threshold * nodePct):
                nodePct = int(nodeI / threshold)
                if nodePct >= lastNodePct + 5:
                    print("\t\t\t\t\t... (%d%% done)" % nodePct)
                    lastNodePct = nodePct

            # Get shortest paths.
            vPaths = copy.get_shortest_paths(v, to=fileNodes)

            # Remove unnecessary bits.
            delSet = set()
            for (idx, p) in enumerate(vPaths):
                if len(p) < 1:
                    continue

                # Ignore paths with intermediary files.
                for node in p[1:-1]:
                    if types[node] == "file":
                        delSet.add(idx)

            # Remove unsuitable paths.
            for i in sorted(list(delSet), reverse=True):
                del vPaths[i]
            del delSet

            # Save the shortest paths remaining as edges.
            for p in vPaths:
                if len(p) <= 1:
                    continue
                key = (self.idgen[names[p[0]]], self.idgen[names[p[-1]]])
                edges.add(key)
                # weights[key] = 1 / (len(p) - 1)

        # Add edges for removed names
        if not quiet:
            tprnt("\t\t\t\tRe-add file-file direct nodes into graph...")
        for (src, dest) in namesRemoved:
            edges.add((self.idgen[src], self.idgen[dest]))

        # Step 3. construct a graph with only file nodes.
        if not quiet:
            tprnt("\t\t\tStep 3: construct a graph with only file nodes...")
        edges = list(edges)
        self.g = Graph(edges)
        del edges
        # self.g.es["weight"] = list((weights[e] for e in edges))
        self.g.vs["name"] = self.idgen.values()

        # Steph 4. apply community information to the nodes.
        if not quiet:
            tprnt("\t\t\tStep 4: apply communities to flat graph...")
        applyCommunities(self, parent.clusters.membership, names)
Exemple #3
0
    def populate(self, policy: Policy = None, quiet: bool = False):
        """Populate the AccessGraph, filtering it based on a Policy."""
        appStore = ApplicationStore.get()
        fileStore = FileStore.get()
        fileFactory = FileFactory.get()
        userConf = UserConfigLoader.get()

        # Add all user apps.
        if not quiet:
            tprnt("\t\tAdding apps...")
        for app in appStore:
            if app.isUserlandApp():
                self._addAppNode(app)

        def _allowed(policy, f, acc):
            return acc.actor.isUserlandApp() and \
                (acc.isByDesignation() or not policy or
                 policy.allowedByPolicy(f, acc.actor))

        # Add all user documents.
        if not quiet:
            tprnt("\t\tAdding user documents...")
        self.docCount = 0
        for f in fileStore:
            if not f.isUserDocument(userHome=userConf.getHomeDir(),
                                    allowHiddenFiles=True):
                continue
            if f.isFolder():
                continue

            # Provided they have userland apps accessing them.
            hasUserlandAccesses = False
            for acc in f.getAccesses():
                if _allowed(policy, f, acc):
                    hasUserlandAccesses = True
                    break

            # And then add such userland apps to user document accesses.
            if hasUserlandAccesses:
                self.docCount += 1
                self._addFileNode(f)
                for acc in f.getAccesses():
                    if _allowed(policy, f, acc):
                        self._addAccess(f, acc)

        if not quiet:
            tprnt("\t\tAdding file links...")
        links = fileFactory.getFileLinks()
        for (pred, follow) in links.items():
            source = str(pred.inode)
            dest = str(follow)
            if source in self.vertices and dest in self.vertices:
                tprnt("Info: adding link from File %s to File %s in graph "
                      "as there is a file move/copy event between those." %
                      (source, dest))
                edge = (source, dest) if source <= dest else (dest, source)
                self.edges.add(edge)
                self.weights[edge] = 999999999

        if not quiet:
            tprnt("\t\tConstructing graph...")
        self._construct()
Exemple #4
0
    def calculateReachability(self,
                              output: str = None,
                              quiet: bool = False,
                              nodeCount: int = 0):
        """Model the reachability improvement of community finding."""
        if self.clusters is None:
            raise ValueError("Clusters for a graph must be computed "
                             "before modelling how community isolation "
                             "decreases its average reachability.")
        if self.editCount is None:
            raise ValueError("Costs for a graph must be calculated "
                             "before modelling how community isolation "
                             "decreases its average reachability.")

        msg = ""

        def _print(clusters, header, tag):
            msg = "\nGraph statistics %s:\n" % header

            if len(clusters) == 0:
                msg += "no clusters for this graph."
                return (msg, 0, 1)

            sizes = [
                x for x in sorted(list((len(x) for x in clusters))) if x != 0
            ]
            vertexSum = sum(sizes)
            isolatedNC = nodeCount - self.docCount
            msg += ("* %s-size distribution: %s\n" % (tag, sizes.__str__()))
            msg += ("* %s-cluster count: %d\n" % (tag, len(sizes)))
            msg += ("* %s-isolated nodes: %d\n" % (tag, isolatedNC))
            msg += ("* %s-smallest cluster: %d\n" % (tag, min(sizes)))
            msg += ("* %s-largest cluster: %d\n" % (tag, max(sizes)))
            avgSize = vertexSum / len(sizes)
            msg += ("* %s-average size: %f\n" % (tag, avgSize))

            reach = sum([i**2 for i in sizes]) / vertexSum
            msg += ("* %s-average reachability: %f\n" % (tag, reach))

            reach = (sum([i ** 2 for i in sizes]) + isolatedNC) / \
                    (vertexSum + isolatedNC)
            msg += ("* %s-adjusted reachability: %f\n" % (tag, reach))

            return (msg, avgSize, reach)

        def _printAndSum(g, editCount, tagPrefix=None):
            msg = "\n"

            preTag = tagPrefix + "-pre" if tagPrefix else "pre"
            _m, avgPreSize, preReach = _print(g.g.clusters(),
                                              "pre community finding", preTag)
            msg += _m

            postTag = tagPrefix + "-post" if tagPrefix else "post"
            _m, avgPostSize, postReach = _print(g.clusters,
                                                "post community finding",
                                                postTag)
            msg += _m

            if avgPreSize:
                deltaSize = 1 - (avgPostSize / avgPreSize)
                sizeEfficiency = deltaSize / editCount if editCount else 1
                msg += "\nEvol. of avg. cluster size: {:.2%}\n".format(
                    deltaSize)
                msg += ("Efficiency of edits wrt. average size: %f\n" %
                        sizeEfficiency)
            else:
                msg += "\nEvol. of avg. cluster size: N/A\n"

            if preReach:
                deltaReach = 1 - (postReach / preReach)
                reachEfficiency = deltaReach / editCount if editCount else 1
                msg += "\nEvol. of reachability: {:.2%}\n".format(deltaReach)
                msg += ("Efficiency of edits wrt. adj. reachability: %f\n" %
                        reachEfficiency)
            else:
                msg += "\nEvol. of adj. reachability: N/A\n"

            return msg

        if not quiet:
            tprnt("\t\tPrinting statistics on whole graph...")
        msg += _printAndSum(self, self.editCount)

        if not quiet:
            tprnt("\t\tBuilding flat file graph...")
        fg = FlatGraph(parent=self, quiet=quiet)
        if not plottingDisabled():
            if not quiet:
                tprnt("\t\tPlotting flat file graph...")
            fg.plot(output=output)
        if not quiet:
            tprnt("\t\tPrinting statistics on flat file graph...")
        msg += _printAndSum(fg, self.editCount, tagPrefix="flat")

        if not quiet:
            tprnt(msg)

        if output:
            path = self.outputDir + "/" + output + ".graphstats.txt"
            os.makedirs(File.getParentNameFromName(path), exist_ok=True)
            with open(path, "a") as f:
                print(msg, file=f)
Exemple #5
0
    def calculateCosts(self,
                       output: str = None,
                       quiet: bool = False,
                       policy: Policy = None):
        """Model the usability costs needed to reach found communities."""
        if not self.clusters:
            raise ValueError("Clusters for a graph must be computed "
                             "before calculating its cost.")

        msg = ""
        appStore = ApplicationStore.get()

        crossing = self.clusters.crossing()
        grantingCost = 0
        isolationCost = 0
        splittingCost = 0
        for (index, x) in enumerate(crossing):
            if not x:
                continue

            edge = self.g.es[index]
            source = self.g.vs[edge.source]
            target = self.g.vs[edge.target]
            sourceType = source.attributes()['type']
            targetType = target.attributes()['type']
            sourceName = source.attributes()['name']
            targetName = target.attributes()['name']

            # Case where a file-file node was removed. Should normally not
            # happen so we will not write support for it yet.
            if sourceType == "file":
                if targetType == "app":
                    grantingCost += 1
                    if policy:
                        app = appStore.lookupUid(targetName)
                        policy.incrementScore('graphGrantingCost', None, app)
                else:
                    # Check if an app co-accessed the files. If so, increase the
                    # cost of splitting that app instance into two.
                    sAccessors = []
                    for n in source.neighbors():
                        if n.attributes()['type'] == 'app':
                            sAccessors.append(n)
                    tAccessors = []
                    for n in target.neighbors():
                        if n.attributes()['type'] == 'app':
                            tAccessors.append(n)

                    inter = intersection(sAccessors, tAccessors)

                    for i in inter:
                        splittingCost += 1
                        if policy:
                            app = appStore.lookupUid(sourceName)
                            policy.incrementScore('graphSplittingCost', None,
                                                  app)
                    if not inter:
                        print(
                            "Warning: file-file node removed by graph "
                            "community finding algorithm. Not supported.",
                            file=sys.stderr)
                        print(source, target)
                        raise NotImplementedError
            elif targetType == "file":  # sourceType in "app", "appstate"
                grantingCost += 1
                if sourceType == "app" and policy:
                    app = appStore.lookupUid(sourceName)
                    policy.incrementScore('graphGrantingCost', None, app)
                elif policy:
                    policy.incrementScore('graphGranting', None, None)
            else:
                # app-app links are just noise in the UnifiedGraph
                if sourceType != "app" and targetType == "app":
                    isolationCost += 1
                    if policy:
                        app = appStore.lookupUid(targetName)
                        policy.incrementScore('graphIsolationCost', None, app)
                elif sourceType == "app" and targetType != "app":
                    isolationCost += 1
                    if policy:
                        app = appStore.lookupUid(sourceName)
                        policy.incrementScore('graphIsolationCost', None, app)

        editCount = grantingCost + isolationCost + splittingCost
        msg += ("%d edits performed: %d apps isolated, %d apps split and "
                "%d accesses revoked.\n" %
                (editCount, isolationCost, splittingCost, grantingCost))

        if not quiet:
            tprnt(msg)

        if output:
            path = self.outputDir + "/" + output + ".graphstats.txt"
            os.makedirs(File.getParentNameFromName(path), exist_ok=True)
            with open(path, "w") as f:
                print(msg, file=f)

        self.editCount = editCount
    def processFrequentItemLists(self, inputDirs: list):
        """Process frequent item lists found in a list of input folders."""
        from orangecontrib.associate.fpgrowth import frequent_itemsets
        from os.path import isfile, exists
        from os import replace, makedirs

        inputPaths = [
            d + '/typesPerInstance.list' for d in inputDirs.split(",")
        ]

        # Check for missing files.
        for p in inputPaths:
            if not isfile(p):
                raise ValueError("File '%s' could not be found, please verify "
                                 "you have invoked the analysis software with "
                                 "the --related-files flag for this user." % p)

        # Read every file and aggregate transactions.
        tprnt("Aggregating transactions from input files...")
        transactions = []
        for p in inputPaths:
            participantFolder = p.split("/")[-2]
            tprnt("%s: %s" % (participantFolder, p))
            with open(p, 'r') as f:
                for line in f:
                    transaction = line.rstrip("\n").split("\t")
                    transaction[0] = participantFolder + "/" + transaction[0]
                    transactions.append(transaction)
        tprnt("Done.")

        # Compute itemsets from transactions.
        tprnt("\nComputing frequent itemsets.")
        itemsets = frequent_itemsets(transactions, frequency())
        tprnt("Done.")

        # Functions to sort itemsets.
        def _isPath(elem):
            return elem[0] in ['/', '~', '@']

        def _hasPath(item):
            typeCnt = 0

            for t in item[0]:
                if _isPath(t):
                    return True

            return False

        def _uniqueType(item):
            typeCnt = 0

            for t in item[0]:
                if not _isPath(t):
                    typeCnt += 1

                    # Save time.
                    if typeCnt > 1:
                        return False

            return typeCnt == 1

        def _uniqueTypeWithAccessVariations(item):
            uniqueType = None

            for t in item[0]:
                if not _isPath(t):
                    if t.endswith(":r") or t.endswith(":w"):
                        t = t[:-2]

                    if not uniqueType:
                        uniqueType = t
                    elif uniqueType != t:
                        return False

            return uniqueType != None

        def _multipleTypes(item):
            uniqueType = None

            for t in item[0]:
                if not _isPath(t):
                    if t.endswith(":r") or t.endswith(":w"):
                        t = t[:-2]

                    if not uniqueType:
                        uniqueType = t
                    elif uniqueType != t:
                        return True

            return False

        # Sort itemsets
        tprnt("\nSorting frequent itemsets to isolate mime type co-access "
              "patterns.")
        uniques = []
        patterns = dict()
        for item in itemsets:
            if _hasPath(item):
                pass
            elif _uniqueType(item):
                uniques.append(item)
            elif _uniqueTypeWithAccessVariations(item):
                pass
            elif _multipleTypes(item):
                patterns[item[0]] = item[1]
        tprnt("Done.")

        # Make output directory.
        if exists(self.outputDir):
            backup = self.outputDir.rstrip("/") + ".backup"
            if exists(backup):
                shutil.rmtree(backup)
            replace(self.outputDir, backup)
        makedirs(self.outputDir, exist_ok=False)

        # displayPatterns = dict()
        # for p in patterns:
        #     disp = set()
        #     for elem in p:
        #         if elem.endswith(":r") or elem.endswith(":w"):
        #             disp.add(elem)
        #         elif elem+":w" not in p and elem+":r" not in p:
        #             disp.add(elem)
        #     displayPatterns[p] = disp

        # Print to files.
        with open(self.outputDir + '/' + 'patterns.out', 'w') as f:
            tprnt("\nMost commonly found types:")
            print("Most commonly found types:", file=f)
            for item in sorted(uniques, key=lambda x: x[1], reverse=True):
                print("\t", item)
                print("mcft\t", item, file=f)

            tprnt("\nMost commonly found patterns:")
            print("\nMost commonly found patterns:", file=f)
            for item in sorted(patterns.items(),
                               key=lambda x: x[1],
                               reverse=True):
                print("\t", item)
                print("mcfp\t", item, file=f)
            print("", file=f)

        del itemsets

        # Match items in patterns to transactions, and print out app and file
        # names.
        tprnt("\nMatching frequent patterns to transactions...")
        transactionsPerPattern = dict()
        for t in transactions:
            for p in patterns.keys():
                if p.issubset(t):
                    matches = transactionsPerPattern.get(p) or []
                    matches.append(t)
                    transactionsPerPattern[p] = matches
        tprnt("Done.")

        def _printPattern(p, matches, counter, exclusiveCounter):
            msg = ""
            listing = ""
            summary = ""

            # Base pattern identity.
            msg += ("\n\nPATTERN: %d\t%s" % (patterns[p], p.__str__()))

            # Transaction listing.
            for matchedTransaction in matches:
                listing += ("App: %s\n" % matchedTransaction[0])
                for transactionElem in sorted(matchedTransaction[1:]):
                    listing += ("\t* %s\n" % transactionElem)
                listing += ("\n")

            # Counters of file extension co-occurrences.
            for (k, v) in sorted(counter.items()):
                summary += ("\t{%s} occurs %d times, in %d patterns\n" %
                            (','.join(k), v, counterI[k]))
            summary += "\n"
            for (k, v) in sorted(exclusiveCounter.items()):
                summary += ("\t{%s} is exclusive %d times, in %d patterns\n" %
                            (','.join(k), v, exclusiveCounterI[k]))

            # Print to files.
            with open(self.outputDir + '/' + 'patterns.out', 'a') as f:
                print(msg, file=f)
                print(summary, file=f)

            with open(self.outputDir + '/' + 'patternsListing.out', 'a') as f:
                print(msg, file=f)
                print(listing, file=f)

        # Pre-analyse the relationships between file endings in patterns.
        tprnt("\nPre-analysing the relationships between files in patterns...")
        for (p, matches) in sorted(transactionsPerPattern.items()):
            # Counter used to count combos of files with the same name and
            # different extensions.
            counter = dict()
            exclusiveCounter = dict()
            counterI = dict()
            exclusiveCounterI = dict()

            # Go through file accesses that match the pattern.
            for matchedTransaction in matches:
                # We collect sets of names for each encountered file extension.
                nameDict = dict()
                extensions = set()
                for transactionElem in sorted(matchedTransaction[1:]):
                    if not (transactionElem.startswith("/")
                            or transactionElem.startswith("~")):
                        continue

                    # Get the base name and file extension.
                    ftype = mimetypes.guess_type(transactionElem)[0]
                    fname = File.getFileNameFromPath(transactionElem)
                    fnoext = File.getNameWithoutExtensionFromPath(fname)
                    fext = File.getExtensionFromPath(fname, filterInvalid=True)

                    # Remember which exts were found for a name and overall.
                    if fext:
                        extensions.add(fext)
                        extSet = nameDict.get(fnoext) or set()
                        extSet.add(fext)
                        nameDict[fnoext] = extSet

                # Now check which extension combos exist, and how many times
                # they occur.
                extPairOccs = dict()
                for (fname, extSet) in nameDict.items():
                    fs = frozenset(extSet)
                    extPairOccs[fs] = (extPairOccs.get(fs) or 0) + 1

                # Compile list of all valid extension combos, and browse them
                # in reverse order of length as we first want to validate the
                # largest combinations.
                combos = list(extPairOccs.keys())
                combos.sort(key=len, reverse=True)

                # Count patterns which exclusively have one extension tied to
                # another (i.e. extension never appears on its own).
                exclusives = dict()
                nonExclusiveKeys = set()
                for k in combos:
                    # All the subsets of the current combo of filetypes are not
                    # exclusive since they're included in this set.
                    subcombos = list()
                    for i in range(1, len(k)):
                        subcombos.extend([
                            frozenset(x) for x in itertools.combinations(k, i)
                        ])
                    nonExclusiveKeys.update(subcombos)

                    # Also check if any of these subsets is itself in the list,
                    # if so the current set is not exclusive.
                    for sub in subcombos:
                        if sub in extPairOccs:
                            break
                    else:
                        # Remember: subsets of a previous set aren't exclusive.
                        if k not in nonExclusiveKeys:
                            exclusives[k] = extPairOccs[k]

                # Now add the match's groups of filenames to counters for the
                # whole pattern. Count both number of cases where the pattern
                # is found / exclusively found, and the number of times it is
                # found.
                for (k, v) in extPairOccs.items():
                    counter[k] = (counter.get(k) or 0) + v
                    counterI[k] = (counterI.get(k) or 0) + 1
                for (k, v) in exclusives.items():
                    exclusiveCounter[k] = (exclusiveCounter.get(k) or 0) + v
                    exclusiveCounterI[k] = (exclusiveCounterI.get(k) or 0) + 1

            # Finally, print information on the pattern.
            _printPattern(p, matches, counter, exclusiveCounter)
    def _runAttackRound(self, attack: Attack, policy: Policy, acListInst: dict,
                        lookUps: dict, allowedCache: dict):
        """Run an attack round with a set source and time."""
        fileStore = FileStore.get()
        appStore = ApplicationStore.get()
        userConf = UserConfigLoader.get()
        userHome = userConf.getHomeDir()

        seen = set()  # Already seen targets.
        spreadTimes = dict()  # Times from which the attack can spread.

        toSpread = deque()
        toSpread.append(attack.source)
        spreadTimes[attack.source] = attack.time

        # Statistics counters.
        appSet = set()
        userAppSet = set()
        fileCount = 0
        docCount = 0

        if debugEnabled():
            tprnt("Launching attack on %s at time %s %s app memory." %
                  (attack.source if isinstance(attack.source, File) else
                   attack.source.uid(), time2Str(attack.time),
                   "with" if attack.appMemory else "without"))

        def _allowed(policy, f, acc):
            k = (policy, f, acc)
            if k not in allowedCache:
                v = (policy.fileOwnedByApp(f, acc)
                     or policy.allowedByPolicy(f, acc.actor)
                     or policy.accessAllowedByPolicy(f, acc))
                allowedCache[k] = v
                return v
            else:
                return allowedCache[k]

        # As long as there are reachable targets, loop.
        while toSpread:
            current = toSpread.popleft()
            currentTime = spreadTimes[current]

            # When the attack spreads to a File.
            if isinstance(current, File):
                fileCount += 1
                if current.isUserDocument(userHome):
                    docCount += 1
                if debugEnabled():
                    tprnt("File added @%d: %s" % (currentTime, current))

                # Add followers.
                for f in current.follow:
                    if f.time > currentTime:
                        follower = fileStore.getFile(f.inode)
                        if follower not in seen:
                            toSpread.append(follower)
                            seen.add(follower)
                            spreadTimes[follower] = f.time

                # Add future accesses.
                for acc in current.accesses:
                    if acc.time > currentTime and \
                            acc.actor.desktopid not in appSet and \
                            _allowed(policy, current, acc):
                        toSpread.append(acc.actor)
                        spreadTimes[acc.actor] = acc.time

            # When the attack spreads to an app instance.
            elif isinstance(current, Application):
                if debugEnabled():
                    tprnt("App added @%d: %s" % (currentTime, current.uid()))

                # Add files accessed by the app.
                for (accFile, acc) in acListInst.get(current.uid()) or []:
                    if acc.time > currentTime and \
                            accFile not in seen and \
                            _allowed(policy, accFile, acc):
                        toSpread.append(accFile)
                        seen.add(accFile)
                        spreadTimes[accFile] = acc.time

                # Add future versions of the app.
                if attack.appMemory and current.desktopid not in appSet:
                    for app in appStore.lookupDesktopId(current.desktopid):
                        if app.tstart > currentTime:
                            toSpread.append(app)
                            spreadTimes[app] = app.tstart

                # We do this last to use appSet as a cache for already seen
                # apps, so we append all future instances once and for all to
                # the spread list.
                appSet.add(current.desktopid)
                if current.isUserlandApp():
                    userAppSet.add(current.desktopid)

            else:
                print("Error: attack simulator attempting to parse an unknown"
                      " object (%s)" % type(current),
                      file=sys.stderr)

        return (appSet, userAppSet, fileCount, docCount)
def main(argv):
    __opt_inode_query = None
    __opt_post_analysis = None
    __opt_quick_pol = None

    # Parse command-line parameters
    try:
        (opts, args) = getopt.getopt(argv, "hta:cedf:o:q:sk:rpgGi:u:x", [
            "help", "attacks", "post-analysis=", "check-missing",
            "check-excluded-files", "debug", "frequency", "inode",
            "extensions", "related-files", "output=", "output-fs=", "score",
            "quick-pol=", "skip=", "user", "clusters", "print-clusters",
            "graph", "graph-clusters", "disable-plotting"
        ])
    except (getopt.GetoptError):
        print(USAGE_STRING)
        sys.exit(2)
    else:
        for opt, arg in opts:
            if opt in ('-h', '--help'):
                print(USAGE_STRING + "\n\n\n\n")

                print("--attacks:\n\tSimulates attacks and reports "
                      "on proportions of infected files and apps.\n")
                print("--check-excluded-files:\n\tPrints the lists of files "
                      "accessed by apps that also wrote to excluded\n\tfiles,"
                      " then aborts execution of the program.\n")
                print("--check-missing:\n\tChecks whether some Desktop IDs "
                      "for apps in the user's directory are\n\tmissing. If so,"
                      " aborts execution of the program.\n")
                print("--clusters:\n\tPrints clusters of files with "
                      "information flows to one another. Requires\n\tthe "
                      "--score option.\n")
                print("--debug:\n\tPrints additional debug information in "
                      "various code paths to help debug\n\tthe program.\n")
                print("--disable-plotting:\n\tDo not plot cluster graphs. See "
                      "the --graph option.\n")
                print("--extensions:\n\tPrints file extensions and MIME type "
                      "associations for this user.\n")
                print("--frequency:\n\tSets the frequency used by the "
                      "frequent-itemsets algorithm in the\n\t--related-files "
                      "post-analysis. Requires the --related-files option.\n")
                print("--graph:\n\tFind communities in file/app "
                      "accesses using graph theory methods.\n")
                print("--help:\n\tPrints this help information and exits.\n")
                print("--output=<DIR>:\n\tSaves a copy of the simulated "
                      "files, and some information on events\n\trelated to "
                      "them, in a folder created at the <DIR> path.\n")
                print("--post-analysis=<DIR,DIR,DIR>:\n\t"
                      "Uses the value pointed to"
                      " by --output in order to produce graphs and\n\t"
                      "statistics.\n")
                print("--quick-pol=Policy:\n\tReplace the default policies "
                      "with this one single Policy.\n")
                print("--related-files:\n\tMines for files that are frequently"
                      " accessed together by apps. Produces\n\toutput files in"
                      " scoring mode, and an analysis output in post-analysis"
                      "\n\tmode. See also --frequency.\n")
                print("--score:\n\tCalculates the usability and security "
                      "scores of a number of file access\n\tcontrol policies"
                      ", replayed over the simulated accesses. Prints results"
                      "\n\tand saves them to the output directory.\n")
                print(
                    "--skip=<Policy,Policy,'graphs'>:\n\tSkip the scoring of "
                    "policies in the lists. If the list contains the word"
                    "\n\t'graphs', skips the general graph computation.\n")
                sys.exit()
            elif opt in ('-c', '--check-missing'):
                __setCheckMissing(True)
            elif opt in ('-e', '--check-excluded-files'):
                __setCheckExcludedFiles(True)
            elif opt in ('-x', '--extensions'):
                __setPrintExtensions(True)
            elif opt in ('-d', '--debug'):
                __setDebug(True)
            elif opt in ('-r', '--related-files'):
                __setRelatedFiles(True)
            elif opt in ('-s', '--score'):
                __setScore(True)
            elif opt in ('-p', '--print-clusters', '--clusters'):
                __setPrintClusters(True)
            elif opt in ('-g', '--graph-clusters', '--graph'):
                __setGraph(True)
            elif opt in ('-t', '--attacks'):
                __setAttacks(True)
            elif opt in ('-G', '--disable-plotting'):
                __setPlottingDisabled(True)
            elif opt in ('-f', '--frequency'):
                if not arg:
                    print(USAGE_STRING)
                    sys.exit(2)
                __setFrequency(arg[1:] if arg[0] == '=' else arg)
            elif opt in ('-o', '--output-fs', '--output'):
                if not arg:
                    print(USAGE_STRING)
                    sys.exit(2)
                __setOutputFs(arg[1:] if arg[0] == '=' else arg)
            elif opt in ('-u', '--user'):
                if not arg:
                    print(USAGE_STRING)
                    sys.exit(2)
                __setUser(arg[1:] if arg[0] == '=' else arg)
            elif opt in ('-i', '--inode'):
                if not arg:
                    print(USAGE_STRING)
                    sys.exit(2)
                try:
                    __opt_inode_query = (arg[1:] if arg[0] == '=' else arg)
                except (ValueError) as e:
                    print(USAGE_STRING)
                    sys.exit(2)
            elif opt in ('-a', '--post-analysis'):
                if not arg:
                    print(USAGE_STRING)
                    sys.exit(2)
                __opt_post_analysis = (arg[1:] if arg[0] == '=' else arg)
            elif opt in ('-q', '--quick-pol'):
                if not arg:
                    print(USAGE_STRING)
                    sys.exit(2)
                __opt_quick_pol = (arg[1:] if arg[0] == '=' else arg)
            elif opt in ('-k', '--skip'):
                if not arg:
                    print(USAGE_STRING)
                    sys.exit(2)
                __opt_skip = (arg[1:] if arg[0] == '=' else arg)
                __setSkip(__opt_skip.split(","))

    registerTimePrint()

    if __opt_post_analysis:
        if relatedFilesEnabled():
            tprnt("Starting post-analysis of related files...\n")
            engine = FrequentFileEngine()
            engine.processFrequentItemLists(__opt_post_analysis)

        else:
            tprnt("Starting post-analysis of usability/security scores...\n")
            from AnalysisEngine import AnalysisEngine
            if outputFsEnabled():
                engine = AnalysisEngine(inputDir=__opt_post_analysis,
                                        outputDir=outputFsEnabled())
            else:
                engine = AnalysisEngine(inputDir=__opt_post_analysis)
            engine.analyse()

        sys.exit(0)

    # Make the application, event and file stores
    store = ApplicationStore.get()
    evStore = EventStore.get()
    fileStore = FileStore.get()
    initMimeTypes()
    datapath = getDataPath()

    # Load up user-related variables
    userConf = UserConfigLoader.get(path=datapath + USERCONFIGNAME)

    # Load up and check the SQLite database
    sql = None
    tprnt("\nLoading the SQLite database: %s..." % (datapath + DATABASENAME))
    try:
        sql = SqlLoader(datapath + DATABASENAME)
    except ValueError as e:
        print("Failed to parse SQL: %s" % e.args[0], file=sys.stderr)
        sys.exit(-1)
    if checkMissingEnabled():
        tprnt("Checking for missing application identities...")
        sql.listMissingActors()
    sql.loadDb(store)
    sqlAppCount = sql.appCount
    sqlInstCount = sql.instCount
    sqlEvCount = sql.eventCount
    sqlValidEvCount = sql.validEventRatio
    tprnt("Loaded the SQLite database.")

    # Load up the PreloadLogger file parser
    tprnt("\nLoading the PreloadLogger logs in folder: %s..." % datapath)
    pll = PreloadLoggerLoader(datapath)
    if checkMissingEnabled():
        tprnt("Checking for missing application identities...")
        pll.listMissingActors()
    pll.loadDb(store)
    pllAppCount = pll.appCount
    pllInstCount = pll.instCount
    pllEvCount = pll.eventCount
    pllValidEvCount = pll.validEventRatio
    tprnt("Loaded the PreloadLogger logs.")

    # Resolve actor ids in all apps' events
    tprnt("\nUsing PreloadLogger Applications to resolve interpreters in "
          "Zeitgeist Applications...")
    (interpretersAdded, instancesEliminated) = store.resolveInterpreters()
    tprnt("Resolved interpreter ids in %d Applications, and removed %d "
          "instances by merging them with another as a result." %
          (interpretersAdded, instancesEliminated))

    # Update events' actor ids in the ApplicationStore, then take them and send
    # them to the EvnetStore. Finally, sort the EventStore by timestamp.
    tprnt("\nInserting and sorting all events...")
    store.sendEventsToStore()
    evStore.sort()
    evCount = evStore.getEventCount()
    tprnt("Sorted all %d events in the event store." % evCount)

    # Simulate the events to build a file model
    tprnt("\nSimulating all events to build a file model...")
    evStore.simulateAllEvents()
    del sql
    del pll
    evStore.sort()
    tprnt("Simulated all events. %d files initialised." % len(fileStore))

    appCount = store.getAppCount()
    userAppCount = store.getUserAppCount()
    instCount = len(store)
    userInstCount = store.getUserInstCount()
    fileCount = len(fileStore)
    docCount = fileStore.getUserDocumentCount(userConf.getSetting("HomeDir"))

    if printExtensions():
        exts = set()
        for f in fileStore:
            exts.add(f.getExtension())
        try:
            exts.remove(None)
        except (KeyError):
            pass
        tprnt("Info: the following file extensions were found:")
        for e in sorted(exts):
            print("\t%s: %s" %
                  (e, mimetypes.guess_type("f.%s" % e, strict=False)))

        if checkExcludedFilesEnabled():
            tprnt("\nPrinting files written and read by instances which wrote"
                  "to excluded directories...")
            dbgPrintExcludedEvents()
        import time as t
        t.sleep(10)

    # Manage --inode queries
    if __opt_inode_query:
        inodes = __opt_inode_query.split(",")
        for inode in sorted(int(i) for i in inodes):
            f = fileStore.getFile(inode)
            tprnt("\nInode queried: %d" % inode)
            tprnt("Corresponding file: %s\n\t(%s)" % (f.getName(), f))
        sys.exit(0)

    # Print the model as proof of concept
    if debugEnabled():
        tprnt("\nPrinting the file model...\n")
        fileStore.printFiles(showDeleted=True,
                             showCreationTime=True,
                             showDocumentsOnly=True,
                             userHome=userConf.getSetting("HomeDir"),
                             showDesignatedOnly=False)

    # Make the filesystem corresponding to the model
    if outputFsEnabled():
        tprnt("\nMaking a copy of the file model at '%s'...\n" %
              outputFsEnabled())
        fileStore.makeFiles(outputDir=outputFsEnabled(),
                            showDeleted=True,
                            showDocumentsOnly=False,
                            userHome=userConf.getSetting("HomeDir"),
                            showDesignatedOnly=False)

        with open(os.path.join(outputFsEnabled(), "statistics.txt"), "w") as f:
            msg = "SQL: %d apps; %d instances; %d events; %d%% valid\n" % \
                  (sqlAppCount, sqlInstCount, sqlEvCount, sqlValidEvCount)
            msg += "PreloadLogger: %d apps; %d instances; %d events; " \
                   "%d%% valid\n" % \
                  (pllAppCount, pllInstCount, pllEvCount, pllValidEvCount)
            msg += "Simulated: %d apps; %d instances; %d user apps; %d user" \
                   " instances; %d events; %d files; %d user documents\n" % \
                  (appCount, instCount, userAppCount, userInstCount,
                   evCount, fileCount, docCount)
            exclLists = userConf.getDefinedSecurityExclusionLists()
            for l in exclLists:
                msg += "Exclusion list '%s' defined.\n" % l
            print(msg, file=f)

    # Build a general access graph.
    if graphEnabled():
        skipList = skipEnabled()
        if skipList and 'graphs' in skipList:
            tprnt("\nGraphs in skip list, skipping global graph generation.")
        else:
            engine = GraphEngine.get()
            engine.runGraph(policy=None)

    # Policy engine. Create a policy and run a simulation to score it.
    if scoreEnabled() or attacksEnabled() or graphEnabled():
        engine = PolicyEngine()

        if __opt_quick_pol:
            policies = [__opt_quick_pol]
            polArgs = [None]
        else:
            policies = [
                CompoundLibraryPolicy,
                CustomLibraryPolicy,
                DesignationPolicy,
                DistantFolderPolicy,
                FilenamePolicy,
                FileTypePolicy,
                FolderPolicy,
                OneDistantFolderPolicy,
                OneFolderPolicy,
                OneLibraryPolicy,
                UnsecurePolicy,
                Win10Policy,
                Win8Policy,
                HSecurePolicy,
                HBalancedPolicy,
                'HSecureSbPolicy',
                'HSecureSbFaPolicy',
                'HSecureFaPolicy',
                'HBalancedSbPolicy',
                'HBalancedSbFaPolicy',
                'HBalancedFaPolicy',
                'OneDistantFolderSbPolicy',
                'OneDistantFolderSbFaPolicy',
                'OneDistantFolderFaPolicy',
                'HUsableSecuredSbPolicy',
                'HUsableSecuredSbFaPolicy',
                'HUsableSecuredFaPolicy',
                'HBalancedSecuredSbPolicy',
                'HBalancedSecuredSbFaPolicy',
                'HBalancedSecuredFaPolicy',
                'DistantFolderSbPolicy',
                'DistantFolderSbFaPolicy',
                'DistantFolderFaPolicy',
                'LibraryFolderSbPolicy',
                'LibraryFolderSbFaPolicy',
                'LibraryFolderFaPolicy',
                'FileTypeSbPolicy',
                'FileTypeSbFaPolicy',
                'FileTypeFaPolicy',
                'OneFolderSbPolicy',
                'OneFolderSbFaPolicy',
                'OneFolderFaPolicy',
                'FolderSbPolicy',
                'FolderSbFaPolicy',
                'FolderFaPolicy',
                'OneLibrarySbPolicy',
                'OneLibrarySbFaPolicy',
                'OneLibraryFaPolicy',
                'CompoundLibrarySbPolicy',
                'CompoundLibrarySbFaPolicy',
                'CompoundLibraryFaPolicy',
                'CustomLibrarySbPolicy',
                'CustomLibrarySbFaPolicy',
                'CustomLibraryFaPolicy',
            ]

            polArgs = [
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
                None,
            ]
            # dict(folders=["~/Downloads", "/tmp"])

        skipList = skipEnabled()
        for (polIdx, polName) in enumerate(policies):
            pol = None
            arg = polArgs[polIdx]

            # Names with certain suffixes are dynamically generated policies.
            if isinstance(polName, str):
                if polName.endswith('SbPolicy'):
                    pols = [
                        getattr(sys.modules[__name__],
                                polName[:-8] + 'Policy'), StickyBitPolicy
                    ]
                    args = [arg, dict(folders=["~", "/media", "/mnt"])]
                    pol = CompositionalPolicy(pols, args, polName)
                elif polName.endswith('SbFaPolicy'):
                    pols = [
                        getattr(sys.modules[__name__],
                                polName[:-10] + 'Policy'), StickyBitPolicy,
                        FutureAccessListPolicy
                    ]
                    args = [arg, dict(folders=["~", "/media", "/mnt"]), None]
                    pol = CompositionalPolicy(pols, args, polName)
                elif polName.endswith('FaPolicy'):
                    pols = [
                        getattr(sys.modules[__name__],
                                polName[:-8] + 'Policy'),
                        FutureAccessListPolicy
                    ]
                    args = [arg, None]
                    pol = CompositionalPolicy(pols, args, polName)
                # A normal policy, just invoke it directly.
                else:
                    polName = getattr(sys.modules[__name__], polName)

            # Existing policies, with arguments / or normal policies passed as
            # strings, including via the --quick flag.
            if not pol:
                pol = polName(**arg) if arg else polName()

            tprnt("\nRunning %s..." % pol.name)

            if skipList and pol.name in skipList:
                tprnt("%s is in skip list, skipping." % pol.name)
                continue

            engine.runPolicy(pol,
                             outputDir=outputFsEnabled(),
                             printClusters=printClustersEnabled())

            if pol.name == "FileTypePolicy" and checkMissingEnabled():
                pol.abortIfUnsupportedExtensions()

            if attacksEnabled():
                tprnt("Simulating attacks on %s..." % pol.name)
                sim = AttackSimulator(seed=0)
                sim.runAttacks(pol, outputDir=outputFsEnabled() or "/tmp/")

            del pol

    # Calculate frequently co-accessed files:
    if relatedFilesEnabled():
        engine = FrequentFileEngine()

        tprnt("\nMining for frequently co-accessed file types...")
        engine.mineFileTypes()