def _largeSmallSplit(self, pids: list, index: int, large: Application,
                         small: Application, inserting: Application):
        """Split overlapping apps where one covers the other's lifecycle."""
        (left, right) = large.split(beforeEnd=small.getTimeOfStart() - 1,
                                    afterStart=small.getTimeOfEnd() + 1)

        if inserting == large:
            pids.insert(index + 1, right)
            pids.insert(index, left)
        else:
            pids[index] = left
            pids.insert(index + 1, right)
            pids.insert(index + 1, small)

        return pids
Exemple #2
0
    def loadDb(self, store: ApplicationStore = None):
        """Browse the SQLite db and create all the relevant app instances."""

        # Load up our events from the Zeitgeist database
        self.cur = self.con.cursor()
        self.cur.execute('SELECT * \
                          FROM event_view \
                          WHERE id IN (SELECT DISTINCT id \
                                       FROM event_view \
                                       WHERE subj_uri LIKE "activity://%")')

        # Merge all event subjects based on their event id, and find their pids
        eventsMerged = dict()
        data = self.cur.fetchone()
        while data:
            pid = 0
            if "pid://" in data[EV_SUBJ_URI]:
                m = re.search('(?<=pid://)\d+', data[EV_SUBJ_URI])
                pid = int(m.group(0)) if m else 0

            ev = eventsMerged.get(data[EV_ID])
            if not ev:
                ev = SqlEvent(id=data[EV_ID],
                              pid=pid,
                              timestamp=data[EV_TIMESTAMP],
                              interpretation=self.getInterpretation(
                                             data[EV_INTERPRETATION]),
                              manifestation=self.getManifestation(
                                             data[EV_MANIFESTATION]),
                              origin_uri=data[EV_EVENT_ORIGIN_URI],
                              actor_uri=data[EV_ACTOR_URI])
            elif pid and ev.pid:
                assert ev.pid == pid, ("Error: multiple events record a pid "
                                       " event %d, and they disagree on the "
                                       "pid to record (%d != %d)." % (
                                        data[EV_ID], ev.pid, pid))
            elif pid and not ev.pid:
                ev.pid = pid

            subj = SqlEventSubject(uri=data[EV_SUBJ_URI],
                                   interpretation=self.getInterpretation(
                                                 data[EV_SUBJ_INTERPRETATION]),
                                   manifestation=self.getManifestation(
                                                 data[EV_SUBJ_MANIFESTATION]),
                                   origin_uri=data[EV_SUBJ_ORIGIN_URI],
                                   mimetype=self.getMimeType(
                                            data[EV_SUBJ_MIMETYPE]),
                                   text=data[EV_SUBJ_TEXT],
                                   storage_uri=data[EV_SUBJ_STORAGE],
                                   current_uri=data[EV_SUBJ_CURRENT_URI])
            ev.addSubject(subj)
            eventsMerged[data[EV_ID]] = ev

            data = self.cur.fetchone()

        # Now, sort the events per app PID so we can build apps
        nopids = []            # Matching events without a PID
        eventsPerPid = dict()  # Storage for our events
        count = len(eventsMerged)  # Counter of fetched events, for stats
        instanceCount = 0      # Count of distinct app instances in the dataset
        actors = set()

        for event in eventsMerged.items():
            pid = event[1].pid
            if not pid:
                nopids.append(event[1])
            else:
                try:
                    eventsPerPid[pid].append(event[1])
                except KeyError as e:
                    eventsPerPid[pid] = [event[1]]
        del eventsMerged  # no longer needed

        # For each PID, we'll now identify the successive Application instances
        for (pkey, pevent) in eventsPerPid.items():
            pevent = sorted(pevent, key=lambda x: x.timestamp)
            currentId = ''     # currently matched Desktop Id
            currentApp = None  # currently matched Application
            apps = []          # temp storage for found Applications

            for ev in pevent:
                (evId, __) = Application.getDesktopIdFromDesktopUri(
                    ev.actor_uri)

                if evId != currentId:
                    if debugEnabled():
                        print ("New application:", evId, currentId, ev)
                    currentId = evId
                    currentApp = Application(desktopid=evId,
                                             pid=int(pkey),
                                             tstart=ev.timestamp,
                                             tend=ev.timestamp)
                    actors.add(currentApp.desktopid)
                    apps.append(currentApp)
                else:
                    currentApp.setTimeOfStart(min(ev.timestamp,
                                                  currentApp.getTimeOfStart()))

                    currentApp.setTimeOfEnd(max(ev.timestamp,
                                                currentApp.getTimeOfEnd()))
                # Ignore study artefacts!
                if not currentApp.isStudyApp():
                    event = Event(actor=currentApp,
                                  time=ev.timestamp,
                                  zgEvent=ev)
                    currentApp.addEvent(event)

            # Insert into the ApplicationStore if one was given to us
            instanceCount += len(apps)
            if store is not None:
                for app in apps:
                    # Ignore study artefacts!
                    if not app.isStudyApp():
                        store.insert(app)
                    else:
                        instanceCount -= 1  # We discount this app instance


        self.appCount = len(actors)
        self.instCount = instanceCount
        self.eventCount = count
        self.validEventRatio = 100-100*len(nopids) / count

        print("Finished loading DB.\n%d events seen, %d normal, %d without a "
              "PID.\nIn total, %.02f%% events accepted." % (
               count,
               count-len(nopids),
               len(nopids),
               self.validEventRatio))
        print("Instance count: %d" % self.instCount)
    def insert(self, app: Application):
        """Insert an Application in the store."""

        if app.pid == 0:
            raise ValueError("Applications must have a valid PID.")

        if not app.desktopid:
            raise ValueError("Applications must have a Desktop identifier.")

        tstart = app.getTimeOfStart()
        tend = app.getTimeOfEnd()
        if tstart > tend:
            raise ValueError("Applications must have valid times of start and "
                             "end.")

        # Get the list of instances for this PID, and find this app's place.
        pids = self.pidStore.get(app.pid, list())  # type: list

        neighbourCheckupIndex = -1
        for (index, bpp) in enumerate(pids):
            bstart = bpp.getTimeOfStart()
            bend = bpp.getTimeOfEnd()

            # other item before ours, keep moving
            if (bend < tstart):
                continue

            # other item after ours, we found our position
            if (bstart > tend):
                pids.insert(index, app)
                neighbourCheckupIndex = index
                break

            # time period conflict, merge apps if same id or alert of a problem
            if (bend >= tstart) or (bstart <= tend):
                if app.hasSameDesktopId(bpp, resolveInterpreter=True):
                    bpp.merge(app)
                    pids[index] = bpp
                    neighbourCheckupIndex = index
                else:
                    # Apps A (which we insert) and B (which we compare to) are
                    # overlapping. We now determine their respective orders to
                    # dispatch them to the appropriate app splitting algorithm.
                    print("Warning: Applications %s and %s overlap on PID %d" %
                          (app.desktopid, bpp.desktopid, app.pid),
                          file=sys.stderr)

                    pids = self.dispatchSplit(pids, index, app, bpp)

                    # Now, merge the inserted app with neighbours if applicable
                    # but note that we don't really know where it is, how many
                    # times it was split, and how much the list has grown. Even
                    # if we pulled that info from the split functions, doing
                    # merges on both edges of the newly inserted sequence would
                    # be more complicated (thus error-prone) than browsing the
                    # whole (short) list of pids. So let's keep it fool-proof.
                    pids = self._mergePidList(pids)

                    # raise ValueError("Applications %s and %s have the same "
                    #                  "PID (%d) and their runtimes overlap:\n"
                    #                  "\t%s \t %s\n\t%s \t %s\nbut they have "
                    #                  "different identities. This is a bug "
                    #                  "in the collected data." % (
                    #                    app.desktopid,
                    #                    bpp.desktopid,
                    #                    app.pid,
                    #                    time2Str(app.getTimeOfStart()),
                    #                    time2Str(app.getTimeOfEnd()),
                    #                    time2Str(bpp.getTimeOfStart()),
                    #                    time2Str(bpp.getTimeOfEnd())))
                break
        # app is the last item on the list!
        else:
            pids.append(app)

        # Now, we check if the neighbours to the newly inserted Application
        # have the same Desktop ID. If they do, and if they are within a given
        # proximity window, we merge the items. This is needed to help Events
        # from Zeitgeist and PreloadLogger to synchronise.
        if neighbourCheckupIndex >= 0:
            pids = self._mergePidItem(pids, neighbourCheckupIndex)

        self.pidStore[app.getPid()] = pids
        self.nameStoreClean = False
    def dispatchSplit(self, pids: list, index: int, app: Application,
                      bpp: Application):
        """Decide how to split two Applications based on how they overlap."""
        tstart = app.getTimeOfStart()
        tend = app.getTimeOfEnd()
        bstart = bpp.getTimeOfStart()
        bend = bpp.getTimeOfEnd()

        # First overlap condition, with all ramifications.
        if bend >= tstart:
            # B ends after A.
            if bend >= tend:
                # A is embedded into B, so we split B.
                if bstart <= tstart:
                    pids = self._largeSmallSplit(pids,
                                                 index,
                                                 large=bpp,
                                                 small=app,
                                                 inserting=app)
                # B starts during A, ends after A. We must split.
                else:
                    pids = self._mixedSplit(pids,
                                            index,
                                            before=app,
                                            after=bpp,
                                            inserting=app)
            # A ends after B.
            else:
                # B is embedded into A, so we split A.
                if bstart >= tstart:
                    pids = self._largeSmallSplit(pids,
                                                 index,
                                                 large=app,
                                                 small=bpp,
                                                 inserting=app)
                # A starts during B, ends after B. We must split.
                else:
                    pids = self._mixedSplit(pids,
                                            index,
                                            before=bpp,
                                            after=app,
                                            inserting=app)
        # Second overlap condition, with all ramifications.
        elif bstart <= tend:
            if bstart <= tstart:
                if bend >= tend:
                    pids = self._largeSmallSplit(pids,
                                                 index,
                                                 large=bpp,
                                                 small=app,
                                                 inserting=app)
                else:
                    pids = self._mixedSplit(pids,
                                            index,
                                            before=bpp,
                                            after=app,
                                            inserting=app)
            else:
                if bend <= tend:
                    pids = self._largeSmallSplit(pids,
                                                 index,
                                                 large=app,
                                                 small=bpp,
                                                 inserting=app)
                else:
                    pids = self._mixedSplit(pids,
                                            index,
                                            before=app,
                                            after=bpp,
                                            inserting=app)

        return pids