Beispiel #1
0
    def DownloadActivityList(self, svcRec, exhaustive=False):
        dbcl = self._getClient(svcRec)
        if not svcRec.Authorization["Full"]:
            syncRoot = "/"
        else:
            syncRoot = svcRec.Config["SyncRoot"]
        cache = cachedb.dropbox_cache.find_one({"ExternalID": svcRec.ExternalID})
        if cache is None:
            cache = {"ExternalID": svcRec.ExternalID, "Structure": [], "Activities": {}}
        if "Structure" not in cache:
            cache["Structure"] = []
        self._folderRecurse(cache["Structure"], dbcl, syncRoot)

        activities = []
        exclusions = []

        for dir in cache["Structure"]:
            for file in dir["Files"]:
                path = file["Path"]
                if svcRec.Authorization["Full"]:
                    relPath = path.replace(syncRoot, "", 1)
                else:
                    relPath = path.replace("/Apps/tapiriik/", "", 1)  # dropbox api is meh api

                hashedRelPath = self._hash_path(relPath)
                if hashedRelPath in cache["Activities"]:
                    existing = cache["Activities"][hashedRelPath]
                else:
                    existing = None

                if not existing:
                    # Continue to use the old records keyed by UID where possible
                    existing = [
                        (k, x) for k, x in cache["Activities"].items() if "Path" in x and x["Path"] == relPath
                    ]  # path is relative to syncroot to reduce churn if they relocate it
                    existing = existing[0] if existing else None
                    if existing is not None:
                        existUID, existing = existing
                        existing["UID"] = existUID

                if existing and existing["Rev"] == file["Rev"]:
                    # don't need entire activity loaded here, just UID
                    act = UploadedActivity()
                    act.UID = existing["UID"]
                    try:
                        act.StartTime = datetime.strptime(existing["StartTime"], "%H:%M:%S %d %m %Y %z")
                    except:
                        act.StartTime = datetime.strptime(
                            existing["StartTime"], "%H:%M:%S %d %m %Y"
                        )  # Exactly one user has managed to break %z :S
                    if "EndTime" in existing:  # some cached activities may not have this, it is not essential
                        act.EndTime = datetime.strptime(existing["EndTime"], "%H:%M:%S %d %m %Y %z")
                else:
                    logger.debug(
                        "Retrieving %s (%s)" % (path, "outdated meta cache" if existing else "not in meta cache")
                    )
                    # get the full activity
                    try:
                        act, rev = self._getActivity(svcRec, dbcl, path)
                    except APIExcludeActivity as e:
                        logger.info("Encountered APIExcludeActivity %s" % str(e))
                        exclusions.append(strip_context(e))
                        continue

                    try:
                        act.EnsureTZ()
                    except:
                        pass  # We tried.

                    if hasattr(act, "OriginatedFromTapiriik") and not act.CountTotalWaypoints():
                        # This is one of the files created when TCX export was hopelessly broken for non-GPS activities.
                        # Right now, no activities in dropbox from tapiriik should be devoid of waypoints - since dropbox doesn't receive stationary activities
                        # In the future when this changes, will obviously have to modify this code to also look at modification dates or similar.
                        if ".tcx.summary-data" in path:
                            logger.info("...summary file already moved")
                        else:
                            logger.info("...moving summary-only file")
                            dbcl.file_move(path, path.replace(".tcx", ".tcx.summary-data"))
                        continue  # DON'T include in listing - it'll be regenerated
                    del act.Laps
                    act.Laps = (
                        []
                    )  # Yeah, I'll process the activity twice, but at this point CPU time is more plentiful than RAM.
                    cache["Activities"][hashedRelPath] = {
                        "Rev": rev,
                        "UID": act.UID,
                        "StartTime": act.StartTime.strftime("%H:%M:%S %d %m %Y %z"),
                        "EndTime": act.EndTime.strftime("%H:%M:%S %d %m %Y %z"),
                    }
                tagRes = self._tagActivity(relPath)
                act.ServiceData = {"Path": path, "Tagged": tagRes is not None}

                act.Type = tagRes if tagRes is not None else ActivityType.Other

                logger.debug("Activity s/t %s" % act.StartTime)

                activities.append(act)

        if "_id" in cache:
            cachedb.dropbox_cache.save(cache)
        else:
            cachedb.dropbox_cache.insert(cache)
        return activities, exclusions
Beispiel #2
0
    def DownloadActivityList(self, svcRec, exhaustive=False):
        dbcl = self._getClient(svcRec)
        if not svcRec.Authorization["Full"]:
            syncRoot = "/"
        else:
            syncRoot = svcRec.Config["SyncRoot"]
        # Dropbox API v2 doesn't like / as root.
        if syncRoot == "/":
            syncRoot = ""
        # New Dropbox API prefers path_lower, it would seem.
        syncRoot = syncRoot.lower()

        # There used to be a massive affair going on here to cache the folder structure locally.
        # Dropbox API 2.0 doesn't support the hashes I need for that.
        # Oh well. Throw that data out now. Well, don't load it at all.
        cache = cachedb.dropbox_cache.find_one({"ExternalID": svcRec.ExternalID}, {"ExternalID": True, "Activities": True})
        if cache is None:
            cache = {"ExternalID": svcRec.ExternalID, "Activities": {}}

        try:
            list_result = dbcl.files_list_folder(syncRoot, recursive=True)
        except dropbox.exceptions.DropboxException as e:
            self._raiseDbException(e)

        def cache_writeback():
            if "_id" in cache:
                cachedb.dropbox_cache.save(cache)
            else:
                insert_result = cachedb.dropbox_cache.insert(cache)
                cache["_id"] = insert_result.inserted_id


        activities = []
        exclusions = []
        discovered_activity_cache_keys = set()

        while True:
            for entry in list_result.entries:
                if not hasattr(entry, "rev"):
                    # Not a file -> we don't care.
                    continue
                path = entry.path_lower

                if not path.endswith(".gpx") and not path.endswith(".tcx"):
                    # Not an activity file -> we don't care.
                    continue

                if svcRec.Authorization["Full"]:
                    relPath = path.replace(syncRoot, "", 1)
                else:
                    relPath = path.replace("/Apps/tapiriik/", "", 1)  # dropbox api is meh api

                hashedRelPath = self._hash_path(relPath)
                discovered_activity_cache_keys.add(hashedRelPath)
                if hashedRelPath in cache["Activities"]:
                    existing = cache["Activities"][hashedRelPath]
                else:
                    existing = None

                if existing and existing["Rev"] == entry.rev:
                    # don't need entire activity loaded here, just UID
                    act = UploadedActivity()
                    act.UID = existing["UID"]
                    try:
                        act.StartTime = datetime.strptime(existing["StartTime"], "%H:%M:%S %d %m %Y %z")
                    except:
                        act.StartTime = datetime.strptime(existing["StartTime"], "%H:%M:%S %d %m %Y") # Exactly one user has managed to break %z :S
                    if "EndTime" in existing:  # some cached activities may not have this, it is not essential
                        act.EndTime = datetime.strptime(existing["EndTime"], "%H:%M:%S %d %m %Y %z")
                else:
                    logger.debug("Retrieving %s (%s)" % (path, "outdated meta cache" if existing else "not in meta cache"))
                    # get the full activity
                    try:
                        act, rev = self._getActivity(svcRec, dbcl, path)
                    except APIExcludeActivity as e:
                        logger.info("Encountered APIExcludeActivity %s" % str(e))
                        exclusions.append(strip_context(e))
                        continue

                    try:
                        act.EnsureTZ()
                    except:
                        pass # We tried.

                    act.Laps = []  # Yeah, I'll process the activity twice, but at this point CPU time is more plentiful than RAM.
                    cache["Activities"][hashedRelPath] = {"Rev": rev, "UID": act.UID, "StartTime": act.StartTime.strftime("%H:%M:%S %d %m %Y %z"), "EndTime": act.EndTime.strftime("%H:%M:%S %d %m %Y %z")}
                    # Incrementally update the cache db.
                    # Otherwise, if we crash later on in listing
                    # (due to OOM or similar), we'll never make progress on this account.
                    cache_writeback()
                tagRes = self._tagActivity(relPath)
                act.ServiceData = {"Path": path, "Tagged": tagRes is not None}

                act.Type = tagRes if tagRes is not None else ActivityType.Other

                logger.debug("Activity s/t %s" % act.StartTime)

                activities.append(act)

            # Perform pagination.
            if list_result.has_more:
                list_result = dbcl.files_list_folder_continue(list_result.cursor)
            else:
                break

        # Drop deleted activities' records from cache.
        all_activity_cache_keys = set(cache["Activities"].keys())
        for deleted_key in all_activity_cache_keys - discovered_activity_cache_keys:
            del cache["Activities"][deleted_key]

        cache_writeback()
        return activities, exclusions
Beispiel #3
0
    def DownloadActivityList(self, svcRec, exhaustive=False):
        dbcl = self._getClient(svcRec)
        if not svcRec.Authorization["Full"]:
            syncRoot = "/"
        else:
            syncRoot = svcRec.Config["SyncRoot"]
        # Dropbox API v2 doesn't like / as root.
        if syncRoot == "/":
            syncRoot = ""
        # New Dropbox API prefers path_lower, it would seem.
        syncRoot = syncRoot.lower()

        # There used to be a massive affair going on here to cache the folder structure locally.
        # Dropbox API 2.0 doesn't support the hashes I need for that.
        # Oh well. Throw that data out now. Well, don't load it at all.
        cache = cachedb.dropbox_cache.find_one(
            {"ExternalID": svcRec.ExternalID}, {
                "ExternalID": True,
                "Activities": True
            })
        if cache is None:
            cache = {"ExternalID": svcRec.ExternalID, "Activities": {}}

        try:
            list_result = dbcl.files_list_folder(syncRoot, recursive=True)
        except dropbox.exceptions.DropboxException as e:
            self._raiseDbException(e)

        def cache_writeback():
            if "_id" in cache:
                cachedb.dropbox_cache.save(cache)
            else:
                insert_result = cachedb.dropbox_cache.insert(cache)
                cache["_id"] = insert_result.inserted_id

        activities = []
        exclusions = []
        discovered_activity_cache_keys = set()

        while True:
            for entry in list_result.entries:
                if not hasattr(entry, "rev"):
                    # Not a file -> we don't care.
                    continue
                path = entry.path_lower

                if not path.endswith(".gpx") and not path.endswith(".tcx"):
                    # Not an activity file -> we don't care.
                    continue

                if svcRec.Authorization["Full"]:
                    relPath = path.replace(syncRoot, "", 1)
                else:
                    relPath = path.replace("/Apps/tapiriik/", "",
                                           1)  # dropbox api is meh api

                hashedRelPath = self._hash_path(relPath)
                discovered_activity_cache_keys.add(hashedRelPath)
                if hashedRelPath in cache["Activities"]:
                    existing = cache["Activities"][hashedRelPath]
                else:
                    existing = None

                if existing and existing["Rev"] == entry.rev:
                    # don't need entire activity loaded here, just UID
                    act = UploadedActivity()
                    act.UID = existing["UID"]
                    try:
                        act.StartTime = datetime.strptime(
                            existing["StartTime"], "%H:%M:%S %d %m %Y %z")
                    except:
                        act.StartTime = datetime.strptime(
                            existing["StartTime"], "%H:%M:%S %d %m %Y"
                        )  # Exactly one user has managed to break %z :S
                    if "EndTime" in existing:  # some cached activities may not have this, it is not essential
                        act.EndTime = datetime.strptime(
                            existing["EndTime"], "%H:%M:%S %d %m %Y %z")
                else:
                    logger.debug("Retrieving %s (%s)" %
                                 (path, "outdated meta cache"
                                  if existing else "not in meta cache"))
                    # get the full activity
                    try:
                        act, rev = self._getActivity(svcRec, dbcl, path)
                    except APIExcludeActivity as e:
                        logger.info("Encountered APIExcludeActivity %s" %
                                    str(e))
                        exclusions.append(strip_context(e))
                        continue

                    try:
                        act.EnsureTZ()
                    except:
                        pass  # We tried.

                    act.Laps = [
                    ]  # Yeah, I'll process the activity twice, but at this point CPU time is more plentiful than RAM.
                    cache["Activities"][hashedRelPath] = {
                        "Rev": rev,
                        "UID": act.UID,
                        "StartTime":
                        act.StartTime.strftime("%H:%M:%S %d %m %Y %z"),
                        "EndTime": act.EndTime.strftime("%H:%M:%S %d %m %Y %z")
                    }
                    # Incrementally update the cache db.
                    # Otherwise, if we crash later on in listing
                    # (due to OOM or similar), we'll never make progress on this account.
                    cache_writeback()
                tagRes = self._tagActivity(relPath)
                act.ServiceData = {"Path": path, "Tagged": tagRes is not None}

                act.Type = tagRes if tagRes is not None else ActivityType.Other

                logger.debug("Activity s/t %s" % act.StartTime)

                activities.append(act)

            # Perform pagination.
            if list_result.has_more:
                list_result = dbcl.files_list_folder_continue(
                    list_result.cursor)
            else:
                break

        # Drop deleted activities' records from cache.
        all_activity_cache_keys = set(cache["Activities"].keys())
        for deleted_key in all_activity_cache_keys - discovered_activity_cache_keys:
            del cache["Activities"][deleted_key]

        cache_writeback()
        return activities, exclusions
Beispiel #4
0
    def DownloadActivityList(self, svcRec, exhaustive=False):
        dbcl = self._getClient(svcRec)
        if not svcRec.Authorization["Full"]:
            syncRoot = "/"
        else:
            syncRoot = svcRec.Config["SyncRoot"]
        cache = cachedb.dropbox_cache.find_one(
            {"ExternalID": svcRec.ExternalID})
        if cache is None:
            cache = {
                "ExternalID": svcRec.ExternalID,
                "Structure": [],
                "Activities": {}
            }
        if "Structure" not in cache:
            cache["Structure"] = []
        self._folderRecurse(cache["Structure"], dbcl, syncRoot)

        activities = []
        exclusions = []

        for dir in cache["Structure"]:
            for file in dir["Files"]:
                path = file["Path"]
                if svcRec.Authorization["Full"]:
                    relPath = path.replace(syncRoot, "", 1)
                else:
                    relPath = path.replace("/Apps/tapiriik/", "",
                                           1)  # dropbox api is meh api

                hashedRelPath = self._hash_path(relPath)
                if hashedRelPath in cache["Activities"]:
                    existing = cache["Activities"][hashedRelPath]
                else:
                    existing = None

                if not existing:
                    # Continue to use the old records keyed by UID where possible
                    existing = [
                        (k, x) for k, x in cache["Activities"].items()
                        if "Path" in x and x["Path"] == relPath
                    ]  # path is relative to syncroot to reduce churn if they relocate it
                    existing = existing[0] if existing else None
                    if existing is not None:
                        existUID, existing = existing
                        existing["UID"] = existUID

                if existing and existing["Rev"] == file["Rev"]:
                    # don't need entire activity loaded here, just UID
                    act = UploadedActivity()
                    act.UID = existing["UID"]
                    try:
                        act.StartTime = datetime.strptime(
                            existing["StartTime"], "%H:%M:%S %d %m %Y %z")
                    except:
                        act.StartTime = datetime.strptime(
                            existing["StartTime"], "%H:%M:%S %d %m %Y"
                        )  # Exactly one user has managed to break %z :S
                    if "EndTime" in existing:  # some cached activities may not have this, it is not essential
                        act.EndTime = datetime.strptime(
                            existing["EndTime"], "%H:%M:%S %d %m %Y %z")
                else:
                    logger.debug("Retrieving %s (%s)" %
                                 (path, "outdated meta cache"
                                  if existing else "not in meta cache"))
                    # get the full activity
                    try:
                        act, rev = self._getActivity(svcRec, dbcl, path)
                    except APIExcludeActivity as e:
                        logger.info("Encountered APIExcludeActivity %s" %
                                    str(e))
                        exclusions.append(strip_context(e))
                        continue

                    try:
                        act.EnsureTZ()
                    except:
                        pass  # We tried.

                    if hasattr(act, "OriginatedFromTapiriik"
                               ) and not act.CountTotalWaypoints():
                        # This is one of the files created when TCX export was hopelessly broken for non-GPS activities.
                        # Right now, no activities in dropbox from tapiriik should be devoid of waypoints - since dropbox doesn't receive stationary activities
                        # In the future when this changes, will obviously have to modify this code to also look at modification dates or similar.
                        if ".tcx.summary-data" in path:
                            logger.info("...summary file already moved")
                        else:
                            logger.info("...moving summary-only file")
                            dbcl.file_move(
                                path, path.replace(".tcx",
                                                   ".tcx.summary-data"))
                        continue  # DON'T include in listing - it'll be regenerated
                    del act.Laps
                    act.Laps = [
                    ]  # Yeah, I'll process the activity twice, but at this point CPU time is more plentiful than RAM.
                    cache["Activities"][hashedRelPath] = {
                        "Rev": rev,
                        "UID": act.UID,
                        "StartTime":
                        act.StartTime.strftime("%H:%M:%S %d %m %Y %z"),
                        "EndTime": act.EndTime.strftime("%H:%M:%S %d %m %Y %z")
                    }
                tagRes = self._tagActivity(relPath)
                act.ServiceData = {"Path": path, "Tagged": tagRes is not None}

                act.Type = tagRes if tagRes is not None else ActivityType.Other

                logger.debug("Activity s/t %s" % act.StartTime)

                activities.append(act)

        if "_id" in cache:
            cachedb.dropbox_cache.save(cache)
        else:
            cachedb.dropbox_cache.insert(cache)
        return activities, exclusions