Exemple #1
0
        def async_job(user, limit=None, after=None, before=None):

            user.indexing(True)

            activities_list = []
            count = 1
            try:
                for a in self.client().get_activities():
                    d = strava2dict(a)
                    if d.get("summary_polyline"):
                        activities_list.append(d)
                        if (limit or
                            (after and (d["beginTimestamp"] >= after)) or
                                (before and (d["beginTimestamp"] <= before))):
                            d2 = dict(d)
                            d2["beginTimestamp"] = str(d2["beginTimestamp"])
                            Q.put(d2)
                            app.logger.info("put {} on queue".format(d2["id"]))

                            if limit:
                                limit -= 1
                                if not limit:
                                    Q.put({"stop_rendering": "1"})
                        else:
                            Q.put({"msg": "indexing...{} activities".format(count)})

                        count += 1
                        gevent.sleep(0)
            except Exception as e:
                Q.put({"error": str(e)})
            else:
                Q.put({"msg": "done indexing {} activities.".format(count)})

                activity_index = (pd.DataFrame(activities_list)
                                  .set_index("beginTimestamp")
                                  .sort_index(ascending=False)
                                  .astype(dtypes))

                app.logger.debug("done with indexing for {}".format(self))
                dt_last_indexed = datetime.utcnow()
                packed = activity_index.to_msgpack(compress='blosc')
                cache.set(self.index_key(),
                          (dt_last_indexed, packed),
                          CACHE_INDEX_TIMEOUT)

                app.logger.info("cached {}, size={}".format(self.index_key(),
                                                            len(packed)))
            finally:
                user.indexing(False)
                Q.put(StopIteration)
Exemple #2
0
 def indexing(self, status=None):
     # Indicate to other processes that we are currently indexing
     #  This should not take any longer than 30 seconds
     key = "indexing {}".format(self.strava_id)
     if status is None:
         return cache.get(key)
     else:
         return cache.set(key, status, 30)
Exemple #3
0
    def index(self, activity_ids=None, limit=None,  after=None, before=None):

        def strava2dict(a):
            return {
                "id": a.id,
                "name": a.name,
                "type": a.type,
                "summary_polyline": a.map.summary_polyline,
                "beginTimestamp": a.start_date_local,
                "total_distance": float(a.distance),
                "elapsed_time": int(a.elapsed_time.total_seconds()),
                "average_speed": float(a.average_speed)
            }
        dtypes = {
            "id": "uint32",
            "type": "category",
            "total_distance": "float32",
            "elapsed_time": "uint32",
            "average_speed": "float16"
        }

        if self.indexing():
            return [{
                    "error": "Indexing activities for user {}...<br>Please try again in a few seconds.<br>"
                    .format(self.strava_id)
                    }]

        ind = cache.get(self.index_key())
        if ind:
            dt_last_indexed, packed = ind
            activity_index = pd.read_msgpack(packed).astype({"type": str})
            elapsed = (datetime.utcnow() -
                       dt_last_indexed).total_seconds()

            # update the index if we need to
            if (elapsed > CACHE_INDEX_UPDATE_TIMEOUT) and (not OFFLINE):
                latest = activity_index.index[0]
                app.logger.info("updating activity index for {}"
                                .format(self.strava_id))

                already_got = set(activity_index.id)

                try:
                    activities_list = [strava2dict(
                        a) for a in self.client().get_activities(after=latest)
                        if a.id not in already_got]
                except Exception as e:
                    return [{"error": str(e)}]

                if activities_list:
                    df = pd.DataFrame(activities_list).set_index(
                        "beginTimestamp")

                    activity_index = (
                        df.append(activity_index)
                        .drop_duplicates()
                        .sort_index(ascending=False)
                        .astype(dtypes)
                    )

                dt_last_indexed = datetime.utcnow()
                cache.set(self.index_key(),
                          (dt_last_indexed,
                           activity_index.to_msgpack(compress='blosc')),
                          CACHE_INDEX_TIMEOUT)

            if activity_ids:
                df = activity_index[activity_index["id"].isin(activity_ids)]
            else:
                if limit:
                    df = activity_index.head(limit)
                else:
                    df = activity_index
                    if after:
                        df = df[:after]
                    if before:
                        df = df[before:]
            df = df.reset_index()
            df.beginTimestamp = df.beginTimestamp.astype(str)
            return df.to_dict("records")

        # If we got here then the index hasn't been created yet
        Q = Queue()
        P = Pool()

        def async_job(user, limit=None, after=None, before=None):

            user.indexing(True)

            activities_list = []
            count = 1
            try:
                for a in self.client().get_activities():
                    d = strava2dict(a)
                    if d.get("summary_polyline"):
                        activities_list.append(d)
                        if (limit or
                            (after and (d["beginTimestamp"] >= after)) or
                                (before and (d["beginTimestamp"] <= before))):
                            d2 = dict(d)
                            d2["beginTimestamp"] = str(d2["beginTimestamp"])
                            Q.put(d2)
                            app.logger.info("put {} on queue".format(d2["id"]))

                            if limit:
                                limit -= 1
                                if not limit:
                                    Q.put({"stop_rendering": "1"})
                        else:
                            Q.put({"msg": "indexing...{} activities".format(count)})

                        count += 1
                        gevent.sleep(0)
            except Exception as e:
                Q.put({"error": str(e)})
            else:
                Q.put({"msg": "done indexing {} activities.".format(count)})

                activity_index = (pd.DataFrame(activities_list)
                                  .set_index("beginTimestamp")
                                  .sort_index(ascending=False)
                                  .astype(dtypes))

                app.logger.debug("done with indexing for {}".format(self))
                dt_last_indexed = datetime.utcnow()
                packed = activity_index.to_msgpack(compress='blosc')
                cache.set(self.index_key(),
                          (dt_last_indexed, packed),
                          CACHE_INDEX_TIMEOUT)

                app.logger.info("cached {}, size={}".format(self.index_key(),
                                                            len(packed)))
            finally:
                user.indexing(False)
                Q.put(StopIteration)

        P.apply_async(async_job, [self, limit, after, before])
        return Q
Exemple #4
0
 def cache(self, identifier=None, timeout=CACHE_USERS_TIMEOUT):
     key = User.key(identifier or self.strava_id)
     cache.set(key, self, timeout)
     app.logger.debug(
         "cached {} with key '{}' for {} sec".format(self, key, timeout))
     return self