def storeViewsToCache(self):
        """
        Determine which "documents" need to be sent to the usercache. This
        is currently the trips for the past three days. Any entries older than 3 days
        should be purged. Note that this currently repeats information - the data that
        was from day before yesterday, for example, would have been sent at that point
        as well.  As an optimization, we could use something like CouchDB to only send
        the diffs back and forth. Or use a simple syncing mechanism in which the
        write_ts of the "document" reflects time that the documents were generated, and
        only sync new documents? In general, the write_ts of the generated document
        should be within a few hours of the intake document.

        Second question: How do we send back the travel diary data? As the
        raw entries from the database, or in geojson format?

        After much thought, we are planning to send materialized views over
        the data in various json formats. In particular, we plan to send
        the trip information in geojson format, and the result
        visualizations in either vega or mpld3 formats.

        Since we plan to store all the data in a giant event database,
        materializing the views is likely to be a complex process. Doing
        the materialization ahead of time helps with responsiveness. Note
        that in principle, we could store the timeseries and materialize
        views directly on the phone as well, but in general we choose to do that on the
        server for flexibility and ease of programming.

        This also means that the write_ts of the view naturally corresponds
        to the time that it was generated, as opposed to sensed, which
        provides us with all kinds of goodness.

        :return: Nothing. As a side effect, we materialize views from the
        data and store them into the usercache to be sent to the phone
        """
        # Currently the only views generated are the geojson
        # representations of the trips for the last 3 days. By default, the
        # write_ts of the entry is the time that it was generated, which
        # may not be the right choice for queries on the phone. There, we
        # want to query by start or end time of the trip.
        #
        # Right now, we will go with generated time, since it is unlikely
        # to be off by a lot from the end time, at least good enough for searching in a day.
        # need to decide whether to provide customizations to search by
        # data in addition to metadata.

        # Finally, we don't really want to generate data for trips that we
        # haven't finished analysis for. Right now, we only perform three
        # kinds of analyses - trip segmentation, section segmentation and
        # smoothing, but later we may perform both mode and semantic analyses.
        # We only want to send the data to the phone once analyses are
        # complete. In particular, we don't want to send trips for which we
        # haven't yet generated sections.

        # We could create a new pipeline state for this. But instead, we
        # just query starting from the last "done" ts of the last pipeline
        # stage instead of "now". The last "done" ts is the start_ts if the
        # pipeline were to run again

        start_ts = esp.get_complete_ts(self.user_id)
        trip_gj_list = self.get_trip_list_for_seven_days(start_ts)
        if len(trip_gj_list) == 0:
            ts = etsa.TimeSeries.get_time_series(self.user_id)
            max_loc_ts = ts.get_max_value_for_field("background/filtered_location", "data.ts")
            if max_loc_ts == 1:
                logging.warning("No entries for user %s, early return " % self.user_id)
                return
            if max_loc_ts > start_ts:
                # We have locations, but no trips from them. That seems wrong.
                # But we should get there eventually and then we will have trips.
                logging.warning("No analysis has been done on recent points! max_loc_ts %s > start_ts %s, early return" %
                                (max_loc_ts, start_ts))
                return
            trip_gj_list = self.get_trip_list_for_seven_days(max_loc_ts)
        day_list_bins = self.bin_into_days_by_local_time(trip_gj_list)
        uc = enua.UserCache.getUserCache(self.user_id)

        for day, day_gj_list in day_list_bins.iteritems():
            logging.debug("Adding %s trips for day %s" % (len(day_gj_list), day))
            uc.putDocument("diary/trips-%s"%day, day_gj_list)

        self.delete_obsolete_entries(uc, day_list_bins.iterkeys())
Ejemplo n.º 2
0
    def storeTimelineToCache(self, time_query):
        """
        Store trips for the last week to the cache. Any entries older than 3 days
        should be purged. Note that this currently repeats information - the data that
        was from day before yesterday, for example, would have been sent at that point
        as well.  As an optimization, we could use something like CouchDB to only send
        the diffs back and forth. Or use a simple syncing mechanism in which the
        write_ts of the "document" reflects time that the documents were generated, and
        only sync new documents? In general, the write_ts of the generated document
        should be within a few hours of the intake document.

        Second question: How do we send back the travel diary data? As the
        raw entries from the database, or in geojson format?

        After much thought, we are planning to send materialized views over
        the data in various json formats. In particular, we plan to send
        the trip information in geojson format, and the result
        visualizations in either vega or mpld3 formats.

        Since we plan to store all the data in a giant event database,
        materializing the views is likely to be a complex process. Doing
        the materialization ahead of time helps with responsiveness. Note
        that in principle, we could store the timeseries and materialize
        views directly on the phone as well, but in general we choose to do that on the
        server for flexibility and ease of programming.

        This also means that the write_ts of the view naturally corresponds
        to the time that it was generated, as opposed to sensed, which
        provides us with all kinds of goodness.

        :return: Nothing. As a side effect, we materialize views from the
        data and store them into the usercache to be sent to the phone
        """
        # Currently the only views generated are the geojson
        # representations of the trips for the last 3 days. By default, the
        # write_ts of the entry is the time that it was generated, which
        # may not be the right choice for queries on the phone. There, we
        # want to query by start or end time of the trip.
        #
        # Right now, we will go with generated time, since it is unlikely
        # to be off by a lot from the end time, at least good enough for searching in a day.
        # need to decide whether to provide customizations to search by
        # data in addition to metadata.

        # Finally, we don't really want to generate data for trips that we
        # haven't finished analysis for. Right now, we only perform three
        # kinds of analyses - trip segmentation, section segmentation and
        # smoothing, but later we may perform both mode and semantic analyses.
        # We only want to send the data to the phone once analyses are
        # complete. In particular, we don't want to send trips for which we
        # haven't yet generated sections.

        # We could create a new pipeline state for this. But instead, we
        # just query starting from the last "done" ts of the last pipeline
        # stage instead of "now". The last "done" ts is the start_ts if the
        # pipeline were to run again

        start_ts = esp.get_complete_ts(self.user_id)
        logging.debug(
            "start ts from pipeline = %s, %s" %
            (start_ts, pydt.datetime.utcfromtimestamp(start_ts).isoformat()))
        trip_gj_list = self.get_trip_list_for_seven_days(start_ts)
        if len(trip_gj_list) == 0:
            ts = etsa.TimeSeries.get_time_series(self.user_id)
            max_loc_ts = ts.get_max_value_for_field(
                "background/filtered_location", "data.ts")
            if max_loc_ts == -1:
                logging.warning("No entries for user %s, early return " %
                                self.user_id)
                return
            if max_loc_ts > start_ts:
                # We have locations, but no trips from them. That seems wrong.
                # But we should get there eventually and then we will have trips.
                logging.warning(
                    "No analysis has been done on recent points! max_loc_ts %s > start_ts %s, early return"
                    % (max_loc_ts, start_ts))
                return
            trip_gj_list = self.get_trip_list_for_seven_days(max_loc_ts)
        day_list_bins = self.bin_into_days_by_local_time(trip_gj_list)
        uc = enua.UserCache.getUserCache(self.user_id)

        for day, day_gj_list in day_list_bins.iteritems():
            logging.debug("Adding %s trips for day %s" %
                          (len(day_gj_list), day))
            uc.putDocument("diary/trips-%s" % day, day_gj_list)

        valid_key_list = [
            "diary/trips-%s" % day for day in day_list_bins.iterkeys()
        ]
        self.delete_obsolete_entries(uc, valid_key_list)
Ejemplo n.º 3
0
def get_complete_ts(user_id):
    complete_ts = esp.get_complete_ts(user_id)
    logging.debug("Returning complete_ts = %s" % complete_ts)
    return complete_ts
Ejemplo n.º 4
0
def get_complete_ts(user_id):
    complete_ts = esp.get_complete_ts(user_id)
    logging.debug("Returning complete_ts = %s" % complete_ts)
    return complete_ts