Esempio n. 1
0
def group_by_local_date(user_id, from_dt, to_dt, freq, summary_fn_list):
    """
    Get grouped data frames for the specified local date range and frequency
    :param user_id: id for the user. None for aggregate.
    :param from_dt: start local dt object. We assume that only the year, month
    and date entries are filled in and represent a date range.
    :param to_dt: end local dt object. We assume that only the year, month
    and date entries are filled in and represent a date range.
    :param freq: since we only expand certain local_dt fields, we can only
    support frequencies corresponding to them. These are represented in the
    `LocalFreq` enum.
    :return: a dict containing the last start_ts of the last section processed
        and a result list of ModeStatTimeSummary objects
        If there were no matching sections, the last start_ts is None
        and the list is empty.
    """
    time_query = esttc.TimeComponentQuery("data.start_local_dt", from_dt, to_dt)
    section_df = esda.get_data_df(esda.CLEANED_SECTION_KEY,
                                  user_id=user_id, time_query=time_query,
                                  geo_query=None)
    if len(section_df) == 0:
        logging.info("Found no entries for user %s, time_query %s" % (user_id, time_query))
        return {
            "last_ts_processed": None,
            "result": [[] for i in range(len(summary_fn_list))]
        }
    groupby_arr = _get_local_group_by(freq)
    time_grouped_df = section_df.groupby(groupby_arr)
    local_dt_fill_fn = _get_local_key_to_fill_fn(freq)
    return {
        "last_ts_processed": section_df.iloc[-1].start_ts,
        "result": [grouped_to_summary(time_grouped_df, local_dt_fill_fn, summary_fn)
                        for summary_fn in summary_fn_list]
    }
Esempio n. 2
0
def group_by_local_date(user_id, from_dt, to_dt, freq, summary_fn_list):
    """
    Get grouped data frames for the specified local date range and frequency
    :param user_id: id for the user. None for aggregate.
    :param from_dt: start local dt object. We assume that only the year, month
    and date entries are filled in and represent a date range.
    :param to_dt: end local dt object. We assume that only the year, month
    and date entries are filled in and represent a date range.
    :param freq: since we only expand certain local_dt fields, we can only
    support frequencies corresponding to them. These are represented in the
    `LocalFreq` enum.
    :return: a dict containing the last start_ts of the last section processed
        and a result list of ModeStatTimeSummary objects
        If there were no matching sections, the last start_ts is None
        and the list is empty.
    """
    time_query = esttc.TimeComponentQuery("data.start_local_dt", from_dt, to_dt)
    section_df = esda.get_data_df(eac.get_section_key_for_analysis_results(),
                                  user_id=user_id, time_query=time_query,
                                  geo_query=None)
    if len(section_df) == 0:
        logging.info("Found no entries for user %s, time_query %s" % (user_id, time_query))
        return {
            "last_ts_processed": None,
            "result": [[] for i in range(len(summary_fn_list))]
        }

    groupby_arr = _get_local_group_by(freq)
    time_grouped_df = section_df.groupby(groupby_arr)
    local_dt_fill_fn = _get_local_key_to_fill_fn(freq)
    return {
        "last_ts_processed": section_df.iloc[-1].start_ts,
        "result": [grouped_to_summary(time_grouped_df, local_dt_fill_fn, summary_fn)
                        for summary_fn in summary_fn_list]
    }
Esempio n. 3
0
def group_by_timestamp(user_id, start_ts, end_ts, freq, summary_fn):
    """
    Get grouped dataframes for the specific time range and at the specified frequency
    :param user_id: The user for whom we are computing this information. None for all users.
    :param from_ld: The start timestamp
    :param to_ld: The end timestamp
    :param freq: The frequency as specified in a pandas date_range frequency string.
    We only support frequencies of a day or longer in order to return the data
    in a format that makes sense
    http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
    The canonical list can be found at:
    > pandas.tseries.offsets.prefix_mapping
    :return: a list of ModeStatTimeSummary objects
    """
    time_query = estt.TimeQuery("data.start_ts", start_ts, end_ts)
    section_df = esda.get_data_df(esda.CLEANED_SECTION_KEY,
                                  user_id=user_id, time_query=time_query,
                                  geo_query=None)
    if len(section_df) == 0:
        logging.info("Found no entries for user %s, time_query %s" % (user_id, time_query))
        return []
    logging.debug("first row is %s" % section_df.iloc[0])
    secs_to_nanos = lambda x: x * 10 ** 9
    section_df['start_dt'] = pd.to_datetime(secs_to_nanos(section_df.start_ts))
    time_grouped_df = section_df.groupby(pd.Grouper(freq=freq, key='start_dt'))
    return grouped_to_summary(time_grouped_df, timestamp_fill_times, summary_fn)
Esempio n. 4
0
def uuid_list_query(modes, time_query, region):
    if region is None:
        geo_query = None
    else:
        geo_query = estg.GeoQuery(["data.loc"], region)

    extra_query_list = []
    if modes is not None:
        mode_enum_list = [ecwm.MotionTypes[mode] for mode in modes]
        extra_query_list.append(esdlq.get_mode_query(mode_enum_list))

    loc_entry_df = esda.get_data_df(esda.CLEANED_LOCATION_KEY, user_id=None,
                                      time_query=time_query, geo_query=geo_query,
                                      extra_query_list=extra_query_list)
    if len(loc_entry_df) == 0:
        logging.info("No points found matching query, returning empty list")
        return []

    unique_uuid_list = loc_entry_df.user_id.unique().tolist()
    logging.info("Found %d points with %d unique uuids" % (len(loc_entry_df), len(unique_uuid_list)))
    return unique_uuid_list
Esempio n. 5
0
def group_by_timestamp(user_id, start_ts, end_ts, freq, summary_fn_list):
    """
    Get grouped dataframes for the specific time range and at the specified frequency
    :param user_id: The user for whom we are computing this information. None for all users.
    :param from_ld: The start timestamp
    :param to_ld: The end timestamp
    :param freq: The frequency as specified in a pandas date_range frequency string.
    We only support frequencies of a day or longer in order to return the data
    in a format that makes sense
    http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
    The canonical list can be found at:
    > pandas.tseries.offsets.prefix_mapping
    :return: a dict containing the last start_ts of the last section processed
        and a result list of ModeStatTimeSummary objects
        If there were no matching sections, the last start_ts is None
        and the list is empty.
    """
    time_query = estt.TimeQuery("data.start_ts", start_ts, end_ts)
    section_df = esda.get_data_df(esda.CLEANED_SECTION_KEY,
                                  user_id=user_id, time_query=time_query,
                                  geo_query=None)
    if len(section_df) == 0:
        logging.info("Found no entries for user %s, time_query %s" % (user_id, time_query))
        return {
            "last_ts_processed": None,
            "result": [[] for i in range(len(summary_fn_list))]
        }
    logging.debug("first row is %s" % section_df.iloc[0])
    secs_to_nanos = lambda x: x * 10 ** 9
    section_df['start_dt'] = pd.to_datetime(secs_to_nanos(section_df.start_ts))
    time_grouped_df = section_df.groupby(pd.Grouper(freq=freq, key='start_dt'))
    return {
        "last_ts_processed": section_df.iloc[-1].start_ts,
        "result": [grouped_to_summary(time_grouped_df, timestamp_fill_times, summary_fn)
                   for summary_fn in summary_fn_list]
    }
Esempio n. 6
0
def group_by_timestamp(user_id, start_ts, end_ts, freq, summary_fn_list):
    """
    Get grouped dataframes for the specific time range and at the specified frequency
    :param user_id: The user for whom we are computing this information. None for all users.
    :param from_ld: The start timestamp
    :param to_ld: The end timestamp
    :param freq: The frequency as specified in a pandas date_range frequency string.
    We only support frequencies of a day or longer in order to return the data
    in a format that makes sense
    http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
    The canonical list can be found at:
    > pandas.tseries.offsets.prefix_mapping
    :return: a dict containing the last start_ts of the last section processed
        and a result list of ModeStatTimeSummary objects
        If there were no matching sections, the last start_ts is None
        and the list is empty.
    """
    time_query = estt.TimeQuery("data.start_ts", start_ts, end_ts)
    section_df = esda.get_data_df(eac.get_section_key_for_analysis_results(),
                                  user_id=user_id, time_query=time_query,
                                  geo_query=None)
    if len(section_df) == 0:
        logging.info("Found no entries for user %s, time_query %s" % (user_id, time_query))
        return {
            "last_ts_processed": None,
            "result": [[] for i in range(len(summary_fn_list))]
        }
    logging.debug("first row is %s" % section_df.iloc[0])
    secs_to_nanos = lambda x: x * 10 ** 9
    section_df['start_dt'] = pd.to_datetime(secs_to_nanos(section_df.start_ts))
    time_grouped_df = section_df.groupby(pd.Grouper(freq=freq, key='start_dt'))
    return {
        "last_ts_processed": section_df.iloc[-1].start_ts,
        "result": [grouped_to_summary(time_grouped_df, timestamp_fill_times, summary_fn)
                   for summary_fn in summary_fn_list]
    }
Esempio n. 7
0
def uuid_list_query(modes, time_query, region):
    if region is None:
        geo_query = None
    else:
        geo_query = estg.GeoQuery(["data.loc"], region)

    extra_query_list = []
    if modes is not None:
        mode_enum_list = [ecwm.MotionTypes[mode] for mode in modes]
        extra_query_list.append(esdlq.get_mode_query(mode_enum_list))

    loc_entry_df = esda.get_data_df(esda.CLEANED_LOCATION_KEY,
                                    user_id=None,
                                    time_query=time_query,
                                    geo_query=geo_query,
                                    extra_query_list=extra_query_list)
    if len(loc_entry_df) == 0:
        logging.info("No points found matching query, returning empty list")
        return []

    unique_uuid_list = loc_entry_df.user_id.unique().tolist()
    logging.info("Found %d points with %d unique uuids" %
                 (len(loc_entry_df), len(unique_uuid_list)))
    return unique_uuid_list
Esempio n. 8
0
def group_by_local_date(user_id, from_dt, to_dt, freq, summary_fn):
    """
    Get grouped data frames for the specified local date range and frequency
    :param user_id: id for the user. None for aggregate.
    :param from_dt: start local dt object. We assume that only the year, month
    and date entries are filled in and represent a date range.
    :param to_dt: end local dt object. We assume that only the year, month
    and date entries are filled in and represent a date range.
    :param freq: since we only expand certain local_dt fields, we can only
    support frequencies corresponding to them. These are represented in the
    `LocalFreq` enum.
    :return: pandas.core.groupby.DataFrameGroupBy object
    """
    time_query = esttc.TimeComponentQuery("data.start_local_dt", from_dt, to_dt)
    section_df = esda.get_data_df(esda.CLEANED_SECTION_KEY,
                                  user_id=user_id, time_query=time_query,
                                  geo_query=None)
    if len(section_df) == 0:
        logging.info("Found no entries for user %s, time_query %s" % (user_id, time_query))
        return []
    groupby_arr = _get_local_group_by(freq)
    time_grouped_df = section_df.groupby(groupby_arr)
    local_dt_fill_fn = _get_local_key_to_fill_fn(freq)
    return grouped_to_summary(time_grouped_df, local_dt_fill_fn, summary_fn)