예제 #1
0
    def get_merged_dataframes(self, feature_requests, start_dt, end_dt, repopulate=False):
        """
        Return a TimeseriesFeature by sticker for start_dt <= t < end_dt

        :param feature_requests:
        :return: {sticker: DataFrame} where Dataframe has all the features merged
        """

        start_dt = parse_date_if_necessary(start_dt, to_utc=True)
        end_dt = parse_date_if_necessary(end_dt, to_utc=True)

        if not isinstance(feature_requests, list):
            feature_requests = [feature_requests]

        self._instantiate_features(feature_requests)

        for request in feature_requests:
            feature_id = request.feature_id
            self.feature_map[feature_id].initialize(start_dt, end_dt)

        features_df = list()
        for request in feature_requests:
            feature_id = request.feature_id
            feature = self.feature_map[feature_id]

            if not isinstance(feature, InfiniteTimeSeriesFeature):
                raise ValueError("Not a InfiniteTimeSeriesFeature")

            logging.info("Requesting {} {} {}".format(feature_id, start_dt, end_dt))
            df = feature.get_df(start_dt, end_dt, repopulate=repopulate)
            df.columns = [request.prefix + c for c in df.columns]
            features_df.append(df)
        merged = merge_timeseries_features(features_df)
        return merged
예제 #2
0
    def get_dataframes_by_stickers(self,
                                   feature_requests,
                                   stickers,
                                   repopulate=False):
        """
        Return a TimeserieFeature by sticker

        :param feature_requests:
        :param stickers: with or without bookmakers
        :return: {sticker: DataFrame} where Dataframe has all the features merged
        """
        self._instantiate_features(feature_requests)

        for request in feature_requests:
            feature_id = request.feature_id
            self.feature_map[feature_id].initialize_stickers(stickers)

        ret = {}
        # TODO parallelize
        for sticker in stickers:
            features_df = list()
            for request in feature_requests:
                feature_id = request.feature_id
                feature = self.feature_map[feature_id]

                if not isinstance(feature, TimeSeriesFeature):
                    raise ValueError("Not a TimeSeriesFeature")

                logging.info("Requesting {} {}".format(feature_id, sticker))
                df = feature.get_df_from_sticker(sticker,
                                                 repopulate=repopulate)
                df.columns = [request.prefix + c for c in df.columns]
                features_df.append(df)
            ret[sticker] = merge_timeseries_features(features_df)
        return ret
예제 #3
0
def compute_for_sticker_q(feature_requests, stickers, feature_map,
                          result_queue):
    ret = dict()
    for sticker in stickers:
        features_df = list()
        for request in feature_requests:
            feature_id = request.feature_id
            # logging.info("Requesting {} {}".format(feature_id, sticker))
            features_df.append(
                feature_map[feature_id].get_df_from_sticker(sticker))
        ret[sticker] = merge_timeseries_features(features_df)
    result_queue.put(ret)
예제 #4
0
    def get_dataframes_by_event_ids(self,
                                    feature_requests,
                                    event_ids,
                                    repopulate=False,
                                    recompute_missing=True):
        """
        Return a TimeserieFeature by event_ids

        :param feature_requests:
        :param event_ids: list of strings like ['GSM55554', 'GSM55555'] or ['ENP7777', 'ENP7778']
        :return: {event_id: DataFrame} where Dataframe has all the features merged
        """
        self._instantiate_features(feature_requests)
        for request in feature_requests:
            feature_id = request.feature_id
            self.feature_map[feature_id].initialize_events(event_ids)

        ret = {}
        for event_id in event_ids:
            features_df = list()
            for request in feature_requests:
                feature_id = request.feature_id
                feature = self.feature_map[feature_id]

                if not isinstance(feature, TimeSeriesFeature):
                    raise ValueError("Not a TimeSeriesFeature")

                logging.info("Requesting {} {}".format(feature_id, event_id))
                request_df = feature.get_df_from_event_id(
                    event_id,
                    repopulate=repopulate,
                    recompute_missing=recompute_missing)

                features_df.append(request_df)
            ret[event_id] = merge_timeseries_features(features_df)
        return ret
예제 #5
0
 def merge_timeseries_features(cls, dfs):
     return merge_timeseries_features(dfs)