Beispiel #1
0
def replace_missing_values(ts_data):
    LOGGER.info(repr(ts_data))
    assert (isinstance(ts_data, TimestampedMonoVal))
    LOGGER.info("TU: calling replace_missing_values ")
    l_numpy = ts_data.data
    LOGGER.info("TU: input %s", str(l_numpy))

    ts_out = numpy.array([[1.5, 5.0], [3.0, 6.7], [5.5, 134.34]])

    return TimestampedMonoVal(ts_out)
Beispiel #2
0
    def read_ts_from_temporal_db(self, ts_ref, timeseries_class, tmp_mgr):
        """
        Read and return the expected timeseries according to the reference and the timeseries class

        Required: ResourceClientSingleton has been initialized !!!

        :param ts_ref:
        :type ts_ref: subclass of AbstractTsRef
        :param timeseries_class: class from core.data.ts: as value returned by ResourceClientSingleton
                                may be transformed into specific data type
        :type timeseries_class: subclass of from core.data.ts.AbstractTs: from core.data.ts,
                               or your own subclass, accepting the numpy array internally  produced )
        :param tmp_mgr: temporal data manager
        :type tmp_mgr: TemporalDataMgr
        """
        assert (isinstance(ts_ref, AbstractTsRef))
        assert (isinstance(tmp_mgr, TemporalDataMgr))
        try:

            if isinstance(ts_ref, TsuidTsRef):
                self.LOGGER.info("Reading TS matching reference: %s",
                                 str(ts_ref))
                tsuid = ts_ref.tsuid
                start_date = ts_ref.start_date
                end_date = ts_ref.end_date
                value_db = tmp_mgr.get_ts_by_tsuid(tsuid,
                                                   sd=start_date,
                                                   ed=end_date)
            else:
                self.LOGGER.info("Reading TS matching reference: %s",
                                 str(ts_ref))
                raise Exception("Not yet implemented for ts_ref typed: %s" %
                                type(ts_ref))

            if len(value_db) == 0:
                raise ValueError("Empty timeseries selection %s" % ts_ref)

            if timeseries_class is TimestampedMonoVal:
                self.LOGGER.info(
                    "Building TimestampedMonoVal with extracted value")
                data_value = TimestampedMonoVal(value_db)
            else:
                self.LOGGER.warning(
                    "Specific TS instance with custom class %s",
                    timeseries_class.__name__)
                raise Exception("Not yet implemented")

            return data_value

        except Exception as err:
            self.LOGGER.error("Failure: TsFactory::read_ts_from_temporal_db")
            self.LOGGER.exception(err)
            raise err
Beispiel #3
0
    def compute(cls, ts_data, seg_list):
        """
        Compute the PAA based on the ts and the list of segments

        :param ts_data: TS to compute the PAA onto
        :type ts_data: np.ndarray or TimestampedMonoVal

        :param seg_list: list of the indexes corresponding to every period of the TS
        :type seg_list: list

        :return: the new TS (with values replaced by mean value) and the means list
        :rtype: tuple (TimestampedMonoVal, list)
        """

        # Check inputs
        if type(ts_data) not in [np.ndarray, TimestampedMonoVal]:
            LOGGER.error(
                "ts_data must be a np.ndarray or TimestampedMonoVal (got %s)",
                type(ts_data))
            raise TypeError(
                "ts_data must be a np.ndarray or TimestampedMonoVal (got %s)" %
                type(ts_data))

        if type(ts_data) == TimestampedMonoVal:
            # Get the internal data if ts_data is TimestampedMonoVal
            ts_data = ts_data.data

        # Results initialization
        result_ts = ts_data[:]
        result_coeff = []

        # For every segment
        for i in range(1, len(seg_list)):
            # Compute the mean using numpy
            mean = np.mean(ts_data[seg_list[i - 1]:seg_list[i], 1])

            # Store it to one of the results
            result_coeff.append(mean)

            # Overwrite the mean value for every point in the segment
            result_ts[
                seg_list[i - 1]:seg_list[i],
                1] = ts_data[seg_list[i - 1]:seg_list[i], 1] * 0 + [mean]
        return TimestampedMonoVal(result_ts), result_coeff
Beispiel #4
0
def run_sax_from_tsuid(tdm,
                       tsuid,
                       word_size,
                       alphabet_size=None,
                       normalize=False):
    """
    Perform the Symbolic Aggregate Approximation (SAX) on the TSUID provided in **tsuid**

    :param tdm: temporal data manager object
    :type tdm: TemporalDataMgr

    :param tsuid: TSUID of the TS to calculate the SAX
    :type tsuid: str

    :param alphabet_size: number of characters in result word
    :type alphabet_size: int

    :param word_size: number of segments
    :type word_size: int

    :param normalize: Apply the normalization on the TS if set to True (False:default)
    :type normalize: bool

    :return: A dict composed of the PAA result, the SAX breakpoints, the SAX string and the TS points
    :rtype: dict

    :raise TypeError: if TSUID is not a string
    """

    if type(tsuid) is not str:
        LOGGER.error("TSUID must be a string (got %s)", type(tsuid))
        raise TypeError("TSUID must be a string (got %s)" % type(tsuid))

    # Get the TS content
    ts_dps = tdm.get_ts(tsuid_list=[tsuid])[0]
    ts_values = TimestampedMonoVal(ts_dps)

    # Call the calculation of the SAX on the content
    result = run_sax(ts_data=ts_values,
                     alphabet_size=alphabet_size,
                     word_size=word_size,
                     normalize=normalize)

    return result
Beispiel #5
0
def rollmean(ts_data, window_size, alignment=Alignment.center):
    """
    Compute the rollmean on TS data provided

    This algorithm needs:
        * a TS
        * a window range (in number of points or in time (ms))
        * an alignment method for the output

    .. warning::
        The TS must not contain any hole (an interpolation may be applied before calling this algorithm).
        The result will be altered and may not represent the real behaviour of the rollmean algorithm

    Example:
    ~~~~~~~~

    .. code-block:: python

        # Applying a rollmean on a TS stored in ts1_data with a window having 2 points
        # The result will be left-aligned
        r1 = rollmean(ts1_data, window_size=2, alignment=Alignment.left)

    To understand what is done, given this ts1_data:

        +-----------+-------+
        | Timestamp | Value |
        +===========+=======+
        | 1000      | 1     |
        +-----------+-------+
        | 2000      | 10    |
        +-----------+-------+
        | 3000      | 20    |
        +-----------+-------+
        | 4000      | 5     |
        +-----------+-------+
        | 5000      | 8     |
        +-----------+-------+
        | 6000      | 2     |
        +-----------+-------+

    We want to apply a rollmean with a window equal to 2 points.
        * we take the first window [1000;2000]
        * the mean of points is (sum of points in window divided by the size of the window): (1 + 10) / 2 = 5.5
        * Now the alignment is left so the value 5.5 will be assigned to the timestamp 1000
        * we shift the window by one point [2000;3000]
        * Start again until the TS is fully parsed

    About alignment:
    ~~~~~~~~~~~~~~~~

    Assuming
        * K is the window length (in number of points)
        * N is the number of points of the TS

    Then
        * Left alignment corresponds to the range: [0;N-K+1]
        * Center alignment has the same length but shifted by floor(k/2)
        * Right alignment has the same length but shifted by k-1


    About size of the final TS
    ~~~~~~~~~~~~~~~~~~~~~~~~~~

    Due to the mean, we have less points in the result TS than the original TS
    Assuming
        * K is the window length (in number of points)
        * N is the number of points of the TS
    the length of the new TS will be N-K


    About the computation method
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~

    The code is highly optimized for single instance run.

    To explain the details, we will use the following values (timestamps are removed for clarity)

    ``TS_values = [1, 2, 10, 3, 4, 5, 6, 7, 8, 9, 10]``

    **Steps**

        #. Compute the cumsum
        #. Shift the cumsum by K and subtract
        #. Divide the results by K

    **Step 1**

    ``step1 = [1, 3, 13, 16, 20, 25, 31, 38, 46, 55, 65]``

    **Step 2**

        +---+---+----+----+----+----+----+----+----+----+----+--------+-----------------------+
        | 1 | 3 | 13 | 16 | 20 | 25 | 31 | 38 | 46 | 55 | 65 |        | Original              |
        +---+---+----+----+----+----+----+----+----+----+----+----+---+-----------------------+
        |       |  1 |  3 | 13 | 16 | 20 | 25 | 31 | 38 | 46 | 55 | 65| Shifted               |
        +---+---+----+----+----+----+----+----+----+----+----+----+---+-----------------------+
        | 1 | 3 | 12 | 13 |  7 |  9 | 11 | 13 | 15 | 17 | 19 |        | Result of subtraction |
        +---+---+----+----+----+----+----+----+----+----+----+--------+-----------------------+
    ``step2 = [1, 3, 12, 13,  7,  9, 11, 13, 15, 17, 19]``

    **Step 3**

        +-----+----+------+------+------+------+------+------+------+------+-----------------+
        | 3   | 12 | 13   |  7   |  9   | 11   | 13   | 15   | 17   | 19   | Remove first    |
        +-----+----+------+------+------+------+------+------+------+------+-----------------+
        | 1.5 |  6 |  6.5 |  3.5 |  4.5 |  5.5 |  6.5 |  7.5 |  8.5 |  9.5 | Divide by K(=2) |
        +-----+----+------+------+------+------+------+------+------+------+-----------------+
    ``step3 =  [1.5,  6, 6.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5]``


    :param ts_data: input Timeseries to compute the rollmean on
    :type ts_data: numpy.array or TimestampedMonoVal

    :param window_size: Size of the sliding window (in number of points). Mutually exclusive with window_size
    :type window_size: int

    :param alignment: result alignment (left,right,center), default: center
    :type alignment: int

    :return: The new TS
    :rtype: TimestampedMonoVal

    :raise TypeError: Alignment must be taken within Alignment Enumerate
    :raise TypeError: ts_data must be numpy array or TimestampedMonoVal
    :raise ValueError: window_period and window_size are mutually exclusive
    :raise ValueError: window size must be positive integer
    :raise ValueError: window period must be positive integer
    :raise ValueError: window_period xor window_size must be set
    :raise ValueError: window_period must be lower than TS window
    :raise ValueError: window_size must be lower than TS length
    :raise ValueError: Window size is too big compared to TS length

    """

    LOGGER.debug("RollMean arguments:")
    LOGGER.debug(" * window_size: (%s) %s", type(window_size), window_size)
    LOGGER.debug(" * alignment: (%s) %s", type(alignment), alignment)
    LOGGER.debug(" * ts_data: (%s) len=%s", type(ts_data), len(ts_data))

    # Input check
    if type(alignment) != int or alignment not in [1, 2, 3]:
        raise TypeError("Alignment must be taken within Alignment Enumerate")
    if type(ts_data) != np.ndarray and type(ts_data) != TimestampedMonoVal:
        raise TypeError(
            "ts_data must be numpy array or TimestampedMonoVal (got %s)" %
            type(ts_data))
    if window_size is not None and type(
            window_size) != int and window_size > 0:
        raise ValueError("window size must be positive integer")
    if window_size is not None and window_size >= len(ts_data):
        raise ValueError("window_size must be lower than TS length")

    # Convert to numpy array
    if type(ts_data) == TimestampedMonoVal:
        ts_data = ts_data.data

    if window_size == 0:
        LOGGER.error("Window size is too big compared to TS length")
        raise ValueError("Window size is too big compared to TS length")

    if window_size == 1:
        # The result is the original TS if window is equal to 1 point
        LOGGER.warning(
            "Window size contains 1 point. The result of rollmean is the original TS"
        )
        return TimestampedMonoVal(ts_data)

    # Work only with values, timestamps are not needed for calculation
    values = ts_data[:, 1]

    # Computation of the roll mean (highly optimized for arithmetic mean)
    ret = np.cumsum(values, dtype=float)
    ret[window_size:] = ret[window_size:] - ret[:-window_size]
    ts_result_values = ret[window_size - 1:] / window_size

    # Selection of the correct time range (depending on alignment)
    ts_result_timestamps = []
    if alignment == Alignment.left:
        ts_result_timestamps = ts_data[:len(values) - window_size + 1, 0]
    if alignment == Alignment.center:
        ts_result_timestamps = ts_data[floor(window_size /
                                             2):floor(window_size / 2) +
                                       len(values) - window_size + 1, 0]
    if alignment == Alignment.right:
        ts_result_timestamps = ts_data[window_size - 1:len(values), 0]

    # Build result TS
    # From 2 series (timestamps and values), build a formatted TS
    # example: timestamps = [1000, 2000, 3000] and values are [42, 15, 0]
    #          --> [[1000, 42],
    #               [2000, 15],
    #               [3000, 0]]
    #           in TimestampedMonoVal format
    ts_result = TimestampedMonoVal(
        np.dstack((ts_result_timestamps, ts_result_values))[0])

    return ts_result
Beispiel #6
0
def scale(ts_list, scaler=AvailableScaler.ZNorm):
    """
    Compute a scaling on a provided ts list ("no spark" mode).

    :param ts_list: List of TS to scale
    :type ts_list: list of str

    :param scaler: The scaler used, should be one of the AvailableScaler...
    :type scaler: AvailableScaler or str

    :return: A list of dict composed by original TSUID and the information about the new TS
    :rtype: list

    ..Example: result=[{"tsuid": new_tsuid,
                        "funcId": new_fid
                        "origin": tsuid
                        }, ...]
    """
    # Init result, list of dict
    result = []

    # 0/ Init Scaler object
    # ------------------------------------------------
    # Init Spark Scaler
    current_scaler = Scaler(scaler=scaler, spark=False)

    # Perform operation iteratively on each TS
    for tsuid in ts_list:
        # 1/ Load TS content
        # ------------------------------------------------
        start_loading_time = time.time()

        # Read TS from it's ID
        ts_data = IkatsApi.ts.read([tsuid])[0]
        # shape = (2, nrow)

        LOGGER.debug("TSUID: %s, Gathering time: %.3f seconds", tsuid,
                     time.time() - start_loading_time)

        # 2/ Perform scaling
        # ------------------------------------------------
        start_computing_time = time.time()

        # ts_data is np.array [Time, Value]: apply scaler on col `Value` ([:, 1])
        # Need to reshape this col into a (1, n_row) dataset (sklearn format)
        scaled_data = current_scaler.perform_scaling(
            x=ts_data[:, 1].reshape(-1, 1))

        LOGGER.debug("TSUID: %s, Computing time: %.3f seconds", tsuid,
                     time.time() - start_computing_time)

        # 3/ Merge [Dates + new_values] and save
        # ------------------------------------------------
        ts_result = TimestampedMonoVal(
            np.dstack((ts_data[:, 0], scaled_data.flat))[0])

        # 4/ Save result
        # ------------------------------------------------
        # Save the result
        start_saving_time = time.time()
        short_name = "scaled"
        new_tsuid, new_fid = save(tsuid=tsuid,
                                  ts_result=ts_result,
                                  short_name=short_name,
                                  sparkified=False)

        # Inherit from parent
        IkatsApi.ts.inherit(new_tsuid, tsuid)

        LOGGER.debug("TSUID: %s(%s), Result import time: %.3f seconds",
                     new_fid, new_tsuid,
                     time.time() - start_saving_time)

        # 4/ Update result
        # ------------------------------------------------
        result.append({"tsuid": new_tsuid, "funcId": new_fid, "origin": tsuid})

    return result