Beispiel #1
0
 def data(self):
     # work around for https://github.com/pandas-dev/pandas/issues/18372
     return [
         dict((k, _maybe_box_datetimelike(v))
              for k, v in zip(self.__df.columns, np.atleast_1d(row)))
         for row in self.__df.values
     ]
Beispiel #2
0
def wind(in_testing, params):
    sets = params
    deployed = int(in_testing)
    pipeline = [
                { "$match": { "id": 2001 } },
                { "$group": {"_id": "$ts", "low": { "$last": "$t1" }, "med": { "$last": "$t2" }, "high": { "$last": "$h" } } },
                { "$sort" : SON([("_id", -1)]) }
                ]
    data    = list(sensors.aggregate(pipeline, allowDiskUse = True))
    dicts = []
    ### Set Sample size
    sample  = sets["size"]
    tScore  = sets["tScore"]
    ###
    d0 = pd.DataFrame(data)
    d1 = d0.rename(columns={'_id':'ts'})
    # Offset by one, as the first is for subtracting the amount of time
    df  = (d1.iloc[1:sample+1])
    lowlist  = list(df["low"])
    medlist  = list(df["med"])
    highlist = list(df["high"])
    dicts = {
                "uploads": [dict((k, _maybe_box_datetimelike(v)) for k, v in zip(df.columns, row) if v != None and v == v) for row in df.values],
                "low_avg"   : round(np.mean(lowlist), 1),
                "low_MOE"   : round(np.std(lowlist)/math.sqrt(sample)*tScore/np.mean(lowlist)*100,1),
                "med_avg"   : round(np.mean(medlist), 1),
                "med_MOE"   : round(np.std(medlist)/math.sqrt(sample)*tScore/np.mean(medlist)*100,1),
                "high_avg"  : round(np.mean(lowlist), 1),
                "high_MOE"  : round(np.std(highlist)/math.sqrt(sample)*tScore/np.mean(highlist)*100,1)
                }
    return dicts
Beispiel #3
0
def updateTS(_ID):
    step0 = time.time()
    pipeline = [
                    { "$match": { "id": _ID } },
                    { "$sort" : SON([("ts", 1)]) }
                ]
    try:
        _TEST = pd.DataFrame(list(sensors.aggregate(pipeline, allowDiskUse = True)))
        print "Total rows:", _TEST.__len__()
        print "Data loaded in %ss" % (round((time.time() - step0), 1))
        _TEST.ts = pd.to_datetime(_TEST.ts)
        
        #https://stackoverflow.com/questions/20167194/insert-a-pandas-dataframe-into-mongodb-using-pymongo/49127811
        my_list = [dict((k, _maybe_box_datetimelike(v)) for k, v in zip(_TEST.columns, row) if v != None and v == v) for row in _TEST.values]
        bulk = sensors.initialize_unordered_bulk_op()
        for i in range (0, len(my_list)):
            bulk.find( { '_id':  my_list[i]["_id"]}).update({ '$set': {  "ts" : my_list[i]["ts"] }})
        
        #https://stackoverflow.com/questions/46458618/how-can-i-update-a-whole-collection-in-mongodb-and-not-document-by-document
        print bulk.execute()
        # output = list(sensors.find({"id": _ID}))
        # print output[0]
        print "Completed ID %s in %ss" % (_ID, (round((time.time() - step0), 1)))
        # return output
    except:
        print "ID %s does not exist." % _ID
Beispiel #4
0
def rain(in_testing, params):
    sets = params
    deployed = map(int, in_testing.split(','))

    pipeline = [
                { "$match": { "id": 2000 } },
                { "$group": {"_id": "$ts", "r1": { "$last": "$t1" }, "r2": { "$last": "$h" }, "r3": { "$last": "$r" } } },
                { "$sort" : SON([("_id", -1)]) }
                ]
    data    = list(sensors.aggregate(pipeline, allowDiskUse = True))
    dicts = []

    ### Set Sample size
    sample  = sets["size"]
    tScore  = sets["tScore"]
    ###
    d0 = pd.DataFrame(data)
    df = d0.rename(columns={'_id':'ts'})

    # Offset by one, as the first is for subtracting the amount of time
    rain1 = (df.iloc[1:sample+1]).ix[:, ['ts', 'r1']]
    rain2 = (df.iloc[1:sample+1]).ix[:, ['ts', 'r2']]
    rain3 = (df.iloc[1:sample+1]).ix[:, ['ts', 'r3']]

    r1list = list(rain1["r1"])
    r2list = list(rain2["r2"])
    r3list = list(rain3["r3"])
    dicts = [{
                "id"     : deployed[0],
                "uploads": [dict((k, _maybe_box_datetimelike(v)) for k, v in zip(rain1.columns, row) if v != None and v == v) for row in rain1.values],
                "avg"   : round(np.mean(r1list), 1),
                "MOE"   : round(np.std(r1list)/math.sqrt(sample)*tScore/np.mean(r1list)*100,1)
                },
            {
                "id"     : deployed[1],
                "uploads": [dict((k, _maybe_box_datetimelike(v)) for k, v in zip(rain2.columns, row) if v != None and v == v) for row in rain2.values],
                "avg"   : round(np.mean(r2list), 1),
                "MOE"   : round(np.std(r2list)/math.sqrt(sample)*tScore/np.mean(r2list)*100,1)
                },
            {
                "id"     : deployed[2],
                "uploads": [dict((k, _maybe_box_datetimelike(v)) for k, v in zip(rain3.columns, row) if v != None and v == v) for row in rain3.values],
                "avg"   : round(np.mean(r3list), 1),
                "MOE"   : round(np.std(r3list)/math.sqrt(sample)*tScore/np.mean(r3list)*100,1)
            }]
    return dicts
 def data(self):
     # work around for https://github.com/pandas-dev/pandas/issues/18372
     data = [dict((k, _maybe_box_datetimelike(v))
             for k, v in zip(self.df.columns, np.atleast_1d(row)))
             for row in self.df.values]
     for d in data:
         for k, v in list(d.items()):
             # if an int is too big for Java Script to handle
             # convert it to a string
             if isinstance(v, int):
                 if abs(v) > JS_MAX_INTEGER:
                     d[k] = str(v)
     return data
 def data(self):
     # work around for https://github.com/pandas-dev/pandas/issues/18372
     data = [dict((k, _maybe_box_datetimelike(v))
             for k, v in zip(self.df.columns, np.atleast_1d(row)))
             for row in self.df.values]
     for d in data:
         for k, v in list(d.items()):
             # if an int is too big for Java Script to handle
             # convert it to a string
             if isinstance(v, int):
                 if abs(v) > JS_MAX_INTEGER:
                     d[k] = str(v)
     return data
Beispiel #7
0
    def tolist(self):
        """
        Return a list of the values.

        These are each a scalar type, which is a Python scalar
        (for str, int, float) or a pandas scalar
        (for Timestamp/Timedelta/Interval/Period)

        See Also
        --------
        numpy.ndarray.tolist
        """

        if is_datetimelike(self):
            return [com._maybe_box_datetimelike(x) for x in self._values]
        else:
            return self._values.tolist()
Beispiel #8
0
    def tolist(self):
        """
        Return a list of the values.

        These are each a scalar type, which is a Python scalar
        (for str, int, float) or a pandas scalar
        (for Timestamp/Timedelta/Interval/Period)

        See Also
        --------
        numpy.ndarray.tolist
        """

        if is_datetimelike(self):
            return [_maybe_box_datetimelike(x) for x in self._values]
        else:
            return self._values.tolist()
Beispiel #9
0
def df_to_mongo(df, collection):
    try:
        # Ensure columns are unique:
        df = df.loc[:, ~df.T.duplicated(keep='first')]

        # Omit null/ nan values and convert to rows:
        df_list = [
            dict((k, _maybe_box_datetimelike(v))
                 for k, v in zip(df.columns, row)
                 if v != None and v == v and '#' not in str(v))
            for row in df.values
        ]

        # Import to collection
        collection.insert_many(df_list)

    except Exception as e:
        print(e)
Beispiel #10
0
def interval_range(start=None, end=None, periods=None, freq=None,
                   name=None, closed='right'):
    """
    Return a fixed frequency IntervalIndex

    Parameters
    ----------
    start : numeric or datetime-like, default None
        Left bound for generating intervals
    end : numeric or datetime-like, default None
        Right bound for generating intervals
    periods : integer, default None
        Number of periods to generate
    freq : numeric, string, or DateOffset, default None
        The length of each interval. Must be consistent with the type of start
        and end, e.g. 2 for numeric, or '5H' for datetime-like.  Default is 1
        for numeric and 'D' (calendar daily) for datetime-like.
    name : string, default None
        Name of the resulting IntervalIndex
    closed : string, default 'right'
        options are: 'left', 'right', 'both', 'neither'

    Notes
    -----
    Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
    must be specified.

    Returns
    -------
    rng : IntervalIndex

    Examples
    --------
    Numeric ``start`` and  ``end`` is supported.

    >>> pd.interval_range(start=0, end=5)
    IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]]
                  closed='right', dtype='interval[int64]')

    Additionally, datetime-like input is also supported.

    >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
                          end=pd.Timestamp('2017-01-04'))
    IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03],
                   (2017-01-03, 2017-01-04]]
                  closed='right', dtype='interval[datetime64[ns]]')

    The ``freq`` parameter specifies the frequency between the left and right.
    endpoints of the individual intervals within the ``IntervalIndex``.  For
    numeric ``start`` and ``end``, the frequency must also be numeric.

    >>> pd.interval_range(start=0, periods=4, freq=1.5)
    IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]]
                  closed='right', dtype='interval[float64]')

    Similarly, for datetime-like ``start`` and ``end``, the frequency must be
    convertible to a DateOffset.

    >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
                          periods=3, freq='MS')
    IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01],
                   (2017-03-01, 2017-04-01]]
                  closed='right', dtype='interval[datetime64[ns]]')

    The ``closed`` parameter specifies which endpoints of the individual
    intervals within the ``IntervalIndex`` are closed.

    >>> pd.interval_range(end=5, periods=4, closed='both')
    IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]]
                  closed='both', dtype='interval[int64]')

    See Also
    --------
    IntervalIndex : an Index of intervals that are all closed on the same side.
    """
    if _count_not_none(start, end, periods) != 2:
        raise ValueError('Of the three parameters: start, end, and periods, '
                         'exactly two must be specified')

    start = _maybe_box_datetimelike(start)
    end = _maybe_box_datetimelike(end)
    endpoint = next(_not_none(start, end))

    if not _is_valid_endpoint(start):
        msg = 'start must be numeric or datetime-like, got {start}'
        raise ValueError(msg.format(start=start))

    if not _is_valid_endpoint(end):
        msg = 'end must be numeric or datetime-like, got {end}'
        raise ValueError(msg.format(end=end))

    if is_float(periods):
        periods = int(periods)
    elif not is_integer(periods) and periods is not None:
        msg = 'periods must be a number, got {periods}'
        raise TypeError(msg.format(periods=periods))

    freq = freq or (1 if is_number(endpoint) else 'D')
    if not is_number(freq):
        try:
            freq = to_offset(freq)
        except ValueError:
            raise ValueError('freq must be numeric or convertible to '
                             'DateOffset, got {freq}'.format(freq=freq))

    # verify type compatibility
    if not all([_is_type_compatible(start, end),
                _is_type_compatible(start, freq),
                _is_type_compatible(end, freq)]):
        raise TypeError("start, end, freq need to be type compatible")

    if is_number(endpoint):
        if periods is None:
            periods = int((end - start) // freq)

        if start is None:
            start = end - periods * freq

        # force end to be consistent with freq (lower if freq skips over end)
        end = start + periods * freq

        # end + freq for inclusive endpoint
        breaks = np.arange(start, end + freq, freq)
    elif isinstance(endpoint, Timestamp):
        # add one to account for interval endpoints (n breaks = n-1 intervals)
        if periods is not None:
            periods += 1
        breaks = date_range(start=start, end=end, periods=periods, freq=freq)
    else:
        # add one to account for interval endpoints (n breaks = n-1 intervals)
        if periods is not None:
            periods += 1
        breaks = timedelta_range(start=start, end=end, periods=periods,
                                 freq=freq)

    return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
Beispiel #11
0
def interval_range(start=None, end=None, periods=None, freq=None,
                   name=None, closed='right'):
    """
    Return a fixed frequency IntervalIndex

    Parameters
    ----------
    start : numeric or datetime-like, default None
        Left bound for generating intervals
    end : numeric or datetime-like, default None
        Right bound for generating intervals
    periods : integer, default None
        Number of periods to generate
    freq : numeric, string, or DateOffset, default None
        The length of each interval. Must be consistent with the type of start
        and end, e.g. 2 for numeric, or '5H' for datetime-like.  Default is 1
        for numeric and 'D' (calendar daily) for datetime-like.
    name : string, default None
        Name of the resulting IntervalIndex
    closed : string, default 'right'
        options are: 'left', 'right', 'both', 'neither'

    Notes
    -----
    Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
    must be specified.

    Returns
    -------
    rng : IntervalIndex

    Examples
    --------
    Numeric ``start`` and  ``end`` is supported.

    >>> pd.interval_range(start=0, end=5)
    IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]]
                  closed='right', dtype='interval[int64]')

    Additionally, datetime-like input is also supported.

    >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
                          end=pd.Timestamp('2017-01-04'))
    IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03],
                   (2017-01-03, 2017-01-04]]
                  closed='right', dtype='interval[datetime64[ns]]')

    The ``freq`` parameter specifies the frequency between the left and right.
    endpoints of the individual intervals within the ``IntervalIndex``.  For
    numeric ``start`` and ``end``, the frequency must also be numeric.

    >>> pd.interval_range(start=0, periods=4, freq=1.5)
    IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]]
                  closed='right', dtype='interval[float64]')

    Similarly, for datetime-like ``start`` and ``end``, the frequency must be
    convertible to a DateOffset.

    >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
                          periods=3, freq='MS')
    IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01],
                   (2017-03-01, 2017-04-01]]
                  closed='right', dtype='interval[datetime64[ns]]')

    The ``closed`` parameter specifies which endpoints of the individual
    intervals within the ``IntervalIndex`` are closed.

    >>> pd.interval_range(end=5, periods=4, closed='both')
    IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]]
                  closed='both', dtype='interval[int64]')

    See Also
    --------
    IntervalIndex : an Index of intervals that are all closed on the same side.
    """
    if com._count_not_none(start, end, periods) != 2:
        raise ValueError('Of the three parameters: start, end, and periods, '
                         'exactly two must be specified')

    start = com._maybe_box_datetimelike(start)
    end = com._maybe_box_datetimelike(end)
    endpoint = next(com._not_none(start, end))

    if not _is_valid_endpoint(start):
        msg = 'start must be numeric or datetime-like, got {start}'
        raise ValueError(msg.format(start=start))

    if not _is_valid_endpoint(end):
        msg = 'end must be numeric or datetime-like, got {end}'
        raise ValueError(msg.format(end=end))

    if is_float(periods):
        periods = int(periods)
    elif not is_integer(periods) and periods is not None:
        msg = 'periods must be a number, got {periods}'
        raise TypeError(msg.format(periods=periods))

    freq = freq or (1 if is_number(endpoint) else 'D')
    if not is_number(freq):
        try:
            freq = to_offset(freq)
        except ValueError:
            raise ValueError('freq must be numeric or convertible to '
                             'DateOffset, got {freq}'.format(freq=freq))

    # verify type compatibility
    if not all([_is_type_compatible(start, end),
                _is_type_compatible(start, freq),
                _is_type_compatible(end, freq)]):
        raise TypeError("start, end, freq need to be type compatible")

    if is_number(endpoint):
        if periods is None:
            periods = int((end - start) // freq)

        if start is None:
            start = end - periods * freq

        # force end to be consistent with freq (lower if freq skips over end)
        end = start + periods * freq

        # end + freq for inclusive endpoint
        breaks = np.arange(start, end + freq, freq)
    elif isinstance(endpoint, Timestamp):
        # add one to account for interval endpoints (n breaks = n-1 intervals)
        if periods is not None:
            periods += 1
        breaks = date_range(start=start, end=end, periods=periods, freq=freq)
    else:
        # add one to account for interval endpoints (n breaks = n-1 intervals)
        if periods is not None:
            periods += 1
        breaks = timedelta_range(start=start, end=end, periods=periods,
                                 freq=freq)

    return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
 def data(self):
     # work around for https://github.com/pandas-dev/pandas/issues/18372
     return [dict((k, _maybe_box_datetimelike(v))
                  for k, v in zip(self.__df.columns, np.atleast_1d(row)))
             for row in self.__df.values]
Beispiel #13
0
def interval_range(start=None, end=None, periods=None, freq=None,
                   name=None, closed='right'):
    """
    Return a fixed frequency IntervalIndex

    Parameters
    ----------
    start : numeric or datetime-like, default None
        Left bound for generating intervals
    end : numeric or datetime-like, default None
        Right bound for generating intervals
    periods : integer, default None
        Number of periods to generate
    freq : numeric, string, or DateOffset, default None
        The length of each interval. Must be consistent with the type of start
        and end, e.g. 2 for numeric, or '5H' for datetime-like.  Default is 1
        for numeric and 'D' (calendar daily) for datetime-like.
    name : string, default None
        Name of the resulting IntervalIndex
    closed : {'left', 'right', 'both', 'neither'}, default 'right'
        Whether the intervals are closed on the left-side, right-side, both
        or neither.

    Notes
    -----
    Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
    exactly three must be specified. If ``freq`` is omitted, the resulting
    ``IntervalIndex`` will have ``periods`` linearly spaced elements between
    ``start`` and ``end``, inclusively.

    To learn more about datetime-like frequency strings, please see `this link
    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.

    Returns
    -------
    rng : IntervalIndex

    Examples
    --------
    Numeric ``start`` and  ``end`` is supported.

    >>> pd.interval_range(start=0, end=5)
    IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]]
                  closed='right', dtype='interval[int64]')

    Additionally, datetime-like input is also supported.

    >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
                          end=pd.Timestamp('2017-01-04'))
    IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03],
                   (2017-01-03, 2017-01-04]]
                  closed='right', dtype='interval[datetime64[ns]]')

    The ``freq`` parameter specifies the frequency between the left and right.
    endpoints of the individual intervals within the ``IntervalIndex``.  For
    numeric ``start`` and ``end``, the frequency must also be numeric.

    >>> pd.interval_range(start=0, periods=4, freq=1.5)
    IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]]
                  closed='right', dtype='interval[float64]')

    Similarly, for datetime-like ``start`` and ``end``, the frequency must be
    convertible to a DateOffset.

    >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
                          periods=3, freq='MS')
    IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01],
                   (2017-03-01, 2017-04-01]]
                  closed='right', dtype='interval[datetime64[ns]]')

    Specify ``start``, ``end``, and ``periods``; the frequency is generated
    automatically (linearly spaced).

    >>> pd.interval_range(start=0, end=6, periods=4)
    IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]]
              closed='right',
              dtype='interval[float64]')

    The ``closed`` parameter specifies which endpoints of the individual
    intervals within the ``IntervalIndex`` are closed.

    >>> pd.interval_range(end=5, periods=4, closed='both')
    IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]]
                  closed='both', dtype='interval[int64]')

    See Also
    --------
    IntervalIndex : an Index of intervals that are all closed on the same side.
    """
    start = com._maybe_box_datetimelike(start)
    end = com._maybe_box_datetimelike(end)
    endpoint = start if start is not None else end

    if freq is None and com._any_none(periods, start, end):
        freq = 1 if is_number(endpoint) else 'D'

    if com._count_not_none(start, end, periods, freq) != 3:
        raise ValueError('Of the four parameters: start, end, periods, and '
                         'freq, exactly three must be specified')

    if not _is_valid_endpoint(start):
        msg = 'start must be numeric or datetime-like, got {start}'
        raise ValueError(msg.format(start=start))
    elif not _is_valid_endpoint(end):
        msg = 'end must be numeric or datetime-like, got {end}'
        raise ValueError(msg.format(end=end))

    if is_float(periods):
        periods = int(periods)
    elif not is_integer(periods) and periods is not None:
        msg = 'periods must be a number, got {periods}'
        raise TypeError(msg.format(periods=periods))

    if freq is not None and not is_number(freq):
        try:
            freq = to_offset(freq)
        except ValueError:
            raise ValueError('freq must be numeric or convertible to '
                             'DateOffset, got {freq}'.format(freq=freq))

    # verify type compatibility
    if not all([_is_type_compatible(start, end),
                _is_type_compatible(start, freq),
                _is_type_compatible(end, freq)]):
        raise TypeError("start, end, freq need to be type compatible")

    # +1 to convert interval count to breaks count (n breaks = n-1 intervals)
    if periods is not None:
        periods += 1

    if is_number(endpoint):
        # force consistency between start/end/freq (lower end if freq skips it)
        if com._all_not_none(start, end, freq):
            end -= (end - start) % freq

        # compute the period/start/end if unspecified (at most one)
        if periods is None:
            periods = int((end - start) // freq) + 1
        elif start is None:
            start = end - (periods - 1) * freq
        elif end is None:
            end = start + (periods - 1) * freq

        breaks = np.linspace(start, end, periods)
        if all(is_integer(x) for x in com._not_none(start, end, freq)):
            # np.linspace always produces float output
            breaks = maybe_downcast_to_dtype(breaks, 'int64')
    else:
        # delegate to the appropriate range function
        if isinstance(endpoint, Timestamp):
            range_func = date_range
        else:
            range_func = timedelta_range

        breaks = range_func(start=start, end=end, periods=periods, freq=freq)

    return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
Beispiel #14
0
def rain(in_testing, params):
    sets = params
    deployed = map(int, in_testing.split(','))

    pipeline = [{
        "$match": {
            "id": 2000
        }
    }, {
        "$group": {
            "_id": "$ts",
            "r1": {
                "$last": "$t1"
            },
            "r2": {
                "$last": "$h"
            },
            "r3": {
                "$last": "$r"
            }
        }
    }, {
        "$sort": SON([("_id", -1)])
    }]
    data = list(sensors.aggregate(pipeline, allowDiskUse=True))
    dicts = []

    ### Set Sample size
    sample = sets["size"]
    tScore = sets["tScore"]
    ###
    d0 = pd.DataFrame(data)
    df = d0.rename(columns={'_id': 'ts'})

    # Offset by one, as the first is for subtracting the amount of time
    rain1 = (df.iloc[1:sample + 1]).ix[:, ['ts', 'r1']]
    rain2 = (df.iloc[1:sample + 1]).ix[:, ['ts', 'r2']]
    rain3 = (df.iloc[1:sample + 1]).ix[:, ['ts', 'r3']]

    r1list = list(rain1["r1"])
    r2list = list(rain2["r2"])
    r3list = list(rain3["r3"])
    dicts = [{
        "id":
        deployed[0],
        "uploads": [
            dict((k, _maybe_box_datetimelike(v))
                 for k, v in zip(rain1.columns, row) if v != None and v == v)
            for row in rain1.values
        ],
        "avg":
        round(np.mean(r1list), 1),
        "MOE":
        round(
            np.std(r1list) / math.sqrt(sample) * tScore / np.mean(r1list) *
            100, 1)
    }, {
        "id":
        deployed[1],
        "uploads": [
            dict((k, _maybe_box_datetimelike(v))
                 for k, v in zip(rain2.columns, row) if v != None and v == v)
            for row in rain2.values
        ],
        "avg":
        round(np.mean(r2list), 1),
        "MOE":
        round(
            np.std(r2list) / math.sqrt(sample) * tScore / np.mean(r2list) *
            100, 1)
    }, {
        "id":
        deployed[2],
        "uploads": [
            dict((k, _maybe_box_datetimelike(v))
                 for k, v in zip(rain3.columns, row) if v != None and v == v)
            for row in rain3.values
        ],
        "avg":
        round(np.mean(r3list), 1),
        "MOE":
        round(
            np.std(r3list) / math.sqrt(sample) * tScore / np.mean(r3list) *
            100, 1)
    }]
    return dicts
Beispiel #15
0
def wind(in_testing, params):
    sets = params
    deployed = int(in_testing)
    pipeline = [{
        "$match": {
            "id": 2001
        }
    }, {
        "$group": {
            "_id": "$ts",
            "low": {
                "$last": "$t1"
            },
            "med": {
                "$last": "$t2"
            },
            "high": {
                "$last": "$h"
            }
        }
    }, {
        "$sort": SON([("_id", -1)])
    }]
    data = list(sensors.aggregate(pipeline, allowDiskUse=True))
    dicts = []
    ### Set Sample size
    sample = sets["size"]
    tScore = sets["tScore"]
    ###
    d0 = pd.DataFrame(data)
    d1 = d0.rename(columns={'_id': 'ts'})
    # Offset by one, as the first is for subtracting the amount of time
    df = (d1.iloc[1:sample + 1])
    lowlist = list(df["low"])
    medlist = list(df["med"])
    highlist = list(df["high"])
    dicts = {
        "uploads": [
            dict((k, _maybe_box_datetimelike(v))
                 for k, v in zip(df.columns, row) if v != None and v == v)
            for row in df.values
        ],
        "low_avg":
        round(np.mean(lowlist), 1),
        "low_MOE":
        round(
            np.std(lowlist) / math.sqrt(sample) * tScore / np.mean(lowlist) *
            100, 1),
        "med_avg":
        round(np.mean(medlist), 1),
        "med_MOE":
        round(
            np.std(medlist) / math.sqrt(sample) * tScore / np.mean(medlist) *
            100, 1),
        "high_avg":
        round(np.mean(lowlist), 1),
        "high_MOE":
        round(
            np.std(highlist) / math.sqrt(sample) * tScore / np.mean(highlist) *
            100, 1)
    }
    return dicts
Beispiel #16
0
def interval_range(start=None,
                   end=None,
                   periods=None,
                   freq=None,
                   name=None,
                   closed='right'):
    """
    Return a fixed frequency IntervalIndex

    Parameters
    ----------
    start : numeric or datetime-like, default None
        Left bound for generating intervals
    end : numeric or datetime-like, default None
        Right bound for generating intervals
    periods : integer, default None
        Number of periods to generate
    freq : numeric, string, or DateOffset, default None
        The length of each interval. Must be consistent with the type of start
        and end, e.g. 2 for numeric, or '5H' for datetime-like.  Default is 1
        for numeric and 'D' (calendar daily) for datetime-like.
    name : string, default None
        Name of the resulting IntervalIndex
    closed : {'left', 'right', 'both', 'neither'}, default 'right'
        Whether the intervals are closed on the left-side, right-side, both
        or neither.

    Notes
    -----
    Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
    exactly three must be specified. If ``freq`` is omitted, the resulting
    ``IntervalIndex`` will have ``periods`` linearly spaced elements between
    ``start`` and ``end``, inclusively.

    To learn more about datetime-like frequency strings, please see `this link
    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.

    Returns
    -------
    rng : IntervalIndex

    Examples
    --------
    Numeric ``start`` and  ``end`` is supported.

    >>> pd.interval_range(start=0, end=5)
    IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]]
                  closed='right', dtype='interval[int64]')

    Additionally, datetime-like input is also supported.

    >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
                          end=pd.Timestamp('2017-01-04'))
    IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03],
                   (2017-01-03, 2017-01-04]]
                  closed='right', dtype='interval[datetime64[ns]]')

    The ``freq`` parameter specifies the frequency between the left and right.
    endpoints of the individual intervals within the ``IntervalIndex``.  For
    numeric ``start`` and ``end``, the frequency must also be numeric.

    >>> pd.interval_range(start=0, periods=4, freq=1.5)
    IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]]
                  closed='right', dtype='interval[float64]')

    Similarly, for datetime-like ``start`` and ``end``, the frequency must be
    convertible to a DateOffset.

    >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
                          periods=3, freq='MS')
    IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01],
                   (2017-03-01, 2017-04-01]]
                  closed='right', dtype='interval[datetime64[ns]]')

    Specify ``start``, ``end``, and ``periods``; the frequency is generated
    automatically (linearly spaced).

    >>> pd.interval_range(start=0, end=6, periods=4)
    IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]]
              closed='right',
              dtype='interval[float64]')

    The ``closed`` parameter specifies which endpoints of the individual
    intervals within the ``IntervalIndex`` are closed.

    >>> pd.interval_range(end=5, periods=4, closed='both')
    IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]]
                  closed='both', dtype='interval[int64]')

    See Also
    --------
    IntervalIndex : an Index of intervals that are all closed on the same side.
    """
    start = com._maybe_box_datetimelike(start)
    end = com._maybe_box_datetimelike(end)
    endpoint = start if start is not None else end

    if freq is None and com._any_none(periods, start, end):
        freq = 1 if is_number(endpoint) else 'D'

    if com._count_not_none(start, end, periods, freq) != 3:
        raise ValueError('Of the four parameters: start, end, periods, and '
                         'freq, exactly three must be specified')

    if not _is_valid_endpoint(start):
        msg = 'start must be numeric or datetime-like, got {start}'
        raise ValueError(msg.format(start=start))
    elif not _is_valid_endpoint(end):
        msg = 'end must be numeric or datetime-like, got {end}'
        raise ValueError(msg.format(end=end))

    if is_float(periods):
        periods = int(periods)
    elif not is_integer(periods) and periods is not None:
        msg = 'periods must be a number, got {periods}'
        raise TypeError(msg.format(periods=periods))

    if freq is not None and not is_number(freq):
        try:
            freq = to_offset(freq)
        except ValueError:
            raise ValueError('freq must be numeric or convertible to '
                             'DateOffset, got {freq}'.format(freq=freq))

    # verify type compatibility
    if not all([
            _is_type_compatible(start, end),
            _is_type_compatible(start, freq),
            _is_type_compatible(end, freq)
    ]):
        raise TypeError("start, end, freq need to be type compatible")

    # +1 to convert interval count to breaks count (n breaks = n-1 intervals)
    if periods is not None:
        periods += 1

    if is_number(endpoint):
        # force consistency between start/end/freq (lower end if freq skips it)
        if com._all_not_none(start, end, freq):
            end -= (end - start) % freq

        # compute the period/start/end if unspecified (at most one)
        if periods is None:
            periods = int((end - start) // freq) + 1
        elif start is None:
            start = end - (periods - 1) * freq
        elif end is None:
            end = start + (periods - 1) * freq

        breaks = np.linspace(start, end, periods)
        if all(is_integer(x) for x in com._not_none(start, end, freq)):
            # np.linspace always produces float output
            breaks = maybe_downcast_to_dtype(breaks, 'int64')
    else:
        # delegate to the appropriate range function
        if isinstance(endpoint, Timestamp):
            range_func = date_range
        else:
            range_func = timedelta_range

        breaks = range_func(start=start, end=end, periods=periods, freq=freq)

    return IntervalIndex.from_breaks(breaks, name=name, closed=closed)