def test_combineAdd(self): with tm.assert_produces_warning(FutureWarning): # trivial comb = self.frame.combineAdd(self.frame) assert_frame_equal(comb, self.frame * 2) # more rigorous a = DataFrame([[1., nan, nan, 2., nan]], columns=np.arange(5)) b = DataFrame([[2., 3., nan, 2., 6., nan]], columns=np.arange(6)) expected = DataFrame([[3., 3., nan, 4., 6., nan]], columns=np.arange(6)) with tm.assert_produces_warning(FutureWarning): result = a.combineAdd(b) assert_frame_equal(result, expected) with tm.assert_produces_warning(FutureWarning): result2 = a.T.combineAdd(b.T) assert_frame_equal(result2, expected.T) expected2 = a.combine(b, operator.add, fill_value=0.) assert_frame_equal(expected, expected2) # corner cases with tm.assert_produces_warning(FutureWarning): comb = self.frame.combineAdd(self.empty) assert_frame_equal(comb, self.frame) with tm.assert_produces_warning(FutureWarning): comb = self.empty.combineAdd(self.frame) assert_frame_equal(comb, self.frame) # integer corner case df1 = DataFrame({'x': [5]}) df2 = DataFrame({'x': [1]}) df3 = DataFrame({'x': [6]}) with tm.assert_produces_warning(FutureWarning): comb = df1.combineAdd(df2) assert_frame_equal(comb, df3) # mixed type GH2191 df1 = DataFrame({'A': [1, 2], 'B': [3, 4]}) df2 = DataFrame({'A': [1, 2], 'C': [5, 6]}) with tm.assert_produces_warning(FutureWarning): rs = df1.combineAdd(df2) xp = DataFrame({'A': [2, 4], 'B': [3, 4.], 'C': [5, 6.]}) assert_frame_equal(xp, rs)
class Timeseries(object): def __init__(self, data): """ Can be called with either: A DataFrame. Preferred. timeseries_dict, a dict with UTC datetimes as keys and floats as values. A list of such dicts. This works like a pandas DataFrame, except we keep track of the order of column names.""" if isinstance(data, DataFrame): self._dataframe = data self._columns = tuple(data.columns) elif isinstance(data, dict): series = Series(data) self._dataframe = DataFrame({"data": series}) self._columns = ("data",) else: self._dataframe = DataFrame(dict([("data_{0}".format(i), series) for i, series in enumerate(data)])) self._columns = tuple("data_{0}".format(i) for i, series in enumerate(data)) def add(self, timeseries): """Add the columns from timeseries to the dataframe of this timeseries.""" self._dataframe = self._dataframe.combineAdd(timeseries._dataframe) self._columns = self.columns + timeseries.columns @property def dataframe(self): return self._dataframe @property def timeseries(self): """Return the first of the series in dataframe""" return self._dataframe[self._columns[0]].dropna() def get_series(self, columnname): return self._dataframe[columnname].dropna() @property def columns(self): return self._columns def label(self, series_name): """Only the part of the columns before '||'.""" return series_name.split("||")[0] def unit(self, series_name): """Only the part of the columns after '||', or None.""" return series_name.split("||")[1] if "||" in series_name else None def dates(self): return self.timeseries.keys() def values(self): return list(self.timeseries) def latest(self): return self.timeseries.tail(1) def data(self): return [[key, value] for key, value in izip(self.dates(), self.values())] def __len__(self): return len(self._dataframe) if self._dataframe is not None else 0
class Timeseries(object): def __init__(self, data): """ Can be called with either: A DataFrame. Preferred. timeseries_dict, a dict with UTC datetimes as keys and floats as values. A list of such dicts. This works like a pandas DataFrame, except we keep track of the order of column names.""" if isinstance(data, DataFrame): self._dataframe = data self._columns = tuple(data.columns) elif isinstance(data, dict): series = Series(data) self._dataframe = DataFrame({'data': series}) self._columns = ('data',) else: self._dataframe = DataFrame(dict([ ('data_{0}'.format(i), series) for i, series in enumerate(data)])) self._columns = tuple( 'data_{0}'.format(i) for i, series in enumerate(data)) def add(self, timeseries): """Add the columns from timeseries to the dataframe of this timeseries.""" self._dataframe = self._dataframe.combineAdd(timeseries._dataframe) self._columns = self.columns + timeseries.columns @property def dataframe(self): return self._dataframe @property def timeseries(self): """Return the first of the series in dataframe""" return self._dataframe[self._columns[0]].dropna() def get_series(self, columnname): return self._dataframe[columnname].dropna() def to_csv(self, outfile, sep=',', timezone=None, date_format='%Y-%m-%d %H:%M', header_date_format='Datum + tijd'): """Note: changes the timezone of all datetimes! Write the data of all timeseries to a CSV file.""" if timezone is not None: self.set_timezone(timezone) headers = [header_date_format] + [ self.label_and_unit(column) for column in self.columns] outfile.write(sep.join(headers) + "\n") self._dataframe.to_csv(outfile, sep=sep, mode='a', header=None, date_format=date_format) def set_timezone(self, timezone): """Sets this timezone on all datetimes. Timezone is a pytz timezone object.""" self._dataframe = self._dataframe.tz_convert(timezone) @property def columns(self): return self._columns def label(self, series_name): """Only the part of the columns before '||'.""" return series_name.split('||')[0] def unit(self, series_name): """Only the part of the columns after '||', or None.""" return series_name.split('||')[1] if '||' in series_name else None def label_and_unit(self, series_name): unit = self.unit(series_name) if unit: return "{} ({})".format(self.label(series_name), unit) else: return self.label(series_name) def dates(self): return self.timeseries.keys() def values(self): return list(self.timeseries) def latest(self): return self.timeseries.tail(1) def data(self): return [[key, value] for key, value in izip(self.dates(), self.values())] def __len__(self): return len(self._dataframe) if self._dataframe is not None else 0