def data(): temp = pd.read_pickle(os.path.join(_PATH_TO_HERE, 'temp_data.pickle')) temp = temp.sort_index().asfreq("H") # Extended periods with failed readings, replace with Gvarv temp['2004-11-11 14:00':'2004-11-21 23:00'] = np.nan temp['2005-02-08 08:00':'2005-02-27 23:00'] = np.nan # Shorter periods with failed readings, that we may leave to the cleansing # to take care of? # temp['2005-09-07 08:00':'2005-09-08 04:00'] = np.nan # temp['2006-02-28 05:00':'2006-02-28 04:00'] = np.nan # temp['2006-06-17 11:00':'2006-06-18 08:00'] = np.nan # temp['2006-12-19 06:00':'2006-12-21 03:00'] = np.nan gvarv = xml.parse(_TEMP_DATA)[temp.index[0]:].asfreq("H") gvarv_aligned = temp.align(gvarv, join="left")[1] # np.where returned a Pandas Timeseries with old Numpy, but now # returns an ndarray. Therefore we need to reassign to temp. temp[:] = np.where(np.isnan(temp), gvarv_aligned, temp) temp = temp.interpolate() temp.name = "Temperature" # Interpolate away a couple of outliers and zero-recordings, or leave to # cleansing? # temp['2004-11-29 08:00'] = np.nan # temp['2005-11-30 00:00':'2005-11-30 02:00'] = np.nan # temp['2006-10-27 09:00'] = np.nan # temp = temp.interpolate() return temp
def _collect_and_plot(files): TS = [] location = [] for f in files: temperatures = [ section[1] for section in parse.parse_file(f)[1:-1] if section[1]['Plant'] == ['tmp'] ] for t in temperatures: if t['Step'][0] != '0000-00-00.01:00:00': print 'Not hourly readings of temperature. Abort.' break dates = ts.date_array(start_date=ts.Date('H', t['Start'][0]), length=len(t['Value'])) data = [ float(value.rsplit('/')[0]) for value in t['Value'] ] TS.append(ts.TimeSeries(data=data, dates=dates)) if location and t['Installation'][0] != location: print 'Location changed during reading of gs2 files. Probably some bad grouping of gs2 files.' location = t['Installation'][0] if TS: path = '/Users/tidemann/Documents/NTNU/devel/data/eklima/Telemark/' for file in os.listdir(path): try: series = xml.parse(path + file) sg.utils.plot_time_series([ts.concatenate((TS)), series], ['b-','r-'], [location, file]) except: print file, 'had no data.' else: print 'No temperature data.'