def __init__(self, file_=None): if (file_ is None): assert False, 'unimplemented' else: book = xlrd.open_workbook(file_) sheet = book.sheet_by_index(0) headers = sheet.row_values(1) # read dates assert (headers.index('date') == 0) dates = [ datetime(*xlrd.xldate_as_tuple(i, book.datemode)) for i in sheet.col_values(0, start_rowx=2) if isinstance(i, float) ] # read time series self.data = dict() for (i, sname) in enumerate(headers[1:], start=1): if (sname == ''): break values = [ j if isinstance(j, float) else None for j in sheet.col_values(i, start_rowx=2) ] data = math_.Date_Vector.zeros(min(dates), max(dates)) mask = math_.Date_Vector.zeros(min(dates), max(dates), dtype=np.bool) out_i = 0 for j in range(len(dates)): if (j + 1 < len(dates)): duration = time_.days_diff(dates[j + 1], dates[j]) else: duration = 1 assert (duration > 0) v = values[j] / duration if values[j] is not None else None for k in range(out_i, out_i + duration): if (v is not None): data[k] = v mask[k] = True else: data[k] = np.nan mask[k] = False out_i += duration self.data[sname] = (data, mask) # read property key/values key_idx = headers.index('property') val_idx = headers.index('value') self.properties = dict( (k, v if v != '' else None) for (k, v) in zip(sheet.col_values(key_idx, start_rowx=2), sheet.col_values(val_idx, start_rowx=2)) if k != '')
def __init__(self, file_=None): if (file_ is None): assert False, 'unimplemented' else: book = xlrd.open_workbook(file_) sheet = book.sheet_by_index(0) headers = sheet.row_values(1) # read dates assert (headers.index('date') == 0) dates = [datetime(*xlrd.xldate_as_tuple(i, book.datemode)) for i in sheet.col_values(0, start_rowx=2) if isinstance(i, float)] # read time series self.data = dict() for (i, sname) in enumerate(headers[1:], start=1): if (sname == ''): break values = [j if isinstance(j, float) else None for j in sheet.col_values(i, start_rowx=2)] data = math_.Date_Vector.zeros(min(dates), max(dates)) mask = math_.Date_Vector.zeros(min(dates), max(dates), dtype=np.bool) out_i = 0 for j in xrange(len(dates)): if (j+1 < len(dates)): duration = time_.days_diff(dates[j+1], dates[j]) else: duration = 1 assert (duration > 0) v = values[j] / duration if values[j] is not None else None for k in xrange(out_i, out_i + duration): if (v is not None): data[k] = v mask[k] = True else: data[k] = np.nan mask[k] = False out_i += duration self.data[sname] = (data, mask) # read property key/values key_idx = headers.index('property') val_idx = headers.index('value') self.properties = dict((k, v if v != '' else None) for (k, v) in zip(sheet.col_values(key_idx, start_rowx=2), sheet.col_values(val_idx, start_rowx=2)) if k != '')
def resize(self, first_day, last_day): '''Return a copy of myself with new bounds first_day and last_day. >>> a = Date_Vector('2013-06-02', np.arange(2, 7)) >>> a Date_Vector('2013-06-02', [2, 3, 4, 5, 6]) Shrinking removes the newly extra elements (note that if either bound is None, we use the existing bound): >>> s = a.resize('2013-06-03', None) >>> s Date_Vector('2013-06-03', [3, 4, 5, 6]) >>> a.resize(None, '2013-06-04') Date_Vector('2013-06-02', [2, 3, 4]) >>> a.resize('2013-06-03', '2013-06-04') Date_Vector('2013-06-03', [3, 4]) >>> a.resize('2013-06-06', None) Date_Vector('2013-06-06', [6]) >>> a.resize(None, '2013-06-02') Date_Vector('2013-06-02', [2]) If the new bounds cross, return None: >>> a.resize('2013-06-07', None) is None True >>> a.resize(None, '2013-06-01') is None True If shrinking, the result may (but is not guaranteed to) be a view of the original vector: >>> np.may_share_memory(a, s) True Growing adds new elements containing zero: >>> g = a.resize('2013-06-01', '2013-06-07') >>> g Date_Vector('2013-06-01', [0, 2, 3, 4, 5, 6, 0]) >>> a.resize('2013-06-01', None) Date_Vector('2013-06-01', [0, 2, 3, 4, 5, 6]) >>> a.resize(None, '2013-06-07') Date_Vector('2013-06-02', [2, 3, 4, 5, 6, 0]) You can grow and shrink at the same time: >>> gs = a.resize('2013-06-01', '2013-06-05') >>> gs Date_Vector('2013-06-01', [0, 2, 3, 4, 5]) >>> a.resize('2013-06-03', '2013-06-07') Date_Vector('2013-06-03', [3, 4, 5, 6, 0]) Grown vectors are not views: >>> np.may_share_memory(a, g) False >>> np.may_share_memory(a, gs) False Finally, it's OK to do a no-op resize. In this case, the result is a shallow copy. >>> n = a.resize(None, None) >>> n Date_Vector('2013-06-02', [2, 3, 4, 5, 6]) >>> n is a False >>> np.may_share_memory(a, n) True''' # clean up new bounds fd_new = time_.dateify(first_day) or self.first_day ld_new = time_.dateify(last_day) or self.last_day # if they're empty, return None if (max(fd_new, self.first_day) > min(ld_new, self.last_day)): return None # how many elements to add and remove from the start? delta_start = time_.days_diff(fd_new, self.first_day) trim_start = max(0, delta_start) add_start = max(0, -delta_start) # how many elements to add and remove from the end? delta_end = time_.days_diff(self.last_day, ld_new) trim_end = max(0, delta_end) add_end = max(0, -delta_end) # Do it! if (add_start == 0 and add_end == 0): # If shrinking, don't use hstack(); this avoids copying data, which # favors speed over memory. The caller can do a deep copy if this is # a problem. return Date_Vector(fd_new, self[trim_start:len(self) - trim_end]) else: return Date_Vector(fd_new, np.hstack([np.zeros(add_start, dtype=self.dtype), self[trim_start:len(self) - trim_end], np.zeros(add_end, dtype=self.dtype)]))
def resize(self, first_day, last_day): '''Return a copy of myself with new bounds first_day and last_day. >>> a = Date_Vector('2013-06-02', np.arange(2, 7)) >>> a Date_Vector('2013-06-02', [2, 3, 4, 5, 6]) Shrinking removes the newly extra elements (note that if either bound is None, we use the existing bound): >>> s = a.resize('2013-06-03', None) >>> s Date_Vector('2013-06-03', [3, 4, 5, 6]) >>> a.resize(None, '2013-06-04') Date_Vector('2013-06-02', [2, 3, 4]) >>> a.resize('2013-06-03', '2013-06-04') Date_Vector('2013-06-03', [3, 4]) >>> a.resize('2013-06-06', None) Date_Vector('2013-06-06', [6]) >>> a.resize(None, '2013-06-02') Date_Vector('2013-06-02', [2]) If the new bounds cross, return None: >>> a.resize('2013-06-07', None) is None True >>> a.resize(None, '2013-06-01') is None True If shrinking, the result may (but is not guaranteed to) be a view of the original vector: >>> np.may_share_memory(a, s) True Growing adds new elements containing zero: >>> g = a.resize('2013-06-01', '2013-06-07') >>> g Date_Vector('2013-06-01', [0, 2, 3, 4, 5, 6, 0]) >>> a.resize('2013-06-01', None) Date_Vector('2013-06-01', [0, 2, 3, 4, 5, 6]) >>> a.resize(None, '2013-06-07') Date_Vector('2013-06-02', [2, 3, 4, 5, 6, 0]) You can grow and shrink at the same time: >>> gs = a.resize('2013-06-01', '2013-06-05') >>> gs Date_Vector('2013-06-01', [0, 2, 3, 4, 5]) >>> a.resize('2013-06-03', '2013-06-07') Date_Vector('2013-06-03', [3, 4, 5, 6, 0]) Grown vectors are not views: >>> np.may_share_memory(a, g) False >>> np.may_share_memory(a, gs) False Finally, it's OK to do a no-op resize. In this case, the result is a shallow copy. >>> n = a.resize(None, None) >>> n Date_Vector('2013-06-02', [2, 3, 4, 5, 6]) >>> n is a False >>> np.may_share_memory(a, n) True''' # clean up new bounds fd_new = time_.dateify(first_day) or self.first_day ld_new = time_.dateify(last_day) or self.last_day # if they're empty, return None if (max(fd_new, self.first_day) > min(ld_new, self.last_day)): return None # how many elements to add and remove from the start? delta_start = time_.days_diff(fd_new, self.first_day) trim_start = max(0, delta_start) add_start = max(0, -delta_start) # how many elements to add and remove from the end? delta_end = time_.days_diff(self.last_day, ld_new) trim_end = max(0, delta_end) add_end = max(0, -delta_end) # Do it! if (add_start == 0 and add_end == 0): # If shrinking, don't use hstack(); this avoids copying data, which # favors speed over memory. The caller can do a deep copy if this is # a problem. return Date_Vector(fd_new, self[trim_start:len(self) - trim_end]) else: return Date_Vector( fd_new, np.hstack([ np.zeros(add_start, dtype=self.dtype), self[trim_start:len(self) - trim_end], np.zeros(add_end, dtype=self.dtype) ]))