Esempio n. 1
0
 def __init__(self, file_=None):
     if (file_ is None):
         assert False, 'unimplemented'
     else:
         book = xlrd.open_workbook(file_)
         sheet = book.sheet_by_index(0)
         headers = sheet.row_values(1)
         # read dates
         assert (headers.index('date') == 0)
         dates = [
             datetime(*xlrd.xldate_as_tuple(i, book.datemode))
             for i in sheet.col_values(0, start_rowx=2)
             if isinstance(i, float)
         ]
         # read time series
         self.data = dict()
         for (i, sname) in enumerate(headers[1:], start=1):
             if (sname == ''):
                 break
             values = [
                 j if isinstance(j, float) else None
                 for j in sheet.col_values(i, start_rowx=2)
             ]
             data = math_.Date_Vector.zeros(min(dates), max(dates))
             mask = math_.Date_Vector.zeros(min(dates),
                                            max(dates),
                                            dtype=np.bool)
             out_i = 0
             for j in range(len(dates)):
                 if (j + 1 < len(dates)):
                     duration = time_.days_diff(dates[j + 1], dates[j])
                 else:
                     duration = 1
                 assert (duration > 0)
                 v = values[j] / duration if values[j] is not None else None
                 for k in range(out_i, out_i + duration):
                     if (v is not None):
                         data[k] = v
                         mask[k] = True
                     else:
                         data[k] = np.nan
                         mask[k] = False
                 out_i += duration
             self.data[sname] = (data, mask)
         # read property key/values
         key_idx = headers.index('property')
         val_idx = headers.index('value')
         self.properties = dict(
             (k, v if v != '' else None)
             for (k, v) in zip(sheet.col_values(key_idx, start_rowx=2),
                               sheet.col_values(val_idx, start_rowx=2))
             if k != '')
Esempio n. 2
0
 def __init__(self, file_=None):
    if (file_ is None):
       assert False, 'unimplemented'
    else:
       book = xlrd.open_workbook(file_)
       sheet = book.sheet_by_index(0)
       headers = sheet.row_values(1)
       # read dates
       assert (headers.index('date') == 0)
       dates = [datetime(*xlrd.xldate_as_tuple(i, book.datemode))
                for i in sheet.col_values(0, start_rowx=2)
                if isinstance(i, float)]
       # read time series
       self.data = dict()
       for (i, sname) in enumerate(headers[1:], start=1):
          if (sname == ''):
             break
          values = [j if isinstance(j, float) else None
                    for j in sheet.col_values(i, start_rowx=2)]
          data = math_.Date_Vector.zeros(min(dates), max(dates))
          mask = math_.Date_Vector.zeros(min(dates), max(dates),
                                         dtype=np.bool)
          out_i = 0
          for j in xrange(len(dates)):
             if (j+1 < len(dates)):
                duration = time_.days_diff(dates[j+1], dates[j])
             else:
                duration = 1
             assert (duration > 0)
             v = values[j] / duration if values[j] is not None else None
             for k in xrange(out_i, out_i + duration):
                if (v is not None):
                   data[k] = v
                   mask[k] = True
                else:
                   data[k] = np.nan
                   mask[k] = False
             out_i += duration
          self.data[sname] = (data, mask)
       # read property key/values
       key_idx = headers.index('property')
       val_idx = headers.index('value')
       self.properties = dict((k, v if v != '' else None)
                              for (k, v)
                              in zip(sheet.col_values(key_idx, start_rowx=2),
                                     sheet.col_values(val_idx, start_rowx=2))
                              if k != '')
Esempio n. 3
0
File: math_.py Progetto: reidpr/quac
   def resize(self, first_day, last_day):
      '''Return a copy of myself with new bounds first_day and last_day.

         >>> a = Date_Vector('2013-06-02', np.arange(2, 7))
         >>> a
         Date_Vector('2013-06-02', [2, 3, 4, 5, 6])

         Shrinking removes the newly extra elements (note that if either bound
         is None, we use the existing bound):

         >>> s = a.resize('2013-06-03', None)
         >>> s
         Date_Vector('2013-06-03', [3, 4, 5, 6])
         >>> a.resize(None, '2013-06-04')
         Date_Vector('2013-06-02', [2, 3, 4])
         >>> a.resize('2013-06-03', '2013-06-04')
         Date_Vector('2013-06-03', [3, 4])
         >>> a.resize('2013-06-06', None)
         Date_Vector('2013-06-06', [6])
         >>> a.resize(None, '2013-06-02')
         Date_Vector('2013-06-02', [2])

         If the new bounds cross, return None:

         >>> a.resize('2013-06-07', None) is None
         True
         >>> a.resize(None, '2013-06-01') is None
         True

         If shrinking, the result may (but is not guaranteed to) be a view of
         the original vector:

         >>> np.may_share_memory(a, s)
         True

         Growing adds new elements containing zero:

         >>> g = a.resize('2013-06-01', '2013-06-07')
         >>> g
         Date_Vector('2013-06-01', [0, 2, 3, 4, 5, 6, 0])
         >>> a.resize('2013-06-01', None)
         Date_Vector('2013-06-01', [0, 2, 3, 4, 5, 6])
         >>> a.resize(None, '2013-06-07')
         Date_Vector('2013-06-02', [2, 3, 4, 5, 6, 0])

         You can grow and shrink at the same time:

         >>> gs = a.resize('2013-06-01', '2013-06-05')
         >>> gs
         Date_Vector('2013-06-01', [0, 2, 3, 4, 5])
         >>> a.resize('2013-06-03', '2013-06-07')
         Date_Vector('2013-06-03', [3, 4, 5, 6, 0])

         Grown vectors are not views:

         >>> np.may_share_memory(a, g)
         False
         >>> np.may_share_memory(a, gs)
         False

         Finally, it's OK to do a no-op resize. In this case, the result is a
         shallow copy.

         >>> n = a.resize(None, None)
         >>> n
         Date_Vector('2013-06-02', [2, 3, 4, 5, 6])
         >>> n is a
         False
         >>> np.may_share_memory(a, n)
         True'''
      # clean up new bounds
      fd_new = time_.dateify(first_day) or self.first_day
      ld_new = time_.dateify(last_day) or self.last_day
      # if they're empty, return None
      if (max(fd_new, self.first_day) > min(ld_new, self.last_day)):
         return None
      # how many elements to add and remove from the start?
      delta_start = time_.days_diff(fd_new, self.first_day)
      trim_start = max(0, delta_start)
      add_start = max(0, -delta_start)
      # how many elements to add and remove from the end?
      delta_end = time_.days_diff(self.last_day, ld_new)
      trim_end = max(0, delta_end)
      add_end = max(0, -delta_end)
      # Do it!
      if (add_start == 0 and add_end == 0):
         # If shrinking, don't use hstack(); this avoids copying data, which
         # favors speed over memory. The caller can do a deep copy if this is
         # a problem.
         return Date_Vector(fd_new, self[trim_start:len(self) - trim_end])
      else:
         return Date_Vector(fd_new,
                            np.hstack([np.zeros(add_start, dtype=self.dtype),
                                       self[trim_start:len(self) - trim_end],
                                       np.zeros(add_end, dtype=self.dtype)]))
Esempio n. 4
0
    def resize(self, first_day, last_day):
        '''Return a copy of myself with new bounds first_day and last_day.

         >>> a = Date_Vector('2013-06-02', np.arange(2, 7))
         >>> a
         Date_Vector('2013-06-02', [2, 3, 4, 5, 6])

         Shrinking removes the newly extra elements (note that if either bound
         is None, we use the existing bound):

         >>> s = a.resize('2013-06-03', None)
         >>> s
         Date_Vector('2013-06-03', [3, 4, 5, 6])
         >>> a.resize(None, '2013-06-04')
         Date_Vector('2013-06-02', [2, 3, 4])
         >>> a.resize('2013-06-03', '2013-06-04')
         Date_Vector('2013-06-03', [3, 4])
         >>> a.resize('2013-06-06', None)
         Date_Vector('2013-06-06', [6])
         >>> a.resize(None, '2013-06-02')
         Date_Vector('2013-06-02', [2])

         If the new bounds cross, return None:

         >>> a.resize('2013-06-07', None) is None
         True
         >>> a.resize(None, '2013-06-01') is None
         True

         If shrinking, the result may (but is not guaranteed to) be a view of
         the original vector:

         >>> np.may_share_memory(a, s)
         True

         Growing adds new elements containing zero:

         >>> g = a.resize('2013-06-01', '2013-06-07')
         >>> g
         Date_Vector('2013-06-01', [0, 2, 3, 4, 5, 6, 0])
         >>> a.resize('2013-06-01', None)
         Date_Vector('2013-06-01', [0, 2, 3, 4, 5, 6])
         >>> a.resize(None, '2013-06-07')
         Date_Vector('2013-06-02', [2, 3, 4, 5, 6, 0])

         You can grow and shrink at the same time:

         >>> gs = a.resize('2013-06-01', '2013-06-05')
         >>> gs
         Date_Vector('2013-06-01', [0, 2, 3, 4, 5])
         >>> a.resize('2013-06-03', '2013-06-07')
         Date_Vector('2013-06-03', [3, 4, 5, 6, 0])

         Grown vectors are not views:

         >>> np.may_share_memory(a, g)
         False
         >>> np.may_share_memory(a, gs)
         False

         Finally, it's OK to do a no-op resize. In this case, the result is a
         shallow copy.

         >>> n = a.resize(None, None)
         >>> n
         Date_Vector('2013-06-02', [2, 3, 4, 5, 6])
         >>> n is a
         False
         >>> np.may_share_memory(a, n)
         True'''
        # clean up new bounds
        fd_new = time_.dateify(first_day) or self.first_day
        ld_new = time_.dateify(last_day) or self.last_day
        # if they're empty, return None
        if (max(fd_new, self.first_day) > min(ld_new, self.last_day)):
            return None
        # how many elements to add and remove from the start?
        delta_start = time_.days_diff(fd_new, self.first_day)
        trim_start = max(0, delta_start)
        add_start = max(0, -delta_start)
        # how many elements to add and remove from the end?
        delta_end = time_.days_diff(self.last_day, ld_new)
        trim_end = max(0, delta_end)
        add_end = max(0, -delta_end)
        # Do it!
        if (add_start == 0 and add_end == 0):
            # If shrinking, don't use hstack(); this avoids copying data, which
            # favors speed over memory. The caller can do a deep copy if this is
            # a problem.
            return Date_Vector(fd_new, self[trim_start:len(self) - trim_end])
        else:
            return Date_Vector(
                fd_new,
                np.hstack([
                    np.zeros(add_start, dtype=self.dtype),
                    self[trim_start:len(self) - trim_end],
                    np.zeros(add_end, dtype=self.dtype)
                ]))