Beispiel #1
0
 def _stitch(self, bounds, fileroot):
     """
     Stitch together final file
       bounds     : (number of blocks, start and end of blocks)
       fileroot   : for loading blocks and creating new file
     """
     # create new file
     fname = fileroot + '_merge.h5'
     try:
         sth5 = h5py.File(fname, 'w')
     except:
         os.unlink(fname)
         sth5 = h5py.File(fname, 'w')            
         
     sth5.create_dataset('phi', data=self.phi)
     
     
     
     sth5.create_dataset('mean', data=self.mean)
     sth5.create_dataset('var', data=self.var)
     sth5.create_dataset('std', data=self.std)                
     if self.mu is not None:
         sth5.create_dataset('mu', data=self.mu)
         sth5.create_dataset('sigma', data=self.sigma)            
     
     size = bounds[-1,1]+self.P-1
     out = sth5.create_dataset('data', shape=(size, self.N),
                               dtype=np.float32, compression='gzip',
                               chunks=(min(size,1000), self.N))
     # write out metadata
     out.attrs['length'] = self.D
     out.attrs['N'] = self.N
     out.attrs['C'] = self.C
     out.attrs['P'] = self.P
     out.attrs['T'] = self.T
     out.attrs['s'] = self.s
     out.attrs['bounds'] = bounds
     
     
     # open block files for reading
     files = [fileroot + '_proc%02d.h5' % i for i in range(len(bounds))]
     h5 = [h5py.File(f) for f in files]
     d = [h['data'] for h in h5]
     
     
     # allocate space
     pad = self.P-1
     
     x = np.zeros((1, self.C, 2*pad))
     xv = x[0]   # view of x
     a = np.zeros((1, 3*pad, self.N))
     av = a[0]   # view of a
     
     
     if 'mp' in self.method:
         mask = np.zeros_like(av)   
         mask[:pad] = np.inf
         mask[-pad:] = np.inf
         s = self.s * 2*pad / self.T
     else:
         mask = np.ones_like(av)    
         mask[:pad] = 0
         mask[-pad:] = 0
     
     
     # method
     if 'mp' in self.method:
         print 'Using %g as s for stitching' % s  
         if self.method != 'mp': raise NotImplementedError()
         pursuit = mp.ConvolutionalMatchingPursuit(self.phi, s=s, T=2*pad,
                                                   positive=self.positive, debug=self.debug>1)
     
     
     print 'Stitching: '
     # copy first block
     t = bounds[0,1] - bounds[0,0] 
     block_copy(d[0], out, size=t)
     print ' %d done' % t
     
     
     # stitch intermediate blocks
     for i in range(len(bounds)-1):
         # get non-overlap coefficients
         av.fill(0.)
         av[:pad] = d[i][-2*pad:-pad]
         av[-pad:] = d[i+1][pad:2*pad]
         
         tb = bounds[i,1]
         print 'Getting data: %d:%d' % (tb-pad, tb+pad)
         xv[:] = self.get_data(tb-pad, tb+pad)
         xv -= self.mean[:,None]
         xv /= self.std[:,None]
         
         
         A = a.transpose((0,2,1))
         if 'mp' in self.method:
             pursuit.run(x, A=A, mask=mask.T)
         else:
             A[:] = sparseqn_batch(self.phi, x, Sin=A, maxit=self.maxit, positive=self.positive,
                                   delta=0.00001, debug=self.debug, lam=self.lam, mask=mask.T)
             
         if self.debug:
             self.debug_plot(xv, av, prefix='stitch-%08d' % t)
         
         
         # write overlapping coefficients
         out[t:t+pad] = av[pad:2*pad]
         t += pad
         
         
         # write remainder of coefficients up to pad
         size = len(d[i+1]) - 2*pad
         block_copy(d[i+1], out, in0=pad, out0=t, size=size)
         t += size
         
         
         print ' %d done' % t
     
     
     # write remaining 
     out[t:t+pad] = d[-1][-pad:]
     t += pad
     
     
     print ' %d done' % t
     
     
     if t != len(out):
         print 'Warning: stitched dataset not correct length'
         print '  stitiched %d != out %d' % (t, len(out))
     
     
     print 'Finished stitching length %d dataset' % t
     
     
     for h in h5:
         h.close()
     
     
     sth5.close()
     
     
     # remove temporary files
     for f in files:
         try:
             os.unlink(f)
         except:
             print 'Failed to remove temporary file: ', f
Beispiel #2
0
 def _run(self, t0, t1):
     """
     Sparsify data set
     t0, t1    : start and end times of block to sparsify
     
     
     
     [TODO] avoid re-reading pad of data
     [TODO] avoid transposing of data
     [TODO] avoid zero-ing out of xv
     """
     # if s is fractional and using mp, set to number of coefficients
     if self.s < 1.:
         self.s = int((self.P+self.T-1)*self.N * self.s)
     
     
     # allocate memory for sparsification
     # (sparsification routines use batches, so here we use 1 sample batches)
     x = np.zeros((1, self.C, self.T))
     xv = x[0]   # view of x
     a = np.zeros((1, self.T+self.P-1, self.N))
     av = a[0]   # view of a
     
     
     pad = self.P - 1
     
     
     # masking for mp uses inf to keep coefficient unchanged
     # whereas for the quasinewton methods, 0 indicates no change,
     # 1 indicate change
     if 'mp' in self.method:
         mask = np.zeros_like(av)
     else:
         mask = np.ones_like(av)
         mask_col = 0
     
     
     t = ot = 0
     D = t1 - t0
     finished = False
     
     frame = expired = 0
     
     
     if self.method == 'mp':
         pursuit = mp.ConvolutionalMatchingPursuit(self.phi, s=self.s, T=self.T,
                                                   positive=self.positive, debug=self.debug>1)
     if self.method == 'penalized-mp':
         print 'using penalized mp'
         extra = {'mu': self.mu, 'sigma': self.sigma, 'dt': 16}
         pursuit = mp.PenalizedMP(self.phi, s=self.s, T=self.T,
                                  positive=self.positive, debug=self.debug>1, extra=extra)
     
     
     if self.method == 'refractory-mp':
         print 'using refractory mp'
         extra = {'dt': 16}
         pursuit = mp.RefractoryMP(self.phi, s=self.s, T=self.T,
                                   positive=self.positive, debug=self.debug>1, extra=extra)
     
     
     tic = now()
     while not finished:
         if t+self.T > D:
             self.T = D - t   # [TODO] shouldn't change value of T
             finished = True
         
         
         xv[:,:self.T] = self.get_data(t0+t, t0+t+self.T)
         xv[:,self.T:] = 0        
         xv -= self.mean[:,None]
         xv /= self.std[:,None]
         
         
         if 'mp' in self.method:
             pursuit.run(x, A=a.transpose((0,2,1)), mask=mask.T)
         elif self.method == 'owlbfgs':
             sparsity_gain = 4
             A = sparseqn_batch(self.phi, x, Sin=a.transpose((0,2,1)),
                                maxit=self.maxit, positive=self.positive,
                                delta=0.00001, debug=self.debug>2,
                                lam=sparsity_gain * self.lam,
                                mask=mask.T)
             a[:] = A.transpose((0,2,1))
         else:
             raise ValueError('Bad method')
         
         
         if self.debug>2:
             self.debug_plot(xv, av, prefix='p%02d-%08d' % (rank, t))
         
         
         if not finished:
             if t == 0:
                 self.out[ot:ot+self.T] = av[:self.T]
         
         
                 # turn on masking for subsequent times
                 if 'mp' in self.method:
                     mask[:pad] = np.inf
                 else:
                     mask[:pad] = 0
                 mask_col = pad
                 ot += self.T
             else:
                 self.out[ot:ot+self.T-pad] = av[pad:self.T]
                 ot += self.T-pad
             
             
             if 'mp' in self.method:
                 av[:2*pad,:] = av[-2*pad:,:]
                 av[pad:].fill(0.)
             else:
                 av[:2*pad,:] = av[-2*pad:,:]
                 av[2*pad:].fill(0.)
                 
             t += self.T - pad                    
         else:
             self.out[ot:ot+self.T] = av[pad:pad+self.T]
             print '[%d] Completed with %d timepoints' % (rank, ot+self.T)
             if ot + self.T != D + self.P - 1:
                 print '[%d] Warning, length of coeff data != length of data' % rank
                 # resizing dataset will be slow
                 self.out.resize((ot+self.T, self.N))
             t += self.T                
         
         # print approximate time remaining
         frame += 1
         if frame % 10 == 0 and rank == root:
             expired = now() - tic
             left = str(datetime.timedelta(seconds=int((t1-t0) * expired / t - expired)))
             print '[%d] %d (left: %s)' % (rank, t, left)
         
     self.outh5.close()