def _stitch(self, bounds, fileroot): """ Stitch together final file bounds : (number of blocks, start and end of blocks) fileroot : for loading blocks and creating new file """ # create new file fname = fileroot + '_merge.h5' try: sth5 = h5py.File(fname, 'w') except: os.unlink(fname) sth5 = h5py.File(fname, 'w') sth5.create_dataset('phi', data=self.phi) sth5.create_dataset('mean', data=self.mean) sth5.create_dataset('var', data=self.var) sth5.create_dataset('std', data=self.std) if self.mu is not None: sth5.create_dataset('mu', data=self.mu) sth5.create_dataset('sigma', data=self.sigma) size = bounds[-1,1]+self.P-1 out = sth5.create_dataset('data', shape=(size, self.N), dtype=np.float32, compression='gzip', chunks=(min(size,1000), self.N)) # write out metadata out.attrs['length'] = self.D out.attrs['N'] = self.N out.attrs['C'] = self.C out.attrs['P'] = self.P out.attrs['T'] = self.T out.attrs['s'] = self.s out.attrs['bounds'] = bounds # open block files for reading files = [fileroot + '_proc%02d.h5' % i for i in range(len(bounds))] h5 = [h5py.File(f) for f in files] d = [h['data'] for h in h5] # allocate space pad = self.P-1 x = np.zeros((1, self.C, 2*pad)) xv = x[0] # view of x a = np.zeros((1, 3*pad, self.N)) av = a[0] # view of a if 'mp' in self.method: mask = np.zeros_like(av) mask[:pad] = np.inf mask[-pad:] = np.inf s = self.s * 2*pad / self.T else: mask = np.ones_like(av) mask[:pad] = 0 mask[-pad:] = 0 # method if 'mp' in self.method: print 'Using %g as s for stitching' % s if self.method != 'mp': raise NotImplementedError() pursuit = mp.ConvolutionalMatchingPursuit(self.phi, s=s, T=2*pad, positive=self.positive, debug=self.debug>1) print 'Stitching: ' # copy first block t = bounds[0,1] - bounds[0,0] block_copy(d[0], out, size=t) print ' %d done' % t # stitch intermediate blocks for i in range(len(bounds)-1): # get non-overlap coefficients av.fill(0.) av[:pad] = d[i][-2*pad:-pad] av[-pad:] = d[i+1][pad:2*pad] tb = bounds[i,1] print 'Getting data: %d:%d' % (tb-pad, tb+pad) xv[:] = self.get_data(tb-pad, tb+pad) xv -= self.mean[:,None] xv /= self.std[:,None] A = a.transpose((0,2,1)) if 'mp' in self.method: pursuit.run(x, A=A, mask=mask.T) else: A[:] = sparseqn_batch(self.phi, x, Sin=A, maxit=self.maxit, positive=self.positive, delta=0.00001, debug=self.debug, lam=self.lam, mask=mask.T) if self.debug: self.debug_plot(xv, av, prefix='stitch-%08d' % t) # write overlapping coefficients out[t:t+pad] = av[pad:2*pad] t += pad # write remainder of coefficients up to pad size = len(d[i+1]) - 2*pad block_copy(d[i+1], out, in0=pad, out0=t, size=size) t += size print ' %d done' % t # write remaining out[t:t+pad] = d[-1][-pad:] t += pad print ' %d done' % t if t != len(out): print 'Warning: stitched dataset not correct length' print ' stitiched %d != out %d' % (t, len(out)) print 'Finished stitching length %d dataset' % t for h in h5: h.close() sth5.close() # remove temporary files for f in files: try: os.unlink(f) except: print 'Failed to remove temporary file: ', f
def _run(self, t0, t1): """ Sparsify data set t0, t1 : start and end times of block to sparsify [TODO] avoid re-reading pad of data [TODO] avoid transposing of data [TODO] avoid zero-ing out of xv """ # if s is fractional and using mp, set to number of coefficients if self.s < 1.: self.s = int((self.P+self.T-1)*self.N * self.s) # allocate memory for sparsification # (sparsification routines use batches, so here we use 1 sample batches) x = np.zeros((1, self.C, self.T)) xv = x[0] # view of x a = np.zeros((1, self.T+self.P-1, self.N)) av = a[0] # view of a pad = self.P - 1 # masking for mp uses inf to keep coefficient unchanged # whereas for the quasinewton methods, 0 indicates no change, # 1 indicate change if 'mp' in self.method: mask = np.zeros_like(av) else: mask = np.ones_like(av) mask_col = 0 t = ot = 0 D = t1 - t0 finished = False frame = expired = 0 if self.method == 'mp': pursuit = mp.ConvolutionalMatchingPursuit(self.phi, s=self.s, T=self.T, positive=self.positive, debug=self.debug>1) if self.method == 'penalized-mp': print 'using penalized mp' extra = {'mu': self.mu, 'sigma': self.sigma, 'dt': 16} pursuit = mp.PenalizedMP(self.phi, s=self.s, T=self.T, positive=self.positive, debug=self.debug>1, extra=extra) if self.method == 'refractory-mp': print 'using refractory mp' extra = {'dt': 16} pursuit = mp.RefractoryMP(self.phi, s=self.s, T=self.T, positive=self.positive, debug=self.debug>1, extra=extra) tic = now() while not finished: if t+self.T > D: self.T = D - t # [TODO] shouldn't change value of T finished = True xv[:,:self.T] = self.get_data(t0+t, t0+t+self.T) xv[:,self.T:] = 0 xv -= self.mean[:,None] xv /= self.std[:,None] if 'mp' in self.method: pursuit.run(x, A=a.transpose((0,2,1)), mask=mask.T) elif self.method == 'owlbfgs': sparsity_gain = 4 A = sparseqn_batch(self.phi, x, Sin=a.transpose((0,2,1)), maxit=self.maxit, positive=self.positive, delta=0.00001, debug=self.debug>2, lam=sparsity_gain * self.lam, mask=mask.T) a[:] = A.transpose((0,2,1)) else: raise ValueError('Bad method') if self.debug>2: self.debug_plot(xv, av, prefix='p%02d-%08d' % (rank, t)) if not finished: if t == 0: self.out[ot:ot+self.T] = av[:self.T] # turn on masking for subsequent times if 'mp' in self.method: mask[:pad] = np.inf else: mask[:pad] = 0 mask_col = pad ot += self.T else: self.out[ot:ot+self.T-pad] = av[pad:self.T] ot += self.T-pad if 'mp' in self.method: av[:2*pad,:] = av[-2*pad:,:] av[pad:].fill(0.) else: av[:2*pad,:] = av[-2*pad:,:] av[2*pad:].fill(0.) t += self.T - pad else: self.out[ot:ot+self.T] = av[pad:pad+self.T] print '[%d] Completed with %d timepoints' % (rank, ot+self.T) if ot + self.T != D + self.P - 1: print '[%d] Warning, length of coeff data != length of data' % rank # resizing dataset will be slow self.out.resize((ot+self.T, self.N)) t += self.T # print approximate time remaining frame += 1 if frame % 10 == 0 and rank == root: expired = now() - tic left = str(datetime.timedelta(seconds=int((t1-t0) * expired / t - expired))) print '[%d] %d (left: %s)' % (rank, t, left) self.outh5.close()