def predict_async(self, fX, fY): """Iterative predict which saves data to HDF5, with asynchronous I/O by separate processes. """ assert self.Beta is not None, "Train ELM before predicting" X, _ = self._checkdata(fX, None) N = X.shape[0] make_hdf5((N, self.targets), fY) # start async reader and writer for HDF5 files qr_in = mp.Queue() qr_out = mp.Queue(1) reader = mp.Process(target=ireader, args=(fX, qr_in, qr_out)) reader.daemon = True reader.start() qw_in = mp.Queue(1) writer = mp.Process(target=iwriter, args=(fY, qw_in)) writer.daemon = True writer.start() nn = np.sum([n1[1] for n1 in self.neurons]) batch = max(self.batch, nn) nb = N / batch # number of batches if N > batch * nb: nb += 1 t = time() t0 = time() eta = 0 for b in xrange(0, nb + 1): start_next = b * batch stop_next = min((b + 1) * batch, N) # prefetch data qr_in.put((start_next, stop_next)) # asyncronous reading of next data batch if b > 0: # first iteration only prefetches data # get data t2 = time() Xb = qr_out.get() print "toc %.2f" % (time() - t2) Xb = Xb.astype(np.float64) # process data Hb = self.project(Xb) Yb = Hb.dot(self.Beta) # save data qw_in.put((Yb, start, stop)) start = start_next stop = stop_next # report time eta = int(((time() - t0) / (b + 1)) * (nb - b - 1)) if time() - t > self.tprint: print "processing batch %d/%d, eta %d:%02d:%02d" % (b + 1, nb, eta / 3600, (eta % 3600) / 60, eta % 60) t = time() qw_in.put(None) reader.join() writer.join()
def _makeh5(self, h5name, N, d): """Creates HDF5 file opened in append mode. """ make_hdf5((N, d), h5name) h5 = open_file(h5name, "a") self.opened_hdf5.append(h5) for node in h5.walk_nodes(): pass # find a node with whatever name return node
def predict(self, fX, fY): """Iterative predict which saves data to HDF5, sequential version. """ assert self.Beta is not None, "Train ELM before predicting" X, _ = self._checkdata(fX, None) N = X.shape[0] make_hdf5((N, self.targets), fY) h5 = open_file(fY, "a") self.opened_hdf5.append(h5) for Y in h5.walk_nodes(): pass # find a node with whatever name nn = np.sum([n1[1] for n1 in self.neurons]) batch = max(self.batch, nn) nb = N / batch # number of batches if N > batch * nb: nb += 1 t = time() t0 = time() eta = 0 for b in xrange(0, nb): start = b * batch stop = min((b + 1) * batch, N) # get data t2 = time() print "tic" Xb = X[start:stop].astype(np.float64) print "toc %.2f" % (time() - t2) print # process data Hb = self.project(Xb) Yb = Hb.dot(self.Beta) # write data Y[start:stop] = Yb # report time eta = int(((time() - t0) / (b + 1)) * (nb - b - 1)) if time() - t > self.tprint: print "processing batch %d/%d, eta %d:%02d:%02d" % (b + 1, nb, eta / 3600, (eta % 3600) / 60, eta % 60) t = time() self.opened_hdf5.pop() h5.close()
def test_ParallelBasicPython_Works(self): X = np.random.rand(1000, 10) T = np.random.rand(1000, 3) hX = modules.make_hdf5(X, self.fnameX) hT = modules.make_hdf5(T, self.fnameT) model0 = HPELM(10, 3) model0.add_neurons(10, 'lin') model0.add_neurons(5, 'tanh') model0.add_neurons(15, 'sigm') model0.save(self.fmodel) model1 = HPELM(10, 3) model1.load(self.fmodel) os.remove(self.fnameHT) os.remove(self.fnameHH) model1.add_data(self.fnameX, self.fnameT, istart=0, icount=100, fHH=self.fnameHH, fHT=self.fnameHT) model2 = HPELM(10, 3) model2.load(self.fmodel) model2.add_data(self.fnameX, self.fnameT, istart=100, icount=900, fHH=self.fnameHH, fHT=self.fnameHT) model3 = HPELM(10, 3) model3.load(self.fmodel) model3.solve_corr(self.fnameHH, self.fnameHT) model3.save(self.fmodel) model4 = HPELM(10, 3) model4.load(self.fmodel) model4.predict(self.fnameX, self.fnameY) err = model4.error(self.fnameT, self.fnameY, istart=0, icount=198) self.assertLess(err, 1) err = model4.error(self.fnameT, self.fnameY, istart=379, icount=872) self.assertLess(err, 1)