def main(self): """ Tests data processing methods """ try: preprocess.setup() except: print 'SETUP failed' else: print 'SETUP succeeded' try: d = preprocess.load(prefix=PAR.OBSERVATIONS) s = preprocess.load(prefix=PAR.SYNTHETICS) except: print 'LOAD failed' else: print 'LOAD succeeded' try: d = preprocess.process_traces(d) s = preprocess.process_traces(s) except: print 'PROCESS_TRACES failed' else: print 'PROCESS_TRACES succeeded'
def setup(self): """ Lays groundwork for inversion """ # clean scratch directories if PAR.BEGIN == 1: unix.rm(PATH.GLOBAL) unix.mkdir(PATH.GLOBAL) preprocess.setup() postprocess.setup() optimize.setup() system.run('solver', 'setup', hosts='all')
def main(self): """ Migrates seismic data """ # prepare directory structure unix.rm(PATH.GLOBAL) unix.mkdir(PATH.GLOBAL) # set up workflow machinery preprocess.setup() postprocess.setup() # set up solver machinery print 'Preparing solver...' system.run('solver', 'setup', hosts='all') self.prepare_model() # perform migration print 'Generating synthetics...' system.run('solver', 'eval_func', hosts='all', path=PATH.GLOBAL) print 'Backprojecting data...' system.run('solver', 'eval_grad', hosts='all', path=PATH.GLOBAL, export_traces=PAR.SAVETRACES) postprocess.combine_kernels( path=PATH.GLOBAL, parameters=solver.parameters) try: postprocess.combine_kernels( path=PATH.GLOBAL, parameters=['rhop']) except: pass if PAR.SAVETRACES: self.save_traces() if PAR.SAVEKERNELS: self.save_kernels() else: self.save_kernels_sum() print 'Finished\n'
def fix_near_field(self, path=''): """ """ import preprocess preprocess.setup() name = solver.check_source_names()[solver.taskid] fullpath = path + '/' + name g = solver.load(fullpath, suffix='_kernel') if not PAR.FIXRADIUS: return mesh = self.getmesh() x, z = self.getxz() lx = x.max() - x.min() lz = z.max() - z.min() nn = x.size nx = np.around(np.sqrt(nn * lx / lz)) nz = np.around(np.sqrt(nn * lz / lx)) dx = lx / nx dz = lz / nz sigma = 0.5 * PAR.FIXRADIUS * (dx + dz) sx, sy, sz = preprocess.get_source_coords( preprocess.reader(solver.cwd + '/' + 'traces/obs', solver.data_filenames[0])) rx, ry, rz = preprocess.get_receiver_coords( preprocess.reader(solver.cwd + '/' + 'traces/obs', solver.data_filenames[0])) # mask sources mask = np.exp(-0.5 * ((x - sx[0])**2. + (z - sy[0])**2.) / sigma**2.) for key in solver.parameters: weight = np.sum(mask * g[key][0]) / np.sum(mask) g[key][0] *= 1. - mask g[key][0] += mask * weight # mask receivers for ir in range(PAR.NREC): mask = np.exp(-0.5 * ((x - rx[ir])**2. + (z - ry[ir])**2.) / sigma**2.) for key in solver.parameters: weight = np.sum(mask * g[key][0]) / np.sum(mask) g[key][0] *= 1. - mask g[key][0] += mask * weight solver.save(fullpath, g, suffix='_kernel')
def __init__(self, train, dev, test, opt, nb_classes): # self.data = {'train': train, 'dev': dev, 'test': test} for key, value in opt.__dict__.items(): self.__setattr__(key, value) self.preprocessor = preprocess.setup(opt) self.datas = { 'train': self.preprocess(train), 'dev': self.preprocess(dev), 'test': self.preprocess(test) } self.nb_classes = nb_classes self.get_max_sentence_length() self.dict_path = os.path.join(self.bert_dir, 'vocab.txt') if bool(self.bert_enabled): loaded_dic = Dictionary(dict_path=self.dict_path) self.embedding = Embedding(loaded_dic, self.max_sequence_length) else: self.embedding = Embedding( self.get_dictionary(self.datas.values()), self.max_sequence_length) print('loading word embedding...') self.embedding.get_embedding(dataset_name=self.dataset_name, fname=opt.wordvec_path) self.opt_callback(opt)
def main(self): unix.rm(PATH.SCRATCH) unix.mkdir(PATH.SCRATCH) preprocess.setup() print 'SIMULATION 1 OF 3' system.run('solver', 'setup', hosts='all') print 'SIMULATION 2 OF 3' self.prepare_model() system.run('solver', 'eval_func', hosts='all', path=PATH.SCRATCH) print 'SIMULATION 3 OF 3' system.run('solver', 'eval_grad', hosts='all', path=PATH.SCRATCH) # collect traces obs = join(PATH.SOLVER, self.event, 'traces/obs') syn = join(PATH.SOLVER, self.event, 'traces/syn') adj = join(PATH.SOLVER, self.event, 'traces/adj') obs,_ = preprocess.load(obs) syn,_ = preprocess.load(syn) adj,_ = preprocess.load(adj, suffix='.su.adj') # collect model and kernels model = solver.load(PATH.MODEL_INIT) kernels = solver.load(PATH.SCRATCH+'/'+'kernels'+'/'+self.event, suffix='_kernel') # dot prodcut in data space keys = obs.keys() LHS = DotProductLHS(keys, syn, adj) # dot product in model space keys = ['rho', 'vp', 'vs'] # model.keys() RHS = DotProductRHS(keys, model, kernels) print print 'LHS:', LHS print 'RHS:', RHS print 'RELATIVE DIFFERENCE:', (LHS-RHS)/RHS print
def setup(self): """ Lays groundwork for inversion """ # clean scratch directories if PAR.BEGIN == 1: unix.rm(PATH.SCRATCH) unix.mkdir(PATH.SCRATCH) preprocess.setup() postprocess.setup() optimize.setup() if PATH.DATA: print 'Copying data' else: print 'Generating data' system.run('solver', 'setup', hosts='all')
def setup(self): """ Lays groundwork for inversion """ # clean scratch directories if PAR.BEGIN == 1: unix.rm(PATH.GLOBAL) unix.mkdir(PATH.GLOBAL) preprocess.setup() postprocess.setup() optimize.setup() if PATH.DATA: print('Copying data...') else: print('Generating data...') system.run('solver', 'setup', hosts='all')
def fix_near_field(self, path=''): """ """ import preprocess preprocess.setup() name = solver.check_source_names()[solver.getnode] fullpath = path + '/' + name g = solver.load(fullpath, suffix='_kernel') if not PAR.FIXRADIUS: return mesh = self.getmesh() x, z = self.getxz() lx = x.max() - x.min() lz = z.max() - z.min() nn = x.size nx = np.around(np.sqrt(nn * lx / lz)) nz = np.around(np.sqrt(nn * lz / lx)) dx = lx / nx dz = lz / nz sigma = 0.5 * PAR.FIXRADIUS * (dx + dz) _, h = preprocess.load(solver.getpath + '/' + 'traces/obs') # mask sources mask = np.exp(-0.5 * ((x - h.sx[0])**2. + (z - h.sy[0])**2.) / sigma**2.) for key in solver.parameters: weight = np.sum(mask * g[key][0]) / np.sum(mask) g[key][0] *= 1. - mask g[key][0] += mask * weight # mask receivers for ir in range(h.nr): mask = np.exp(-0.5 * ((x - h.rx[ir])**2. + (z - h.ry[ir])**2.) / sigma**2.) for key in solver.parameters: weight = np.sum(mask * g[key][0]) / np.sum(mask) g[key][0] *= 1. - mask g[key][0] += mask * weight solver.save(fullpath, g, suffix='_kernel')
def main(self): unix.rm(PATH.SCRATCH) unix.mkdir(PATH.SCRATCH) preprocess.setup() print 'SIMULATION 1 OF 3' system.run('solver', 'setup', hosts='all') print 'SIMULATION 2 OF 3' self.prepare_model() system.run('solver', 'eval_func', hosts='all', path=PATH.SCRATCH) print 'SIMULATION 3 OF 3' system.run('solver', 'eval_grad', hosts='all', path=PATH.SCRATCH) # collect traces obs = join(PATH.SOLVER, self.event, 'traces/obs') syn = join(PATH.SOLVER, self.event, 'traces/syn') adj = join(PATH.SOLVER, self.event, 'traces/adj') obs, _ = preprocess.load(obs) syn, _ = preprocess.load(syn) adj, _ = preprocess.load(adj, suffix='.su.adj') # collect model and kernels model = solver.load(PATH.MODEL_INIT) kernels = solver.load(PATH.SCRATCH + '/' + 'kernels' + '/' + self.event, suffix='_kernel') # dot prodcut in data space keys = obs.keys() LHS = DotProductLHS(keys, syn, adj) # dot product in model space keys = ['rho', 'vp', 'vs'] # model.keys() RHS = DotProductRHS(keys, model, kernels) print print 'LHS:', LHS print 'RHS:', RHS print 'RELATIVE DIFFERENCE:', (LHS - RHS) / RHS print
def fix_near_field(self, path=''): """ """ import preprocess preprocess.setup() name = solver.check_source_names()[solver.getnode] fullpath = path +'/'+ name g = solver.load(fullpath, suffix='_kernel') if not PAR.FIXRADIUS: return mesh = self.getmesh() x,z = self.getxz() lx = x.max() - x.min() lz = z.max() - z.min() nn = x.size nx = np.around(np.sqrt(nn*lx/lz)) nz = np.around(np.sqrt(nn*lz/lx)) dx = lx/nx dz = lz/nz sigma = 0.5*PAR.FIXRADIUS*(dx+dz) _, h = preprocess.load(solver.getpath +'/'+ 'traces/obs') # mask sources mask = np.exp(-0.5*((x-h.sx[0])**2.+(z-h.sy[0])**2.)/sigma**2.) for key in solver.parameters: weight = np.sum(mask*g[key][0])/np.sum(mask) g[key][0] *= 1.-mask g[key][0] += mask*weight # mask receivers for ir in range(h.nr): mask = np.exp(-0.5*((x-h.rx[ir])**2.+(z-h.ry[ir])**2.)/sigma**2.) for key in solver.parameters: weight = np.sum(mask*g[key][0])/np.sum(mask) g[key][0] *= 1.-mask g[key][0] += mask*weight solver.save(fullpath, g, suffix='_kernel')
def main(self): """ Migrates seismic data """ # prepare directory structure unix.rm(PATH.GLOBAL) unix.mkdir(PATH.GLOBAL) # set up pre- and post-processing preprocess.setup() postprocess.setup() # prepare solver print 'Preparing solver...' system.run('solver', 'setup', hosts='all') self.prepare_model() system.run('solver', 'eval_func', hosts='all', path=PATH.GLOBAL) # backproject data print 'Backprojecting data...' system.run('solver', 'eval_grad', hosts='all', path=PATH.GLOBAL, export_traces=PAR.SAVETRACES) # process gradient postprocess.process_kernels(path=PATH.GLOBAL, tag='gradient') # save results if PAR.SAVEGRADIENT: self.save_gradient() if PAR.SAVETRACES: self.save_traces() if PAR.SAVEKERNELS: self.save_kernels() print 'Finished\n'
def setup(self): """ Lays groundwork for inversion """ # clean scratch directories if PAR.BEGIN == 1: unix.rm(PATH.SCRATCH) unix.mkdir(PATH.SCRATCH) preprocess.setup() postprocess.setup() optimize.setup() isready = self.solver_status() if not isready: if PATH.DATA: print 'Copying data...' else: print 'Generating data...' system.run('solver', 'setup', hosts='all')
def main(self): """ Tests data processing methods """ data = {} try: preprocess.setup() except: print 'setup failed' else: print 'setup succeeded' # test reader try: for channel in self.channels(): data[channel] = preprocess.reader(PATH.DATA, channel) except: print 'reader failed' else: print 'reader succeeded' # test processing try: for channel in self.channels(): data[channel] = preprocess.apply_filter(data[channel]) except: print 'processing failed' else: print 'processing succeeded' try: for channel in self.channels(): preprocess.writer(data[channel], PATH.OUTPUT, channel) except: print 'writer failed' else: print 'writer succeeded'
def setup(self): """ Lays groundwork for inversion """ # clean scratch directories if PAR.BEGIN == 1: unix.rm(PATH.GLOBAL) unix.mkdir(PATH.GLOBAL) # set up optimization optimize.setup() # set up pre- and post-processing preprocess.setup() postprocess.setup() # set up solver if PAR.BEGIN == 1: system.run('solver', 'setup', hosts='all') return if PATH.LOCAL: system.run('solver', 'setup', hosts='all')
def main(save=True): """ Train a model \n ave {bool} - whether to save the trained model (default: True) \n Returns: wrapper RNN class for a Keras model (e.g. keras.models.Sequential) """ startTime = time() trainingSet, validationSet, scaler = setup() trainGen = DataGenerator(trainingSet, scaler, windowSize=WINDOW_SIZE, lookback=LOOKBACK, sampleRate=SAMPLERATE, prediction=PREDICTION).generator() validGen = DataGenerator(validationSet, scaler, windowSize=WINDOW_SIZE, lookback=LOOKBACK, sampleRate=SAMPLERATE, prediction=PREDICTION).generator() rnn = RNN(HIDDEN_NODES, LOOKBACK, WINDOW_SIZE, SAMPLERATE, PREDICTION) optimizer = rnn.pickOptimizer(OPTIMIZER, lr=LEARNING_RATE) rnn.model.compile(loss=LOSS_FUNC, optimizer=optimizer) rnn.model.fit_generator(trainGen, steps_per_epoch=STEPS_PER_EPOCH, epochs=EPOCHS, validation_data=validGen, validation_steps=VALIDATION_STEP_PER_EPOCH, verbose=2, shuffle=False) endTime = time() print( f"\nTRAINING DONE. Total time elapsed: {strftime('%H:%M:%S', gmtime(endTime - startTime))}" ) if save: weightsFile = constructFilename(BASE_PATH, HIDDEN_NODES, LOOKBACK, WINDOW_SIZE, SAMPLERATE, PREDICTION, WEIGHT_EXT) architectureFile = constructFilename(BASE_PATH, HIDDEN_NODES, LOOKBACK, WINDOW_SIZE, SAMPLERATE, PREDICTION, ARCHITECT_EXT) rnn.saveWeights(weightsFile) rnn.saveArchitecture(architectureFile) return rnn
def __init__(self, opt): self.onehot = True self.unbalanced_sampling = False for key, value in opt.__dict__.items(): self.__setattr__(key, value) self.dir_path = os.path.join(opt.datasets_dir, 'QA', opt.dataset_name.lower()) self.preprocessor = preprocess.setup(opt) self.datas = self.load(do_filter=opt.remove_unanswered_question) self.get_max_sentence_length() self.nb_classes = 2 self.dict_path = os.path.join(self.bert_dir, 'vocab.txt') if 'bert' in self.network_type: loaded_dic = Dictionary(dict_path=self.dict_path) self.embedding = Embedding(loaded_dic, self.max_sequence_length) else: self.embedding = Embedding( self.get_dictionary(self.datas.values()), self.max_sequence_length) self.embedding = Embedding(self.get_dictionary(self.datas.values()), self.max_sequence_length) self.alphabet = self.get_dictionary(self.datas.values()) # self.q_max_sent_length = q_max_sent_length # self.a_max_sent_length = a_max_sent_length print('loading word embedding...') if opt.dataset_name == "NLPCC": # can be updated self.embedding.get_embedding(dataset_name=self.dataset_name, language="cn", fname=opt.wordvec_path) else: self.embedding.get_embedding(dataset_name=self.dataset_name, fname=opt.wordvec_path) self.opt_callback(opt)
def fix_near_field(self, path=''): """ """ import preprocess preprocess.setup() name = solver.check_source_names()[solver.getnode] fullpath = path + '/' + name g = solver.load(fullpath, suffix='_kernel') g_vec = solver.merge(g) nproc = solver.mesh.nproc if not PAR.FIXRADIUS: return x, y, z = self.getcoords() lx = x.max() - x.min() ly = y.max() - y.min() lz = z.max() - z.min() nn = x.size nx = np.around(np.sqrt(nn * lx / (lz * ly))) ny = np.around(np.sqrt(nn * ly / (lx * lz))) nz = np.around(np.sqrt(nn * lz / (lx * ly))) dx = lx / nx * 1.25 dy = ly / ny * 1.25 dz = lz / nz * 1.25 sigma = PAR.FIXRADIUS * (dx + dz + dy) / 3.0 _, h = preprocess.load(solver.getpath + '/' + 'traces/obs') mask = np.exp(-0.5 * ((x - h.sx[0])**2. + (y - h.sy[0])**2. + (z - h.sz[0])**2.) / sigma**2.) scale_z = np.power(abs(z), 0.5) power_win = 10 win_x = np.power(x, power_win) win_y = np.power(y, power_win) win_z = np.power(z, power_win) win_x = win_x / win_x.max() win_y = win_y / win_y.max() win_z = win_z / win_z.max() win_x = 1.0 - win_x[::-1] win_y = 1.0 - win_y[::-1] win_z = 1.0 - win_z[::-1] win_x_rev = win_x[::-1] win_y_rev = win_y[::-1] win_z_rev = win_z[::-1] taper_x = x * 0.0 + 1.0 taper_y = y * 0.0 + 1.0 taper_z = z * 0.0 + 1.0 taper_x *= win_x taper_y *= win_y taper_z *= win_z taper_x *= win_x_rev taper_y *= win_y_rev taper_z *= win_z_rev scale_z = scale_z * taper_z + 0.1 mask_x = solver.split(taper_x) mask_y = solver.split(taper_y) mask_z = solver.split(scale_z) mask_d = solver.split(mask) for key in solver.parameters: for iproc in range(nproc): weight = np.sum(mask_d['vp'][iproc] * g[key][iproc]) / np.sum( mask_d['vp'][iproc]) g[key][iproc] *= 1. - mask_d['vp'][iproc] g[key][iproc] *= mask_z['vp'][iproc] g[key][iproc] *= mask_x['vp'][iproc] g[key][iproc] *= mask_y['vp'][iproc] #sigma = 1.0 ## mask receivers #for ir in range(h.nr): # mask = np.exp(-0.5*((x-h.rx[ir])**2.+(y-h.ry[ir])**2.+(z-h.rz[ir])**2.)/sigma**2.) # mask_d = solver.split(mask) # #mask = np.exp(-0.5*((x-h.rx[ir])**2.+(z-h.ry[ir])**2.)/sigma**2.) # for key in solver.parameters: # for iproc in range(nproc): # #weight = np.sum(mask*g[key][0])/np.sum(mask) # g[key][iproc] *= 1.-mask_d['vp'][iproc] # #g[key][0] += mask*weight solver.save(fullpath, g, suffix='_kernel')
class test_preprocess(object): """ Preprocess integration test Not yet implemented. The following is just a sketch. None of the methods work yet. """ def check(self): """ Checks parameters and paths """ #raise NotImplementedError # mute settings if 'MUTE' not in PAR: setattr(PAR, 'MUTE', False) if 'MUTESLOPE' not in PAR: setattr(PAR, 'MUTESLOPE', 0.) if 'MUTECONST' not in PAR: setattr(PAR, 'MUTECONST', 0.) # filter settings if 'BANDPASS' not in PAR: setattr(PAR, 'BANDPASS', False) if 'FREQLO' not in PAR: setattr(PAR, 'FREQLO', 0.) if 'FREQHI' not in PAR: setattr(PAR, 'FREQHI', 0.) # check paths if 'OBSERVATIONS' not in PATH: raise Exception if 'SYNTHETICS' not in PATH: raise Exception if 'OUTPUT' not in PATH: raise Exception def main(self): """ Tests data processing methods """ try: preprocess.setup() except: print 'SETUP failed' else: print 'SETUP succeeded' try: d, h = preprocess.load(prefix=PATH.OBSERVATIONS) s, h = preprocess.load(prefix=PATH.SYNTHETICS) except: print 'LOAD failed' else: print 'LOAD succeeded' try: d = preprocess.multichannel(preprocess.process_traces, [d], [h]) s = preprocess.multichannel(preprocess.process_traces, [s], [h]) except: print 'PROCESS_TRACES failed' else: print 'PROCESS_TRACES succeeded' try: preprocess.save(d, h, prefix=PATH.OBSERVATIONS_PRE) preprocess.save(s, h, prefix=PATH.SYNTHETICS_PRE) except: print 'OUTPUT_TRACES failed' else: print 'OUTPUT_TRACES succeeded'
import quandl import preprocess, dataAccess, calculate import matplotlib.pyplot as plt import pandas as pd import numpy as np import pprint import datetime import json import fix_yahoo_finance holdings, benchmarks = preprocess.setup() tev = calculate.trackingErrorVolatility(holdings, benchmarks) pdr = calculate.cumulative_returns_no_shares(holdings, benchmarks) tevs = calculate.tev_plot_array(holdings, tev) print(pdr) # plot the percent excess returns #plt.plot(tevs) plt.plot(pdr) plt.ylabel('cumulative returns') plt.show() exit()
def fix_near_field(self, path=''): """ """ import preprocess preprocess.setup() name = solver.check_source_names()[solver.getnode] fullpath = path + '/' + name #print 'DB: name=', name #print 'DB: fullpath=', fullpath g = solver.load(fullpath, suffix='_kernel') g_vec = solver.merge(g) nproc = solver.mesh.nproc #print 'DB: len(g_vec)=', len(g_vec) if not PAR.FIXRADIUS: return x, y, z = self.getcoords() #print 'DB: len(g)=', len(g) #print 'DB: len(g[vp][0])=', len(g['vp'][0]) #print 'DB: x.shape=', x.shape #print 'DB: len(x)=', len(x) ##sys.exit("DB: stop from postporcess-regularize") lx = x.max() - x.min() ly = y.max() - y.min() lz = z.max() - z.min() nn = x.size nx = np.around(np.sqrt(nn * lx / (lz * ly))) ny = np.around(np.sqrt(nn * ly / (lx * lz))) nz = np.around(np.sqrt(nn * lz / (lx * ly))) dx = lx / nx * 1.25 dy = ly / ny * 1.25 dz = lz / nz * 1.25 #print 'DB: lx=', lx #print 'DB: ly=', ly #print 'DB: lz=', lz #print 'DB: nn=', nn #print 'DB: nx=', nx #print 'DB: ny=', ny #print 'DB: nz=', nz #print 'DB: dx=', dx #print 'DB: dy=', dy #print 'DB: dz=', dz sigma = PAR.FIXRADIUS * (dx + dz + dy) / 3.0 _, h = preprocess.load(solver.getpath + '/' + 'traces/obs') # mask sources mask = np.exp(-0.5 * ((x - h.sx[0])**2. + (y - h.sy[0])**2. + (z - h.sz[0])**2.) / sigma**2.) # mask top # for matlab # z_sqrt=(abs(z).^(0.25)); depth_scale=1-z_sqrt/max(z_sqrt); figure; plot(depth_scale,z); z_factor = np.power(abs(z), 0.5) #max_z_factor = np.amax(z_factor) #scale_depth = 1.0 - z_factor/max_z_factor #print 'DB: max(z_factor)=',max_z_factor #print 'DB: max(scale_depth)=',np.amax(scale_depth) #print 'DB: min(scale_depth)=',np.amin(scale_depth) #mask *= scale_depth #mask_depth = solver.split(z) mask_depth = solver.split(z_factor) mask_d = solver.split(mask) ##print 'DB: sigma=',sigma ##print 'DB: mask=',mask #print 'DB: len(mask)=', len(mask) #print 'DB: len(mask_d)=', len(mask_d) ##print 'DB: len(g)=', len(g) ##print 'DB: len(g)[vp][0]=', len(g['vp'][0]) for key in solver.parameters: for iproc in range(nproc): #print 'DB: key, iproc=', key, iproc #print 'DB: len(g[key][iproc])=', len(g[key][iproc]) #print 'DB: len(mask_d[key][iproc])=', len(mask_d[key][iproc]) weight = np.sum(mask_d['vp'][iproc] * g[key][iproc]) / np.sum( mask_d['vp'][iproc]) #print 'DB: key, iproc, weigth= ', key, iproc, weight g[key][iproc] *= 1. - mask_d['vp'][iproc] g[key][iproc] *= mask_depth['vp'][iproc] #g[key][iproc] += mask_d['vp'][iproc]*weight #weight = np.sum(mask_d['vp'][iproc]*g[key][iproc])/np.sum(mask_d['vp'][iproc]) ##print 'DB: key, iproc, weigth= ', key, iproc, weight #g[key][iproc] *= 1.-mask_d['vp'][iproc] #g[key][iproc] += mask_d['vp'][iproc]*weight # mask receivers #for ir in range(h.nr): # mask = np.exp(-0.5*((x-h.rx[ir])**2.+(z-h.ry[ir])**2.)/sigma**2.) # for key in solver.parameters: # weight = np.sum(mask*g[key][0])/np.sum(mask) # g[key][0] *= 1.-mask # g[key][0] += mask*weight solver.save(fullpath, g, suffix='_kernel')
def predict(modelpath, UNTRAINED_MODEL=False): if UNTRAINED_MODEL: rnn = RNN(HIDDEN_NODES, LOOKBACK, WINDOW_SIZE, SAMPLERATE, 1) else: rnn = loadTrainedModel(modelpath) trainingSet, validationSet, scaler = setup() testSet = readDataset(TEST_SET) if rnn.sampleRate < rnn.windowSize: trainGen = DataGenerator(trainingSet, scaler, windowSize=rnn.windowSize, lookback=rnn.lookBack, sampleRate=rnn.windowSize) validateGen = DataGenerator(validationSet, scaler, windowSize=rnn.windowSize, lookback=rnn.lookBack, sampleRate=rnn.windowSize) testGen = DataGenerator(testSet, scaler, windowSize=rnn.windowSize, lookback=rnn.lookBack, sampleRate=rnn.windowSize) batchLength = rnn.windowSize else: trainGen = DataGenerator(trainingSet, scaler, windowSize=rnn.windowSize, lookback=rnn.lookBack, sampleRate=rnn.sampleRate) validateGen = DataGenerator(validationSet, scaler, windowSize=rnn.windowSize, lookback=rnn.lookBack, sampleRate=rnn.sampleRate) testGen = DataGenerator(testSet, scaler, windowSize=rnn.windowSize, lookback=rnn.lookBack, sampleRate=rnn.sampleRate) batchLength = rnn.sampleRate # or sampleRate * windowSize? trainingSetTrueSize = TRAINING_DATASIZE - trainGen.maxStepIndex - trainGen.minIndex validationSetTrueSize = VALIDATION_DATASIZE - validateGen.maxStepIndex - validateGen.minIndex testSetTrueSize = TEST_DATASIZE - testGen.maxStepIndex - testGen.minIndex trainStep = int(trainingSetTrueSize / batchLength) validateStep = int(validationSetTrueSize / batchLength) testStep = int(testSetTrueSize / batchLength) if DEBUG: print( f"trainStep: {trainStep}, validationStep: {validateStep}, testStep: {testStep}" ) # Model predictions start = time.time() trainPred = rnn.model.predict_generator( trainGen.generator(returnLabel=False), trainStep) end = time.time() if DEBUG: print( f"Time to make {trainPred.shape} training predictions: {end - start:.3f}, training dataset shape {trainingSet.shape}" ) start = time.time() validatePred = rnn.model.predict_generator( validateGen.generator(returnLabel=False), validateStep) end = time.time() if DEBUG: print( f"Time to make {validatePred.shape} validation predictions: {end - start:.3f}, validation dataset shape {validationSet.shape}" ) start = time.time() testPred = rnn.model.predict_generator( testGen.generator(returnLabel=False), testStep) end = time.time() if DEBUG: print( f"Time to make {testPred.shape} test predictions: {end - start:.3f}, test dataset shape {testSet.shape}" ) # Undo the standardization on the predictions trainPred = scaler.inverse_transform(trainPred) validatePred = scaler.inverse_transform(validatePred) testPred = scaler.inverse_transform(testPred) # Sampling like this # | - minIndex - | | - maxStepIndex - | # [ .......... { TRUE SIZE } .............. ] trainingTruth = trainingSet[trainGen. minIndex:-trainGen.maxStepIndex].ravel() validationTruth = validationSet[validateGen.minIndex:-validateGen. maxStepIndex].ravel() testTruth = testSet[testGen.minIndex:-testGen.maxStepIndex].ravel() if DEBUG: print( f"trainingTruth shape: {trainingTruth.shape}, validationTruth shape: {validationTruth.shape}, testTruth shape: {testTruth.shape}" ) groundTruth = np.block([trainingTruth, validationTruth, testTruth]) return trainPred, validatePred, testPred, groundTruth