def main(): outdb_path = os.path.join(SCRIPT_DIR, 'timeseries.sqlite') if os.path.exists(outdb_path): sys.stderr.write('timeseries.sqlite already exists; aborting.\n') sys.exit(1) outdb = sqlite3.connect(outdb_path) outdb.execute('CREATE TABLE job_info (job_id, eps, beta00, sigma01, sd_proc, replicate_id)') outdb.execute('CREATE TABLE timeseries (job_id, ind, logI0, logI1, C0, C1)') if not os.path.exists(SIM_DB_PATH): sys.stderr.write('simulations DB not present; aborting.\n') sys.exit(1) with sqlite3.connect(SIM_DB_PATH) as db: for job_id, eps, beta00, sigma01, sd_proc, replicate_id in db.execute( 'SELECT job_id, eps, beta00, sigma01, sd_proc, replicate_id FROM job_info' ): sys.stderr.write('{}, {}, {}, {}, {}, {}\n'.format(job_id, eps, beta00, sigma01, sd_proc, replicate_id)) job_id, logIbuf, Cbuf = db.execute('SELECT job_id, logI, C FROM timeseries WHERE job_id = ?', [job_id]).next() outdb.execute('INSERT INTO job_info VALUES (?,?,?,?,?,?)', [job_id, eps, beta00, sigma01, sd_proc, replicate_id]) logI = npybuffer.npy_buffer_to_ndarray(logIbuf)[::3,:] C = npybuffer.npy_buffer_to_ndarray(Cbuf)[::3,:] for i in range(logI.shape[0]): outdb.execute('INSERT INTO timeseries VALUES (?,?,?,?,?,?)', [job_id, i, logI[i,0], logI[i,1], C[i,0], C[i,1]]) outdb.commit() outdb.execute('CREATE INDEX job_info_index ON job_info (job_id, eps, beta00, sigma01, sd_proc, replicate_id)') outdb.execute('CREATE INDEX timeseries_index ON timeseries (job_id, ind)') outdb.commit() outdb.close()
def main(): outdb_path = os.path.join(SCRIPT_DIR, 'timeseries.sqlite') if os.path.exists(outdb_path): sys.stderr.write('timeseries.sqlite already exists; aborting.\n') sys.exit(1) outdb = sqlite3.connect(outdb_path) outdb.execute( 'CREATE TABLE job_info (job_id, eps, beta00, sigma01, sd_proc, replicate_id)' ) outdb.execute( 'CREATE TABLE timeseries (job_id, ind, logI0, logI1, C0, C1)') if not os.path.exists(SIM_DB_PATH): sys.stderr.write('simulations DB not present; aborting.\n') sys.exit(1) with sqlite3.connect(SIM_DB_PATH) as db: for job_id, eps, beta00, sigma01, sd_proc, replicate_id in db.execute( 'SELECT job_id, eps, beta00, sigma01, sd_proc, replicate_id FROM job_info' ): sys.stderr.write('{}, {}, {}, {}, {}, {}\n'.format( job_id, eps, beta00, sigma01, sd_proc, replicate_id)) job_id, logIbuf, Cbuf = db.execute( 'SELECT job_id, logI, C FROM timeseries WHERE job_id = ?', [job_id]).next() outdb.execute( 'INSERT INTO job_info VALUES (?,?,?,?,?,?)', [job_id, eps, beta00, sigma01, sd_proc, replicate_id]) logI = npybuffer.npy_buffer_to_ndarray(logIbuf)[::3, :] C = npybuffer.npy_buffer_to_ndarray(Cbuf)[::3, :] for i in range(logI.shape[0]): outdb.execute( 'INSERT INTO timeseries VALUES (?,?,?,?,?,?)', [job_id, i, logI[i, 0], logI[i, 1], C[i, 0], C[i, 1]]) outdb.commit() outdb.execute( 'CREATE INDEX job_info_index ON job_info (job_id, eps, beta00, sigma01, sd_proc, replicate_id)' ) outdb.execute('CREATE INDEX timeseries_index ON timeseries (job_id, ind)') outdb.commit() outdb.close()
def load_simulation(sim_db_path, job_id, ccm_settings): '''Loads and processes time series based on settings at top of file.''' with sqlite3.connect(sim_db_path) as sim_db: buf = sim_db.execute( 'SELECT {} FROM timeseries WHERE job_id = ?'.format(ccm_settings['variable_name']), [job_id] ).next()[0] assert isinstance(buf, buffer) arr = npybuffer.npy_buffer_to_ndarray(buf) assert arr.shape[1] == 2 years = ccm_settings['years'] simulation_samples_per_year = ccm_settings['simulation_samples_per_year'] ccm_samples_per_year = ccm_settings['ccm_samples_per_year'] # Get the unthinned sample from the end of the time series sim_samps_unthinned = years * simulation_samples_per_year if 'simulation_samples_offset' in ccm_settings: sim_samps_unthinned -= ccm_settings['simulation_samples_offset'] print sim_samps_unthinned thin = int(simulation_samples_per_year / ccm_samples_per_year) arr_end_unthinned = arr[-sim_samps_unthinned:, :] # Thin the samples, adding in the intervening samples if requested arr_mod = arr_end_unthinned[::thin, :] if ccm_settings['add_samples']: for i in range(1, thin): arr_mod += arr_end_unthinned[i::thin, :] if ccm_settings['transform'] == 'log': arr_mod = numpy.log(arr_mod) elif ccm_settings['transform'] == 'exp': arr_mod = numpy.exp(arr_mod) if ccm_settings['first_difference']: arr_mod = arr_mod[1:, :] - arr_mod[:-1, :] if ccm_settings['standardize']: for i in range(arr_mod.shape[1]): arr_mod[:,i] -= numpy.mean(arr_mod[:,i]) arr_mod[:,i] /= numpy.std(arr_mod[:,i]) if ccm_settings['first_difference']: assert arr_mod.shape[0] == int(years * ccm_samples_per_year) - 1 else: print arr_mod.shape[0] print years * ccm_samples_per_year assert arr_mod.shape[0] == int(years * ccm_samples_per_year) assert arr_mod.shape[1] == 2 return arr_mod
def load_simulation(sim_db_path, job_id, ccm_settings): '''Loads and processes time series based on settings at top of file.''' with sqlite3.connect(sim_db_path) as sim_db: buf = sim_db.execute( 'SELECT {} FROM timeseries WHERE job_id = ?'.format( ccm_settings['variable_name']), [job_id]).next()[0] assert isinstance(buf, buffer) arr = npybuffer.npy_buffer_to_ndarray(buf) assert arr.shape[1] == 2 years = ccm_settings['years'] simulation_samples_per_year = ccm_settings['simulation_samples_per_year'] ccm_samples_per_year = ccm_settings['ccm_samples_per_year'] # Get the unthinned sample from the end of the time series sim_samps_unthinned = years * simulation_samples_per_year if 'simulation_samples_offset' in ccm_settings: sim_samps_unthinned -= ccm_settings['simulation_samples_offset'] print sim_samps_unthinned thin = int(simulation_samples_per_year / ccm_samples_per_year) arr_end_unthinned = arr[-sim_samps_unthinned:, :] # Thin the samples, adding in the intervening samples if requested arr_mod = arr_end_unthinned[::thin, :] if ccm_settings['add_samples']: for i in range(1, thin): arr_mod += arr_end_unthinned[i::thin, :] if ccm_settings['transform'] == 'log': arr_mod = numpy.log(arr_mod) elif ccm_settings['transform'] == 'exp': arr_mod = numpy.exp(arr_mod) if ccm_settings['first_difference']: arr_mod = arr_mod[1:, :] - arr_mod[:-1, :] if ccm_settings['standardize']: for i in range(arr_mod.shape[1]): arr_mod[:, i] -= numpy.mean(arr_mod[:, i]) arr_mod[:, i] /= numpy.std(arr_mod[:, i]) if ccm_settings['first_difference']: assert arr_mod.shape[0] == int(years * ccm_samples_per_year) - 1 else: print arr_mod.shape[0] print years * ccm_samples_per_year assert arr_mod.shape[0] == int(years * ccm_samples_per_year) assert arr_mod.shape[1] == 2 return arr_mod
def load_simulation(): '''Loads and processes time series based on settings at top of file.''' with sqlite3.connect(SIM_DB_PATH) as sim_db: buf = sim_db.execute( 'SELECT {} FROM timeseries WHERE job_id = ?'.format(VARIABLE_NAME), [job_id] ).next()[0] assert isinstance(buf, buffer) arr = npybuffer.npy_buffer_to_ndarray(buf) assert arr.shape[1] == 2 # Get the unthinned sample from the end of the time series sim_samps_unthinned = CCM_YEARS * SIMULATION_SAMPLES_PER_YEAR thin = SIMULATION_SAMPLES_PER_YEAR / CCM_SAMPLES_PER_YEAR arr_end_unthinned = arr[-sim_samps_unthinned:, :] # Thin the samples, adding in the intervening samples if requested arr_mod = arr_end_unthinned[::thin, :] if ADD_SAMPLES: for i in range(1, thin): arr_mod += arr_end_unthinned[i::thin, :] if LOG_TRANSFORM: arr_mod = numpy.log(arr_mod) if FIRST_DIFFERENCE: arr_mod = arr_mod[1:, :] - arr_mod[:-1, :] if STANDARDIZE: for i in range(arr_mod.shape[1]): arr_mod[:,i] -= numpy.mean(arr_mod[:,i]) arr_mod[:,i] /= numpy.std(arr_mod[:,i]) if FIRST_DIFFERENCE: assert arr_mod.shape[0] == CCM_YEARS * CCM_SAMPLES_PER_YEAR - 1 else: assert arr_mod.shape[0] == CCM_YEARS * CCM_SAMPLES_PER_YEAR assert arr_mod.shape[1] == 2 return arr_mod
def load_simulation(): '''Loads and processes time series based on settings at top of file.''' with sqlite3.connect(SIM_DB_PATH) as sim_db: buf = sim_db.execute( 'SELECT {} FROM timeseries WHERE job_id = ?'.format(VARIABLE_NAME), [job_id]).next()[0] assert isinstance(buf, buffer) arr = npybuffer.npy_buffer_to_ndarray(buf) assert arr.shape[1] == 2 # Get the unthinned sample from the end of the time series sim_samps_unthinned = CCM_YEARS * SIMULATION_SAMPLES_PER_YEAR thin = SIMULATION_SAMPLES_PER_YEAR / CCM_SAMPLES_PER_YEAR arr_end_unthinned = arr[-sim_samps_unthinned:, :] # Thin the samples, adding in the intervening samples if requested arr_mod = arr_end_unthinned[::thin, :] if ADD_SAMPLES: for i in range(1, thin): arr_mod += arr_end_unthinned[i::thin, :] if LOG_TRANSFORM: arr_mod = numpy.log(arr_mod) if FIRST_DIFFERENCE: arr_mod = arr_mod[1:, :] - arr_mod[:-1, :] if STANDARDIZE: for i in range(arr_mod.shape[1]): arr_mod[:, i] -= numpy.mean(arr_mod[:, i]) arr_mod[:, i] /= numpy.std(arr_mod[:, i]) if FIRST_DIFFERENCE: assert arr_mod.shape[0] == CCM_YEARS * CCM_SAMPLES_PER_YEAR - 1 else: assert arr_mod.shape[0] == CCM_YEARS * CCM_SAMPLES_PER_YEAR assert arr_mod.shape[1] == 2 return arr_mod