def _run_set_of_spectra(self, index_start: int, index_stop: int) -> None: """Internal function to run a chunk of spectra Args: index_start: spectral index to start execution at index_stop: spectral index to stop execution at """ logging.basicConfig(format='%(levelname)s:%(message)s', level=self.loglevel, filename=self.logfile) self._init_nonpicklable_objects() io = IO(self.config, self.fm, self.iv, self.rows, self.cols) for index in range(index_start, index_stop): success, row, col, meas, geom = io.get_components_at_index( index) # Only run through the inversion if we got some data if success: if meas is not None and all(meas < -49.0): # Bad data flags self.states = [] else: # The inversion returns a list of states, which are # intepreted either as samples from the posterior (MCMC case) # or as a gradient descent trajectory (standard case). For # a trajectory, the last spectrum is the converged solution. self.states = self.iv.invert(meas, geom) # Write the spectra to disk io.write_spectrum(row, col, self.states, meas, geom, flush_immediately=True) if (index - index_start) % 100 == 0: logging.info( 'Core at start index {} completed inversion {}/{}'.format(index_start, index-index_start, index_stop-index_start))
def __init__(self, config: configs.Config, loglevel: str, logfile: str, worker_id: int = None, total_workers: int = None): """ Worker class to help run a subset of spectra. Args: config: isofit configuration loglevel: output logging level logfile: output logging file worker_id: worker ID for logging reference total_workers: the total number of workers running, for logging reference """ logging.basicConfig(format='%(levelname)s:%(message)s', level=loglevel, filename=logfile) self.config = config self.fm = ForwardModel(self.config) if self.config.implementation.mode == 'mcmc_inversion': self.iv = MCMCInversion(self.config, self.fm) elif self.config.implementation.mode in ['inversion', 'simulation']: self.iv = Inversion(self.config, self.fm) else: # This should never be reached due to configuration checking raise AttributeError('Config implementation mode node valid') self.io = IO(self.config, self.fm) self.approximate_total_spectra = None if total_workers is not None: self.approximate_total_spectra = self.io.n_cols * self.io.n_rows / total_workers self.worker_id = worker_id self.completed_spectra = 0
def __init__(self, config_file, row_column='', level='INFO', logfile=None): # Explicitly set the number of threads to be 1, so we more effectively #run in parallel os.environ["MKL_NUM_THREADS"] = "1" # Set logging level self.loglevel = level self.logfile = logfile logging.basicConfig(format='%(levelname)s:%(message)s', level=self.loglevel, filename=self.logfile) self.rows = None self.cols = None self.config = None self.fm = None self.iv = None self.io = None self.states = None # Load configuration file self.config = configs.create_new_config(config_file) self.config.get_config_errors() # Initialize ray for parallel execution rayargs = {'address': self.config.implementation.ip_head, 'redis_password': self.config.implementation.redis_password, 'ignore_reinit_error':True, 'local_mode': self.config.implementation.n_cores == 1} # only specify a temporary directory if we are not connecting to # a ray cluster if rayargs['local_mode']: rayargs['temp_dir'] = self.config.implementation.ray_temp_dir # Used to run on a VPN ray.services.get_node_ip_address = lambda: '127.0.0.1' # We can only set the num_cpus if running on a single-node if self.config.implementation.ip_head is None and self.config.implementation.redis_password is None: rayargs['num_cpus'] = self.config.implementation.n_cores ray.init(**rayargs) if len(row_column) > 0: ranges = row_column.split(',') if len(ranges) == 1: self.rows, self.cols = [int(ranges[0])], None if len(ranges) == 2: row_start, row_end = ranges self.rows, self.cols = range( int(row_start), int(row_end)), None elif len(ranges) == 4: row_start, row_end, col_start, col_end = ranges line_start, line_end, samp_start, samp_end = ranges self.rows = range(int(row_start), int(row_end)) self.cols = range(int(col_start), int(col_end)) # Build the forward model and inversion objects self._init_nonpicklable_objects() self.io = IO(self.config, self.fm, self.iv, self.rows, self.cols)
def __init__(self, config: configs.Config, loglevel: str, logfile: str): logging.basicConfig(format='%(levelname)s:%(message)s', level=loglevel, filename=logfile) self.config = config self.fm = ForwardModel(self.config) if self.config.implementation.mode == 'mcmc_inversion': self.iv = MCMCInversion(self.config, self.fm) elif self.config.implementation.mode in ['inversion', 'simulation']: self.iv = Inversion(self.config, self.fm) else: # This should never be reached due to configuration checking raise AttributeError('Config implementation mode node valid') self.io = IO(self.config, self.fm)
def run_forward(): """Simulate the remote measurement of a spectrally uniform surface.""" # Configure the surface/atmosphere/instrument model config = load_config('config_forward.json') fm = ForwardModel(config['forward_model']) iv = Inversion(config['inversion'], fm) io = IO(config, fm, iv, [0], [0]) # Simulate a measurement and write result for row, col, meas, geom, configs in io: states = iv.invert(meas, geom) io.write_spectrum(row, col, states, meas, geom) assert True return states[0]
def run_forward(): """Simulate the remote measurement of a spectrally uniform surface.""" # Configure the surface/atmosphere/instrument model testdir, fname = os.path.split(os.path.abspath(__file__)) datadir = os.path.join(testdir, 'data') config = create_new_config(os.path.join(datadir, 'config_forward.json')) fm = ForwardModel(config) iv = Inversion(config, fm) io = IO(config, fm, iv, [0], [0]) # Simulate a measurement and write result for row, col, meas, geom, configs in io: states = iv.invert(meas, geom) io.write_spectrum(row, col, states, meas, geom) assert True return states[0]
def run_inverse(): """Invert the remote measurement.""" # Configure the surface/atmosphere/instrument model config = load_config('config_inversion.json') fm = ForwardModel(config['forward_model']) iv = Inversion(config['inversion'], fm) io = IO(config, fm, iv, [0], [0]) geom = None # Get our measurement from the simulation results, and invert. # Calculate uncertainties at the solution state, write result for row, col, meas, geom, configs in io: states = iv.invert(meas, geom) io.write_spectrum(row, col, states, meas, geom) assert True return states[-1]
def run_inverse(): """Invert the remote measurement.""" # Configure the surface/atmosphere/instrument model testdir, fname = os.path.split(os.path.abspath(__file__)) datadir = os.path.join(testdir, 'data') config = create_new_config(os.path.join(datadir, 'config_forward.json')) fm = ForwardModel(config) iv = Inversion(config, fm) io = IO(config, fm, iv, [0], [0]) geom = None # Get our measurement from the simulation results, and invert. # Calculate uncertainties at the solution state, write result for row, col, meas, geom, configs in io: states = iv.invert(meas, geom) io.write_spectrum(row, col, states, meas, geom) assert True return states[-1]
def run_forward(): """Simulate the remote measurement of a spectrally uniform surface.""" # Configure the surface/atmosphere/instrument model testdir, fname = os.path.split(os.path.abspath(__file__)) datadir = os.path.join(testdir, 'data') config = create_new_config(os.path.join(datadir, 'config_forward.json')) fm = ForwardModel(config) iv = Inversion(config, fm) io = IO(config, fm) # Simulate a measurement and write result for row in range(io.n_rows): for col in range(io.n_cols): id = io.get_components_at_index(row, col) if id is not None: states = iv.invert(id.meas, id.geom) io.write_spectrum(row, col, states, fm, iv) assert True return states[0]
def run_inverse(): """Invert the remote measurement.""" # Configure the surface/atmosphere/instrument model testdir, fname = os.path.split(os.path.abspath(__file__)) datadir = os.path.join(testdir, 'data') config = create_new_config(os.path.join(datadir, 'config_forward.json')) fm = ForwardModel(config) iv = Inversion(config, fm) io = IO(config, fm) # Get our measurement from the simulation results, and invert. # Calculate uncertainties at the solution state, write result for row in range(io.n_rows): for col in range(io.n_cols): id = io.get_components_at_index(row, col) if id is not None: states = iv.invert(id.meas, id.geom) io.write_spectrum(row, col, states, fm, iv) assert True return states[-1]
def run(self, row_column = None): """ Iterate over spectra, reading and writing through the IO object to handle formatting, buffering, and deferred write-to-file. Attempts to avoid reading the entire file into memory, or hitting the physical disk too often. row_column: The user can specify * a single number, in which case it is interpreted as a row * a comma-separated pair, in which case it is interpreted as a row/column tuple (i.e. a single spectrum) * a comma-separated quartet, in which case it is interpreted as a row, column range in the order (line_start, line_end, sample_start, sample_end) all values are inclusive. If none of the above, the whole cube will be analyzed. """ logging.info("Building first forward model, will generate any necessary LUTs") fm = ForwardModel(self.config) if row_column is not None: ranges = row_column.split(',') if len(ranges) == 1: self.rows, self.cols = [int(ranges[0])], None if len(ranges) == 2: row_start, row_end = ranges self.rows, self.cols = range( int(row_start), int(row_end)), None elif len(ranges) == 4: row_start, row_end, col_start, col_end = ranges self.rows = range(int(row_start), int(row_end) + 1) self.cols = range(int(col_start), int(col_end) + 1) else: io = IO(self.config, fm) self.rows = range(io.n_rows) self.cols = range(io.n_cols) del io, fm index_pairs = np.vstack([x.flatten(order='f') for x in np.meshgrid(self.rows, self.cols)]).T n_iter = index_pairs.shape[0] if self.config.implementation.n_cores is None: n_workers = multiprocessing.cpu_count() else: n_workers = self.config.implementation.n_cores # Max out the number of workers based on the number of tasks n_workers = min(n_workers, n_iter) if self.workers is None: remote_worker = ray.remote(Worker) self.workers = ray.util.ActorPool([remote_worker.remote(self.config, self.loglevel, self.logfile, n, n_workers) for n in range(n_workers)]) start_time = time.time() n_tasks = min(n_workers * self.config.implementation.task_inflation_factor, n_iter) logging.info(f'Beginning {n_iter} inversions in {n_tasks} chunks using {n_workers} cores') # Divide up spectra to run into chunks index_sets = np.linspace(0, n_iter, num=n_tasks, dtype=int) if len(index_sets) == 1: indices_to_run = [index_pairs[0:1,:]] else: indices_to_run = [index_pairs[index_sets[l]:index_sets[l + 1], :] for l in range(len(index_sets) - 1)] res = list(self.workers.map_unordered(lambda a, b: a.run_set_of_spectra.remote(b), indices_to_run)) total_time = time.time() - start_time logging.info(f'Inversions complete. {round(total_time,2)}s total, {round(n_iter/total_time,4)} spectra/s, ' f'{round(n_iter/total_time/n_workers,4)} spectra/s/core')