t12_K, t37_K, t_clim_K, sun_zenith_angle, sat_zenith_angle) if np.isnan(st_truth_K): # No need to do more for this pixel, if the output is not a number. continue swath_input_id = db.insert_swath_values( str(avhrr_model.satellite_id), surface_temp=st_truth_K, # float(true_st_K), t_11=float(t11_K), t_12=float(t12_K), sat_zenith_angle=sat_zenith_angle, sun_zenith_angle=sun_zenith_angle, cloudmask=int(avhrr_model.cloudmask[row_index, col_index]), swath_datetime=datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), lat=float(lat), lon=float(lon) ) # Do the perturbations... for i in range(int(args["--number-of-perturbations"])): perturbed_t11_K = random.gauss(t11_K, sigma_11) perturbed_t12_K = random.gauss(t12_K, sigma_12) perturbed_t37_K = random.gauss(t37_K, sigma_37) \ if t37_K is None or np.isnan(t37_K) else np.NaN # Missing climatology
def populate_from_files(database_filename, avhrr_filename, sun_sat_angle_filename, cloudmask_filename, sea_ice_fraction_data_directory, number_of_perturbations, run_in_parallel=False): """ Populate the database with perturbed values. """ LOG.info("db_filename: %s" % (database_filename)) LOG.info("avhrr_filename: %s" % (avhrr_filename)) LOG.info("sunsatangle_filename: %s" % (sunsatangle_filename)) LOG.info("cloudmask_filename: %s" % (cloudmask_filename)) LOG.info("sea_ice_fraction_data_directory: %s" % (sea_ice_fraction_data_directory)) # Reading in the input file. # The file is cached, so that when the values are read, they are read # from memory, and not from the file system. This speeds up the # calculations. with models.avhrr_hdf5.Hdf5(avhrr_filename, sun_sat_angle_filename, cloudmask_filename) as avhrr_model: LOG.info(avhrr_model) assert (avhrr_model.lat.shape == avhrr_model.lon.shape) # Get the sigma values based on the satellite id. sigmas = eustace.sigmas.get_sigmas(avhrr_model.satellite_id) LOG.info(sigmas) sea_ice_fractions = get_sea_ice_fractions( sea_ice_fraction_data_directory, avhrr_filename) if sea_ice_fractions is not None: assert (avhrr_model.lat.shape == sea_ice_fractions.shape) # Some book keeping... total_perturbed_st_count = 0 counter = 0 # Set the random seed, so that the results are the same # the next time the exact same system is is run. random.seed(1) output_queue = mp.Queue() number_of_cpus = mp.cpu_count() number_of_processes_started = 0 number_of_processes_finished = 0 # Book keeping. start_time = datetime.datetime.now() # Using the coefficients based on the satellite id. with eustace.coefficients.Coefficients( avhrr_model.satellite_id) as coeff: ## Using a ram disk speeds up the calculations, quite a lot. ## Creating ramdisk: # mkdir /tmp/ramdisk # ## A 3Gb ram disk. # mount -t tmpfs -o size=3072m tmpfs /tmp/ramdisk # ## or # ## For a 12Gb ram disk. # mount -t tmpfs -o size=$((12 * 1024))m tmpfs /tmp/ramdisk # # ## Defining the database. with eustace.db.Db(database_filename) as db: # Rows. for row_index in np.arange(avhrr_model.lon.shape[0]): # Some diagnostics while running. LOG.info( "ROW: %i. total st_count: %i. total_time: %s. sts./sec: %f" % (row_index, total_perturbed_st_count, str(datetime.datetime.now() - start_time), (total_perturbed_st_count / (datetime.datetime.now() - start_time).total_seconds()))) # Cols. for col_index in np.arange(avhrr_model.lon.shape[1]): counter += 1 # Reading in the values. cloudmask = avhrr_model.cloudmask[row_index, col_index] if cloudmask != 1 and cloudmask != 4: LOG.debug("Bad cloudmask: %i" % (cloudmask)) continue # T11 is channel 4. t11_K = avhrr_model.ch4[row_index, col_index] # T12 is channel 5. t12_K = avhrr_model.ch5[row_index, col_index] # T37 is channel 3b. t37_K = avhrr_model.ch3b[row_index, col_index] if np.isnan(t11_K) or np.isnan(t12_K): # t11 and t12 are both needed for all calculations. # Is something wrong if they are both missing? # Consider what to do. raise RuntimeException("Missing T11 or T12") # Angles. sun_zenith_angle = float( avhrr_model.sun_zenith_angle[row_index, col_index]) sat_zenith_angle = float( avhrr_model.sat_zenith_angle[row_index, col_index]) # Missing climatology. Using t11_K in stead. t_clim_K = t11_K # Lat / lon. lat = avhrr_model.lat[row_index, col_index] lon = avhrr_model.lon[row_index, col_index] if lat is None or np.isnan( lat) or lon is None or np.isnan(lon): continue # Pick algorithm. algorithm = eustace.surface_temperature.select_surface_temperature_algorithm( sun_zenith_angle, t11_K, t37_K) # Calculate the temperature. st_truth_K = eustace.surface_temperature.get_surface_temperature( algorithm, coeff, t11_K, t12_K, t37_K, t_clim_K, sun_zenith_angle, sat_zenith_angle) if np.isnan(st_truth_K): # No need to do more for this pixel, if the output is not a number. continue if sea_ice_fractions is not None: if sea_ice_fractions[row_index][ col_index] is None or np.isnan( sea_ice_fractions[row_index] [col_index]): sea_ice_fraction = None else: sea_ice_fraction = float( sea_ice_fractions[row_index][col_index]) else: sea_ice_fraction = None swath_input_id = db.insert_swath_values( str(avhrr_model.satellite_id), surface_temp=st_truth_K, # float(true_st_K), t_11=float(t11_K), t_12=float(t12_K), sat_zenith_angle=sat_zenith_angle, sun_zenith_angle=sun_zenith_angle, cloudmask=int(avhrr_model.cloudmask[row_index, col_index]), swath_datetime=avhrr_model.swath_datetime, lat=float(lat), lon=float(lon), sea_ice_fraction=sea_ice_fraction) if not run_in_parallel: # WARNING! # If the number of perturbations is a small number, it is much faster # to run sequencially!! perturbations = eustace.surface_temperature.get_n_perturbed_temeratures( coeff, number_of_perturbations, t11_K, t12_K, t37_K, t_clim_K, sigmas["sigma_11"], sigmas["sigma_12"], sigmas["sigma_37"], sun_zenith_angle, sat_zenith_angle, random_seed=counter) num_inserted = db.insert_many_perturbations( swath_input_id, perturbations) total_perturbed_st_count += num_inserted else: # This starts a process running a number of perturbations # and inserts the result in the in the output queue. perturbate_in_parallel(output_queue, swath_input_id, coeff, number_of_perturbations, t11_K, t12_K, t37_K, t_clim_K, sigmas["sigma_11"], sigmas["sigma_12"], sigmas["sigma_37"], sun_zenith_angle, sat_zenith_angle, random_seed=counter) number_of_processes_started += 1 if number_of_processes_started > number_of_cpus: # Get will wait forever, for the process to finish. swath_input_id, perturbations = output_queue.get( ) number_of_processes_finished += 1 num_inserted = db.insert_many_perturbations( swath_input_id, perturbations) total_perturbed_st_count += num_inserted if run_in_parallel: while number_of_processes_started > number_of_processes_finished: swath_input_id, perturbations = output_queue.get() number_of_processes_finished += 1 db.insert_many_perturbations(swath_input_id, perturbations) # FIN. LOG.info("Finished perturbing '%s'." % (avhrr_model.avhrr_filename))
def populate_from_files(database_filename, avhrr_filename, sun_sat_angle_filename, cloudmask_filename, sea_ice_fraction_data_directory, number_of_perturbations, run_in_parallel = False ): """ Populate the database with perturbed values. """ LOG.info("db_filename: %s" % (database_filename)) LOG.info("avhrr_filename: %s" % (avhrr_filename)) LOG.info("sunsatangle_filename: %s" % (sunsatangle_filename)) LOG.info("cloudmask_filename: %s" % (cloudmask_filename)) LOG.info("sea_ice_fraction_data_directory: %s" % (sea_ice_fraction_data_directory)) # Reading in the input file. # The file is cached, so that when the values are read, they are read # from memory, and not from the file system. This speeds up the # calculations. with models.avhrr_hdf5.Hdf5(avhrr_filename, sun_sat_angle_filename, cloudmask_filename) as avhrr_model: LOG.info(avhrr_model) assert(avhrr_model.lat.shape == avhrr_model.lon.shape) # Get the sigma values based on the satellite id. sigmas = eustace.sigmas.get_sigmas(avhrr_model.satellite_id) LOG.info(sigmas) sea_ice_fractions = get_sea_ice_fractions(sea_ice_fraction_data_directory, avhrr_filename) if sea_ice_fractions is not None: assert(avhrr_model.lat.shape == sea_ice_fractions.shape) # Some book keeping... total_perturbed_st_count = 0 counter = 0 # Set the random seed, so that the results are the same # the next time the exact same system is is run. random.seed(1) output_queue = mp.Queue() number_of_cpus = mp.cpu_count() number_of_processes_started = 0 number_of_processes_finished = 0 # Book keeping. start_time = datetime.datetime.now() # Using the coefficients based on the satellite id. with eustace.coefficients.Coefficients(avhrr_model.satellite_id) as coeff: ## Using a ram disk speeds up the calculations, quite a lot. ## Creating ramdisk: # mkdir /tmp/ramdisk # ## A 3Gb ram disk. # mount -t tmpfs -o size=3072m tmpfs /tmp/ramdisk # ## or # ## For a 12Gb ram disk. # mount -t tmpfs -o size=$((12 * 1024))m tmpfs /tmp/ramdisk # # ## Defining the database. with eustace.db.Db(database_filename) as db: # Rows. for row_index in np.arange(avhrr_model.lon.shape[0]): # Some diagnostics while running. LOG.info("ROW: %i. total st_count: %i. total_time: %s. sts./sec: %f" % (row_index, total_perturbed_st_count, str(datetime.datetime.now() - start_time), (total_perturbed_st_count / (datetime.datetime.now() - start_time).total_seconds()))) # Cols. for col_index in np.arange(avhrr_model.lon.shape[1]): counter += 1 # Reading in the values. cloudmask = avhrr_model.cloudmask[row_index, col_index] if cloudmask != 1 and cloudmask != 4: LOG.debug("Bad cloudmask: %i" % (cloudmask)) continue # T11 is channel 4. t11_K = avhrr_model.ch4[row_index, col_index] # T12 is channel 5. t12_K = avhrr_model.ch5[row_index, col_index] # T37 is channel 3b. t37_K = avhrr_model.ch3b[row_index, col_index] if np.isnan(t11_K) or np.isnan(t12_K): # t11 and t12 are both needed for all calculations. # Is something wrong if they are both missing? # Consider what to do. raise RuntimeException("Missing T11 or T12") # Angles. sun_zenith_angle = float(avhrr_model.sun_zenith_angle[row_index, col_index]) sat_zenith_angle = float(avhrr_model.sat_zenith_angle[row_index, col_index]) # Missing climatology. Using t11_K in stead. t_clim_K = t11_K # Lat / lon. lat = avhrr_model.lat[row_index, col_index] lon = avhrr_model.lon[row_index, col_index] if lat is None or np.isnan(lat) or lon is None or np.isnan(lon): continue # Pick algorithm. algorithm = eustace.surface_temperature.select_surface_temperature_algorithm( sun_zenith_angle, t11_K, t37_K) # Calculate the temperature. st_truth_K = eustace.surface_temperature.get_surface_temperature(algorithm, coeff, t11_K, t12_K, t37_K, t_clim_K, sun_zenith_angle, sat_zenith_angle) if np.isnan(st_truth_K): # No need to do more for this pixel, if the output is not a number. continue if sea_ice_fractions is not None: if sea_ice_fractions[row_index][col_index] is None or np.isnan(sea_ice_fractions[row_index][col_index]): sea_ice_fraction = None else: sea_ice_fraction = float(sea_ice_fractions[row_index][col_index]) else: sea_ice_fraction = None swath_input_id = db.insert_swath_values( str(avhrr_model.satellite_id), surface_temp=st_truth_K, # float(true_st_K), t_11=float(t11_K), t_12=float(t12_K), sat_zenith_angle=sat_zenith_angle, sun_zenith_angle=sun_zenith_angle, cloudmask=int(avhrr_model.cloudmask[row_index, col_index]), swath_datetime=avhrr_model.swath_datetime, lat=float(lat), lon=float(lon), sea_ice_fraction=sea_ice_fraction ) if not run_in_parallel: # WARNING! # If the number of perturbations is a small number, it is much faster # to run sequencially!! perturbations = eustace.surface_temperature.get_n_perturbed_temeratures(coeff, number_of_perturbations, t11_K, t12_K, t37_K, t_clim_K, sigmas["sigma_11"], sigmas["sigma_12"], sigmas["sigma_37"], sun_zenith_angle, sat_zenith_angle, random_seed=counter) num_inserted = db.insert_many_perturbations(swath_input_id, perturbations) total_perturbed_st_count += num_inserted else: # This starts a process running a number of perturbations # and inserts the result in the in the output queue. perturbate_in_parallel(output_queue, swath_input_id, coeff, number_of_perturbations, t11_K, t12_K, t37_K, t_clim_K, sigmas["sigma_11"], sigmas["sigma_12"], sigmas["sigma_37"], sun_zenith_angle, sat_zenith_angle, random_seed=counter) number_of_processes_started += 1 if number_of_processes_started > number_of_cpus: # Get will wait forever, for the process to finish. swath_input_id, perturbations = output_queue.get() number_of_processes_finished += 1 num_inserted = db.insert_many_perturbations(swath_input_id, perturbations) total_perturbed_st_count += num_inserted if run_in_parallel: while number_of_processes_started > number_of_processes_finished: swath_input_id, perturbations = output_queue.get() number_of_processes_finished += 1 db.insert_many_perturbations(swath_input_id, perturbations) # FIN. LOG.info("Finished perturbing '%s'." % (avhrr_model.avhrr_filename))