def Process_File(source, destination, fileKeywords, moduleSections, valuesDict, mapDict): if len(moduleSections) > 1: raise Exception('Only one instance of %s allowed per FILE block' % os.path.basename(__file__)) if source == destination: raise Exception('Will not write albedo into source spectra file. dest_filename must be defined') instrument_name = Apply_Template(moduleSections[0].Get_Keyword_Value('instrument_name'), valuesDict, mapDict=mapDict) if instrument_name == None or len(instrument_name) == 0: raise Exception('instrument_name keyword must be specified for module: %s' % os.path.basename(__file__)) # Load spectra data source spec_file_obj = OCO_Matrix(source) radiances = [] for pixel_range in spec_file_obj.pixel_ranges(): radiances.append( spec_file_obj[ASCII_SPECTRA_RADIANCE_COLUMN][pixel_range[0]:pixel_range[1]] ) # Get SZA value try: sza_r = math.radians( float(spec_file_obj.header[ASCII_SPECTRA_SZA_KEYWORD].split()[0]) ) except KeyError: raise KeyError('Could not find header keyword %s in spectrum file: %s' % (ASCII_SPECTRA_SZA_KEYWORD, source)) # Create apriori file from radiance data create_albedo_apriori_from_radiance(radiances, sza_r, instrument_name, destination)
def average_profiles(input_file_list, output_file): input_file_obj = open(input_file_list) first_file = input_file_obj.readline() input_file_obj.close() first_obj = OCO_Matrix(first_file.strip()) dst_data = zeros((first_obj.dims[0], first_obj.dims[1]), dtype=float) pres_col = first_obj.labels_lower.index("pressure") dst_data[:, pres_col] = first_obj.data[:, pres_col] input_file_obj = open(input_file_list) count = 0 for curr_atm_file in input_file_obj.readlines(): curr_atm_file = curr_atm_file.strip() # Load existing file print "Loading %s" % curr_atm_file file_obj = OCO_Matrix(curr_atm_file) for col in range(file_obj.dims[1]): if col != pres_col: dst_data[:, col] += file_obj.data[:, col] count += 1 for col in range(dst_data.shape[1]): if col != pres_col: dst_data[:, col] /= count first_obj.data = dst_data first_obj.write(output_file)
def create_log_p_profile(input_file, output_file, column, val0, lapse_rate): # Load existing file file_obj = OCO_Matrix(input_file) num_rows = file_obj.dims[0] val0 = float(val0) lapse_rate = float(lapse_rate) # Find existing pressure bounds src_pres_col = file_obj.labels_lower.index("pressure") pressure = numpy.zeros(num_rows, dtype=float) for row in range(0, num_rows): pressure[row] = float(file_obj.data[row][src_pres_col]) if column.isdigit(): dest_prof_col = column else: dest_prof_col = file_obj.labels_lower.index(column.lower()) # create log p profile for row in range(num_rows - 1, 0, -1): file_obj.data[row, dest_prof_col] = val0 - lapse_rate * ( math.log(pressure[num_rows - 1]) - math.log(pressure[row])) file_obj.write(output_file)
def create_log_p_profile(input_file, output_file, column, val0, lapse_rate): # Load existing file file_obj = OCO_Matrix(input_file) num_rows = file_obj.dims[0] val0 = float(val0) lapse_rate = float(lapse_rate) # Find existing pressure bounds src_pres_col = file_obj.labels_lower.index("pressure") pressure = numpy.zeros(num_rows, dtype=float) for row in range(0, num_rows): pressure[row] = float(file_obj.data[row][src_pres_col]) if column.isdigit(): dest_prof_col = column else: dest_prof_col = file_obj.labels_lower.index(column.lower()) # create log p profile for row in range(num_rows-1,0,-1): file_obj.data[row, dest_prof_col] = val0 - lapse_rate * (math.log(pressure[num_rows-1])-math.log(pressure[row])) file_obj.write(output_file)
def scale_cov_by_corr(input_file, output_file, scale_factor): # Load existing file matrix_obj = OCO_Matrix(input_file) rows = range(matrix_obj.dims[0]) cols = range(matrix_obj.dims[1]) data_new = numpy.zeros((matrix_obj.dims[0], matrix_obj.dims[1]), dtype=float) for row_idx in rows: for col_idx in cols: rho_old = matrix_obj.data[row_idx, col_idx] / \ (math.sqrt(matrix_obj.data[row_idx, row_idx]) * math.sqrt(matrix_obj.data[col_idx, col_idx])) if rho_old < 0.0: sign = -1.0 else: sign = 1.0 fact_new = float(scale_factor) * (1.0 - abs(rho_old)) if abs(rho_old) < 1e-40: rho_new = 0.0 elif fact_new > 1.0: rho_new = 0.0 else: rho_new = 1.0 - fact_new data_new[row_idx, col_idx] = sign * rho_new * \ (math.sqrt(matrix_obj.data[row_idx, row_idx]) * math.sqrt(matrix_obj.data[col_idx, col_idx])) matrix_obj.data = data_new matrix_obj.write(output_file)
def remove_bad_data_all(input_file, output_file, check_col, check_val): # Load existing file file_obj = OCO_Matrix(input_file) num_rows = file_obj.dims[0] if check_col.isdigit(): check_col = int(check_col) else: check_col = file_obj.labels_lower.index(check_col.lower()) good_mask = [] for row_idx in range(num_rows): if not re.search(str(check_val).lower(), str(file_obj.data[row_idx, check_col]).lower()): good_mask.append(row_idx) cleaned_data = numpy.zeros((len(good_mask), file_obj.dims[1]), dtype=float) new_data_idx = 0 for good_row in good_mask: cleaned_data[new_data_idx, :] = file_obj.data[good_row, :] new_data_idx += 1 file_obj.data = cleaned_data file_obj.write(output_file)
def remove_bad_data_all(input_file, output_file, check_col, check_val): # Load existing file file_obj = OCO_Matrix(input_file) num_rows = file_obj.dims[0] if check_col.isdigit(): check_col = int(check_col) else: check_col = file_obj.labels_lower.index(check_col.lower()) good_mask = [] for row_idx in range(num_rows): if not re.search( str(check_val).lower(), str(file_obj.data[row_idx, check_col]).lower()): good_mask.append(row_idx) cleaned_data = numpy.zeros((len(good_mask), file_obj.dims[1]), dtype=float) new_data_idx = 0 for good_row in good_mask: cleaned_data[new_data_idx, :] = file_obj.data[good_row, :] new_data_idx += 1 file_obj.data = cleaned_data file_obj.write(output_file)
def extract_acos_spectra(acos_l1b_file, output_dir, sounding_id_list=None, average_pol=False): if average_pol: average_name = 'Polarization' else: average_name = None with contextlib.closing(ACOS_File.L1B(acos_l1b_file)) as acos_l1b_obj: if sounding_id_list == None or len(sounding_id_list) == 0: sounding_id_list = acos_l1b_obj.get_sounding_ids( add_polarization=not average_pol) for sounding_id in numpy.ravel(sounding_id_list): ascii_obj = OCO_Matrix() sounding_info_dict = acos_l1b_obj.get_sounding_info_dict( sounding_id, ignore_missing=True, average=average_name) write_ascii_from_hdf(acos_l1b_obj, sounding_info_dict, ascii_obj, sounding_id, average_name) output_filename = os.path.join(output_dir, '%s.dat' % sounding_id) print 'Writing %s' % output_filename ascii_obj.write(output_filename, default_precision=16)
def noisify_spectra_file(input_radiance_file, output_radiance_file, **kwarg): # Load existing file matrix_obj = OCO_Matrix(input_radiance_file) noisify_spectra_obj(matrix_obj, **kwargs) matrix_obj.write(output_radiance_file, auto_size_cols=False)
def Process_File(source, destination, fileKeywords, moduleSections, valuesDict, mapDict, buffer_objs): matrix_obj = OCO_Matrix(source) for noisifySect in moduleSections: noise_cut_off = Apply_Template(noisifySect.Get_Keyword_Value('noise_cut_off'), valuesDict, mapDict=mapDict) pixel_rows = Apply_Template(noisifySect.Get_Keyword_Value('pixel_rows'), valuesDict, mapDict=mapDict) noisify_spectra_obj(matrix_obj, row_range_spec=pixel_rows, noise_cut_off=noise_cut_off) matrix_obj.write(destination, auto_size_cols=False)
def get_data_object(data_filename): # Try to load data using OCO_Matrix class try: data_obj = OCO_Matrix(data_filename) return data_obj except: pass # Now load file as tabled data table_file_obj = open(data_filename, 'r') file_lines = table_file_obj.readlines() table_file_obj.close() # Seperate each line by spaces. Keep count of maximum # number of columns seen for when file is added so we can # know how to size the resultng matrix max_cols = 0 file_rows = [] for line in file_lines: if line.find('#') < 0 and len(line.strip()) != 0: line_cols = line.strip().split() file_rows.append(line_cols) max_cols = max(max_cols, len(line_cols)) # data_mat = numpy.zeros((len(file_rows), max_cols), dtype=float) data_mat = numpy.zeros((len(file_rows), max_cols), dtype=numpy.chararray) for row_idx in range(len(file_rows)): num_cols = len(file_rows[row_idx]) for col_idx in range(num_cols): col_value = file_rows[row_idx][col_idx] data_mat[row_idx][col_idx] = col_value # try: # data_mat[row_idx][col_idx] = float(col_value) # except: # data_mat[row_idx][col_idx] = fill_value # Create label names based on filename and index or else can # not select specific columns label_base = os.path.basename(data_filename) label_base = label_base[0:label_base.rfind('.')] # Remove extension data_labels = [] for col_idx in range(max_cols): data_labels.append( get_column_format(max_cols) % (label_base, col_idx) ) # Save data into OCO Matrix object data_obj = OCO_Matrix() data_obj.dims = [len(file_rows), max_cols] data_obj.labels = data_labels data_obj.data = data_mat return data_obj
def Process_File(source, destination, fileKeywords, moduleSections, valuesDict, mapDict): logger = logging.getLogger(os.path.basename(__file__)) for currSection in moduleSections: if str(source) == str(destination): raise IOError("source and destination must be different. will not overwrite source file") rows = Apply_Template(moduleSections[0].Get_Keyword_Value("rows"), valuesDict, mapDict=mapDict) columns = Apply_Template(moduleSections[0].Get_Keyword_Value("columns"), valuesDict, mapDict=mapDict) identifier = Apply_Template(moduleSections[0].Get_Keyword_Value("identifier"), valuesDict, mapDict=mapDict) initial_value = Apply_Template(moduleSections[0].Get_Keyword_Value("initial_value"), valuesDict, mapDict=mapDict) map_filename = Apply_Template(moduleSections[0].Get_Keyword_Value("map_filename"), valuesDict, mapDict=mapDict) modify = moduleSections[0].Get_Keyword_Value("modify") # Load ranges from RANGES section of module max_range_val = None range_values = {} for range_sect in moduleSections[0].Get_Section("->RANGES"): for range_spec in range_sect.Get_Matrix_Data(): (range_name, range_str) = range_spec if range_str.find(",") > 0: curr_range = [float(val) for val in range_str.split(",")] else: curr_range = [float(val) for val in range_str.split()] if max_range_val == None: max_range_val = max(curr_range) else: max_range_val = max(max_range_val, max(curr_range)) range_values[range_name] = curr_range if len(range_values) == 0: logger.error("No index range list supplied for operating on source: %s" % source) return # Load source for data to map agains data_obj = OCO_Matrix(source) # Set columns to all if argument not supplied, # Otherwise try parsing as an index range list failing that try # using the specified columns as label names if columns == None: columns = range(data_obj.dims[1]) else: try: columns = index_range_list(columns) except ValueError, TypeError: columns = data_obj.find_labels(columns, match_case=False, indexes=True)
def write_xco2_file(log_sounding_dict, xco2_filename): xco2_fileobj = OCO_Matrix() xco2_fileobj.file_id = 'True xco2 from orbit simulator' xco2_fileobj.labels = [XCO2_LABEL_NAME] xco2_fileobj.data = numpy.zeros((1,1), dtype=float) xco2_fileobj.data[0,0] = log_sounding_dict[XCO2_COL_NAME] xco2_fileobj.write(xco2_filename)
def create_mean_psurf(runlog_file, psurf_file): print 'runlog_file = ', runlog_file print 'psurf_file = ', psurf_file runlog_fobj = open(runlog_file, "r") header_cols = runlog_fobj.readline().split() pout_col = header_cols.index('pout') pouts = [] for runlog_line in runlog_fobj.readlines(): runlog_parts = runlog_line.split() pouts.append(float(runlog_parts[pout_col])) runlog_fobj.close() avg_psurf = mean(pouts) * 1e2 out_mat_obj = OCO_Matrix() out_mat_obj.file_id = "Mean surface pressure from runlog file: %s" % runlog_file out_mat_obj.labels = ['LEVEL', 'PSURF'] out_mat_obj.data = ones((1, 2), dtype=float) out_mat_obj.data[0, 1] = avg_psurf out_mat_obj.write(psurf_file)
def set_noise_col(input_file, output_file, snr): snr = float(snr) file_obj = OCO_Matrix(input_file) rad_col = file_obj.labels_lower.index("radiance") noise_col = file_obj.labels_lower.index("noise") noise_val = max(file_obj.data[:, rad_col]) / snr file_obj.data[:, noise_col] = noise_val file_obj.write(output_file)
def set_noise_col(input_file, output_file, snr): snr = float(snr) file_obj = OCO_Matrix(input_file) rad_col = file_obj.labels_lower.index("radiance") noise_col = file_obj.labels_lower.index("noise") noise_val = max( file_obj.data[:, rad_col] ) / snr file_obj.data[:, noise_col] = noise_val file_obj.write(output_file)
def write_radcnv_file(output_filename, column_names, sheet, input_rows, input_cols, scaling=1.0): file_obj = OCO_Matrix() file_obj.labels = column_names file_obj.file_id = FILE_ID file_obj.data = numpy.zeros((len(input_rows), len(input_cols)), dtype=numpy.chararray) out_row_idx = 0 for in_row_idx, out_row_idx in zip(input_rows, range(len(input_rows))): for in_col_idx, out_col_idx in zip(input_cols, range(len(input_cols))): cell_data = get_cell_data(sheet, in_row_idx, in_col_idx) if cell_data != None and column_names[ out_col_idx] != WAVENUMBER_COLUMN_NAME: try: cell_data *= scaling except TypeError as e: raise TypeError( '%s: cell_data = "%s", scaling = "%s" at row: %d column %d' % (e, cell_data, scaling, in_row_idx, in_col_idx)) file_obj.data[out_row_idx, out_col_idx] = cell_data print 'Writing output filename: %s' % output_filename file_obj.write(output_filename)
def get_cached_file(self, file_path, file_glob, required=False, use_first=False, **addl_args): if file_glob == None: return None if hasattr(file_glob, '__iter__'): found_files = [] for curr_glob in file_glob: curr_files = glob.glob( os.path.join(file_path, curr_glob) ) if len(curr_files) > 0: found_files = curr_files break else: found_files = glob.glob( os.path.join(file_path, file_glob) ) if len(found_files) == 0: if required: raise OSError('Could not find at path: "%s" any files matching glob: "%s"' % (file_path, file_glob)) else: return None elif len(found_files) > 1 and not use_first: raise OSError('Found too many files files at path: "%s" with glob: "%s", found: %s' % (file_path, file_glob, found_files)) if self.file_cache.has_key(found_files[0]): file_obj = self.file_cache[ found_files[0] ] else: file_obj = OCO_Matrix( found_files[0], **addl_args ) self.file_cache[ found_files[0] ] = file_obj return file_obj
def _load_sens_conv_files(self): self.sens_corr_data = [] for curr_band in SWIR_BANDS: corr_file = RAD_CNV_FILE_TMPL % curr_band corr_obj = OCO_Matrix(corr_file, ignore_conv_err=True) self.sens_corr_data.append(corr_obj)
def Process_File(source, destination, fileKeywords, moduleSections, valuesDict, mapDict): if len(moduleSections) > 1: raise RuntimeError('Only one input file config set per file') if str(source) == str(destination): raise ValueError( 'source and dest filenames must be different. will not overwrite source file' ) spectrum_file = Apply_Template( moduleSections[0].Get_Keyword_Value('spectrum_file'), valuesDict, mapDict=mapDict) if type(source) is str and not os.path.exists(source): raise IOError('Runlog file %s does not exist' % source) base_spec_name = os.path.basename(spectrum_file) # Use grep because its faster than doing it outself matched_line = None if type(source) == str: grep_cmd = "grep -E " + base_spec_name + " " + source matched_line = os.popen(grep_cmd).readline() elif hasattr(source, 'read'): for curr_line in source.readlines(): if re.search(base_spec_name, curr_line): matched_line = curr_line break else: raise Exception('Unsupported object: %s' % source) if matched_line == None or len(matched_line) == 0: raise IOError('Could not find spectrum name: %s in run log file: %s' % (base_spec_name, source)) try: matched_columns = matched_line.split() psurf_val = float(matched_columns[pout_col_idx]) * convert_factor except: raise ValueError( 'Failed to parse psurf value from: "%s" from runlog line: %s' % (matched_columns[pout_col_idx], matched_line)) out_obj = OCO_Matrix() out_obj.data = numpy.zeros((1, 1), dtype=float) out_obj.data[0, 0] = psurf_val out_obj.file_id = 'psurf value extracted for spectrum named: %s from runlog file: %s' % ( base_spec_name, source) out_obj.labels = ['PSURF'] out_obj.write(destination)
def make_diag_only_cov(input_file, output_file): # Load existing file matrix_obj = OCO_Matrix(input_file) rows = range(matrix_obj.dims[0]) cols = range(matrix_obj.dims[1]) data_new = numpy.zeros((matrix_obj.dims[0], matrix_obj.dims[1]), dtype=float) for row_idx in rows: for col_idx in cols: if row_idx == col_idx: data_new[row_idx, col_idx] = matrix_obj.data[row_idx, col_idx] matrix_obj.data = data_new matrix_obj.write(output_file)
def adjust_file_for_trend(self, src_apriori_file, time_struct, dst_apriori_file=None): if dst_apriori_file == None: dst_apriori_file = src_apriori_file if type(dst_apriori_file) is str and os.path.realpath(os.path.dirname(dst_apriori_file)) == os.path.realpath(self.apriori_db_path): raise IOError('Can not modify apriori file as located in database path, it must be copied first') apriori_obj = OCO_Matrix(src_apriori_file) co2_col_idx = apriori_obj.labels.index(apriori_obj.find_labels(CO2_APRIORI_CO2_COL)[0]) co2_offset = self.get_apriori_offset(time_struct, debug_values=apriori_obj.header) apriori_obj.data[:, co2_col_idx] += co2_offset apriori_obj.header['co2_offset'] = co2_offset apriori_obj.write(dst_apriori_file)
def offset_column(input_file, output_file, columns, offset, method, pressure_range=None): # Load existing file matrix_obj = OCO_Matrix(input_file) # Add ability to specify cols individually or using a * to goto end cols = index_range_list(columns) if offset.isdigit(): offset = float(offset) else: offset = eval(offset) if pressure_range != None: pres_col = matrix_obj.labels_lower.index("pressure") pres_range_arr = pressure_range.split(',') pres_val_beg = float(pres_range_arr[0]) pres_val_end = float(pres_range_arr[1]) pres_idx_beg = 0 pres_idx_end = matrix_obj.dims[0] pres_column = [] [ pres_column.append(float(val[pres_col])) for val in matrix_obj.data ] pres_idx_curr = 0 beg_found = False for pres_val in pres_column: if pres_val >= pres_val_beg and not beg_found: pres_idx_beg = pres_idx_curr beg_found = True if pres_val <= pres_val_end: pres_idx_end = pres_idx_curr + 1 pres_idx_curr += 1 target_rows = range(pres_idx_beg, pres_idx_end) else: target_rows = range(matrix_obj.dims[0]) for rowIdx in target_rows: for colIdx in cols: #print 'old_val[%d][%d] = %f' % (rowIdx, colIdx, matrix_obj.data[rowIdx][colIdx]) if method == '/': matrix_obj.data[rowIdx][colIdx] = matrix_obj.data[rowIdx][colIdx] / offset elif method == '-': matrix_obj.data[rowIdx][colIdx] = matrix_obj.data[rowIdx][colIdx] - offset elif method == '*': matrix_obj.data[rowIdx][colIdx] = matrix_obj.data[rowIdx][colIdx] * offset else: matrix_obj.data[rowIdx][colIdx] = matrix_obj.data[rowIdx][colIdx] + offset #print 'new_val[%d][%d] = %f' % (rowIdx, colIdx, matrix_obj.data[rowIdx][colIdx]) matrix_obj.write(output_file)
def random_column(input_file, output_file, columns, mean, std_dev): # Load existing file matrix_obj = OCO_Matrix(input_file) # Add ability to specify cols individually or using a * to goto end cols = index_range_list(columns) mean = float(mean) std_dev = float(std_dev) target_rows = range(matrix_obj.dims[0]) for rowIdx in target_rows: for colIdx in cols: matrix_obj.data[rowIdx][colIdx] = random.gauss(mean, std_dev) matrix_obj.write(output_file)
def read(self, filename): self.filename = filename file_obj = OCO_Matrix(filename) self.global_mean = float(file_obj.header['global_ocean_mean']) self.months = file_obj['Month'] self.surface_values = file_obj['Surface_']
def scale_ils_table(input_file, output_file, scale_factor): # Load existing file print "Reading %s" % input_file file_obj = OCO_Matrix(input_file) scale_factor = float(scale_factor) for row_idx in range(file_obj.dims[0]): for lbl_idx in range(file_obj.dims[1]): if file_obj.labels_lower[lbl_idx].find("ils_delta_lambda_") == 0: file_obj.data[row_idx, lbl_idx] *= scale_factor if file_obj.labels_lower[lbl_idx].find("ils_response_") == 0: file_obj.data[row_idx, lbl_idx] = (1.0 / scale_factor) * file_obj.data[row_idx, lbl_idx] print "Writing %s" % output_file file_obj.write(output_file)
def interpol_cov(input_file, output_file, src_pressure_file, dst_pressure_file): # Load existing file file_obj = OCO_Matrix(input_file) # Find existing pressure bounds pres_src_obj = OCO_Matrix(src_pressure_file) src_pres_col = pres_src_obj.labels_lower.index("pressure") num_levels_src = pres_src_obj.dims[0] pressure_src = pres_src_obj.data[:, src_pres_col] pres_dst_obj = OCO_Matrix(dst_pressure_file) dst_pres_col = pres_dst_obj.labels_lower.index("pressure") num_levels_dst = pres_dst_obj.dims[0] pressure_dst = pres_dst_obj.data[:, dst_pres_col] M = numpy.zeros((num_levels_dst, num_levels_src), dtype=float) # Setup Interpolation Matrix for i in range(num_levels_dst): for j in range(num_levels_src): if (pressure_dst[i] <= pressure_src[j]): lev = j break if (lev > 0): M[i, lev] = \ (math.log(pressure_dst[i]) - math.log(pressure_src[lev-1])) \ / (math.log(pressure_src[lev]) \ - math.log(pressure_src[lev-1])) else: M[i, lev] = 1.0 if (i > 0): M[i, lev-1] = \ (-math.log(pressure_dst[i]) + math.log(pressure_src[lev-1])) \ / (math.log(pressure_src[lev]) \ - math.log(pressure_src[lev-1])) + 1 # Use interpolation matrix to create new covariance shat_out = (mat(M) * mat(file_obj.data)) * transpose(mat(M)) file_obj.data = shat_out file_obj.write(output_file, auto_size_cols=False)
def write_total_aod_file(log_sounding_dict, aod_filename): # make aerosol_od_<sounding_id>.dat aod_fileobj = OCO_Matrix() aod_fileobj.file_id = 'True aerosol optical depth from orbit simulator' aod_fileobj.labels = AOD_LABEL_NAMES aod_fileobj.data = numpy.zeros((1,len(AOD_COL_NAMES)), dtype=float) for out_idx, aer_col_name in enumerate(AOD_COL_NAMES): aod_fileobj.data[0,out_idx] = log_sounding_dict[aer_col_name] aod_fileobj.write(aod_filename)
def Process_File(source, destination, fileKeywords, moduleSections, valuesDict, mapDict, buffer_objs): matrix_obj = OCO_Matrix(source) for noisifySect in moduleSections: noise_cut_off = Apply_Template( noisifySect.Get_Keyword_Value('noise_cut_off'), valuesDict, mapDict=mapDict) pixel_rows = Apply_Template( noisifySect.Get_Keyword_Value('pixel_rows'), valuesDict, mapDict=mapDict) noisify_spectra_obj(matrix_obj, row_range_spec=pixel_rows, noise_cut_off=noise_cut_off) matrix_obj.write(destination, auto_size_cols=False)
def scale_ils_table(input_file, output_file, scale_factor): # Load existing file print 'Reading %s' % input_file file_obj = OCO_Matrix(input_file) scale_factor = float(scale_factor) for row_idx in range(file_obj.dims[0]): for lbl_idx in range(file_obj.dims[1]): if file_obj.labels_lower[lbl_idx].find('ils_delta_lambda_') == 0: file_obj.data[row_idx, lbl_idx] *= scale_factor if file_obj.labels_lower[lbl_idx].find('ils_response_') == 0: file_obj.data[row_idx, lbl_idx] = ( 1.0 / scale_factor) * file_obj.data[row_idx, lbl_idx] print 'Writing %s' % output_file file_obj.write(output_file)
def Process_File(source, destination, fileKeywords, moduleSections, valuesDict, mapDict): if len(moduleSections) > 1: raise Exception('Only one instance of %s allowed per FILE block' % os.path.basename(__file__)) if source == destination: raise Exception( 'Will not write albedo into source spectra file. dest_filename must be defined' ) instrument_name = Apply_Template( moduleSections[0].Get_Keyword_Value('instrument_name'), valuesDict, mapDict=mapDict) if instrument_name == None or len(instrument_name) == 0: raise Exception( 'instrument_name keyword must be specified for module: %s' % os.path.basename(__file__)) # Load spectra data source spec_file_obj = OCO_Matrix(source) radiances = [] for pixel_range in spec_file_obj.pixel_ranges(): radiances.append(spec_file_obj[ASCII_SPECTRA_RADIANCE_COLUMN] [pixel_range[0]:pixel_range[1]]) # Get SZA value try: sza_r = math.radians( float(spec_file_obj.header[ASCII_SPECTRA_SZA_KEYWORD].split()[0])) except KeyError: raise KeyError( 'Could not find header keyword %s in spectrum file: %s' % (ASCII_SPECTRA_SZA_KEYWORD, source)) # Create apriori file from radiance data create_albedo_apriori_from_radiance(radiances, sza_r, instrument_name, destination)
def write_soundinginfo_file(hdf_sounding_dict, sounding_info_filename, sounding_id): sounding_info_fileobj = OCO_Matrix() sounding_info_fileobj.file_id = 'Sounding info from orbit simulator for sounding id: %s' % sounding_id sounding_info_fileobj.header = hdf_sounding_dict sounding_info_fileobj.header['sounding_id'] = sounding_id sounding_info_fileobj.write(sounding_info_filename)
def write_psurf_file(psurf, out_filename): out_psurf_data = numpy.zeros((1, 1), dtype=float) out_psurf_data[0, 0] = psurf out_mat_obj = OCO_Matrix() out_mat_obj.file_id = 'True surface pressure from orbit simulator' out_mat_obj.data = out_psurf_data out_mat_obj.labels = ['PSURF'] out_mat_obj.units = ['Pa'] out_mat_obj.write(out_filename)
def write_albedo_file(output_file, albedo_data, header_values=None): albedo_obj = OCO_Matrix() if header_values != None: albedo_obj.header.update(header_values) albedo_obj.header['center_wavelengths'] = ' '.join( [str(wl) for wl in ALBEDO_CENTER_WAVELENGTHS]) albedo_obj.labels = [ ALBEDO_COL_TMPL % (idx + 1) for idx in range(albedo_data.shape[1]) ] albedo_obj.data = albedo_data albedo_obj.file_id = 'Surface albedo data' albedo_obj.write(output_file)
def extract_sv_jacobians(pd_file, names_file, rad_conv_file): conv_obj = OCO_Matrix(rad_conv_file, read_data=False) names_obj = OCO_Matrix(names_file, as_strings=True) sv_names = names_obj['Element Name'][:,0] pd_obj = OCO_Matrix(pd_file) for name_re, output_filename in SV_NAMES_MATCH.items(): file_indexes = [] for curr_idx, curr_name in enumerate(sv_names): if re.search(name_re, curr_name): file_indexes.append(curr_idx) print output_filename, file_indexes out_obj = OCO_Matrix() out_obj.pixels = conv_obj.pixels out_obj.data = pd_obj.data[:,file_indexes] out_obj.write(output_filename)
def remove_bad_data_last(input_file, output_file, check_col, check_val): # Load existing file file_obj = OCO_Matrix(input_file) num_rows = file_obj.dims[0] if check_col.isdigit(): check_col = int(check_col) else: check_col = file_obj.labels_lower.index(check_col.lower()) last_good_index = -1 for row_idx in range(num_rows-1, 1, -1): if not re.search(str(check_val).lower(), str(file_obj.data[row_idx, check_col]).lower()): last_good_index = row_idx break print "Last good index = ", last_good_index file_obj.dims = [last_good_index+1, file_obj.dims[1]] file_obj.write(output_file, use_set_dims=True, auto_size_cols=False)
def extract_acos_spectra(acos_l1b_file, output_dir, sounding_id_list=None, average_pol=False): if average_pol: average_name = 'Polarization' else: average_name = None with contextlib.closing(ACOS_File.L1B(acos_l1b_file)) as acos_l1b_obj: if sounding_id_list == None or len(sounding_id_list) == 0: sounding_id_list = acos_l1b_obj.get_sounding_ids(add_polarization=not average_pol) for sounding_id in numpy.ravel(sounding_id_list): ascii_obj = OCO_Matrix() sounding_info_dict = acos_l1b_obj.get_sounding_info_dict(sounding_id, ignore_missing=True, average=average_name) write_ascii_from_hdf(acos_l1b_obj, sounding_info_dict, ascii_obj, sounding_id, average_name) output_filename = os.path.join(output_dir, '%s.dat' % sounding_id) print 'Writing %s' % output_filename ascii_obj.write(output_filename, default_precision=16)
def scale_uncertainty(input_radiance_file, output_radiance_file, scale_factor, row_range_spec=None): # Load existing file matrix_obj = OCO_Matrix(input_radiance_file) radiance_col = matrix_obj.labels_lower.index("radiance") noise_col = matrix_obj.labels_lower.index("noise") if row_range_spec == None or len(row_range_spec) == 0: row_range = range(matrix_obj.dims[0]) else: row_range = index_range_list(row_range_spec) for row_idx in row_range: radiance_val = matrix_obj.data[row_idx, radiance_col] new_uncert = radiance_val * float(scale_factor) matrix_obj.data[row_idx, noise_col] = new_uncert matrix_obj.write(output_radiance_file)
def write_albedo_file(output_file, albedo_data, header_values=None): albedo_obj = OCO_Matrix() if header_values != None: albedo_obj.header.update(header_values) albedo_obj.header['center_wavelengths'] = ' '.join([str(wl) for wl in ALBEDO_CENTER_WAVELENGTHS]) albedo_obj.labels = [ ALBEDO_COL_TMPL % (idx+1) for idx in range(albedo_data.shape[1]) ] albedo_obj.data = albedo_data albedo_obj.file_id = 'Surface albedo data' albedo_obj.write(output_file)
def extract_soundinginfo_file(inp_fileobj, output_filename): soundinginfo_sect = inp_fileobj.Get_Section("SOUNDING_INFO") soundinginfo_file = get_inp_file_path(inp_fileobj, soundinginfo_sect[0].Get_Keyword_Value("soundinginfo_file")) print "%s -> %s" % (soundinginfo_file, output_filename) # Handle converting sounding info files without a HEADER block si_fileobj = L2_Input.Input_File(soundinginfo_file) header_section = si_fileobj.Get_Section("HEADER") if len(header_section) > 0: key_dict = header_section[0].Get_Keywords_Dict() else: key_dict = si_fileobj.Get_Keywords_Dict() output_matobj = OCO_Matrix() output_matobj.header.update(key_dict) output_matobj.file_id = "Sounding Information" output_matobj.write(output_filename) write_source_into_header(output_filename, soundinginfo_file)
def Process_File(source, destination, fileKeywords, moduleSections, valuesDict, mapDict): logger = logging.getLogger(os.path.basename(__file__)) # Load existing file matrix_obj = OCO_Matrix(source) for realizeSect in moduleSections: # Add ability to specify cols individually or using a * to goto end covariance = Apply_Template(realizeSect.Get_Keyword_Value('covariance'), valuesDict, mapDict=mapDict) column = Apply_Template(realizeSect.Get_Keyword_Value('column'), valuesDict, mapDict=mapDict) if type(column) is ListType: raise TypeError('Only one column can be modified per file') if covariance == None or len(covariance) == 0: raise IOError('covariance file is not specified') if not os.path.exists(covariance): raise IOError('covariance file does not exist: %s' % covariance) cov_obj = OCO_Matrix(covariance) rand_factors = [ random.normalvariate(0,1) for i in range(matrix_obj.dims[0]) ] (eigen_val, eigen_vec) = linalg.eigh(cov_obj.data) try: column_idx = int(column) except: if column == None: raise IOError('column named not defined for source file: %s' % (source)) elif not column.lower() in matrix_obj.labels_lower: raise IOError('column named %s not found in source file: %s' % (column, source)) column_idx = matrix_obj.labels_lower.index(column.lower()) update_vals = dot(eigen_vec.transpose(), (rand_factors* sqrt(eigen_val))) matrix_obj.data[:, column_idx] = matrix_obj.data[:, column_idx] + update_vals matrix_obj.write(destination)
def adjust_file_for_trend(self, src_apriori_file, time_struct, dst_apriori_file=None): if dst_apriori_file == None: dst_apriori_file = src_apriori_file if type(dst_apriori_file) is str and os.path.realpath( os.path.dirname(dst_apriori_file)) == os.path.realpath( self.apriori_db_path): raise IOError( 'Can not modify apriori file as located in database path, it must be copied first' ) apriori_obj = OCO_Matrix(src_apriori_file) co2_col_idx = apriori_obj.labels.index( apriori_obj.find_labels(CO2_APRIORI_CO2_COL)[0]) co2_offset = self.get_apriori_offset(time_struct, debug_values=apriori_obj.header) apriori_obj.data[:, co2_col_idx] += co2_offset apriori_obj.header['co2_offset'] = co2_offset apriori_obj.write(dst_apriori_file)
def get_data_object(data_filename): # Try to load data using OCO_Matrix class try: data_obj = OCO_Matrix(data_filename) return data_obj except: pass # Now load file as tabled data table_file_obj = open(data_filename, 'r') file_lines = table_file_obj.readlines() table_file_obj.close() # Seperate each line by spaces. Keep count of maximum # number of columns seen for when file is added so we can # know how to size the resultng matrix max_cols = 0 file_rows = [] for line in file_lines: if line.find('#') < 0 and len(line.strip()) != 0: line_cols = line.strip().split() file_rows.append(line_cols) max_cols = max(max_cols, len(line_cols)) # data_mat = numpy.zeros((len(file_rows), max_cols), dtype=float) data_mat = numpy.zeros((len(file_rows), max_cols), dtype=numpy.chararray) for row_idx in range(len(file_rows)): num_cols = len(file_rows[row_idx]) for col_idx in range(num_cols): col_value = file_rows[row_idx][col_idx] data_mat[row_idx][col_idx] = col_value # try: # data_mat[row_idx][col_idx] = float(col_value) # except: # data_mat[row_idx][col_idx] = fill_value # Create label names based on filename and index or else can # not select specific columns label_base = os.path.basename(data_filename) label_base = label_base[0:label_base.rfind('.')] # Remove extension data_labels = [] for col_idx in range(max_cols): data_labels.append(get_column_format(max_cols) % (label_base, col_idx)) # Save data into OCO Matrix object data_obj = OCO_Matrix() data_obj.dims = [len(file_rows), max_cols] data_obj.labels = data_labels data_obj.data = data_mat return data_obj
def extract_soundinginfo_file(inp_fileobj, output_filename): soundinginfo_sect = inp_fileobj.Get_Section("SOUNDING_INFO") soundinginfo_file = get_inp_file_path( inp_fileobj, soundinginfo_sect[0].Get_Keyword_Value("soundinginfo_file")) print "%s -> %s" % (soundinginfo_file, output_filename) # Handle converting sounding info files without a HEADER block si_fileobj = L2_Input.Input_File(soundinginfo_file) header_section = si_fileobj.Get_Section("HEADER") if len(header_section) > 0: key_dict = header_section[0].Get_Keywords_Dict() else: key_dict = si_fileobj.Get_Keywords_Dict() output_matobj = OCO_Matrix() output_matobj.header.update(key_dict) output_matobj.file_id = "Sounding Information" output_matobj.write(output_filename) write_source_into_header(output_filename, soundinginfo_file)
def remove_bad_data_last(input_file, output_file, check_col, check_val): # Load existing file file_obj = OCO_Matrix(input_file) num_rows = file_obj.dims[0] if check_col.isdigit(): check_col = int(check_col) else: check_col = file_obj.labels_lower.index(check_col.lower()) last_good_index = -1 for row_idx in range(num_rows - 1, 1, -1): if not re.search( str(check_val).lower(), str(file_obj.data[row_idx, check_col]).lower()): last_good_index = row_idx break print "Last good index = ", last_good_index file_obj.dims = [last_good_index + 1, file_obj.dims[1]] file_obj.write(output_file, use_set_dims=True, auto_size_cols=False)
def resample_levels(input_file, output_file, resample_to, val_extrapolate=False): # Load existing file file_obj = OCO_Matrix(input_file) try: src_pres_col = file_obj.labels_lower.index("pressure") except: raise IOError('Could not find pressure column in input file: "%s"' % input_file) try: src_temp_col = file_obj.labels_lower.index("t") except: src_temp_col = -1 ## Do nothing except write output file if input and desired levels already match if resample_to.isdigit() and file_obj.dims[0] == int(resample_to): file_obj.write(output_file) return elif resample_to.isdigit(): resample_to = int(resample_to) dst_data = numpy.zeros((resample_to, file_obj.dims[1]), dtype=float) elif os.path.exists(resample_to): dest_pressure_file = resample_to pres_obj = OCO_Matrix(dest_pressure_file) dst_pres_col = pres_obj.labels_lower.index("pressure") dst_data = numpy.zeros((pres_obj.dims[0], file_obj.dims[1]), dtype=float) resample_to = pres_obj.data[:, dst_pres_col] else: raise ValueError('Resample to argument "%s" is neither an integer nor a file that exists' % resample_to) for col_idx in range(file_obj.dims[1]): # Interpolate all but temperature in log space if col_idx == src_temp_col: log_data=False else: log_data=True if col_idx == src_pres_col: do_extrapolate = True else: do_extrapolate = val_extrapolate dst_data[:, col_idx] = resample_profile( file_obj.data[:, src_pres_col], file_obj.data[:, col_idx], resample_to, log_data=log_data, extrapolate=do_extrapolate ) file_obj.data = dst_data file_obj.write(output_file)
def extract_sv_jacobians(pd_file, names_file, rad_conv_file): conv_obj = OCO_Matrix(rad_conv_file, read_data=False) names_obj = OCO_Matrix(names_file, as_strings=True) sv_names = names_obj['Element Name'][:, 0] pd_obj = OCO_Matrix(pd_file) for name_re, output_filename in SV_NAMES_MATCH.items(): file_indexes = [] for curr_idx, curr_name in enumerate(sv_names): if re.search(name_re, curr_name): file_indexes.append(curr_idx) print output_filename, file_indexes out_obj = OCO_Matrix() out_obj.pixels = conv_obj.pixels out_obj.data = pd_obj.data[:, file_indexes] out_obj.write(output_filename)
def create_dispersion_from_ascii(l1b_file, out_disp_file, disp_in_file, sounding_id=None, index_scheme=None): asc_l1b_obj = OCO_Matrix(l1b_file) if sounding_id == None: sounding_id = asc_l1b_obj.header['sounding_id'] if disp_in_file == None: raise IOError('No dispersion file specified') latitude = float(asc_l1b_obj.header['sounding_latitude'].split()[0]) sza_r = math.radians(float(asc_l1b_obj.header['sounding_solar_zenith'].split()[0])) saz_r = math.radians(float(asc_l1b_obj.header['sounding_solar_azimuth'].split()[0])) time_stamp = asc_l1b_obj.header['frame_time_stamp'] time_struct = OCO_TextUtils.convert_timestamp_to_struct(time_stamp) pixel_ranges = asc_l1b_obj.pixel_ranges() aband_data = asc_l1b_obj['Radiance'][slice(*pixel_ranges[0]), 0] disp_in_obj = OCO_Matrix(disp_in_file) dispersion_coefs = disp_in_obj[DISPERSION_ASCII_COLUMN_IDENT].transpose() create_scene_dispersion_file(sounding_id, latitude, sza_r, saz_r, time_struct, aband_data, dispersion_coefs, out_disp_file, index_scheme=index_scheme)