def read_bufr_file(input_bufr_file): # #[ read a bufr file """ read the file using the BUFRReader class and get the data with the get_values_as_2d_array method """ # get an instance of the BUFR class # which automatically opens the file for reading and decodes it bufr = BUFRReader(input_bufr_file, warn_about_bufr_size=False) msg_nr = -1 for msg_nr, msg in enumerate(bufr): num_subsets = msg.get_num_subsets() for subs, msg_or_subset_data in enumerate(msg): #names = msg_or_subset_data.names #units = msg_or_subset_data.units data = msg_or_subset_data.data if data.shape[0] == 0: print('NO DATA FOUND! this seems an empty BUFR message !') continue print('loaded BUFR msg nr. ', msg_nr, 'shape = ', data.shape) if len(data.shape) == 1: print('data[:2] = ', data[:2]) else: print('data[:2,:2] = ', data[:2, :2]) if subs > 1: break # close the file bufr.close() if msg_nr == -1: print('no BUFR messages found, are you sure this is a BUFR file?')
def print_bufr_content1(input_bufr_file, output_fd, separator, max_msg_nr): # #[ implementation 1 """ example implementation using the BUFRReader class combined with the get_values_as_2d_array method """ # get an instance of the BUFR class # which automatically opens the file for reading and decodes it bob = BUFRReader(input_bufr_file, warn_about_bufr_size=False) msg_nr = 0 while True: try: bob.get_next_msg() msg_nr += 1 except EOFError: break # add header strings # print 'DEBUG: bob.msg_loaded ',bob.msg_loaded if bob.msg_loaded == 1: list_of_names = [] list_of_units = [] list_of_names.extend(bob.get_names()) list_of_units.extend(bob.get_units()) # print 'DEBUG: ',separator.join(list_of_names) # print 'DEBUG: ',separator.join(list_of_units) output_fd.write(separator.join(list_of_names) + "\n") output_fd.write(separator.join(list_of_units) + "\n") data = bob.get_values_as_2d_array() # print 'DEBUG: data.shape = ', data.shape if data.shape[0] * data.shape[1] == 0: print 'NO DATA FOUND! this seems an empty BUFR message !' continue for subs in range(len(data[:, 0])): output_fd.write( str(subs) + separator + separator.join(str(val) for val in data[subs, :]) + "\n") print 'converted BUFR msg nr. ', msg_nr if ((max_msg_nr > 0) and (msg_nr >= max_msg_nr)): print 'skipping remainder of this BUFR file' break # close the file bob.close() if msg_nr == 0: print 'no BUFR messages found, are you sure this is a BUFR file?'
def sort_msgs(input_bufr_file): # #[ """ a little example routine to demonstrate how to extract BUFR messages from a BUFR file, sort them, and write them out again to another file. """ # get an instance of the BUFR class # which automatically opens the file for reading and decodes it bob = BUFRReader(input_bufr_file, warn_about_bufr_size=False) files_dict = {} msg_nr = 0 while True: try: bob.get_next_msg() msg_nr += 1 except EOFError: break print 'handling message nr ', msg_nr list_of_unexp_descr = bob.bufr_obj.py_unexp_descr_list output_filename = '_'.join(d for d in list_of_unexp_descr) if files_dict.has_key(output_filename): fdescr = files_dict[output_filename][0] files_dict[output_filename][1] += 1 # increment count else: fdescr = open(output_filename, 'wb') count = 1 files_dict[output_filename] = [fdescr, count] fdescr.write(bob.bufr_obj.encoded_message) generated_files = files_dict.keys() for k in files_dict.keys(): count = files_dict[k][1] print 'file ', k, ' contains ', count, ' messages' files_dict[k][0].close() return generated_files
def read_bufr_file(input_bufr_file): # #[ read a bufr file """ read the file using the BUFRReader class and get the data with the get_values_as_2d_array method """ # get an instance of the BUFR class # which automatically opens the file for reading and decodes it bufr = BUFRReader(input_bufr_file, warn_about_bufr_size=False) msg_nr = -1 for msg_nr, msg in enumerate(bufr): num_subsets = msg.get_num_subsets() for subs, msg_or_subset_data in enumerate(msg): #names = msg_or_subset_data.names #units = msg_or_subset_data.units data = msg_or_subset_data.data if data.shape[0] == 0: print('NO DATA FOUND! this seems an empty BUFR message !') continue print('loaded BUFR msg nr. ', msg_nr, 'shape = ', data.shape) if len(data.shape) == 1: print('data[:2] = ', data[:2].tolist()) else: print('data[:2,:2] = ', data[:2,:2].tolist()) if subs > 1: break # close the file bufr.close() if msg_nr == -1: print('no BUFR messages found, are you sure this is a BUFR file?')
def read_bufr_file(input_bufr_file): # #[ read a bufr file """ read the file using the BUFRReader class and get the data with the get_values_as_2d_array method """ # get an instance of the BUFR class # which automatically opens the file for reading and decodes it bob = BUFRReader(input_bufr_file, warn_about_bufr_size=False) msg_nr = 0 while True: try: bob.get_next_msg() msg_nr += 1 except EOFError: break nsubsets = bob.get_num_subsets() data1 = bob.get_subset_values(1) if numpy.shape(data1)[0] == 0: print('NO DATA FOUND! this seems an empty BUFR message !') continue print('loaded BUFR msg nr. ', msg_nr, 'shape = ', numpy.shape(data1)) print('data1[:2] = ', data1[:2]) if nsubsets > 1: data2 = bob.get_subset_values(2) if numpy.shape(data2)[0] == 0: print('NO DATA FOUND! this seems an empty BUFR message !') continue print('data2[:2] = ', data2[:2]) # close the file bob.close() if msg_nr == 0: print('no BUFR messages found, are you sure this is a BUFR file?')
def decoding_example(input_bufr_file): """ wrap the example in a function to circumvent the pylint convention of requiring capitals for constants in the global scope (since most of these variables are not constants at all)) """ # suppres the default ECMWF welcome message which # is not yet redirected to the above defined fileunit os.environ['PRINT_TABLE_NAMES'] = 'FALSE' # read the binary data using the BUFRReader class print('loading testfile: ', input_bufr_file) with BUFRReader(input_bufr_file) as bufr: for msg in bufr: for msg_or_subset_data in msg: data = msg_or_subset_data.data names = msg_or_subset_data.names units = msg_or_subset_data.units print(data.shape) print(len(names)) print(len(units))
# debug print('values[:25]: ', msg.values[:25].tolist()) print('values[-25:]: ', msg.values[-25:].tolist()) msg.write_msg_to_file() bwr.close() ############################## # reopen the BUFR file as test ############################## print('*'*50) input_bufr_file = output_bufr_file bufr = BUFRReader(input_bufr_file, warn_about_bufr_size=False) # just 1 msg in this test file, so no looping needed for msg in bufr: print('num_subsets = ', msg.get_num_subsets()) for subs, msg_or_subset_data in enumerate(msg): list_of_names = msg_or_subset_data.names list_of_units = msg_or_subset_data.units data = msg_or_subset_data.data print('"subset nr"'+','+','.join(list_of_names)) print('""'+','+','.join(list_of_units)) print('data.shape = ', data.shape) for irow, row in enumerate(data): print('{}, {} : {}'. format(subs+1, irow, ', '.join('{}'.format(val) for val in row.tolist())))
def print_bufr_content4(input_bufr_file, output_fd, separator, max_msg_nr, expand_flags, expand_strings, descr_multiplier): # #[ implementation 4 """ example implementation using the BUFRReader class to decode a bufr file using delayed replication. Since these files may have different descriptor lists for each subset, a different call pattern is needed. """ # get an instance of the BUFR class # which automatically opens the file for reading and decodes it bufr = BUFRReader( input_bufr_file, warn_about_bufr_size=False, #verbose=True, expand_flags=expand_flags, verbose=False, expand_flags=expand_flags, expand_strings=expand_strings, descr_multiplyer=descr_multiplier) msg_nr = -1 for msg_nr, msg in enumerate(bufr): # since this example assumes a bufr file using delayed replication # always request and add the header for each subset nsubsets = msg.get_num_subsets() # print('nsubsets = ', nsubsets) for msg_or_subs_nr, msg_or_subset_data in enumerate(msg): # add header strings list_of_names = msg_or_subset_data.names list_of_units = msg_or_subset_data.units data = msg_or_subset_data.data if numpy.shape(data)[0] == 0: print('NO DATA FOUND! this seems an empty BUFR message !') continue output_fd.write('"subset nr"' + separator + separator.join(list_of_names) + "\n") output_fd.write('""' + separator + separator.join(list_of_units) + "\n") # print(data.shape) if len(data.shape) == 1: # we are walking over subsets subs = msg_or_subs_nr output_fd.write( str(subs + 1) + separator + separator.join(to_str(val) for val in data[:]) + "\n") else: # we are getting a 2D array as result for subs in range(data.shape[0]): output_fd.write( str(subs + 1) + separator + separator.join(to_str(val) for val in data[subs, :]) + "\n") print('converted BUFR msg nr. ', msg_nr + 1) if (max_msg_nr >= 0) and (msg_nr >= max_msg_nr): print('skipping remainder of this BUFR file') break # close the file bufr.close() if msg_nr == -1: print('no BUFR messages found, are you sure this is a BUFR file?')
def select_subsets(input_bufr_file, output_bufr_file): # #[ select on subsets """ select data and write out again """ # get an instance of the BUFR class # which automatically opens the file for reading and decodes it bob = BUFRReader(input_bufr_file, warn_about_bufr_size=False) # open the file for writing rbf_out = RawBUFRFile() rbf_out.open(output_bufr_file, 'wb') msg_nr = 0 while True: try: bob.get_next_msg() msg_nr += 1 except EOFError: break data = bob.get_values_as_2d_array() print('data.shape = ', data.shape) if data.shape[0] * data.shape[1] == 0: print('NO DATA FOUND! this seems an empty BUFR message !') continue # select every other subset new_data = data[::2, :] print('new_data.shape = ', new_data.shape) #bob.bufr_obj nsub = bob.bufr_obj.get_num_subsets() n_exp_descr = len(bob.bufr_obj.values) / nsub bob.bufr_obj.fill_descriptor_list( nr_of_expanded_descriptors=n_exp_descr) bob.bufr_obj.ktdlst = bob.bufr_obj.get_descriptor_list() delayed_repl_data = bob.bufr_obj.derive_delayed_repl_factors() bob.bufr_obj.fill_delayed_repl_data(delayed_repl_data) new_nsub = new_data.shape[0] bob.bufr_obj.nr_subsets = new_nsub btm = BufrTemplate() btm.add_descriptors(*bob.bufr_obj.ktdlst) #[:self.ktdlen]) btm.nr_of_delayed_repl_factors = 1 btm.del_repl_max_nr_of_repeats_list = list(delayed_repl_data) bob.bufr_obj.register_and_expand_descriptors(btm) # activate this one if the encoding crashes without clear cause: # bob.bufr_obj.estimated_num_bytes_for_encoding = 25000 bob.bufr_obj.kdate = new_nsub * list(delayed_repl_data) print('bob.bufr_obj.cvals.shape = ', bob.bufr_obj.cvals.shape) bob.bufr_obj.encode_data(new_data, bob.bufr_obj.cvals[:32, :]) rbf_out.write_raw_bufr_msg(bob.bufr_obj.encoded_message) #for subs in range(len(data[:, 0])): # output_fd.write(str(subs)+separator+ # separator.join(str(val) for val in data[subs, :])+ # "\n") print('converted BUFR msg nr. ', msg_nr) # close the file bob.close() if msg_nr == 0: print('no BUFR messages found, are you sure this is a BUFR file?') rbf_out.close()
# debug print('values[:25]: ', msg.values[:25].tolist()) print('values[-25:]: ', msg.values[-25:].tolist()) msg.write_msg_to_file() bwr.close() ############################## # reopen the BUFR file as test ############################## print('*' * 50) input_bufr_file = output_bufr_file bufr = BUFRReader(input_bufr_file, expand_strings=True, warn_about_bufr_size=False) # just 1 msg in this test file, so no looping needed for msg in bufr: print('num_subsets = ', msg.get_num_subsets()) for subs, msg_or_subset_data in enumerate(msg): list_of_names = msg_or_subset_data.names list_of_units = msg_or_subset_data.units data = msg_or_subset_data.data print('"subset nr"' + ',' + ','.join(list_of_names)) print('""' + ',' + ','.join(list_of_units)) print(str(subs + 1) + ',' + ','.join(str(val) for val in data[:])) bufr.close()
def print_bufr_content1(input_bufr_file, output_fd, separator, max_msg_nr, expand_flags): # #[ implementation 1 """ example implementation using the BUFRReader class combined with the get_values_as_2d_array method """ # get an instance of the BUFR class # which automatically opens the file for reading and decodes it bob = BUFRReader(input_bufr_file, warn_about_bufr_size=False, expand_flags=expand_flags) msg_nr = 0 while True: try: bob.get_next_msg() msg_nr += 1 except EOFError: break # add header strings # print('DEBUG: bob.msg_loaded ',bob.msg_loaded) list_of_names = [] list_of_units = [] list_of_names.extend(bob.get_names()) list_of_units.extend(bob.get_units()) list_of_unexp_descr = bob.get_unexp_descr_list() #print('list_of_names = ',list_of_names) #print('list_of_units = ',list_of_units) if bob.msg_loaded == 1: output_fd.write('"subset nr"'+separator) if list_of_names: for name in list_of_names[:-1]: output_fd.write('"'+name+'"'+separator) name = list_of_names[-1] output_fd.write('"'+name+'"\n') else: output_fd.write('"[NO DATA]"\n') output_fd.write('""'+separator) if list_of_units: for unit in list_of_units[:-1]: output_fd.write('"'+unit+'"'+separator) unit = list_of_units[-1] output_fd.write('"'+unit+'"\n') else: output_fd.write('"[NO DATA]"\n') list_of_unexp_descr_first_msg = bob.bufr_obj.py_unexp_descr_list #print('list_of_unexp_descr_first_msg = ', # list_of_unexp_descr_first_msg) data = bob.get_values_as_2d_array() if list_of_unexp_descr != list_of_unexp_descr_first_msg: print('\n\n') print('WARNING: it seems different types of BUFR messages') print('are mixed in this BUFR file, meaning that the list of') print('descriptor names and units printed on the first 2 output') print('lines will not match with all lines of data.') print('To prevent confusion, therefore decoding is halted') print('It is recommended to first sort BUFR messages by type') print('before converting them to ascii or csv.') print('The example script sort_bufr_msgs.py can be used') print('to sort a BUFR file.') print('\n\n') print('Detailed info:') print('list_of_unexp_descr != list_of_unexp_descr_first_msg !') print('list_of_unexp_descr = ', list_of_unexp_descr) print('list_of_unexp_descr_first_msg = ', list_of_unexp_descr_first_msg) sys.exit(0) if data.shape[0]*data.shape[1] == 0: print('NO DATA FOUND! this seems an empty BUFR message !') continue for subs in range(len(data[:, 0])): output_fd.write(str(subs+1)+separator+ separator.join(str(val) for val in data[subs, :])+ "\n") print('converted BUFR msg nr. ', msg_nr) if (max_msg_nr > 0) and (msg_nr >= max_msg_nr): print('skipping remainder of this BUFR file') break # close the file bob.close() if msg_nr == 0: print('no BUFR messages found, are you sure this is a BUFR file?')
def print_bufr_content5(input_bufr_file, output_fd, separator, max_msg_nr, expand_flags): # #[ implementation 5 """ example implementation using the BUFRReader class to decode a bufr file using delayed replication. Since these files may have different descriptor lists for each subset, a different call pattern is needed. """ # testcases: # ./example_programs/bufr_to_ascii.py -5 -c -o tmp.csv \ # -i ./pybufr_ecmwf/ecmwf_bufr_lib/bufrdc_000403/data/syno_1.bufr # # ./example_programs/bufr_to_ascii.py -5 -c -o tmp.csv \ # -i ../BUFR_test_files/synop_knmi_via_ko_janssen/MSSAEOL_00002950.b # names_to_be_selected = ['temperature', 'wind'] names_to_be_excluded = ['minimum', 'maximum'] write_names_and_units_just_once = True # get an instance of the BUFR class # which automatically opens the file for reading and decodes it bob = BUFRReader(input_bufr_file, warn_about_bufr_size=False, verbose=False, expand_flags=expand_flags) msg_nr = 0 not_yet_printed = True while True: try: bob.get_next_msg() msg_nr += 1 except EOFError: break # since this example assumes a bufr file using delayed replication # always request and add the header for each subset nsubsets = bob.get_num_subsets() for subs in range(1, nsubsets+1): print('==> subset ', subs) # add header strings (list_of_names, list_of_units) = bob.get_names_and_units(subs) data = bob.get_subset_values(subs) #,autoget_cval=True) selected_names = [] selected_units = [] selected_values = [] for i, name in enumerate(list_of_names): selected = False for name in names_to_be_selected: if name in name.lower(): selected = True for name in names_to_be_excluded: if name in name.lower(): selected = False if selected: # print(' '*10,name,'=',data[i],list_of_units[i]) selected_names.append(list_of_names[i]) selected_units.append(list_of_units[i]) selected_values.append(data[i]) if len(selected_values) == 0: print('NO DATA SELECTED for BUFR message %d and subset %d!' % (msg_nr, subs)) continue if write_names_and_units_just_once and not_yet_printed: output_fd.write('"subset nr"'+separator+ separator.join(selected_names) + "\n") output_fd.write('""'+separator+ separator.join(selected_units) + "\n") not_yet_printed = False output_fd.write(str(subs)+separator+ separator.join(str(val) for val in selected_values)+ "\n") print('='*25) print('converted BUFR msg nr. ', msg_nr) print('='*25) if (max_msg_nr > 0) and (msg_nr >= max_msg_nr): print('skipping remainder of this BUFR file') break # close the file bob.close() if msg_nr == 0: print('no BUFR messages found, are you sure this is a BUFR file?')
def sort_msgs(input_bufr_file): # #[ """ a little example routine to demonstrate how to extract BUFR messages from a BUFR file, sort them, and write them out again to another file. """ # get an instance of the BUFR class # which automatically opens the file for reading and decodes it bob = BUFRReader(input_bufr_file, warn_about_bufr_size=False) num_msgs = bob.rbf.get_num_bufr_msgs() progress_step = max(1, int(num_msgs / 20)) files_dict = {} msg_nr = 0 while True: can_be_decoded = False try: bob.get_next_msg() can_be_decoded = True except EOFError: break except KeyError: # allow sorting of BUFR messages that cannot be decoded # because the needed key is not in the available set of # BUFR table files. This tool only uses unexpanded descriptors # so ability to decode should not be required. # A user still may wish to first sort, and then decode # a subset of messages that can be decoded. pass msg_nr += 1 if progress_step * int(msg_nr / progress_step) == msg_nr: print('handling message nr {} out of {}'.format(msg_nr, num_msgs)) list_of_unexp_descr = bob.bufr_obj.py_unexp_descr_list output_filename = construct_unique_filename(list_of_unexp_descr) if files_dict.has_key(output_filename): fdescr = files_dict[output_filename][0] files_dict[output_filename][1] += 1 # increment count else: fdescr = open(output_filename, 'wb') count = 1 files_dict[output_filename] = [ fdescr, count, can_be_decoded, list_of_unexp_descr ] fdescr.write(bob.bufr_obj.encoded_message) generated_files = files_dict.keys() num_that_can_be_decoded = 0 num_that_cannot_be_decoded = 0 for k in files_dict: fdescr, count, can_be_decoded, list_of_unexp_descr = files_dict[k] print('file {} contains {} messages'.format(k[:25], count)) files_dict[k][0].close() if can_be_decoded: num_that_can_be_decoded += 1 else: num_that_cannot_be_decoded += 1 # check to see if local descriptors are present for descr in list_of_unexp_descr: if int(descr[3:]) >= 192: print('==>A local descriptor definition is present: ', descr) print('==>this template cannot be decoded with ' + 'standard WMO BUFR tables.') print('Sorting results:') print('BUFR messages with {} different templates are present in this file'. format(num_that_can_be_decoded + num_that_cannot_be_decoded)) if num_that_cannot_be_decoded > 0: print('decoding is not possible for {} templates.'.format( num_that_cannot_be_decoded)) return generated_files
def print_bufr_content1(input_bufr_file, output_fd, separator, max_msg_nr, expand_flags): # #[ implementation 1 """ example implementation using the BUFRReader class combined with the get_values_as_2d_array method """ # get an instance of the BUFR class # which automatically opens the file for reading and decodes it bufr = BUFRReader(input_bufr_file, warn_about_bufr_size=False, expand_flags=expand_flags) msg_nr = -1 for msg_nr, msg in enumerate(bufr): num_subsets = msg.get_num_subsets() list_of_unexp_descr = msg.get_unexp_descr_list() for subs, msg_or_subset_data in enumerate(msg): # get the actual values data = msg_or_subset_data.data if msg_nr == 0 and subs == 0: list_of_unexp_descr_first_msg = list_of_unexp_descr[:] # add header strings list_of_names = msg_or_subset_data.names list_of_units = msg_or_subset_data.units output_fd.write('"subset nr"'+separator) if list_of_names: for name in list_of_names[:-1]: output_fd.write('"'+name+'"'+separator) name = list_of_names[-1] output_fd.write('"'+name+'"\n') else: output_fd.write('"[NO DATA]"\n') output_fd.write('""'+separator) if list_of_units: for unit in list_of_units[:-1]: output_fd.write('"'+unit+'"'+separator) unit = list_of_units[-1] output_fd.write('"'+unit+'"\n') else: output_fd.write('"[NO DATA]"\n') try: data_is_2d = True if len(numpy.shape(data)) == 1: data_is_2d = False if data_is_2d: ns = numpy.shape(data)[0] for subs_cnt in range(ns): output_fd.write(str(subs_cnt+1)+separator+ separator.join(str(val) for val in data[subs_cnt, :])+ "\n") else: # 1D data is returned if character values may be present # in the data (in case autoget_cval or expand_flags # are active) # it may also happen if the message uses delayed # replication and has variable lengths for the # different subsets subs_cnt = msg_or_subset_data.current_subset output_fd.write(str(subs_cnt)+separator+ separator.join(str(val) for val in data[:])+ "\n") except TypeError: # in case of delayed replication or when character strings are # present (i.e. expand_flags = True) data will be returned as # a 1D list in stead of a 2D numpy array. # This generates a TypeError in the data[subs, :] indexing above output_fd.write(str(subs+1)+separator+ separator.join(str(val) for val in data[:])+ "\n") if list(list_of_unexp_descr) != list(list_of_unexp_descr_first_msg): print('\n\n') print('ERROR: it seems different types of BUFR messages') print('are mixed in this BUFR file, meaning that the list of') print('descriptor names and units printed on the first 2 output') print('lines will not match with all lines of data.') print('To prevent confusion, therefore decoding is halted') print('It is recommended to first sort BUFR messages by type') print('before converting them to ascii or csv.') print('The example script sort_bufr_msgs.py can be used') print('to sort a BUFR file.') print('\n\n') print('Detailed info:') print('list_of_unexp_descr != list_of_unexp_descr_first_msg !') print('list_of_unexp_descr = ', list_of_unexp_descr) print('list_of_unexp_descr_first_msg = ', list_of_unexp_descr_first_msg) sys.exit(1) if numpy.shape(data)[0] == 0: print('NO DATA FOUND! this seems an empty BUFR message !') continue print('converted BUFR msg nr. ', msg_nr+1) if msg_nr >= max_msg_nr: print('skipping remainder of this BUFR file') break # close the file bufr.close() if msg_nr == -1: print('no BUFR messages found, are you sure this is a BUFR file?')
def print_bufr_content5(input_bufr_file, output_fd, separator, max_msg_nr, expand_flags): # #[ implementation 5 """ example implementation using the BUFRReader class to decode a bufr file using delayed replication. Since these files may have different descriptor lists for each subset, a different call pattern is needed. """ # testcases: # ./example_programs/bufr_to_ascii.py -5 -c -o tmp.csv \ # -i ./pybufr_ecmwf/ecmwf_bufr_lib/bufrdc_000403/data/syno_1.bufr # # ./example_programs/bufr_to_ascii.py -5 -c -o tmp.csv \ # -i ../BUFR_test_files/synop_knmi_via_ko_janssen/MSSAEOL_00002950.b # names_to_be_selected = ['temperature', 'wind'] names_to_be_excluded = ['minimum', 'maximum'] write_names_and_units_just_once = True # get an instance of the BUFR class # which automatically opens the file for reading and decodes it bob = BUFRReader(input_bufr_file, warn_about_bufr_size=False, verbose=False, expand_flags=expand_flags) msg_nr = 0 not_yet_printed = True while True: try: bob.get_next_msg() msg_nr += 1 except EOFError: break # since this example assumes a bufr file using delayed replication # always request and add the header for each subset nsubsets = bob.get_num_subsets() for subs in range(1, nsubsets + 1): print('==> subset ', subs) # add header strings (list_of_names, list_of_units) = bob.get_names_and_units(subs) data = bob.get_subset_values(subs) #,autoget_cval=True) selected_names = [] selected_units = [] selected_values = [] for i, name in enumerate(list_of_names): selected = False for name in names_to_be_selected: if name in name.lower(): selected = True for name in names_to_be_excluded: if name in name.lower(): selected = False if selected: # print(' '*10,name,'=',data[i],list_of_units[i]) selected_names.append(list_of_names[i]) selected_units.append(list_of_units[i]) selected_values.append(data[i]) if len(selected_values) == 0: print('NO DATA SELECTED for BUFR message %d and subset %d!' % (msg_nr, subs)) continue if write_names_and_units_just_once and not_yet_printed: output_fd.write('"subset nr"' + separator + separator.join(selected_names) + "\n") output_fd.write('""' + separator + separator.join(selected_units) + "\n") not_yet_printed = False output_fd.write( str(subs) + separator + separator.join(str(val) for val in selected_values) + "\n") print('=' * 25) print('converted BUFR msg nr. ', msg_nr) print('=' * 25) if (max_msg_nr > 0) and (msg_nr >= max_msg_nr): print('skipping remainder of this BUFR file') break # close the file bob.close() if msg_nr == 0: print('no BUFR messages found, are you sure this is a BUFR file?')
if ((max_msg_nr > 0) and (msg_nr >= max_msg_nr)): print 'skipping remainder of this BUFR file' break # close the file bob.close() if msg_nr == 0: print 'no BUFR messages found, are you sure this is a BUFR file?' # #] if __name__ == '__main__': bufrdir = '/net/bhw379/nobackup/users/plas/temp/BUFRdata/' bufrfile = os.path.join(bufrdir, 'BUFR.radarv.bewid') os.environ['BUFR_TABLES'] = os.path.join(bufrdir, 'BUFRtables') bob = BUFRReader(bufrfile, warn_about_bufr_size=False) bob.get_next_msg() names = bob.get_names() units = bob.get_units() print set(names) print 20 * '=' print set(units) for n, u in zip(names, units): if "dB" in u: print n, u outf = open('radarBUFR.dat', 'w') print_bufr_content1(bufrfile, outf, ',', 5)
def sort_msgs(input_bufr_file): # #[ """ a little example routine to demonstrate how to extract BUFR messages from a BUFR file, sort them, and write them out again to another file. """ # get an instance of the BUFR class # which automatically opens the file for reading and decodes it bob = BUFRReader(input_bufr_file, warn_about_bufr_size=False) num_msgs = bob.rbf.get_num_bufr_msgs() progress_step = max(1,int(num_msgs/20)) files_dict = {} msg_nr = 0 while True: can_be_decoded = False try: bob.get_next_msg() can_be_decoded = True except EOFError: break except KeyError: # allow sorting of BUFR messages that cannot be decoded # because the needed key is not in the available set of # BUFR table files. This tool only uses unexpanded descriptors # so ability to decode should not be required. # A user still may wish to first sort, and then decode # a subset of messages that can be decoded. pass msg_nr += 1 if progress_step*int(msg_nr/progress_step) == msg_nr: print('handling message nr {} out of {}'.format(msg_nr, num_msgs)) list_of_unexp_descr = bob.bufr_obj.py_unexp_descr_list output_filename = construct_unique_filename(list_of_unexp_descr) if files_dict.has_key(output_filename): fdescr = files_dict[output_filename][0] files_dict[output_filename][1] += 1 # increment count else: fdescr = open(output_filename, 'wb') count = 1 files_dict[output_filename] = [fdescr, count, can_be_decoded, list_of_unexp_descr] fdescr.write(bob.bufr_obj.encoded_message) generated_files = files_dict.keys() num_that_can_be_decoded = 0 num_that_cannot_be_decoded = 0 for k in files_dict: fdescr, count, can_be_decoded, list_of_unexp_descr = files_dict[k] print('file {} contains {} messages'.format(k[:25], count)) files_dict[k][0].close() if can_be_decoded: num_that_can_be_decoded += 1 else: num_that_cannot_be_decoded += 1 # check to see if local descriptors are present for d in list_of_unexp_descr: if int(d[3:]) >= 192: print('==>A local descriptor definition is present: ', d) print('==>this template cannot be decoded with '+ 'standard WMO BUFR tables.') print('Sorting results:') print('BUFR messages with {} different templates are present in this file'. format(num_that_can_be_decoded+num_that_cannot_be_decoded)) if num_that_cannot_be_decoded > 0: print('decoding is not possible for {} templates.'. format(num_that_cannot_be_decoded)) return generated_files
def reopen_bufr_file(input_bufr_file): # #[ open bufr file ''' open a bufr file for reading and print its content ''' print '*'*50 from pybufr_ecmwf.bufr import BUFRReader bob = BUFRReader(input_bufr_file, warn_about_bufr_size=False) bob.setup_tables(table_b_to_use='B'+TABLE_NAME, table_d_to_use='D'+TABLE_NAME) bob.get_next_msg() print 'num_subsets: ', bob.get_num_subsets() if USE_DELAYED_REPLICATION: data1 = bob.get_subset_values(0) print 'data1 = ', data1 data2 = bob.get_subset_values(1) print 'data2 = ', data2 else: print 'num_elements: ', bob.get_num_elements() print bob.get_names() print bob.get_units() data = bob.get_values_as_2d_array() print data.shape print data print 'bob.bufr_obj.values = ' print bob.bufr_obj.values textdata = bob.get_value(3, 0) print 'textdata(3,0)', textdata textdata = bob.get_value(3, 0, get_cval=True) print 'textdata(3,0)', textdata textdata = bob.get_values(3, get_cval=True) print 'textdata(3,:)', textdata bob.close()
def select_subsets(input_bufr_file, output_bufr_file): # #[ select on subsets """ select data and write out again """ # get an instance of the BUFR class # which automatically opens the file for reading and decodes it bob = BUFRReader(input_bufr_file, warn_about_bufr_size=False) # open the file for writing rbf_out = RawBUFRFile() rbf_out.open(output_bufr_file, 'wb') msg_nr = 0 while True: try: bob.get_next_msg() msg_nr += 1 except EOFError: break data = bob.get_values_as_2d_array() print 'data.shape = ', data.shape if data.shape[0]*data.shape[1] == 0: print 'NO DATA FOUND! this seems an empty BUFR message !' continue # select every other subset new_data = data[::2, :] print 'new_data.shape = ', new_data.shape #bob.bufr_obj nsub = bob.bufr_obj.get_num_subsets() n_exp_descr = len(bob.bufr_obj.values)/nsub bob.bufr_obj.fill_descriptor_list(nr_of_expanded_descriptors= n_exp_descr) bob.bufr_obj.ktdlst = bob.bufr_obj.get_descriptor_list() delayed_repl_data = bob.bufr_obj.derive_delayed_repl_factors() bob.bufr_obj.fill_delayed_repl_data(delayed_repl_data) new_nsub = new_data.shape[0] bob.bufr_obj.nr_subsets = new_nsub btm = BufrTemplate() btm.add_descriptors(*bob.bufr_obj.ktdlst)#[:self.ktdlen]) btm.nr_of_delayed_repl_factors = 1 btm.del_repl_max_nr_of_repeats_list = list(delayed_repl_data) bob.bufr_obj.register_and_expand_descriptors(btm) # activate this one if the encoding crashes without clear cause: # bob.bufr_obj.estimated_num_bytes_for_encoding = 25000 bob.bufr_obj.kdate = new_nsub*list(delayed_repl_data) print 'bob.bufr_obj.cvals.shape = ', bob.bufr_obj.cvals.shape bob.bufr_obj.encode_data(new_data, bob.bufr_obj.cvals[:32, :]) rbf_out.write_raw_bufr_msg(bob.bufr_obj.encoded_message) #for subs in range(len(data[:, 0])): # output_fd.write(str(subs)+separator+ # separator.join(str(val) for val in data[subs, :])+ # "\n") print 'converted BUFR msg nr. ', msg_nr # close the file bob.close() if msg_nr == 0: print 'no BUFR messages found, are you sure this is a BUFR file?' rbf_out.close()
def print_bufr_content1(input_bufr_file, output_fd, separator, max_msg_nr, expand_flags): # #[ implementation 1 """ example implementation using the BUFRReader class combined with the get_values_as_2d_array method """ # get an instance of the BUFR class # which automatically opens the file for reading and decodes it bufr = BUFRReader(input_bufr_file, warn_about_bufr_size=False, expand_flags=expand_flags) msg_nr = -1 for msg_nr, msg in enumerate(bufr): num_subsets = msg.get_num_subsets() list_of_unexp_descr = msg.get_unexp_descr_list() for subs, msg_or_subset_data in enumerate(msg): # get the actual values data = msg_or_subset_data.data if msg_nr == 0 and subs == 0: list_of_unexp_descr_first_msg = list_of_unexp_descr[:] # add header strings list_of_names = msg_or_subset_data.names list_of_units = msg_or_subset_data.units output_fd.write('"subset nr"' + separator) if list_of_names: for name in list_of_names[:-1]: output_fd.write('"' + name + '"' + separator) name = list_of_names[-1] output_fd.write('"' + name + '"\n') else: output_fd.write('"[NO DATA]"\n') output_fd.write('""' + separator) if list_of_units: for unit in list_of_units[:-1]: output_fd.write('"' + unit + '"' + separator) unit = list_of_units[-1] output_fd.write('"' + unit + '"\n') else: output_fd.write('"[NO DATA]"\n') try: data_is_2d = True if len(numpy.shape(data)) == 1: data_is_2d = False if data_is_2d: ns = numpy.shape(data)[0] for subs_cnt in range(ns): output_fd.write( str(subs_cnt + 1) + separator + separator.join( str(val) for val in data[subs_cnt, :]) + "\n") else: # 1D data is returned if character values may be present # in the data (in case autoget_cval or expand_flags # are active) # it may also happen if the message uses delayed # replication and has variable lengths for the # different subsets subs_cnt = msg_or_subset_data.current_subset output_fd.write( str(subs_cnt) + separator + separator.join(str(val) for val in data[:]) + "\n") except TypeError: # in case of delayed replication or when character strings are # present (i.e. expand_flags = True) data will be returned as # a 1D list in stead of a 2D numpy array. # This generates a TypeError in the data[subs, :] indexing above output_fd.write( str(subs + 1) + separator + separator.join(str(val) for val in data[:]) + "\n") if list(list_of_unexp_descr) != list(list_of_unexp_descr_first_msg): print('\n\n') print('ERROR: it seems different types of BUFR messages') print('are mixed in this BUFR file, meaning that the list of') print('descriptor names and units printed on the first 2 output') print('lines will not match with all lines of data.') print('To prevent confusion, therefore decoding is halted') print('It is recommended to first sort BUFR messages by type') print('before converting them to ascii or csv.') print('The example script sort_bufr_msgs.py can be used') print('to sort a BUFR file.') print('\n\n') print('Detailed info:') print('list_of_unexp_descr != list_of_unexp_descr_first_msg !') print('list_of_unexp_descr = ', list_of_unexp_descr) print('list_of_unexp_descr_first_msg = ', list_of_unexp_descr_first_msg) sys.exit(1) if numpy.shape(data)[0] == 0: print('NO DATA FOUND! this seems an empty BUFR message !') continue print('converted BUFR msg nr. ', msg_nr + 1) if (max_msg_nr > 0) and (msg_nr >= max_msg_nr): print('skipping remainder of this BUFR file') break # close the file bufr.close() if msg_nr == -1: print('no BUFR messages found, are you sure this is a BUFR file?')
def print_bufr_content2(input_bufr_file, output_fd, separator, max_msg_nr, expand_flags): # #[ implementation 2 """ example implementation using the BUFRReader class combined with the get_value method """ # get an instance of the BUFR class # which automatically opens the file for reading and decodes it bob = BUFRReader(input_bufr_file, expand_flags=expand_flags) msg_nr = 0 while True: try: bob.get_next_msg() msg_nr += 1 except EOFError: break # add header strings if bob.msg_loaded == 1: list_of_names = bob.get_names() list_of_units = bob.get_units() output_fd.write('"subset nr"'+separator) if list_of_names: for name in list_of_names[:-1]: output_fd.write('"'+name+'"'+separator) name = list_of_names[-1] output_fd.write('"'+name+'"\n') else: output_fd.write('"[NO DATA]"\n') output_fd.write('""'+separator) if list_of_units: for unit in list_of_units[:-1]: output_fd.write('"'+unit+'"'+separator) unit = list_of_units[-1] output_fd.write('"'+unit+'"\n') else: output_fd.write('"[NO DATA]"\n') nsubsets = bob.get_num_subsets() for subs in range(1, nsubsets+1): nelements = bob.get_num_elements() data_list = [] for descr_nr in range(nelements): data = bob.get_value(descr_nr, subs, autoget_cval=True) data_list.append(data) output_fd.write(str(subs)+separator+ separator.join(str(val) for val in data_list)+ "\n") print('converted BUFR msg nr. ', msg_nr) if (max_msg_nr > 0) and (msg_nr >= max_msg_nr): print('skipping remainder of this BUFR file') break # close the file bob.close() if msg_nr == 0: print('no BUFR messages found, are you sure this is a BUFR file?')
def print_bufr_content4(input_bufr_file, output_fd, separator, max_msg_nr, expand_flags): # #[ implementation 4 """ example implementation using the BUFRReader class to decode a bufr file using delayed replication. Since these files may have different descriptor lists for each subset, a different call pattern is needed. """ # get an instance of the BUFR class # which automatically opens the file for reading and decodes it bob = BUFRReader(input_bufr_file, warn_about_bufr_size=False, # verbose=True, expand_flags=expand_flags) verbose=False, expand_flags=expand_flags) msg_nr = 0 while True: try: bob.get_next_msg() msg_nr += 1 except EOFError: break # since this example assumes a bufr file using delayed replication # always request and add the header for each subset nsubsets = bob.get_num_subsets() for subs in range(1, nsubsets+1): # add header strings (list_of_names, list_of_units) = bob.get_names_and_units(subs) # currently not used # list_of_unexp_descr = bob.bufr_obj.py_unexp_descr_list data = bob.get_subset_values(subs, autoget_cval=True) # print('len(list_of_names) = ', len(list_of_names)) # print('len(list_of_units) = ', len(list_of_units)) # print('len(data) = ', len(data)) if numpy.shape(data)[0] == 0: print('NO DATA FOUND! this seems an empty BUFR message !') continue output_fd.write('"subset nr"'+separator+ separator.join(list_of_names) + "\n") output_fd.write('""'+separator+ separator.join(list_of_units) + "\n") output_fd.write(str(subs)+separator+ separator.join(str(val) for val in data[:])+ "\n") print('converted BUFR msg nr. ', msg_nr) if (max_msg_nr > 0) and (msg_nr >= max_msg_nr): print('skipping remainder of this BUFR file') break # close the file bob.close() if msg_nr == 0: print('no BUFR messages found, are you sure this is a BUFR file?')
msg['LATI'] = [55.2, 66.3, 77.4] msg['LONG'] = [5.1, 6.2, 7.3] # debug print('values: ', list(msg.values)) msg.write_msg_to_file() bwr.close() ############################## # reopen the BUFR file as test ############################## print('*' * 50) input_bufr_file = output_bufr_file bufr = BUFRReader(input_bufr_file, warn_about_bufr_size=False) # just 1 msg in this test file, so no looping needed for msg in bufr: print('num_subsets = ', msg.get_num_subsets()) for subs, msg_or_subset_data in enumerate(msg): list_of_names = msg_or_subset_data.names list_of_units = msg_or_subset_data.units data = msg_or_subset_data.data print('"subset nr"' + ',' + ','.join(list_of_names)) print('""' + ',' + ','.join(list_of_units)) print(str(subs + 1) + ',' + ','.join(str(val) for val in data[:])) bufr.close()
def print_bufr_content4(input_bufr_file, output_fd, separator, max_msg_nr, expand_flags, expand_strings, descr_multiplier): # #[ implementation 4 """ example implementation using the BUFRReader class to decode a bufr file using delayed replication. Since these files may have different descriptor lists for each subset, a different call pattern is needed. """ # get an instance of the BUFR class # which automatically opens the file for reading and decodes it bufr = BUFRReader(input_bufr_file, warn_about_bufr_size=False, #verbose=True, expand_flags=expand_flags, verbose=False, expand_flags=expand_flags, expand_strings=expand_strings) bufr.tune_decoding_parameters(nr_of_descriptors_multiplier=descr_multiplier) msg_nr = -1 for msg_nr, msg in enumerate(bufr): # since this example assumes a bufr file using delayed replication # always request and add the header for each subset nsubsets = msg.get_num_subsets() # print('nsubsets = ', nsubsets) for msg_or_subs_nr, msg_or_subset_data in enumerate(msg): # add header strings list_of_names = msg_or_subset_data.names list_of_units = msg_or_subset_data.units data = msg_or_subset_data.data if numpy.shape(data)[0] == 0: print('NO DATA FOUND! this seems an empty BUFR message !') continue output_fd.write('"subset nr"'+separator+ separator.join(list_of_names) + "\n") output_fd.write('""'+separator+ separator.join(list_of_units) + "\n") # print(data.shape) if len(data.shape) == 1: # we are walking over subsets subs = msg_or_subs_nr output_fd.write(str(subs+1)+separator+ separator.join(to_str(val) for val in data[:])+ "\n") else: # we are getting a 2D array as result for subs in range(data.shape[0]): output_fd.write(str(subs+1)+separator+ separator.join(to_str(val) for val in data[subs, :])+ "\n") print('converted BUFR msg nr. ', msg_nr+1) if msg_nr >= max_msg_nr: print('skipping remainder of this BUFR file') break # close the file bufr.close() if msg_nr == -1: print('no BUFR messages found, are you sure this is a BUFR file?')