def command_compile(ns): """ Command to compile the given descriptors. """ from pybufrkit.templatecompiler import TemplateCompiler template_compiler = TemplateCompiler() if os.path.exists(ns.input): decoder = Decoder(definitions_dir=ns.definitions_directory, tables_root_dir=ns.tables_root_directory) with open(ns.input, 'rb') as ins: bufr_message = decoder.process(ins.read(), file_path=ns.input, info_only=True) template, table_group = bufr_message.build_template( ns.tables_root_directory, normalize=1) else: table_group = TableGroupCacheManager.get_table_group( ns.tables_root_directory, ns.master_table_number, ns.originating_centre, ns.originating_subcentre, ns.master_table_version, ns.local_table_version) descriptor_ids = [x.strip() for x in ns.input.split(',')] template = table_group.template_from_ids(*descriptor_ids) compiled_template = template_compiler.process(template, table_group) print(json.dumps(compiled_template.to_dict()))
def main(): BUFR_FILE = sys.argv[1] print("input: %s" % BUFR_FILE) basename = os.path.basename(BUFR_FILE) fname = basename.split(".")[0] gi = "output/%s.txt" % fname print("output: %s" % gi) decoder = Decoder() with open(BUFR_FILE, 'rb') as ins: bufr_message = decoder.process(ins.read()) slat = query_1d(bufr_message,'005001') slon = query_1d(bufr_message,'006001') #st = query_1d(bufr_message,'001001') #id = query_1d(bufr_message,'001002') #stid = str(st)+str("%0.3i"%id) time = np.array(query(bufr_message,'004086')) pres = np.array(query(bufr_message,'007004'),dtype=np.float) temp = np.array(query(bufr_message,'012101'),dtype=np.float) dewp = np.array(query(bufr_message,'012103'),dtype=np.float) geop = np.array(query(bufr_message,'010009'),dtype=np.float) wdir = np.array(query(bufr_message,'011001'),dtype=np.float) wspd = np.array(query(bufr_message,'011002'),dtype=np.float) dlat = np.array(query(bufr_message,'005015'),dtype=np.float) dlon = np.array(query(bufr_message,'006015'),dtype=np.float) lat = slat+dlat lon = slon+dlon columns = ["ptime","pres","temp","dewp","geop","wdir","wspd","lat","lon"] df = pd.DataFrame([time,pres,temp,dewp,geop,wdir,wspd,lat,lon]) df2 = df.T df2.columns = columns df3 = df2[df2.ptime >= 0] df3.to_csv(gi,index=False,na_rep=np.nan,sep=",")
def command_info(ns): """ Command to show metadata information of given files from command line. """ flat_text_render = FlatTextRenderer() decoder = Decoder(definitions_dir=ns.definitions_directory, tables_root_dir=ns.tables_root_directory) def show_message_info(m): bufr_template, table_group = m.build_template( ns.tables_root_directory, normalize=1) print(flat_text_render.render(m)) if ns.template: print(flat_text_render.render(bufr_template)) for filename in ns.filenames: with open(filename, 'rb') as ins: s = ins.read() if ns.multiple_messages: for bufr_message in generate_bufr_message(decoder, s, file_path=filename, info_only=True): show_message_info(bufr_message) elif ns.count_only: count = 0 for _ in generate_bufr_message(decoder, s, info_only=True): count += 1 print('{}: {}'.format(filename, count)) else: bufr_message = decoder.process(s, file_path=filename, info_only=True) show_message_info(bufr_message)
def command_subset(ns): """ Command to subset and save the given BUFR file. """ decoder = Decoder( definitions_dir=ns.definitions_directory, tables_root_dir=ns.tables_root_directory, compiled_template_cache_max=ns.compiled_template_cache_max) encoder = Encoder( definitions_dir=ns.definitions_directory, tables_root_dir=ns.tables_root_directory, compiled_template_cache_max=ns.compiled_template_cache_max) subset_indices = [int(x) for x in ns.subset_indices.split(',')] with open(ns.filename, 'rb') as ins: s = ins.read() bufr_message = decoder.process( s, file_path=ns.filename, wire_template_data=False, ignore_value_expectation=ns.ignore_value_expectation) data = bufr_message.subset(subset_indices) nb = encoder.process(data, file_path=ns.output_filename, wire_template_data=False) with open(ns.output_filename, 'wb') as outs: outs.write(nb.serialized_bytes)
def command_query(ns): """ Command to query given BUFR files. """ decoder = Decoder(definitions_dir=ns.definitions_directory, tables_root_dir=ns.tables_root_directory, compiled_template_cache_max=ns.compiled_template_cache_max) for filename in ns.filenames: with open(filename, 'rb') as ins: s = ins.read() if ns.query_string.strip()[0] == '%': bufr_message = decoder.process(s, file_path=filename, info_only=True) from pybufrkit.mdquery import MetadataExprParser, MetadataQuerent querent = MetadataQuerent(MetadataExprParser()) value = querent.query(bufr_message, ns.query_string) print(filename) print(value) else: bufr_message = decoder.process(s, file_path=filename, wire_template_data=True, ignore_value_expectation=ns.ignore_value_expectation) from pybufrkit.dataquery import NodePathParser, DataQuerent querent = DataQuerent(NodePathParser()) query_result = querent.query(bufr_message, ns.query_string) if ns.json: if ns.nested: print(json.dumps(NestedJsonRenderer().render(query_result), **JSON_DUMPS_KWARGS)) else: print(json.dumps(FlatJsonRenderer().render(query_result), **JSON_DUMPS_KWARGS)) else: print(filename) print(FlatTextRenderer().render(query_result))
def command_script(ns): """ Command to execute script against given BUFR files. """ from pybufrkit.script import ScriptRunner if ns.from_file: with open(ns.input) as ins: script_string = ins.read() else: if ns.input == '-': script_string = sys.stdin.read() else: script_string = ns.input script_runner = ScriptRunner( script_string, data_values_nest_level=ns.data_values_nest_level) decoder = Decoder( definitions_dir=ns.definitions_directory, tables_root_dir=ns.tables_root_directory, compiled_template_cache_max=ns.compiled_template_cache_max) for filename in ns.filenames: with open(filename, 'rb') as ins: s = ins.read() bufr_message = decoder.process( s, file_path=filename, wire_template_data=True, ignore_value_expectation=ns.ignore_value_expectation, info_only=script_runner.metadata_only) script_runner.run(bufr_message)
class EncoderTests(unittest.TestCase): def setUp(self): self.encoder = Encoder() self.decoder = Decoder() self.filename_stubs = [ 'IUSK73_AMMC_182300', 'rado_250', # uncompressed with 222000, 224000, 236000 '207003', # compressed with delayed replication 'amv2_87', # compressed with 222000 'b005_89', # compressed with 222000 and 224000 (1st order stats) 'profiler_european', # uncompressed with 204001 associated fields 'jaso_214', # compressed with 204001 associated fields 'uegabe', # uncompressed with 204004 associated fields 'asr3_190', # compressed with complex replication and 222000, 224000 'b002_95', # uncompressed with skipped local descriptors 'g2nd_208', # compressed with identical string values for all subsets 'ISMD01_OKPR', # compressed with different string values for subsets 'mpco_217', ] def tearDown(self): pass def do_test(self, filename_stub): with open(os.path.join(DATA_DIR, filename_stub + '.json')) as ins: s = ins.read() bins = self.encoder.encode(s) self.decoder.decode(bins.bytes) assert len(self.encoder.decoded_values_all_subsets) == len( self.decoder.decoded_values_all_subsets) for idx_subset in range(len(self.encoder.decoded_values_all_subsets)): encoder_values = self.encoder.decoded_values_all_subsets[ idx_subset] decoder_values = self.decoder.decoded_values_all_subsets[ idx_subset] assert len(encoder_values) == len(decoder_values) for idx_value in range(len(encoder_values)): if isinstance(encoder_values[idx_value], six.text_type): encoder_value = encoder_values[idx_value].encode('latin-1') else: encoder_value = encoder_values[idx_value] assert encoder_value == decoder_values[idx_value], \ '{!r} != {!r}'.format(encoder_value, decoder_values[idx_value]) def test_encode(self): print() for filename_stub in self.filename_stubs: print(filename_stub) self.do_test(filename_stub)
def command_decode(ns): """ Command to decode given files from command line. """ decoder = Decoder( definitions_dir=ns.definitions_directory, tables_root_dir=ns.tables_root_directory, compiled_template_cache_max=ns.compiled_template_cache_max) def show_message(m): if ns.attributed: m.wire() if ns.json: print( json.dumps(NestedJsonRenderer().render(m), **JSON_DUMPS_KWARGS)) else: print(NestedTextRenderer().render(m)) else: if ns.json: print( json.dumps(FlatJsonRenderer().render(m), **JSON_DUMPS_KWARGS)) else: print(FlatTextRenderer().render(m)) for filename in ns.filenames: if filename != '-': with open(filename, 'rb') as ins: s = ins.read() else: s = sys.stdin.read() if ns.multiple_messages: for bufr_message in generate_bufr_message( decoder, s, continue_on_error=ns.continue_on_error, file_path=filename, wire_template_data=False, ignore_value_expectation=ns.ignore_value_expectation, filter_expr=ns.filter): show_message(bufr_message) else: bufr_message = decoder.process( s, file_path=filename, wire_template_data=False, ignore_value_expectation=ns.ignore_value_expectation) show_message(bufr_message)
def setUp(self): self.decoder = Decoder() self.filename_stubs = [ 'IUSK73_AMMC_182300', 'rado_250', # uncompressed with 222000, 224000, 236000 '207003', # compressed with delayed replication 'amv2_87', # compressed with 222000 'b005_89', # compressed with 222000 and 224000 (1st order stats) 'profiler_european', # uncompressed with 204001 associated fields 'jaso_214', # compressed with 204001 associated fields 'uegabe', # uncompressed with 204004 associated fields 'asr3_190', # compressed with complex replication and 222000, 224000 'b002_95', # uncompressed with skipped local descriptors 'g2nd_208', # compressed with identical string values for all subsets 'ISMD01_OKPR', # compressed with different string values for subsets 'mpco_217', ]
class BenchMarkTests(unittest.TestCase): def setUp(self): self.decoder = Decoder() def test(self): for filename in os.listdir(DATA_DIR): with open(os.path.join(DATA_DIR, filename), 'rb') as ins: print(filename) bufr_message = self.decoder.process(ins.read(), wire_template_data=True) self.assertIsNotNone(bufr_message)
def command_split(ns): """ Command to split given files from command line into one file per BufrMessage. """ decoder = Decoder(definitions_dir=ns.definitions_directory, tables_root_dir=ns.tables_root_directory) for filename in ns.filenames: with open(filename, 'rb') as ins: s = ins.read() for idx, bufr_message in enumerate( generate_bufr_message(decoder, s, file_path=filename, info_only=True)): new_filename = '{}.{}'.format(filename, idx) print(new_filename) with open(new_filename, 'wb') as outs: outs.write(bufr_message.serialized_bytes)
class TemplateDataTests(unittest.TestCase): def setUp(self): self.decoder = Decoder() self.filename_stubs = [ 'IUSK73_AMMC_182300', 'rado_250', # uncompressed with 222000, 224000, 236000 '207003', # compressed with delayed replication 'amv2_87', # compressed with 222000 'b005_89', # compressed with 222000 and 224000 (1st order stats) 'profiler_european', # uncompressed with 204001 associated fields 'jaso_214', # compressed with 204001 associated fields 'uegabe', # uncompressed with 204004 associated fields 'asr3_190', # compressed with complex replication and 222000, 224000 'b002_95', # uncompressed with skipped local descriptors 'g2nd_208', # compressed with identical string values for all subsets 'ISMD01_OKPR', # compressed with different string values for subsets 'mpco_217', ] def do_test(self, filename_stub): s = read_bufr_file(filename_stub + '.bufr') bufr_message = self.decoder.process(s, filename_stub) if filename_stub in ('207003', 'rado_250'): with open(os.path.join(DATA_DIR, '{}.datadump.cmp'.format(filename_stub))) as ins: cmp_str = ins.read() dump_str = NestedTextRenderer().render(bufr_message.template_data.value) # TODO: this is to fix the inconsistent int and long of bitstring on different OS dump_str = dump_str.replace( '005040 ORBIT NUMBER 5258\n', '005040 ORBIT NUMBER 5258L\n') assert dump_str == cmp_str, dump_str else: NestedTextRenderer().render(bufr_message.template_data.value) def test_template_data(self): print() for filename_stub in self.filename_stubs: print(filename_stub) self.do_test(filename_stub)
def test_compiled_vs_noncompiled(self): decoder_noncompiled = Decoder() decoder_compiled = Decoder(compiled_template_cache_max=200) benchmark_data_dir = os.path.join(BASE_DIR, 'benchmark_data') for filename in os.listdir(benchmark_data_dir): with open(os.path.join(benchmark_data_dir, filename), 'rb') as ins: s = ins.read() bufr_message_1 = decoder_noncompiled.process(s) bufr_message_2 = decoder_compiled.process(s) assert bufr_message_1.template_data.value.decoded_descriptors_all_subsets == \ bufr_message_2.template_data.value.decoded_descriptors_all_subsets assert bufr_message_1.template_data.value.decoded_values_all_subsets == \ bufr_message_2.template_data.value.decoded_values_all_subsets assert bufr_message_1.template_data.value.bitmap_links_all_subsets == \ bufr_message_2.template_data.value.bitmap_links_all_subsets
class DataQueryTests(unittest.TestCase): def setUp(self): self.decoder = Decoder() self.querent = DataQuerent(NodePathParser()) def test_query_jaso_214(self): s = read_bufr_file('jaso_214.bufr') bufr_message = self.decoder.process(s) r1 = self.querent.query(bufr_message, '/301011/004001') assert r1.subset_indices() == list(range(128)) assert r1.all_values(flat=True) == [[2012] for _ in range(128)] assert r1.all_values() == [[2012] for _ in range(128)] r2 = self.querent.query(bufr_message, '@[0]/301011/004001') assert r2.subset_indices() == [0] assert r2.all_values(flat=True) == [[2012]] assert r2.all_values() == [[2012]] r3 = self.querent.query(bufr_message, '@[::10]/301011/004001') assert r3.subset_indices() == [ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120 ] assert r3.all_values(flat=True) == [[2012] for _ in range(13)] r4 = self.querent.query(bufr_message, '@[1]/123002/021062') assert r4.subset_indices() == [1] assert r4.all_values(flat=True) == [[11.28, 0.02, 14.78, 0.03]] assert r4.all_values() == [[[[11.28, 0.02], [14.78, 0.03]]]] r5 = self.querent.query(bufr_message, '@[2:7:2]/123002/021062[0]') assert r5.subset_indices() == [2, 4, 6] assert r5.all_values(flat=True) == [[11.32, 14.77], [11.54, 14.95], [11.65, 15.24]] assert r5.all_values() == [ [ # 1st subset [ # replication envelope [11.32], # replication block 1 [14.77] # replication block 2 ] ], [ # 2nd subset [[11.54], [14.95]] ], [ # 3rd subset [[11.65], [15.24]] ] ] r6 = self.querent.query(bufr_message, '@[-1]/123002/021062') assert r6.subset_indices() == [127] assert r6.all_values(flat=True) == [[12.8, 0.06, 16.4, 0.05]] assert r6.all_values() == [[[[12.8, 0.06], [16.4, 0.05]]]] r7 = self.querent.query(bufr_message, '/123002/021062[0].A21062.031021') assert r7.subset_indices() == list(range(128)) assert r7.all_values(flat=True) == [[1, 1] for _ in range(128)] assert r7.all_values() == [[[[1], [1]]] for _ in range(128)] r8 = self.querent.query(bufr_message, '/002173.A02173') assert r8.subset_indices() == list(range(128)) assert r8.all_values(flat=True) == [[0] for _ in range(128)] assert r8.all_values() == [[0] for _ in range(128)] def test_query_207003(self): s = read_bufr_file('207003.bufr') bufr_message = self.decoder.process(s) r1 = self.querent.query(bufr_message, '/310060/301021/006001') assert r1.subset_indices() == [0, 1] assert r1.all_values() == [[24.54144], [24.3926]] r2 = self.querent.query(bufr_message, '/310060/104000.031002') assert r2.subset_indices() == [0, 1] assert r2.all_values() == [[5], [5]] r3 = self.querent.query(bufr_message, '@[-1]/310060/104000/005042') assert r3.subset_indices() == [1] assert r3.all_values(flat=True) == [[1, 2, 3, 4, 5]] assert r3.all_values() == [[[[1], [2], [3], [4], [5]]]] def test_query_ISMD01_OKPR(self): s = read_bufr_file('ISMD01_OKPR.bufr') bufr_message = self.decoder.process(s) r1 = self.querent.query(bufr_message, '/307080/301090/301004/001015') assert r1.subset_indices() == list(range(7)) values = [ [b'Primda '], [b'Kocelovice '], [b'Praha-Ruzyne '], [b'Liberec '], [b'Pribyslav '], [b'Brno-Turany '], [b'Ostrava-Mosnov '], ] assert r1.all_values(flat=True) == values assert r1.all_values() == values def test_query_amv2_87(self): s = read_bufr_file('amv2_87.bufr') bufr_message = self.decoder.process(s) r1 = self.querent.query(bufr_message, '/310195/303250/011001.033007') values = [ [48, 35, 0], [54, 47, 0], [59, 68, 0], [56, 49, 0], [32, 37, 0], [48, 46, 0], [25, 31, 0], [49, 50, 0], [78, 75, 0], [84, 83, 0], [27, 34, 0], [60, 73, 0], [25, 31, 0], [26, 32, 0], [54, 68, 0], [32, 40, 0], [47, 49, 0], [31, 31, 0], [96, 95, 0], [91, 89, 0], [92, 98, 0], [82, 80, 0], [55, 62, 0], [50, 38, 0], [52, 54, 0], [89, 89, 0], [88, 86, 0], [94, 97, 0], [46, 42, 0], [65, 71, 0], [58, 49, 0], [67, 70, 0], [69, 73, 0], [60, 54, 0], [30, 37, 0], [36, 40, 0], [36, 40, 0], [30, 32, 0], [74, 91, 0], [73, 69, 0], [54, 56, 0], [93, 95, 0], [80, 87, 0], [88, 92, 0], [85, 83, 0], [50, 57, 0], [94, 97, 0], [25, 31, 0], [32, 40, 0], [49, 61, 0], [29, 37, 0], [26, 33, 0], [42, 53, 0], [34, 43, 0], [38, 47, 0], [33, 40, 0], [71, 79, 0], [43, 50, 0], [46, 57, 0], [49, 58, 0], [64, 79, 0], [70, 84, 0], [76, 94, 0], [74, 91, 0], [94, 98, 0], [67, 72, 0], [64, 76, 0], [82, 80, 0], [97, 98, 0], [82, 79, 0], [57, 48, 0], [68, 65, 0], [75, 69, 0], [67, 66, 0], [85, 91, 0], [68, 72, 0], [82, 86, 0], [38, 46, 0], [72, 79, 0], [43, 49, 0], [32, 34, 0], [39, 45, 0], [37, 43, 0], [78, 89, 0], [91, 98, 0], [92, 98, 0], [95, 96, 0], [90, 88, 0], [69, 69, 0], [64, 66, 0], [40, 49, 0], [54, 66, 0], [31, 35, 0], [76, 90, 0], [70, 82, 0], [60, 72, 0], [58, 71, 0], [41, 51, 0], [58, 59, 0], [57, 56, 0], [74, 82, 0], [75, 93, 0], [76, 93, 0], [82, 96, 0], [90, 97, 0], [96, 98, 0], [90, 98, 0], [89, 97, 0], [90, 97, 0], [89, 94, 0], [97, 98, 0], [80, 75, 0], [92, 92, 0], [83, 84, 0], [66, 66, 0], [34, 36, 0], [83, 88, 0], [87, 88, 0], [67, 69, 0], [85, 89, 0], [36, 44, 0], [40, 48, 0], [24, 30, 0], [58, 66, 0], [71, 65, 0], [91, 98, 0], [91, 97, 0], [97, 98, 0], ] assert r1.all_values(flat=True) == values assert r1.all_values() == values r2 = self.querent.query(bufr_message, '/310195/303250/011001.033007[1]') values = [[35], [47], [68], [49], [37], [46], [31], [50], [75], [83], [34], [73], [31], [32], [68], [40], [49], [31], [95], [89], [98], [80], [62], [38], [54], [89], [86], [97], [42], [71], [49], [70], [73], [54], [37], [40], [40], [32], [91], [69], [56], [95], [87], [92], [83], [57], [97], [31], [40], [61], [37], [33], [53], [43], [47], [40], [79], [50], [57], [58], [79], [84], [94], [91], [98], [72], [76], [80], [98], [79], [48], [65], [69], [66], [91], [72], [86], [46], [79], [49], [34], [45], [43], [89], [98], [98], [96], [88], [69], [66], [49], [66], [35], [90], [82], [72], [71], [51], [59], [56], [82], [93], [93], [96], [97], [98], [98], [97], [97], [94], [98], [75], [92], [84], [66], [36], [88], [88], [69], [89], [44], [48], [30], [66], [65], [98], [97], [98]] assert r2.all_values(flat=True) == values assert r2.all_values() == values def test_query_asr3_190(self): s = read_bufr_file('asr3_190.bufr') bufr_message = self.decoder.process(s) r1 = self.querent.query(bufr_message, '@[-1]/310028/101011/304037/012063.F12063') assert r1.subset_indices() == [127] assert r1.all_values(flat=True) == [[ None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 17.3, None, 17.3, None, None, 17.3, 4.4, None, 4.3, None, None, 4.3, 7.7, None, 7.6, None, None, 7.6, 13.2, None, 13.2, None, None, 13.2, 8.4, None, 8.4, None, None, 8.4, 11.9, None, 11.9, None, None, 11.9, 10.5, None, 10.4, None, None, 10.4, 6.7, None, 6.6, None, None, 6.6 ]] assert r1.all_values() == [[[ [None, None, None, None, None, None], [None, None, None, None, None, None], [None, None, None, None, None, None], [17.3, None, 17.3, None, None, 17.3], [4.4, None, 4.3, None, None, 4.3], [7.7, None, 7.6, None, None, 7.6], [13.2, None, 13.2, None, None, 13.2], [8.4, None, 8.4, None, None, 8.4], [11.9, None, 11.9, None, None, 11.9], [10.5, None, 10.4, None, None, 10.4], [6.7, None, 6.6, None, None, 6.6], ]]] r2 = self.querent.query( bufr_message, '@[-2]/310028/101011/304037/012063.F12063.008023') assert r2.subset_indices() == [126] assert r2.all_values(flat=True) == [[10] * 66] assert r2.all_values() == [[[[10] * 6] * 11]] def test_query_mpco_217(self): s = read_bufr_file('mpco_217.bufr') bufr_message = self.decoder.process(s) r1 = self.querent.query(bufr_message, '@[-3:]/116000/106000/010004') assert r1.subset_indices() == [125, 126, 127] assert r1.all_values(flat=True) == [ [ 10000.0, 20000.0, 30000.0, 40000.0, 50000.0, 60000.0, 70000.0, 80000.0, 90000.0, 101109.2 ], [ 10000.0, 20000.0, 30000.0, 40000.0, 50000.0, 60000.0, 70000.0, 80000.0, 90000.0, 101099.8 ], [ 10000.0, 20000.0, 30000.0, 40000.0, 50000.0, 60000.0, 70000.0, 80000.0, 90000.0, 101090.1 ], ] assert r1.all_values() == [ [[[[[10000.0], [20000.0], [30000.0], [40000.0], [50000.0], [60000.0], [70000.0], [80000.0], [90000.0], [101109.2]]]]], [[[[[10000.0], [20000.0], [30000.0], [40000.0], [50000.0], [60000.0], [70000.0], [80000.0], [90000.0], [101099.8]]]]], [[[[[10000.0], [20000.0], [30000.0], [40000.0], [50000.0], [60000.0], [70000.0], [80000.0], [90000.0], [101090.1]]]]], ] def test_query_rado_250(self): s = read_bufr_file('rado_250.bufr') bufr_message = self.decoder.process(s) r1 = self.querent.query(bufr_message, '/310226/107000/103000/015037.F15037.008023') assert r1.all_values(flat=True) == [[13] * 247] assert r1.all_values() == [[[[[[13]]]] * 247]] r2 = self.querent.query(bufr_message, '/310226/107000.031002') assert r2.all_values(flat=True) == [[247]] assert r2.all_values() == [[247]] r3 = self.querent.query(bufr_message, '/310226/107000/103000.031001') assert r3.all_values(flat=True) == [[1] * 247] assert r3.all_values() == [[[[1]] * 247]] def test_descendant_ISMD01_OKPR(self): s = read_bufr_file('ISMD01_OKPR.bufr') bufr_message = self.decoder.process(s) r1 = self.querent.query(bufr_message, '020012') assert r1.all_values(flat=True) == [ [62, 61, 60, 59, None, None], [36, 61, 60, 7, None, None], [36, 61, 60, 7, None, None], [36, 61, 60, 7, None, None], [36, 61, 60, 7, None, None], [36, 61, 60, 7, None, None], [30, 20, 11, 0, None, None], ] assert r1.all_values() == [ [62, 61, 60, [[59]], [[None]], None], [36, 61, 60, [[7]], [[None]], None], [36, 61, 60, [[7]], [[None]], None], [36, 61, 60, [[7]], [[None]], None], [36, 61, 60, [[7]], [[None]], None], [36, 61, 60, [[7]], [[None]], None], [30, 20, 11, [[0]], [[None]], None], ] r2 = self.querent.query(bufr_message, '302035 > 020012') assert r2.all_values(flat=True) == [ [62, 61, 60, 59], [36, 61, 60, 7], [36, 61, 60, 7], [36, 61, 60, 7], [36, 61, 60, 7], [36, 61, 60, 7], [30, 20, 11, 0], ] assert r2.all_values() == [ [62, 61, 60, [[59]]], [36, 61, 60, [[7]]], [36, 61, 60, [[7]]], [36, 61, 60, [[7]]], [36, 61, 60, [[7]]], [36, 61, 60, [[7]]], [30, 20, 11, [[0]]], ] r3 = self.querent.query(bufr_message, '@[0] > 302035 > 020012') assert r3.all_values(flat=True) == [[62, 61, 60, 59]] assert r3.all_values() == [[62, 61, 60, [[59]]]] r4 = self.querent.query(bufr_message, '@[-3] > 302035/302004 > 020012') assert r4.all_values(flat=True) == [[36, 61, 60]] assert r4.all_values() == [[36, 61, 60]] r5 = self.querent.query(bufr_message, '008002') assert r5.all_values(flat=True) == [ [5, 5, 11, 7, 8, 9, None], [7, 1, 12, 7, 8, 9, None], [7, 1, 12, 7, 8, 9, None], [7, 1, 12, 7, 8, 9, None], [7, 1, 12, 7, 8, 9, None], [7, 1, 12, 7, 8, 9, None], [0, 1, 12, 7, 8, 9, None], ] assert r5.all_values() == [ [5, [[5]], [[11]], [[7], [8], [9]], None], [7, [[1]], [[12]], [[7], [8], [9]], None], [7, [[1]], [[12]], [[7], [8], [9]], None], [7, [[1]], [[12]], [[7], [8], [9]], None], [7, [[1]], [[12]], [[7], [8], [9]], None], [7, [[1]], [[12]], [[7], [8], [9]], None], [0, [[1]], [[12]], [[7], [8], [9]], None], ] r6 = self.querent.query(bufr_message, '@[4] > 302047 > 008002') assert r6.all_values(flat=True) == [[7, 8, 9]] assert r6.all_values() == [[[[7], [8], [9]]]] def test_descendant_mpco_217(self): s = read_bufr_file('mpco_217.bufr') bufr_message = self.decoder.process(s) r1 = self.querent.query(bufr_message, '@[0] > 010004') assert r1.all_values(flat=True) == [[ 10000.0, 20000.0, 30000.0, 40000.0, 50000.0, 60000.0, 70000.0, 80000.0, 90000.0, 101025.2, 10000.0, 20000.0, 30000.0, 40000.0, 50000.0, 60000.0, 70000.0, 80000.0, 90000.0, 101025.2 ]] assert r1.all_values() == [[[[10000.0], [20000.0], [30000.0 ], [40000.0], [50000.0], [60000.0], [70000.0], [80000.0], [90000.0], [101025.2]], [[[[10000.0], [20000.0], [30000.0], [40000.0], [50000.0], [60000.0], [70000.0], [80000.0], [90000.0], [101025.2]]]]]] r2 = self.querent.query(bufr_message, '@[0]/116000 > 010004') assert r2.all_values(flat=True) == [[ 10000.0, 20000.0, 30000.0, 40000.0, 50000.0, 60000.0, 70000.0, 80000.0, 90000.0, 101025.2 ]] assert r2.all_values() == [[[[[[10000.0], [20000.0], [30000.0], [40000.0], [50000.0], [60000.0], [70000.0], [80000.0], [90000.0], [101025.2]]]]]] r2 = self.querent.query(bufr_message, '@[0] > 010004[::10]') assert r2.all_values(flat=True) == [[ 10000.0, 20000.0, 30000.0, 40000.0, 50000.0, 60000.0, 70000.0, 80000.0, 90000.0, 101025.2, 10000.0, 20000.0, 30000.0, 40000.0, 50000.0, 60000.0, 70000.0, 80000.0, 90000.0, 101025.2 ]] assert r2.all_values() == [[[[10000.0], [20000.0], [30000.0 ], [40000.0], [50000.0], [60000.0], [70000.0], [80000.0], [90000.0], [101025.2]], [[[[10000.0], [20000.0], [30000.0], [40000.0], [50000.0], [60000.0], [70000.0], [80000.0], [90000.0], [101025.2]]]]]] def test_contrived(self): s = read_bufr_file('contrived.bufr') bufr_message = self.decoder.process(s) r1 = self.querent.query(bufr_message, '/105002/102000/020011') assert r1.all_values(flat=True) == [[2, 4, 6, 8, 10], [11, 9, 7, 5, 3]] assert r1.all_values() == [[[[[[2], [4]]], [[[6], [8], [10]]]]], [[[[[11], [9], [7]]], [[[5], [3]]]]]] r2 = self.querent.query(bufr_message, '020011') assert r2.all_values(flat=True) == [[2, 4, 6, 8, 10, 1], [11, 9, 7, 5, 3, 2]] assert r2.all_values() == [[[[[[2], [4]]], [[[6], [8], [10]]]], 1], [[[[[11], [9], [7]]], [[[5], [3]]]], 2]] r3 = self.querent.query(bufr_message, '008002') assert r3.all_values(flat=True) == [[1, 3, 21, 5, 7, 9, 22], [12, 10, 8, 22, 6, 4, 21]] assert r3.all_values() == [[[[[[1], [3]], 21], [[[5], [7], [9]], 22]]], [[[[[12], [10], [8]], 22], [[[6], [4]], 21]]]] r4 = self.querent.query(bufr_message, '102000/008002') assert r4.all_values(flat=True) == [[1, 3, 5, 7, 9], [12, 10, 8, 6, 4]] assert r4.all_values() == [[[[[[1], [3]]], [[[5], [7], [9]]]]], [[[[[12], [10], [8]]], [[[6], [4]]]]]]
def main(days=1, solo_file=0, add_in_bd=1, find_file=0): files = get_list_file(days=days) if find_file: path = os.getcwd() list_file = {find_file:[],'path':path} if solo_file: files = [solo_file] if not files: print('Не получены файлы для проверки') exit() meta_in_bd = set() tele_in_bd = set() last_H_in_bd = set() bar = IncrementalBar('decode_bufr', max = len(files)) temp_check_files_get_index = {file:[] for file in files} for file_name in files: bar.next() try: decoder = Decoder() with open(file_name, 'rb') as ins: # bufr_message = decoder.process(ins.read()) # декодируем телеграмму в текстовый файл text_bufr = NestedTextRenderer().render(bufr_message) except: ex = 'misk' logging(file_name, ex) continue # делим телеграмму на разделы pattern = r'<<<<<< section [0-9] >>>>>>' list_decod_bufr = re.split(pattern, text_bufr) words = ['year','month','day','hour','minute','second'] date_list = [int(re.search(r'{} = \d{}'.format(word, '{0,5}'), list_decod_bufr[2]).group().split(' = ')[-1]) for word in words] date_srok = '{:04d}-{:02d}-{:02d} {:02d}:{:02d}:{:02d}'.format(date_list[0], date_list[1], date_list[2], date_list[3], date_list[4], date_list[5]) # если в бафре несколько телеграмм pettern_split_some_telegram = r'###### subset \d{1,1000} of \d{1,1000} ######' list_telegrams_in_bufr = re.split(pettern_split_some_telegram, list_decod_bufr[4]) # получаем данные из телеграмм for telegram in list_telegrams_in_bufr: meta_info = get_metadate(file_name, telegram, date_srok) if not meta_info: continue # делим телеграмму на разделы pattern = r'<<<<<< section [0-9] >>>>>>' list_decod_bufr = re.split(pattern, text_bufr) words = ['year','month','day','hour','minute','second'] date_list = [int(re.search(r'{} = \d{}'.format(word, '{0,5}'), list_decod_bufr[2]).group().split(' = ')[-1]) for word in words] date_srok = '{:04d}-{:02d}-{:02d} {:02d}:{:02d}:{:02d}'.format(date_list[0], date_list[1], date_list[2], date_list[3], date_list[4], date_list[5]) # если в бафре несколько телеграмм pettern_split_some_telegram = r'###### subset \d{1,1000} of \d{1,1000} ######' list_telegrams_in_bufr = re.split(pettern_split_some_telegram, list_decod_bufr[4]) # получаем данные из телеграмм for telegram in list_telegrams_in_bufr: meta_info = get_metadate(file_name, telegram, date_srok) if not meta_info: continue if '25042' in meta_info: print('\n',meta_info[:5], '\n', file_name, '\n') meta_in_bd.add(meta_info) temp_check_files_get_index[file_name].append(f'{meta_info[0]} - {meta_info[1]}') index_station = meta_info[0] telemetry_info = get_telemetria(index_station, date_srok, telegram) if telemetry_info: last_H = (telemetry_info[-1][0],telemetry_info[-1][1], get_last_h(telemetry_info)) if last_H not in last_H_in_bd: tele_in_bd.add(tuple(telemetry_info)) last_H_in_bd.add(last_H) # для поиска файлов if find_file == meta_info[0]: list_file[find_file].append(file_name) bar.finish() #удаляем дубли образованые первой и второй частью телеграмм # del_duble(meta_in_bd) with open('/home/bufr/bufr_work/telegram_BUFR/temp_check_files_get_index.txt', 'w') as f: # res = '\n'.join(files) + '\n' + '#'*50 + '\n' res = '' for file in files: res += f'{file}: {", ".join(temp_check_files_get_index[file][:5])}\n' res = res + '#'*50 + '\n' res += ''.join([f"{i[0]} - {i[1]} \n" for i in meta_in_bd]) f.write(res) if add_in_bd: set_in_bd(meta_in_bd, tele_in_bd,last_H_in_bd) if find_file: return list_file if solo_file: #для теста отдельных файлов return meta_in_bd return meta_in_bd
def setUp(self): self.decoder = Decoder()
def setUp(self): self.decoder = Decoder() self.querent = DataQuerent(NodePathParser())
import numpy as np from geopandas.tools import sjoin import geopandas as gpd import click from climada.hazard import Centroids, TropCyclone, TCTracks from climada.hazard.tc_tracks_forecast import TCForecast from typhoonmodel.utility_fun import track_data_clean, Check_for_active_typhoon, Sendemail, \ ucl_data, plot_intensity, initialize if platform == "linux" or platform == "linux2": #check if running on linux or windows os from typhoonmodel.utility_fun import Rainfall_data elif platform == "win32": from typhoonmodel.utility_fun import Rainfall_data_window as Rainfall_data decoder = Decoder() initialize.setup_logger() logger = logging.getLogger(__name__) ECMWF_MAX_TRIES = 3 ECMWF_SLEEP = 30 # s @click.command() @click.option('--path', default='./', help='main directory') @click.option('--remote_directory', default=None, help='remote directory for ECMWF forecast data' ) #'20210421120000' @click.option('--typhoonname', default=None, help='name for active typhoon')
def load(self): if self.loaded: return with open(self.filepath, 'rb') as f: message = Decoder().process(f.read()) queryer = DataQuerent(NodePathParser()) self._lons = [] self._lats = [] self._wind = [] self._pres = [] for subset in range(52): # lat try: values = queryer.query( message, '@[{}] > {}'.format(subset, self.CODE_LAT)).all_values() except IndexError: raw_lats = np.empty(41) raw_lats[:] = np.nan else: raw_lats = np.array(values[0][3], dtype='float')[:, 0] raw_lats = np.insert(raw_lats, 0, values[0][1]) self._lats.append(raw_lats) # lon try: values = queryer.query( message, '@[{}] > {}'.format(subset, self.CODE_LON)).all_values() except IndexError: raw_lons = np.empty(41) raw_lons[:] = np.nan else: raw_lons = np.array(values[0][3], dtype='float')[:, 0] raw_lons = np.insert(raw_lons, 0, values[0][1]) raw_lons[raw_lons < 0] = raw_lons[raw_lons < 0] + 360 self._lons.append(raw_lons) # wind try: values = queryer.query( message, '@[{}] > {}'.format(subset, self.CODE_WIND)).all_values(flat=True) except IndexError: raw_wind = np.empty(41) raw_wind[:] = np.nan else: raw_wind = np.array(values[0], dtype='float') * 1.94 # to kt self._wind.append(raw_wind) # pres try: values = queryer.query( message, '@[{}] > {}'.format(subset, self.CODE_PRES)).all_values(flat=True) except IndexError: raw_pres = np.empty(41) raw_pres[:] = np.nan else: raw_pres = np.array(values[0], dtype='float') / 100 # to hPa self._pres.append(raw_pres) self.invalid_indices = [] self.invalid_majors = [] self._lats = self.compact_mean(self._lats) self._lons = self.compact_mean(self._lons) self._wind = self.compact_mean(self._wind) self._pres = self.compact_mean(self._pres) invalid_index = min(self.invalid_indices) invalid_major = min(self.invalid_majors) print(invalid_index, invalid_major) self.cut_major(self._lats, invalid_major) self.cut_major(self._lons, invalid_major) self.cut_major(self._wind, invalid_major) self.cut_major(self._pres, invalid_major) self._lats[-1, invalid_index:] = np.nan self._lons[-1, invalid_index:] = np.nan self._wind[-1, invalid_index:] = np.nan self._pres[-1, invalid_index:] = np.nan self._maxwind = np.nanmax(self._wind, axis=1) self._minpres = np.nanmin(self._pres, axis=1) #print(self._maxwind) #print(self._minpres) self.loaded = True
def test_overriding_master_table_version(): encoder = Encoder(master_table_version=31) bufr_message = encoder.process(CONTRIVED_JSON) assert 31 == bufr_message.master_table_version.value assert 31 == Decoder().process(bufr_message.serialized_bytes).master_table_version.value
named_storm_files.append(directory+filename) if len(named_storm_files)==0: sys.exit("No named storms, exiting") composite_storm_files = [named_storm_files[x:x+2] for x in range(0, len(named_storm_files),2)] for storm in composite_storm_files: ens_path = storm[0] det_path = storm[1] # Decode ensemble bufr file decoder = Decoder() with open(ens_path, 'rb') as ins: bufr_message = decoder.process(ins.read()) text_data = FlatTextRenderer().render(bufr_message) text_array = np.array(text_data.splitlines()) for line in text_array: if "WMO LONG STORM NAME" in line: storm_name = line.split()[-1][:-1] section4 = text_array[np.where(text_array=="<<<<<< section 4 >>>>>>")[0][0]:np.where(text_array=="<<<<<< section 5 >>>>>>")[0][0]]
'initial_lat': cen_lat, 'initial_lon': cen_lon, 'init_msl': cen_msl, 'init_wspd': cen_wspd }, 'forecast_time': ftime, 'forecast_lat': lat, 'forecast_lon': lon, 'forecast_msl': msl, 'forecast_wspd': wspd } return ens_tc_dict # read in bufr file dec = Decoder() dd = '07' mm = '09' yy = '18' hh = '12' name = 'FLORENCE' latf = '-51p4degW' lonf = '25degN' s1 = 'A_JSXX01ECEP{}{}00'.format(dd, hh) s2 = '_C_ECMP_20{}{}{}{}0000_tropical_cyclone_track_'.format(yy, mm, dd, hh) s3 = '{}_{}_{}_bufr4.bin'.format(name, latf, lonf) fn = s1 + s2 + s3 # read in file print 'opening file...' with open(fn, 'rb') as ins:
def ecmwf_data_process(Input_folder,filepatern): """ preprocess ecmwf forecast data downloaded above """ #ecmwf_data_download(Input_folder,filepatern) path_ecmwf=os.path.join(Input_folder,'ecmwf/') decoder = Decoder() #1=Storm Centre 4 = Location of the storm in the perturbed analysis #5 = Location of the storm in the analysis #3=Location of maximum wind ecmwf_files = [f for f in listdir(path_ecmwf) if isfile(join(path_ecmwf, f))] #ecmwf_files = [file_name for file_name in ecmwf_files if file_name.startswith('A_JSXX02ECEP')] list_df=[] for ecmwf_file in ecmwf_files: ecmwf_file=ecmwf_files[0] f_name='ECMWF_'+ ecmwf_file.split('_')[1]+'_'+ecmwf_file.split('_')[4] model_name=ecmwf_file.split('_')[1][6:10] typhoon_name=ecmwf_file.split('_')[-4] with open(os.path.join(path_ecmwf,ecmwf_file), 'rb') as bin_file: bufr_message = decoder.process(bin_file.read()) text_data = FlatTextRenderer().render(bufr_message) STORMNAME=typhoon_name #ecmwf_file.split('_')[8] list1=[] with StringIO(text_data) as input_data: # Skips text before the beginning of the interesting block: for line in input_data: if line.startswith('<<<<<< section 4 >>>>>>'): # Or whatever test is needed break # Reads text until the end of the block: for line in input_data: # This keeps reading the file if line.startswith('<<<<<< section 5 >>>>>>'): break list1.append(line) list_var=["004024","004001","004002","004003","004004","004005","001092","011012","010051","005002","006002","001091",'001092',"008005"] list2=[[int(li.split()[0]),li.split()[1],li.split()[-1]] for li in list1 if li.startswith(" ") and li.split()[1] in list_var] df = pd.DataFrame(list2,columns=['id','code','Data']) def label_en (row,co): if row['code'] == co : return int(row['Data']) return np.nan df['model_sgn'] = df.apply (lambda row: label_en(row,co='008005'), axis=1) df['ensamble_num'] = df.apply (lambda row: label_en(row,co='001091'), axis=1) df['frcst_type'] = df.apply (lambda row: label_en(row,co='001092'), axis=1) df['frcst_type'] =df['frcst_type'].fillna(method='ffill') df['frcst_type'] =df['frcst_type'].fillna(method='bfill') df['ensamble_num'] =df['ensamble_num'].fillna(method='ffill') df['model_sgn'] =df['model_sgn'].fillna(method='ffill') df['model_sgn'] =df['model_sgn'].fillna(method='bfill') df_time = df.query('code in ["004001","004002","004003","004004","004005"]') date_object ='%04d%02d%02d%02d'%(int(df_time['Data'].to_list()[0]), int(df_time['Data'].to_list()[1]), int(df_time['Data'].to_list()[2]), int(df_time['Data'].to_list()[3])) date_object=datetime.strptime(date_object, "%Y%m%d%H") #(date_object + timedelta(hours=x)).strftime("%Y%m%d%H%M") df_center = df.query('code in ["010051","005002","006002"] and model_sgn in [1]') df_center2 = df.query('code in ["010051","005002","006002"] and model_sgn in [4,5]') df_max = df.query('code in ["011012","005002","006002","004024"] and model_sgn in [3]') # 1 storm center and 3 maximum wind speed https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/6/TABLE_CODE_FLAG/008005 latc,lonc,pcen,frcst_type,ensambles=[],[],[],[],[] for names, group in df_center.groupby("ensamble_num"): latc.append(list(group[group.code=="005002"]['Data'].values)) lonc.append(list(group[group.code=="006002"]['Data'].values)) pcen.append(list(group[group.code=="010051"]['Data'].values)) lat,lon,vmax,vhr=[],[],[],[] for names, group in df_max.groupby("ensamble_num"): lat.append(list(group[group.code=="005002"]['Data'].values)) lon.append(list(group[group.code=="006002"]['Data'].values)) vmax.append(list(group[group.code=="011012"]['Data'].values)) vhr.append(list(group[group.code=="004024"]['Data'].values)) frcst_type.append(list(np.unique(group.frcst_type.values))[0]) ensambles.append(names) latc1,lonc1,pcen1=[],[],[] for names, group in df_center2.groupby("ensamble_num"): latc1.append(list(group[group.code=="005002"]['Data'].values)) lonc1.append(list(group[group.code=="006002"]['Data'].values)) pcen1.append(list(group[group.code=="010051"]['Data'].values)) for i in range(len(pcen1)): pcen1[i].extend(pcen[i]) vhr=['0','6', '12', '18', '24', '30', '36', '42', '48', '54', '60', '66', '72', '78', '84', '90', '96', '102', '108'] for i in range(len(ensambles)): wind=[np.nan if value=='None' else float(value) for value in vmax[i]] pre=[np.nan if value=='None' else float(value)/100 for value in pcen1[i]] lon_=[np.nan if value=='None' else float(value) for value in lon[i]] lat_=[np.nan if value=='None' else float(value) for value in lat[i]] lon1_=[np.nan if value=='None' else float(value) for value in lonc[i]] lat1_=[np.nan if value=='None' else float(value) for value in latc[i]] max_radius=np.sqrt(np.square(np.array(lon_)-np.array(lon1_))+np.square(np.array(lat_)-np.array(lat1_)))*110 timestamp=[(date_object + timedelta(hours=int(value))).strftime("%Y%m%d%H%M") for value in vhr] timestep_int=[int(value) for value in vhr] ['TRUE' if frcst_type[i]==4 else 'False'] track = xr.Dataset( data_vars={ 'max_sustained_wind': ('time', wind), 'central_pressure': ('time', pre), 'ts_int': ('time', timestep_int), 'max_radius': ('time', max_radius), 'lat': ('time', lat_), 'lon': ('time', lon_), }, coords={'time': timestamp, }, attrs={ 'max_sustained_wind_unit': 'm/s', 'central_pressure_unit': 'mb', 'name': typhoon_name, 'sid': 'NA', 'orig_event_flag': False, 'data_provider': 'ECMWF', 'id_no': 'NA', 'ensemble_number': ensambles[i], 'is_ensemble': ['TRUE' if frcst_type[i]==4 else 'False'][0], 'forecast_time': date_object.strftime("%Y%m%d%H%M"), 'basin': 'WP', 'category': 'NA', }) track = track.set_coords(['lat', 'lon']) list_df.append(track) #%% date_object ='%04d%02d%02d%02d'%(int([line.split()[-1] for line in StringIO(text_data) if line[6:17].upper=="004001 YEAR" ][0]), int([line.split()[-1] for line in StringIO(text_data) if line[6:18].upper=="004002 MONTH" ][0]), int([line.split()[-1] for line in StringIO(text_data) if line[6:16].upper=="004003 DAY" ][0]), int([line.split()[-1] for line in StringIO(text_data) if line[6:17].upper=="004004 HOUR" ][0])) date_object=datetime.strptime(date_object, "%Y%m%d%H%M") val_t = [int(line.split()[-1]) for num, line in enumerate(StringIO(text_data), 1) if line[6:40].upper=="004024 TIME PERIOD OR DISPLACEMENT"]# and link.endswith(('.html', '.xml'))] val_wind = [line.split()[-1] for num, line in enumerate(StringIO(text_data), 1) if line[6:12].upper=="011012" ]#and num > ind_x[0]]# and link.endswith(('.html', '.xml'))] val_pre = [line.split()[-1] for num, line in enumerate(StringIO(text_data), 1) if line[6:12].upper=="010051" ]#and num > ind_x[0]]# and link.endswith(('.html', '.xml'))] val_lat = [line.split()[-1] for num, line in enumerate(StringIO(text_data), 1) if line[6:12].upper=="005002" ]#and num > ind_x[0]]# and link.endswith(('.html', '.xml'))] val_lon = [line.split()[-1] for num, line in enumerate(StringIO(text_data), 1) if line[6:12].upper=="006002" ]#and num > ind_x[0]]# and link.endswith(('.html', '.xml'))] val_ens = [line.split()[-1] for num, line in enumerate(StringIO(text_data), 1) if line[6:12].upper=="001091" ]#and num > ind_x[0]]# and link.endswith(('.html', '.xml'))] val_dis = [line.split()[-1] for num, line in enumerate(StringIO(text_data), 1) if line[6:12].upper=="008005" ]#and num > ind_x[0]]# and link.endswith(('.html', '.xml'))] if len(val_ens) >1: val_t=val_t[0:int(len(val_t)/len(val_ens))] val_t.insert(0, 0) val_ensamble=duplicate(val_ens, int(len(val_wind)/len(val_ens))) val_time=val_t* len(val_ens) #52 else: val_ensamble='NA' val_t.insert(0, 0) val_time=val_t ecmwf_df=pd.DataFrame({'lon': val_lon,'lat': val_lat,'met_dis': val_dis }) ecmwf_center=ecmwf_df[ecmwf_df['met_dis']=='1'] ecmwf_df2=pd.DataFrame({'STORMNAME':STORMNAME, 'time':val_time, 'lon':ecmwf_center['lon'].values, 'lat':ecmwf_center['lat'].values, 'windsped':val_wind, 'pressure':val_pre, 'ens': val_ensamble}) ecmwf_df2['YYYYMMDDHH']=ecmwf_df2['time'].apply(lambda x: (date_object + timedelta(hours=x)).strftime("%Y%m%d%H%M") ) dict1=[] ecmwf_df2=ecmwf_df2.replace(['None'],np.nan) typhoon_df=pd.DataFrame() typhoon_df[['YYYYMMDDHH','LAT','LON','VMAX','PRESSURE','STORMNAME','ENSAMBLE']]=ecmwf_df2[['YYYYMMDDHH','lat','lon','windsped','pressure','STORMNAME','ens']] typhoon_df[['LAT','LON','VMAX']] = typhoon_df[['LAT','LON','VMAX']].apply(pd.to_numeric) typhoon_df['VMAX'] = typhoon_df['VMAX'].apply(lambda x: x*1.94384449*1.05) #convert to knotes typhoon_df.to_csv(os.path.join(Input_folder,'ECMWF_%s_%s_%s.csv'%(Input_folder.split('/')[-3],STORMNAME,model_name)),index=False)
class BufrDataTests(unittest.TestCase): def setUp(self): self.decoder = Decoder() self.filename_stubs = [ 'IUSK73_AMMC_182300', 'rado_250', # uncompressed with 222000, 224000, 236000 '207003', # compressed with delayed replication 'amv2_87', # compressed with 222000 'b005_89', # compressed with 222000 and 224000 (1st order stats) 'profiler_european', # uncompressed with 204001 associated fields 'jaso_214', # compressed with 204001 associated fields 'uegabe', # uncompressed with 204004 associated fields 'asr3_190', # compressed with complex replication and 222000, 224000 'b002_95', # uncompressed with skipped local descriptors 'g2nd_208', # compressed with identical string values for all subsets 'ISMD01_OKPR', # compressed with different string values for subsets 'mpco_217', ] def do_test(self, filename_stub): s = read_bufr_file(filename_stub + '.bufr') bufr = self.decoder.decode(s, filename_stub) bufr_data = bufr.wire_data() if filename_stub in ('207003', 'rado_250'): with open( os.path.join( DATA_DIR, '{}.datadump.cmp'.format(filename_stub))) as ins: cmp_str = ins.read() dump_str = bufr_data.dumps() # TODO: this is to fix the inconsistent int and long of bitstring on different OS dump_str = dump_str.replace('005040 ORBIT NUMBER 5258\n', '005040 ORBIT NUMBER 5258L\n') assert dump_str == cmp_str, dump_str else: bufr_data.dumps() def test_bufr_data(self): print() for filename_stub in self.filename_stubs: print(filename_stub) self.do_test(filename_stub) def test_path_string_parsing(self): path = parse_position_string('1, 2, 3, 4') assert path == (1, ((1, None), (2, None), (3, None), (4, None)), ()) path = parse_position_string('2,3[0:10:2],5[2].7[2].8') assert path == (1, ((2, None), (3, slice(0, 10, 2)), (5, 2)), ((7, 2), (8, None))) path = parse_position_string('#121, 1, 3, 5[3:].1. 3') assert path == (121, ((1, None), (3, None), (5, slice(3, None, None))), ((1, None), (3, None))) def test_query_by_path(self): s = read_bufr_file('asr3_190.bufr') bufr = self.decoder.decode(s, 'asr3_190') bufr_data = bufr.wire_data() assert bufr_data.query_by_position('1,1,3')[1] == 333 assert bufr_data.query_by_position('#1,1,1,3')[1] == 333 assert bufr_data.query_by_position('#128,1,1,3')[1] == 333 assert bufr_data.query_by_position('#1, 1, 4, 1')[1] == 24.87108 assert bufr_data.query_by_position('#3, 1, 4, 1')[1] == 24.87502 assert bufr_data.query_by_position('#1, 1, 11, 1, 2')[1] == [ 236391100000000.0, 131971700000000.0, 64338200000000.0, 36184200000000.0, 30148700000000.0, 11316000000000.0, 6395700000000.0, 3612800000000.0, 10653400000000.0, 8571400000000.0, 6835300000000.0 ] assert bufr_data.query_by_position('#1, 1, 11, 1, 3.2.1')[1] == [ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10 ] assert bufr_data.query_by_position('1, 11, 1, 3.1')[1] == \ bufr_data.query_by_position('7, 1')[1][0::6] assert bufr_data.query_by_position('#128, 1, 11, 1, 14')[1] == [ None, None, None, 240.7, 215.7, 220.3, 227.1, 228.3, 224.2, 221.5, 218.3 ] def test_query_by_name(self): s = read_bufr_file('asr3_190.bufr') bufr = self.decoder.decode(s, 'asr3_190') bufr_data = bufr.wire_data() assert bufr_data.query_by_name('001007')[1] == [[57]] * 128 assert bufr_data.query_by_name('012063')[1][-1] == [ None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 240.7, None, 240.7, None, None, 240.7, 215.7, None, 215.7, None, None, 215.7, 220.3, None, 220.3, None, None, 220.3, 227.1, None, 227.1, None, None, 227.1, 228.3, None, 228.3, None, None, 228.3, 224.2, None, 224.2, None, None, 224.2, 221.5, None, 221.5, None, None, 221.5, 218.3, None, 218.3, None, None, 218.3 ] assert bufr_data.query_by_name('012063.F12063.008023')[1] == [[10] * 66 ] * 128
class DecoderTests(unittest.TestCase): def setUp(self): self.decoder = Decoder() self.filename_stubs = [ 'IUSK73_AMMC_182300', 'rado_250', # uncompressed with 222000, 224000, 236000 '207003', # compressed with delayed replication 'amv2_87', # compressed with 222000 'b005_89', # compressed with 222000 and 224000 (1st order stats) 'profiler_european', # uncompressed with 204001 associated fields 'jaso_214', # compressed with 204001 associated fields 'uegabe', # uncompressed with 204004 associated fields 'asr3_190', # compressed with complex replication and 222000, 224000 'b002_95', # uncompressed with skipped local descriptors 'g2nd_208', # compressed with identical string values for all subsets 'ISMD01_OKPR', # compressed with different string values for subsets 'mpco_217', ] def tearDown(self): pass def _compare(self, bufr_message, cmp_file_name): with open(os.path.join(DATA_DIR, cmp_file_name)) as ins: lines = ins.readlines() next_line = functools.partial(next, iter(lines)) for idx_subset in range( len(bufr_message.template_data.value.decoded_values_all_subsets )): for idx, value in enumerate( bufr_message.template_data.value. decoded_values_all_subsets[idx_subset]): cmp_line = next_line().strip() if value is None: line = '{} {}'.format(idx + 1, repr(value)) assert line == cmp_line, \ 'At line {}: {} != {}'.format(idx + 1, line, cmp_line) elif isinstance(value, (binary_type, text_type)): # TODO: better to decode all ascii bytes to unicode string if isinstance(value, binary_type) and PY3: line = '{} {}'.format(idx + 1, repr(value)[1:]) else: line = '{} {}'.format(idx + 1, repr(value)) assert line == cmp_line, \ 'At line {}: {} != {}'.format(idx + 1, line, cmp_line) else: field = cmp_line.split()[1] if field.endswith('L'): field = field[:-1] cmp_value = eval(field) assert abs(value - cmp_value) < 1.0e6, \ 'At line {}: {} != {}'.format(idx + 1, value, cmp_value) def _print_values(self, bufr_message): for idx_subset in range( len(bufr_message.template_data.value.decoded_values_all_subsets )): for idx, value in enumerate( bufr_message.template_data.value. decoded_values_all_subsets[idx_subset]): print(idx + 1, repr(value)) def do_test(self, filename_stub): s = read_bufr_file(filename_stub + '.bufr') bufr_message = self.decoder.process(s, filename_stub) self._compare(bufr_message, filename_stub + '.values.cmp') def test_decode(self): print() for filename_stub in self.filename_stubs: print(filename_stub) self.do_test(filename_stub)
args = parser.parse_args() infilename = args.infile outfilename = args.infile+'_ncepbufr' if args.outfile is None else args.outfile bufrtable = args.bufrtable append = args.append print('Converting ',infilename,' to ',outfilename) _nmaxseq = 255 # max size of sequence in message #---------------------------------------------------------------------------------# # Open files #---------------------------------------------------------------------------------# #** Open WMO bufr file decoder = Decoder() with open(infilename,'rb') as ins: bufr_message = decoder.process(s=ins.read(),file_path=infilename) #** open NCEP bufr file if append: bufr = ncepbufr.open(outfilename,'a') else: bufr = ncepbufr.open(outfilename,'w',table=bufrtable) #---------------------------------------------------------------------------------# # Read WMO Standard BUFR #---------------------------------------------------------------------------------# #** get whole data data = FlatJsonRenderer().render(bufr_message)[3][2][0]