def init_dataframe(args, input_file): """Initialize dataframe with data from input file; convert temperature and pressure to SI units""" header_rows = 4 df, columns = common.load_dataframe('nsidc', input_file, header_rows) # Replace missing values with NaN df['wspd'].replace(999, np.nan, inplace=True) df['visby'].replace(999999, np.nan, inplace=True) df['ta'].replace(9999, np.nan, inplace=True) df['dpt'].replace(9999, np.nan, inplace=True) df['slp'].replace(99999, np.nan, inplace=True) factor10_vars = [ 'wspd', 'ta', 'dpt', 'slp' ] # Input data has scaling factor of 10 for these variables df.loc[:, factor10_vars] /= 10 # Divide by 10 to get original values temperature_vars = ['ta', 'dpt'] if not args.celsius: df.loc[:, temperature_vars] += common.freezing_point_temp # Convert units to Kelvin pressure_vars = ['slp'] # Pressure already in hPa # if not args.mb: # df.loc[:, pressure_vars] *= common.pascal_per_millibar # Convert units to millibar/hPa df = df.where((pd.notnull(df)), common.get_fillvalue(args)) return df, temperature_vars, pressure_vars
def init_dataframe(args, input_file, sub_type): """Initialize dataframe with data from input file; convert temperature and pressure to SI units""" check_na = -9999 df, columns = common.load_dataframe(sub_type, input_file, 0) df.replace(check_na, np.nan, inplace=True) if sub_type == 'imau/ant': temperature_vars = ['temp_cnr1', 'ta', 'tsn1a', 'tsn2a', 'tsn3a', 'tsn4a', 'tsn5a', 'tsn1b', 'tsn2b', 'tsn3b', 'tsn4b', 'tsn5b', 'temp_logger'] pressure_vars = ['pa'] elif sub_type == 'imau/grl': temperature_vars = ['temp_cnr1', 'ta2', 'ta6', 'tsn1', 'tsn2', 'tsn3', 'tsn4', 'tsn5', 'datalogger'] pressure_vars = ['pa'] if not args.celsius: df.loc[:, temperature_vars] += common.freezing_point_temp # Convert units to Kelvin if not args.mb: df.loc[:, pressure_vars] *= common.pascal_per_millibar # Convert units to millibar/hPa df = df.where((pd.notnull(df)), common.get_fillvalue(args)) return df, temperature_vars, pressure_vars
def init_dataframe(args, input_file): """Initialize dataframe with data from input file; convert current, temperature and pressure to SI units""" convert_current = 1000 check_na = -999 with open(input_file) as stream: for line in stream: input_file_vars = [x.strip() for x in line.split(' ') if x] break df, columns = common.load_dataframe('promice', input_file, 1, input_file_vars=input_file_vars) df.replace(check_na, np.nan, inplace=True) temperature_vars = [ 'ta', 'ta_hygroclip', 'ts', 'tice1', 'tice2', 'tice3', 'tice4', 'tice5', 'tice6', 'tice7', 'tice8', 'temp_logger' ] if not args.celsius: df.loc[:, temperature_vars] += common.freezing_point_temp # Convert units to Kelvin pressure_vars = ['pa'] if not args.mb: df.loc[:, pressure_vars] *= common.pascal_per_millibar # Convert units to millibar/hPa df.loc[:, ['fan_current']] /= convert_current # Convert units to Ampere df = df.where((pd.notnull(df)), common.get_fillvalue(args)) return df, temperature_vars, pressure_vars
def init_dataframe(args, input_file): """Initialize dataframe with data from input file; convert temperature and speed to SI units""" knot_to_ms = 0.514444 header_rows = 0 with open(input_file) as stream: for line in stream: header_rows += 1 if len(line.strip()) == 0: break count = 0 with open(input_file) as stream: for line in stream: if count == 0: stn_name = line.strip() if count == 1: country = line[12:].strip() if count == 2: parts = line.split(' ') lat = float(parts[1]) lon = float(parts[3]) height = float(parts[5].strip()[:-1]) if count == 3: input_file_vars = [ x.split('(')[0].strip() for x in line[16:].split(',') ] if count == 4: check_na = int(line.strip().split(' ')[-1]) if count == 5: institution = line[16:].strip().lstrip('the ') count += 1 if count == 6: break df, columns = common.load_dataframe('scar', input_file, header_rows, input_file_vars=input_file_vars) df.replace(check_na, np.nan, inplace=True) temperature_vars = ['ta'] if not args.celsius: df.loc[:, temperature_vars] += common.freezing_point_temp # Convert units to Kelvin pressure_vars = ['pa'] if not args.mb: df.loc[:, pressure_vars] *= common.pascal_per_millibar # Convert units to millibar/hPa df.loc[:, 'wspd'] *= knot_to_ms # Convert units to meter per second df = df.where((pd.notnull(df)), common.get_fillvalue(args)) return df, temperature_vars, pressure_vars, stn_name, lat, lon, height, country, institution
def fill_dataset_quality_control(dataframe, dataset, input_file): """Create new separate quality control variables for each variable from qc1, qc9, qc17, qc25""" temp_df, columns = common.load_dataframe('gcnet', input_file, header_rows) keys = common.read_ordered_json('resources/gcnet/quality_control.json') for key, attributes in keys.items(): # Check if qc variables are present in input file if key in columns: values = [list(map(int, i)) for i in zip(*map(str, dataframe[key]))] for attr, value in zip(attributes, values): dataset[attr] = 'time', value
def get_station(args, input_file, stations): """Get latitude, longitude and name for each station""" df, columns = common.load_dataframe('gcnet', input_file, header_rows) station_number = df['station_number'][0] if 1 <= station_number <= 23: station = list(stations.values())[station_number] elif 30 <= station_number <= 32: name = 'gcnet_lar{}'.format(station_number - 29) station = stations[name] else: print('KeyError: {}'.format(df['station_number'][0])) print('HINT: This KeyError can occur when JAWS is asked to process station that is not in its database. ' 'Please inform the JAWS maintainers by opening an issue at https://github.com/jaws/jaws/issues.') sys.exit(1) lat, lon, stn_nm = common.parse_station(args, station) return lat, lon, stn_nm
def init_dataframe(args, input_file): """Initialize dataframe with data from input file; convert temperature and pressure to SI units""" check_na = 999.0 global header_rows header_rows = 0 with open(input_file) as stream: for line in stream: header_rows += 1 if len(line.strip()) == 0 : break df, columns = common.load_dataframe('gcnet', input_file, header_rows) # Convert only if this column is present in input file try: df['qc25'] = df['qc25'].astype(str) # To avoid 999 values marked as N/A except Exception: pass df.replace(check_na, np.nan, inplace=True) temperature_vars = [ 'ta_tc1', 'ta_tc2', 'ta_cs1', 'ta_cs2', 'tsn1', 'tsn2', 'tsn3','tsn4', 'tsn5', 'tsn6', 'tsn7', 'tsn8', 'tsn9', 'tsn10', 'ta_max1', 'ta_max2', 'ta_min1','ta_min2', 'ref_temp'] if not args.celsius: df.loc[:, temperature_vars] += common.freezing_point_temp # Convert units to Kelvin pressure_vars = ['ps'] if not args.mb: df.loc[:, pressure_vars] *= common.pascal_per_millibar # Convert units to millibar/hPa df = df.where((pd.notnull(df)), common.get_fillvalue(args)) try: df['qc25'] = df['qc25'].astype(int) # Convert it back to int except Exception: pass return df, temperature_vars, pressure_vars