Ejemplo n.º 1
def scar2nc(args, input_file, output_file):
    """Main function to convert SCAR txt file to netCDF"""
    df, temperature_vars, pressure_vars, station_name, latitude, longitude, height, country, institution = init_dataframe(
        args, input_file)
    ds = xr.Dataset.from_dataframe(df)
    ds = ds.drop('time')

    common.log(args, 2, 'Calculating time and sza')
    time, time_bounds, sza, day_of_year = get_time_and_sza(
        args, df, latitude, longitude)

    ds['day_of_year'] = 'time', day_of_year

    ds['time'] = 'time', time
    ds['time_bounds'] = ('time', 'nbnd'), time_bounds
    ds['sza'] = 'time', sza
    ds['station_name'] = tuple(), station_name
    ds['latitude'] = tuple(), latitude
    ds['longitude'] = tuple(), longitude
    ds['height'] = tuple(), height

    comp_level = args.dfl_lvl

    encoding = common.get_encoding('scar', common.get_fillvalue(args),
                                   comp_level, args)

    common.write_data(args, ds, output_file, encoding)
Ejemplo n.º 2
def imau2nc(args, input_file, output_file, stations):
    """Main function to convert IMAU ascii file to netCDF"""
    with open(input_file) as stream:
        line = stream.readline()
        var_count = len(line.split(','))

    errmsg = 'Unknown sub-type of IMAU network. Antarctic stations have 31 columns while Greenland stations have 35. ' \
             'Your dataset has {} columns.'.format(var_count)
    if var_count == 31:
        sub_type = 'imau/ant'
    elif var_count == 35:
        sub_type = 'imau/grl'
        raise RuntimeError(errmsg)

    df, temperature_vars, pressure_vars = init_dataframe(args, input_file, sub_type)
    ds = xr.Dataset.from_dataframe(df)
    ds = ds.drop('time')

    common.log(args, 2, 'Retrieving latitude, longitude and station name')
    latitude, longitude, station_name = get_station(args, input_file, stations)

    common.log(args, 3, 'Calculating time and sza')
    month, day, hour, minutes, time, time_bounds, sza, az, first_date, last_date = get_time_and_sza(
        args, df, longitude, latitude, sub_type)

    ds['month'] = 'time', month
    ds['day'] = 'time', day
    ds['hour'] = 'time', hour
    ds['minutes'] = 'time', minutes
    ds['time'] = 'time', time
    ds['time_bounds'] = ('time', 'nbnd'), time_bounds
    ds['sza'] = 'time', sza
    ds['az'] = 'time', az
    ds['station_name'] = tuple(), station_name
    ds['latitude'] = tuple(), latitude
    ds['longitude'] = tuple(), longitude

    rigb_vars = []
    if args.rigb:
        ds, rigb_vars = common.call_rigb(
            args, station_name, first_date, last_date, ds, latitude, longitude, rigb_vars)

    comp_level = args.dfl_lvl

    common.load_dataset_attributes(sub_type, ds, args, rigb_vars=rigb_vars, temperature_vars=temperature_vars,
    encoding = common.get_encoding(sub_type, common.get_fillvalue(args), comp_level, args)

    common.write_data(args, ds, output_file, encoding)
Ejemplo n.º 3
def gcnet2nc(args, input_file, output_file, stations):
    """Main function to convert GCNet ascii file to netCDF"""
    df, temperature_vars, pressure_vars = init_dataframe(args, input_file)
    station_number = df['station_number'][0]
    df.drop('station_number', axis=1, inplace=True)

    ds = xr.Dataset.from_dataframe(df)
    ds = ds.drop('time')

    # surface_temp = extrapolate_temp(df)

    common.log(args, 2, 'Retrieving latitude, longitude and station name')
    latitude, longitude, station_name = get_station(args, input_file, stations)

    common.log(args, 3, 'Calculating time and sza')
    month, day, hour, minutes, time, time_bounds, sza, az, first_date, last_date = get_time_and_sza(
        args, df, longitude, latitude)

    common.log(args, 4, 'Calculating quality control variables')
    fill_dataset_quality_control(df, ds, input_file)

    if args.flx:
        common.log(args, 5, 'Calculating Sensible and Latent Heat Fluxes')
        sh, lh = gradient_fluxes(df)
        ds['sh'] = 'time', sh
        ds['lh'] = 'time', lh

    if args.no_drv_tm:
        ds['month'] = 'time', month
        ds['day'] = 'time', day
        ds['hour'] = 'time', hour
        ds['minutes'] = 'time', minutes

    ds['time'] = 'time', time
    ds['time_bounds'] = ('time', 'nbnd'), time_bounds
    ds['sza'] = 'time', sza
    ds['az'] = 'time', az
    ds['station_number'] = tuple(), station_number
    ds['station_name'] = tuple(), station_name
    ds['latitude'] = tuple(), latitude
    ds['longitude'] = tuple(), longitude
    # ds['surface_temp'] = 'time', surface_temp

    rigb_vars = []
    if args.rigb:
        ds, rigb_vars = common.call_rigb(
            args, station_name, first_date, last_date, ds, latitude, longitude, rigb_vars)

    comp_level = args.dfl_lvl

    common.load_dataset_attributes('gcnet', ds, args, rigb_vars=rigb_vars, temperature_vars=temperature_vars,
    encoding = common.get_encoding('gcnet', common.get_fillvalue(args), comp_level, args)

    common.write_data(args, ds, output_file, encoding)
Ejemplo n.º 4
def nsidc2nc(args, input_file, output_file, stations):
    """Main function to convert NSIDC txt file to netCDF"""
    df, temperature_vars, pressure_vars = init_dataframe(args, input_file)
    ds = xr.Dataset.from_dataframe(df)
    ds = ds.drop('time')

    common.log(args, 2, 'Retrieving latitude, longitude and station name')
    latitude, longitude, station_name, elevation, qlty_ctrl = get_station(
        args, input_file, stations)

    common.log(args, 3, 'Calculating time and sza')
    time, time_bounds, sza, day_of_year, year1900 = get_time_and_sza(
        args, df, latitude, longitude)

    ds['day_of_year'] = 'time', day_of_year
    ds['time'] = 'time', time
    ds['time_bounds'] = ('time', 'nbnd'), time_bounds
    ds['sza'] = 'time', sza
    ds['station_name'] = tuple(), station_name
    ds['latitude'] = tuple(), latitude
    ds['longitude'] = tuple(), longitude
    ds['elevation'] = tuple(), elevation

    comp_level = args.dfl_lvl

    encoding = common.get_encoding('nsidc', common.get_fillvalue(args),
                                   comp_level, args)

    common.write_data(args, ds, output_file, encoding)
Ejemplo n.º 5
def promice2nc(args, input_file, output_file, stations):
    """Main function to convert PROMICE txt file to netCDF"""
    df, temperature_vars, pressure_vars = init_dataframe(args, input_file)
    ds = xr.Dataset.from_dataframe(df)
    ds = ds.drop('time')

    common.log(args, 2, 'Retrieving latitude, longitude and station name')
    latitude, longitude, station_name = get_station(args, input_file, stations)

    common.log(args, 3, 'Calculating time and sza')
    time, time_bounds, sza, az, first_date, last_date = get_time_and_sza(
        args, df, longitude, latitude)

    common.log(args, 4, 'Converting lat_GPS and lon_GPS')
    convert_coordinates(args, df)

    common.log(args, 5, 'Calculating ice velocity')
    fill_ice_velocity(args, df, ds)

    ds['time'] = 'time', time
    ds['time_bounds'] = ('time', 'nbnd'), time_bounds
    ds['sza'] = 'time', sza
    ds['az'] = 'time', az
    ds['station_name'] = tuple(), station_name
    ds['latitude'] = tuple(), latitude
    ds['longitude'] = tuple(), longitude

    rigb_vars = []
    if args.rigb:
        ds, rigb_vars = common.call_rigb(args, station_name, first_date,
                                         last_date, ds, latitude, longitude,

    comp_level = args.dfl_lvl

    encoding = common.get_encoding('promice', common.get_fillvalue(args),
                                   comp_level, args)

    common.write_data(args, ds, output_file, encoding)
Ejemplo n.º 6
def main(dataset, latitude, longitude, clr_df, args):
    ddr = 0.25
    rho = 0.8
    smallest_double = 2.2250738585072014e-308
    dtime_1970, tz = common.time_common(args.tz)

    clrprd_file = clr_df
    # Combine date, start_hour and end_hour into a single string, e.g. 20080103_16_23
    clrprd = [(str(x) + '_' + str(y) + '_' + str(z)) for x, y, z in zip(
        clrprd_file['date'].tolist(), clrprd_file['start_hour'].tolist(),

    hours = list(range(24))
    half_hours = (list(np.arange(0, 24, 0.5)))

    ds = dataset.drop(
    )  # Drop time_bounds dimension so that we don't have double entries of same data
    df = ds.to_dataframe()  # Convert to dataframe

    date_hour = [datetime.fromtimestamp(i, tz)
                 for i in df.index.values]  # Index is seconds since 1970
    dates = [i.date() for i in date_hour]  # Get dates
    df['dates'] = dates  # Add as new column

    # Create new dataframe to store tilt_direction and tilt_angle
    tilt_df = pd.DataFrame(index=dates,
                           columns=['tilt_direction', 'tilt_angle'])

    lat = latitude
    lon = longitude

    # Drop 'time' as index to do proper indexing for station_name
    df.reset_index(level=['time'], inplace=True)
    stn_name = df['station_name'][0]
    # Replace fillvalue with nan for calculations
    df[['fsds']] = df[['fsds']].replace(common.fillvalue_float, np.nan)

    jaws_path = 'http://jaws.ess.uci.edu/jaws/rigb_data/'
    dir_rrtm = 'rrtm-airx3std/'
    sfx = '.rrtm.nc'

    if args.merra:
        dir_rrtm = 'rrtm-merra/'

    rrtm_file = get_rrtm_file(jaws_path, dir_rrtm, stn_name, sfx)

    if rrtm_file:
        rrtm_df = get_rrtm_df(stn_name, sfx, rrtm_file)
    else:  # If no AIRS RRTM file, try MERRA RRTM file
        dir_rrtm = 'rrtm-merra/'
        rrtm_file = get_rrtm_file(jaws_path, dir_rrtm, stn_name, sfx)
        if rrtm_file:
            rrtm_df = get_rrtm_df(stn_name, sfx, rrtm_file)
                'ERROR: RRTM data not available for this station. Please report it on github.com/jaws/jaws/issues'

    start_time = time.time()

    #                   Tilt Angle and Tilt Direction Calculations                         #

    #                           ########PART-1#############                                #

    for line in clrprd:
        clrdate = line.split('_')[0]
        clrhr_start = int(line.split('_')[1])
        clrhr_end = int(line.split('_')[2])
        clrhr_end = clrhr_end + 1  # To make sure we include the last hour when slicing the data
        year = int(clrdate[:4])
        month = int(clrdate[4:6])
        day = int(clrdate[6:])
        current_date_hour = datetime(year, month, day).date()

        fsds_rrtm = rrtm_df.loc[str(year) + '-' + str(month) + '-' +
                                str(day):str(year) + '-' + str(month) + '-' +
        if fsds_rrtm:
            if args.dbg_lvl > 6:
                current_time = time.time()
                if (current_time - start_time) > 10 * 60:
                    print('Still working...')
                    start_time = current_time

        # Subset dataframe
        df_sub = df[df.dates == current_date_hour]

        fsds_jaws_nonmsng = df_sub['fsds'].dropna().tolist()
        indexMissingJAWS = np.where(df_sub['fsds'].isna())
        indexMissingJAWS = [a for b in indexMissingJAWS
                            for a in b]  # Convert to list

        hours_nonmsng = np.where(df_sub['fsds'].notnull())
        hours_nonmsng = [a for b in hours_nonmsng
                         for a in b]  # Convert to list
        hours_nonmsng = [i + 0.5 for i in hours_nonmsng]  # Half-hour values

        # Interpolate fsds and sza for half-hour values
        if len(fsds_jaws_nonmsng) < 2:
            if args.dbg_lvl > 6:
                print("Skipping this day as there is only 1 value of fsds")
            fsds_intrp = CubicSpline(hours_nonmsng,
        fsds_intrp = [a for a in fsds_intrp]  # Convert to list

        # Calculate azimuth angle
        az = []
        sza = []
        for hour in hours:
            dtime = datetime(year, month, day, hour, 0)
            az.append(sunposition.sunpos(dtime, lat, lon, 0)[0])
            sza.append(sunposition.sunpos(dtime, lat, lon, 0)[1])
            dtime = datetime(year, month, day, hour, 30)
            az.append(sunposition.sunpos(dtime, lat, lon, 0)[0])
            sza.append(sunposition.sunpos(dtime, lat, lon, 0)[1])

        az = [(i - 180) for i in az]

        alpha = [(90 - i) for i in sza]

        beta = list(np.arange(0.25, 45.25, 0.25))

        sza_noon = [np.cos(np.radians(i)) for i in sza]

        # Check if measured solar noon time > true solar noon time
        if fsds_intrp.index(max(fsds_intrp)) > sza_noon.index(max(sza_noon)):
            aw = list(np.arange(0, 180, 0.25))
            aw = list(np.arange(-179.75, 0.25, 0.25))

        az = deg_to_rad(az)
        alpha = deg_to_rad(alpha)
        beta = deg_to_rad(beta)
        aw = deg_to_rad(aw)

        # Make pairs of aw,beta
        pairs = []
        for i in aw:
            for j in beta:
                pairs.append(tuple((i, j)))

        # Find all possible pairs using correct fsds
        possible_pairs = []
        daily_avg_diff = []
        best_pairs = []
        fsds_possiblepair_dict = {}

        for pair in pairs:
            count = 0
            cos_i = []
            fsds_correct = []
            while count < len(alpha):
                cos_i.append((np.cos(alpha[count]) *
                              np.cos(az[count] - pair[0]) * np.sin(pair[1]) +
                              (np.sin(alpha[count]) * np.cos(pair[1]))))
                nmr = fsds_intrp[count] * (np.sin(alpha[count]) + ddr)
                dnmr = cos_i[count] + (ddr * (1 + np.cos(pair[1])) /
                                       2.) + (rho *
                                              (np.sin(alpha[count]) + ddr) *
                                              (1 - np.cos(pair[1])) / 2.)
                if dnmr == 0:
                    dnmr = smallest_double
                fsds_correct.append(nmr / dnmr)

                count += 1

            if (abs(
                    cos_i.index(max(cos_i)) -
                    fsds_intrp.index(max(fsds_intrp))) <= 1 and abs(
                        fsds_correct.index(max(fsds_correct)) -
                        sza_noon.index(max(sza_noon))) <= 1):

                fsds_correct_half = fsds_correct[1::2]
                fsds_possiblepair_dict[pair] = fsds_correct_half

                for msng_idx in indexMissingJAWS:
                        common.log(args, 9,
                                   'Warning: missing index fsds_correct_half')
                        common.log(args, 9, 'Warning: missing index fsds_rrtm')

                diff = [
                    abs(x - y)
                    for x, y in zip(fsds_correct_half[clrhr_start:clrhr_end],

        #                           ########PART-2#############                                #

        dailyavg_possiblepair_dict = dict(zip(daily_avg_diff, possible_pairs))

        if not dailyavg_possiblepair_dict.keys():
            continue  # Skip day if no possible pair
            if min(dailyavg_possiblepair_dict.keys()) <= 50:
                for val in dailyavg_possiblepair_dict.keys():
                    if val <= min(dailyavg_possiblepair_dict.keys()) + 5:

        #                           ########PART-3#############                                #

        fsds_bestpair_dict = {k: fsds_possiblepair_dict[k] for k in best_pairs}

        bestpair_dailyavg_dict = dict((bp, [
            key for (key, value) in dailyavg_possiblepair_dict.items()
            if value == bp
        ]) for bp in best_pairs)

        num_spikes = []
        for pair in fsds_bestpair_dict:
            fsds_correct_top = fsds_bestpair_dict[pair]
            counter = 0
            spike_hrs = 0
            diff_top = [
                abs(x - y)
                for x, y in zip(fsds_correct_top[clrhr_start:clrhr_end],
            fsds_rrtm_10 = [
                ij * 0.1 for ij in fsds_rrtm[clrhr_start:clrhr_end]
            for val in diff_top:
                if diff_top[counter] > fsds_rrtm_10[counter]:
                    spike_hrs += 1
                counter += 1

            num_spikes.append((spike_hrs, bestpair_dailyavg_dict[pair]))

            top_pair = best_pairs[num_spikes.index(min(num_spikes))]

            tilt_df.at[current_date_hour, 'tilt_direction'] = top_pair[0]
            tilt_df.at[current_date_hour, 'tilt_angle'] = top_pair[1]
            common.log(args, 9, 'Warning: no top pair found')
            continue  # Skip day if no top pair


    tilt_df['tilt_direction'] = pd.to_numeric(tilt_df['tilt_direction'],
    tilt_df['tilt_angle'] = pd.to_numeric(tilt_df['tilt_angle'],

    tilt_df = tilt_df.interpolate(
        limit_direction='both')  # Interpolate missing values
    tilt_direction_values = tilt_df['tilt_direction'].tolist()
    tilt_angle_values = tilt_df['tilt_angle'].tolist()

    tilt_direction_values = rad_to_deg(tilt_direction_values)
        'tilt_direction_raw'] = 'time', tilt_direction_values  # Raw values to be used in fsds_adjust script

    # Change tilt_direction to 0 pointing north. These values will be in output netCDF file
    tilt_direction_values = [270 - d for d in tilt_direction_values]
    tilt_direction_values = [
        d - 360 if d > 360 else d for d in tilt_direction_values

    tilt_angle_values = rad_to_deg(tilt_angle_values)

    # Add tilt_direction and tilt_angle to output file
    dataset['tilt_direction'] = 'time', tilt_direction_values
    dataset['tilt_angle'] = 'time', tilt_angle_values

    try:  # Remove downloaded rrtm_df file
        os.remove(stn_name + sfx)
    except:  # Windows

    return dataset
Ejemplo n.º 7
def post_process(df, dates, stn_name, sfx, args):
    """Calculate fsus_adjusted and quality check for fsds_adjusted"""
    # Initialize new variables
    df['fsds_adjusted_new'] = np.float()
    df['fsus_adjusted'] = np.float()
    thrsh = 0.1  # Threshold
    outer_idx = 0

    for date in dates:
        year = date.year
        month = date.month
        day = date.day

        # Open downloaded ceres file as dataframe
        ceres_df = xr.open_dataset(stn_name + sfx).to_dataframe()
        # Get toa values for each day; dataframe index is date
        toa = ceres_df.loc[str(year) + '-' + str(month) + '-' +
                           str(day):str(year) + '-' + str(month) + '-' +

        # Subset dataframe for each day
        df_sub = df[df.dates == date]

        # Get fsds_adjusted and fsus values for that day and substitute NaN with fillvalue_float
        fsds_adjusted = df_sub['fsds_adjusted'].tolist()
        fsds_adjusted = [
            common.fillvalue_float if np.isnan(i) else i for i in fsds_adjusted

        fsus_jaws = df_sub['fsus'].tolist()
        fsus_jaws = [
            common.fillvalue_float if np.isnan(i) else i for i in fsus_jaws

        sza = df_sub['sza'].tolist()
        sza = deg_to_rad(sza)

        # Calculate albedo
        fsds_alb = fsds_adjusted
        idx_alb = 0
        while idx_alb < len(fsds_adjusted):
            if (fsds_adjusted[idx_alb] <= 0) or (fsus_jaws[idx_alb] < 0):
                fsds_alb[idx_alb] = common.fillvalue_float

            idx_alb += 1

        albedo = [x / y for x, y in zip(fsus_jaws, fsds_alb)]
        albedo = [abs(i) for i in albedo]

        # Calculate number of hours
        hours = np.arange(len(albedo))

        idx = 0
        # If less than 2 values for a day, derivative not possible, continue to next day without quality check
        if len(albedo) < 2:
            while idx < len(fsds_adjusted):
                df.at[outer_idx, 'fsds_adjusted_new'] = fsds_adjusted[idx]
                df.at[outer_idx, 'fsus_adjusted'] = fsus_jaws[idx]
                outer_idx += 1
                idx += 1
        else:  # Calculate second-order derivative of albedo
            alb_second_derv = first_order_derivative(
                hours, first_order_derivative(hours, albedo))

        # Check all following conditions for each hour
        while idx < len(fsds_adjusted):
                if np.cos(sza[idx]) <= 0:
                    fsds_adjusted[idx] = 0
                if abs(alb_second_derv[idx]) > thrsh:
                    fsds_adjusted[idx] = common.fillvalue_float
                if fsds_adjusted[idx] > toa[idx]:
                    fsds_adjusted[idx] = common.fillvalue_float
                if (fsds_adjusted[idx] <
                        toa[idx] * 0.05) and (fsds_adjusted[idx] != 0):
                    fsds_adjusted[idx] = common.fillvalue_float
                    fsus_jaws[idx] = common.fillvalue_float
                if (fsus_jaws[idx] > fsds_adjusted[idx] * 0.95) or (
                        fsus_jaws[idx] < fsds_adjusted[idx] * 0.05):
                    fsus_jaws[idx] = common.fillvalue_float
                if fsds_adjusted[idx] < 0:
                    fsds_adjusted[idx] = 0
                if fsus_jaws[idx] == 0:
                    fsds_adjusted[idx] = 0
                if fsds_adjusted[idx] == 0:
                    fsus_jaws[idx] = 0

                # Insert calculated values
                df.at[outer_idx, 'fsds_adjusted_new'] = fsds_adjusted[idx]
                df.at[outer_idx, 'fsus_adjusted'] = fsus_jaws[idx]
            except:  # Exception for list index out of range toa[idx]
                common.log(args, 9,
                           'Warning: list index out of range for toa[idx]')

            idx += 1
            outer_idx += 1

    fsds_adjusted_values_new = df['fsds_adjusted_new'].tolist()
    fsus_adjusted_values = df['fsus_adjusted'].tolist()

    return fsds_adjusted_values_new, fsus_adjusted_values