def read_data(settings, suffix, name, year, grid_lats, grid_lons, period, N_OBS_PER_DAY):
    '''
    Read in the data from the netCDF files

    :param Settings settings: object to hold all filepaths etc.
    :param str suffix: used to determine whether using strict or relaxed criteria
    :param str name: variable name
    :param int year: year to read
    :param array grid_lats: latitudes
    :param array grid_lons: longitudes
    :param str period: which period (day/night/all)
    :param int N_OBS_PER_DAY: number of observation times per day

    :returns: var_3hrlys - array of 3hrly data for single variable
    '''

    if suffix == "relax":
        N_OBS_OVER_DAYS = 1
        N_OBS_OVER_PENTAD = 2

    elif suffix == "strict":
        N_OBS_OVER_DAYS = 2
        N_OBS_OVER_PENTAD = 4  


    # set up empty data array
    var_3hrlys = np.ma.zeros([utils.days_in_year(year)*N_OBS_PER_DAY, len(grid_lats), len(grid_lons)])
    var_3hrlys.mask = np.zeros([utils.days_in_year(year)*N_OBS_PER_DAY, len(grid_lats), len(grid_lons)])
    var_3hrlys.fill_value = settings.mdi
    
    year_start = dt.datetime(year, 1, 1, 0, 0)
    
    for month in np.arange(12) + 1:
        print year, month
        
        month_start = utils.day_of_year(year, month)
        month_end = month_start + calendar.monthrange(year, month)[1]
        
        filename = "{}/{}_1x1_3hr_{}{:02d}_{}_{}.nc".format(settings.DATA_LOCATION, settings.OUTROOT, year, month, period, suffix)
                
        ncdf_file = ncdf.Dataset(filename,'r', format='NETCDF4')
        
        if month == 12:
            # run to end of year if december
            var_3hrlys[month_start*N_OBS_PER_DAY:, :, :] = ncdf_file.variables[name][:]
        else:
            var_3hrlys[month_start*N_OBS_PER_DAY:month_end*N_OBS_PER_DAY, :, :] = ncdf_file.variables[name][:]

    return var_3hrlys # read_data
def do_conversion(start_year=defaults.START_YEAR, end_year=defaults.END_YEAR, period="all", doBC=False, doQC=True):
    """
    Convert dailies to pentads 1x1

    :param int start_year: start year to process
    :param int end_year: end year to process
    :param str period: which period to do day/night/all?
    :param bool doBC: work on the bias corrected data
    :param bool doQC: incorporate the QC flags or not


    :returns:
    """
    settings = set_paths_and_vars.set(doBC=doBC, doQC=doQC)

    OBS_ORDER = utils.make_MetVars(settings.mdi, multiplier=False)

    for year in np.arange(start_year, end_year + 1):

        # set up empty data array
        all_dailies = np.ma.zeros([len(OBS_ORDER), utils.days_in_year(year), len(grid_lats), len(grid_lons)])
        all_dailies.mask = np.zeros([len(OBS_ORDER), utils.days_in_year(year), len(grid_lats), len(grid_lons)])
        all_dailies.fill_value = settings.mdi

        all_n_obs = np.zeros([utils.days_in_year(year), len(grid_lats), len(grid_lons)])

        year_start = dt.datetime(year, 1, 1, 0, 0)

        for month in np.arange(12) + 1:
            print year, month

            month_start = utils.day_of_year(year, month)
            month_end = month_start + calendar.monthrange(year, month)[1]

            filename = "{}/{}_1x1_daily_{}{:02d}_{}.nc".format(
                settings.DATA_LOCATION, settings.OUTROOT, year, month, period
            )

            ncdf_file = ncdf.Dataset(filename, "r", format="NETCDF4")

            for v, var in enumerate(OBS_ORDER):

                if month == 12:
                    # run to end of year if december
                    all_dailies[v, month_start:, :, :] = ncdf_file.variables[var.name][:]
                else:
                    all_dailies[v, month_start:month_end, :, :] = ncdf_file.variables[var.name][:]

            # now get number of observations
            if month == 12:
                all_n_obs[month_start:, :, :] = ncdf_file.variables["n_obs"][:]
            else:
                all_n_obs[month_start:month_end, :, :] = ncdf_file.variables["n_obs"][:]

        if calendar.isleap(year):
            assert all_dailies.shape[1] == 366

            # extract 6-day pentad
            incl_feb29th = all_dailies[:, 55:61, :, :]

            # remove the data of Feb 29th from array
            # np.ma.delete doesn't exist, so have to copy mask separately
            mask = all_dailies.mask
            all_dailies = np.delete(all_dailies, 59, 1)
            mask = np.delete(mask, 59, 1)
            all_dailies = np.ma.array(all_dailies, mask=mask)
            del mask

            # number of observations
            incl_feb29th_n_obs = all_n_obs[55:61, :, :]
            all_n_obs = np.delete(all_n_obs, 59, 0)

        else:
            assert all_dailies.shape[1] == 365

        shape = all_dailies.shape
        all_dailies = all_dailies.reshape(shape[0], -1, 5, shape[-2], shape[-1])

        n_days_per_pentad = np.ma.count(all_dailies, axis=2)

        if settings.doMedian:
            pentad_grid = utils.bn_median(all_dailies, axis=2)
        else:
            pentad_grid = np.ma.mean(all_dailies, axis=2)

        # clear up memory
        del all_dailies
        gc.collect()

        all_n_obs = all_n_obs.reshape(-1, 5, shape[-2], shape[-1])
        all_n_obs = np.sum(all_n_obs, axis=1)

        pentad_grid.mask[
            n_days_per_pentad < N_OBS
        ] = True  # mask where fewer than 2 days have values # KW THIS IS ACTUALLY 2 - WHICH I THINK IS GOOD

        # the pentad containing feb 29th is the 11th in the year
        if calendar.isleap(year):
            #  overwrite this with the me(di)an of a 6-day pentad
            if settings.doMedian:
                pentad_grid[:, 11, :, :] = utils.bn_median(incl_feb29th, axis=1)
            else:
                pentad_grid[:, 11, :, :] = np.ma.mean(incl_feb29th, axis=1)

            feb_n_days_per_pentad = np.ma.count(incl_feb29th, axis=1)
            pentad_grid.mask[:, 11, :, :][feb_n_days_per_pentad < N_OBS] = True
            n_days_per_pentad[:, 11, :, :] = feb_n_days_per_pentad

            all_n_obs[11, :, :] = np.sum(incl_feb29th_n_obs, axis=0)

            print "processed Feb 29th"

        times = utils.TimeVar("time", "time since 1/1/{} in hours".format(year), "hours", "time")
        times.data = np.arange(0, pentad_grid.shape[1]) * 5 * 24

        out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_1x1_pentad_{}_{}.nc".format(year, period)

        utils.netcdf_write(
            out_filename,
            pentad_grid,
            n_days_per_pentad[0],
            all_n_obs,
            OBS_ORDER,
            grid_lats,
            grid_lons,
            times,
            frequency="P",
        )

        del pentad_grid
        del all_n_obs
        del n_days_per_pentad
        gc.collect()

    return  # do_conversion