def read_nocs():
    """
    Read the NOCS q datafile
    
    :returns: PlotData containing monthly and annual values
    """
    

    indata = np.genfromtxt("/project/hadobs2/hadisdh/marine/otherdata/NOCS_q_sotc2015.txt", dtype = str)

    t = indata[:, 0]
    nocs = indata[:, -1]
    
    nocs = np.array([float(i) for i in nocs])

    y = utils.set_MetVar_attributes("relative_humidity", "Relative humidity", "relative humidity", "%rh", mdi, np.dtype('float64'), 8, multiplier = False)
    y.data = nocs

    t = np.array([dt.datetime.strptime(i, "%d-%b-%Y") for i in t])

    monthly = PlotData("", y, t, "", 'g', 1, 1)

    # annual

    nocs = nocs.reshape(-1, 12)
    nocs = np.mean(nocs, axis = 1)
    y = utils.set_MetVar_attributes("specific_humidity", "Specific humidity", "specific humidity", "g/kg", mdi, np.dtype('float64'), 8, multiplier = False)
    y.data = nocs

    t = t.reshape(-1, 12)
    t = t[:, 6]

    annual = PlotData("", y, t, "NOCS-q", 'DarkGreen', 10, 2)
    
    return monthly, annual  # read_nocs
def get_data(filename):
    """
    Read in the netCDF data

    :param str filename: file to read in data from
    :returns: ydata - MetVar object
              times - time data
    
    """

    ncdf_file = ncdf.Dataset(filename,'r', format='NETCDF4')

    ydata = ncdf_file.variables[var.name]
    times = ncdf_file.variables["time"] # days since 1973/1/1
    
    times = ncdf.num2date(times[:], units = times.units, calendar = 'gregorian')

    # convert to MetVar from netCDF object
    ydata = utils.set_MetVar_attributes(var.name, ydata.long_name, ydata.standard_name, ydata.units, mdi, np.dtype('float64'), 0, multiplier = False)
    ydata.data = ncdf_file.variables[var.name][:]

    return ydata, times
def make_timeseries(
    suffix="relax",
    doQC=False,
    doQC1it=False,
    doQC2it=False,
    doQC3it=False,
    doBC=False,
    doBCtotal=False,
    doBChgt=False,
    doBCscn=False,
):
    # def make_timeseries(suffix = "relax", doQC = False, doBC = False):
    # end
    """
    Make the timeseries - plots and netCDF files

    :param str suffix: "relax" or "strict" criteria
    :param bool doQC: incorporate the QC flags or not
# KATE modified
    :param bool doQC1it: incorporate the first iteration QC flags or not
    :param bool doQC2it: incorporate the second iteration QC flags or not
    :param bool doQC3it: incorporate the third iteration QC flags or not
# end
    :param bool doBC: work on the bias corrected data
# KATE modified
    :param bool doBCtotal: work on the bias corrected data
    :param bool doBChgt: work on the bias corrected data
    :param bool doBCscn: work on the bias corrected data
# end

    :returns:
    """
    # KATE modified
    settings = set_paths_and_vars.set(
        doBC=doBC,
        doBCtotal=doBCtotal,
        doBChgt=doBChgt,
        doBCscn=doBCscn,
        doQC=doQC,
        doQC1it=doQC1it,
        doQC2it=doQC2it,
        doQC3it=doQC3it,
    )
    # settings = set_paths_and_vars.set(doBC = doBC, doQC = doQC)
    # end

    print "Do QC = {}".format(doQC)
    # KATE modified
    print "Do QC1it = {}".format(doQC1it)
    print "Do QC2it = {}".format(doQC2it)
    print "Do QC3it = {}".format(doQC3it)
    # end
    print "Do BC = {}".format(doBC)
    # KATE modified
    print "Do BCtotal = {}".format(doBCtotal)
    print "Do BChgt = {}".format(doBChgt)
    print "Do BCscn = {}".format(doBCscn)
    # end

    # monthly -> annual

    watermarkstring = (
        "/".join(os.getcwd().split("/")[4:])
        + "/"
        + os.path.basename(__file__)
        + "   "
        + dt.datetime.strftime(dt.datetime.now(), "%d-%b-%Y %H:%M")
    )

    # run on the actuals (which include anomalies from ERA) and the anomalies (calculated from obs-actuals, but also include the anomalies from ERA)
    # KATE modified to add new file name bit '_renorm19812010'
    for version in ["", "_renorm19812010_anomalies"]:
        # for version in ["", "_anomalies"]:
        # end
        if version == "":
            print "5x5 monthly Standard"
        elif version == "_anomalies":
            print "5x5 monthly Anomalies"

        for period in ["both", "day", "night"]:
            print period

            filename = "{}/{}_5x5_monthly{}_from_daily_{}_{}.nc".format(
                settings.DATA_LOCATION, settings.OUTROOT, version, period, suffix
            )

            print filename
            ncdf_file = ncdf.Dataset(filename, "r", format="NETCDF4")

            lat_centres = ncdf_file.variables["latitude"]
            lon_centres = ncdf_file.variables["longitude"]

            n_obs = utils.set_MetVar_attributes(
                "n_obs", "Number of Observations", "Number of Observations", 1, -1, np.dtype("int64"), 0
            )
            OBS_ORDER = utils.make_MetVars(settings.mdi, multiplier=False)
            OBS_ORDER += [n_obs]

            for v, var in enumerate(OBS_ORDER):
                print var.name

                var.data = ncdf_file.variables[var.name][:]

                # make annual and monthly timeseries

                mesh_lon, mesh_lat = np.meshgrid(lon_centres, lat_centres)
                cosines = np.cos(np.radians(mesh_lat))

                full_cosines = mask_and_normalise_weights(cosines, var.data)
                # masked weights now sum to one for each field

                if var.name == "n_obs":
                    weighted_data = var.data
                else:
                    weighted_data = var.data * full_cosines

                plot_values = np.zeros(weighted_data.shape[0])
                plot_times = []
                for y in range(weighted_data.shape[0]):

                    plot_values[y] = np.ma.sum(weighted_data[y])

                    plot_times += [dt.datetime(settings.START_YEAR + (y / 12), 1 + (y % 12), 1, 0, 0)]

                # plot the monthly data
                plt.clf()
                plt.plot(plot_times, plot_values, "r-", label="Monthly")

                var.mdata = plot_values
                monthly_times = plot_times

                # and annual
                plot_values = plot_values.reshape(-1, 12)

                if var.name != "n_obs":
                    plot_values = np.mean(plot_values, axis=1)
                    plot_times = [dt.datetime(settings.START_YEAR + y, 7, 1) for y in range(plot_values.shape[0])]
                    plt.plot(plot_times, plot_values, "b-", label="Annual")

                    plt.ylabel(var.units)

                else:
                    # if n_obs, then have second x-axis
                    plot_values = np.sum(plot_values, axis=1)
                    plot_times = [dt.datetime(settings.START_YEAR + y, 7, 1) for y in range(plot_values.shape[0])]

                    # finish off first axis
                    ax1 = plt.gca()
                    ax1.set_ylabel("Monthly", color="r")
                    for tl in ax1.get_yticklabels():
                        tl.set_color("r")

                    # add second axis
                    ax2 = ax1.twinx()
                    ax2.plot(plot_times, plot_values, "b-", label="Annual")
                    ax2.set_ylabel("Annual", color="b")
                    for tl in ax2.get_yticklabels():
                        tl.set_color("b")

                var.adata = plot_values
                annual_times = plot_times

                # and prettify the plot
                plt.title(" ".join([x.capitalize() for x in var.name.split("_")]))
                if var.name != "n_obs":
                    plt.legend()
                plt.figtext(0.01, 0.01, watermarkstring, size=6)

                plt.savefig(
                    "{}/{}_5x5_monthly{}_from_daily_{}_{}_ts.png".format(
                        settings.PLOT_LOCATION, settings.OUTROOT, version, period, var.name
                    )
                )

            # clean up
            ncdf_file.close()
            del (weighted_data)
            del (full_cosines)
            gc.collect()

            # write output files (annual and monthly)
            filename = "{}/{}_5x5_monthly{}_from_daily_{}_{}_ts_annual.nc".format(
                settings.DATA_LOCATION, settings.OUTROOT, version, period, suffix
            )

            if os.path.exists(filename):
                os.remove(filename)
            write_ncdf_ts(annual_times, OBS_ORDER, filename, annual=True, do_zip=True)

            filename = "{}/{}_5x5_monthly{}_from_daily_{}_{}_ts_monthly.nc".format(
                settings.DATA_LOCATION, settings.OUTROOT, version, period, suffix
            )

            if os.path.exists(filename):
                os.remove(filename)
            write_ncdf_ts(monthly_times, OBS_ORDER, filename, monthly=True, do_zip=True)

            # clean up
            del (plot_values)
            del (plot_times)
            del (OBS_ORDER)
            gc.collect()

    # not activated at present
    pentads = False
    if pentads:
        # pentad -> annual
        OBS_ORDER = utils.make_MetVars(settings.mdi, multiplier=False)

        for v, var in enumerate(OBS_ORDER):
            print var.name

            filename = "{}/{}_1x1_pentads_from_3hrly_{}_{}_{}.nc".format(
                settings.DATA_LOCATION, settings.OUTROOT, var.name, period, suffix
            )

            ncdf_file = ncdf.Dataset(filename, "r", format="NETCDF4")

            lat_centres = ncdf_file.variables["latitude"]
            lon_centres = ncdf_file.variables["longitude"]

            data_shape = ncdf_file.variables[var.name][:].shape

            # pentads
            mesh_lon, mesh_lat = np.meshgrid(lon_centres, lat_centres)
            cosines = np.cos(np.radians(mesh_lat))

            plot_values = np.zeros(data_shape[0])
            plot_times = []
            year = copy.deepcopy(settings.START_YEAR)

            for ts in range(data_shape[0]):

                data = ncdf_file.variables[var.name][ts]

                full_cosines = np.ma.array(cosines)
                full_cosines.mask = data.mask
                full_cosines = full_cosines / np.sum(full_cosines)

                weighted_data = data * full_cosines

                plot_values[ts] = np.ma.sum(weighted_data)

                if calendar.isleap(year) and ((ts + 1) * 5) % 365 > 60:
                    # account for 6 day pentad in leap years
                    plot_times += [dt.datetime(year, 1, 1, 0, 0) + dt.timedelta(days=((ts + 1) * 5) % 365 + 1)]
                else:
                    plot_times += [dt.datetime(year, 1, 1, 0, 0) + dt.timedelta(days=((ts + 1) * 5) % 365)]

                print year, ts, plot_times[-1]

                if ((ts + 1) * 5) % 365 == 0:
                    year += 1

            plt.clf()
            plt.plot(plot_times, plot_values, "r-")
            plt.title(var.name)
            plt.ylabel(var.units)

            # annual

            plot_values = plot_values.reshape(-1, 73, data_shape[-2], data_shape[-1])
            plot_values = np.mean(plot_values, axis=1)

            plt.plot(plot_times[36::73], plot_values, "b-")

            plt.savefig("{}/{}_pentads_all.png".format(settings.PLOT_LOCATION, var.name))

            raw_input("check")

    return  # make_timeseries
    mu_y=np.ma.mean(values)
    deltas=[]

    y1=slope*(years[0].year-mu_x)+mu_y
    y2=slope*(years[-1].year-mu_x)+mu_y
 
    return [years[0], years[-1]], [y1, y2] # mpw_plot_points

#***************************************
#***************************************


mdi = -1.e30
OBS_ORDER = utils.make_MetVars(mdi, multiplier = False)

n_obs = utils.set_MetVar_attributes("n_obs", "Number of Observations", "Number of Observations", 1, -1, np.dtype("int64"), 0)
OBS_ORDER += [n_obs]

GRID_LOCATION = {"NBC" : "GRIDS3", "BC" : "GRIDS_BC", "QC" : "GRIDS3", "noQC" : "GRIDS_noQC"}
PLOT_LOCATION = "/project/hadobs2/hadisdh/marine/PLOTS_compare/"


suffix = "relax"

#***************************************
#***************************************
# Day versus Night
version = "_anomalies"
DATA_LOCATION = "/project/hadobs2/hadisdh/marine/ICOADS.2.5.1/"
correction = "NBC"