Beispiel #1
0
def plotOptionsTransferFunction(**kwargs) -> Dict:
    """Get default plot options for plotting transfer function data 

    Parameters
    ----------
    figsize : Tuple, optional
        Set the figure size
    plotfonts : Dict, optional
        Font sizes to use for plotting fonts
    block : bool, optional
        Boolean flag for blocking execution when plot is shown
    res_ylim : List[float], optional
        y limits for resistivity data
    phase_ylim : List[float], optional
        y limits for phase data
    xlim : List[float], optional
        x limits for transfer function data

    Returns
    -------
    out : Dict
        Dictionary of default plot options for plotting spectra data
    """

    default = plotOptionsStandard()
    default["figsize"] = None
    default["res_ylim"] = [0.01, 10000]
    default["phase_ylim"] = [-20, 90]
    default["xlim"] = [0.0001, 10000]
    default = parseKeywords(default, kwargs)
    return default
Beispiel #2
0
def plotOptionsTipper(**kwargs) -> Dict:
    """Get default plot options for plotting transfer function data 

    Parameters
    ----------
    figsize : Tuple, optional
        Set the figure size
    plotfonts : Dict, optional
        Font sizes to use for plotting fonts
    block : bool, optional
        Boolean flag for blocking execution when plot is shown
    length_ylim : List[float], optional
        y limits for length data
    angle_ylim : List[float], optional
        y limits for angle data
    xlim : List[float], optional
        x limits for transfer function data

    Returns
    -------
    out : Dict
        Dictionary of default plot options for plotting spectra data
    """

    default = plotOptionsStandard()
    default["figsize"] = (16, 5)
    default["length_ylim"] = [0.001, 1000]
    default["angle_ylim"] = [-30, 30]
    default["xlim"] = [0.0001, 10000]
    default = parseKeywords(default, kwargs)
    return default
Beispiel #3
0
def plotOptionsTime(**kwargs) -> Dict:
    """Get default plot options for plotting time data 

    Parameters
    ----------
    figsize : Tuple, optional
        Set the figure size
    plotfonts : Dict, optional
        Font sizes to use for plotting fonts
    block : bool, optional
        Boolean flag for blocking execution when plot is shown
    Eylim : List[float], optional
        y limits for electric data
    Hylim : List[float], optional
        y limits for magnetic data

    Returns
    -------
    out : Dict
        Dictionary of default plot options for plotting time data
    """

    default = plotOptionsStandard()
    default["Eylim"] = []
    default["Hylim"] = []
    default = parseKeywords(default, kwargs)
    return default
Beispiel #4
0
def getMaskData(projData: ProjectData, site: str, maskName: str,
                sampleFreq: Union[float, int], **kwargs) -> MaskData:
    """Get a mask data object

    Parameters
    ----------
    projData : projectData
        A project instance
    site : str
        The site for which to get the mask
    maskName : str
        The name of the mask
    sampleFreq : int, float
        The sampling frequency for which the mask was created
    specdir : str
        The spectra directory for which the mask was created  

    Returns
    -------
    MaskData
        A mask data object with the mask information
    """

    options = {}
    options["specdir"] = projData.config.configParams["Spectra"]["specdir"]
    options = parseKeywords(options, kwargs)

    siteData = projData.getSiteData(site)
    maskIO = MaskIO(siteData.getSpecdirMaskPath(options["specdir"]))
    maskData = maskIO.read(maskName, sampleFreq)
    return maskData
def processProject(projData: ProjectData, **kwargs) -> None:
    """Process a project

    Parameters
    ----------
    projData : ProjectData
        The project data instance for the project    
    sites : List[str], optional
        List of sites 
    sampleFreqs : List[float], optional
        List of sample frequencies to process
    specdir : str, optional
        The spectra directories to use
    inchans : List[str], optional
        Channels to use as the input of the linear system
    inputsite : str, optional
        Site from which to take the input channels. The default is to use input and output channels from the same site        
    outchans : List[str], optional
        Channels to use as the output of the linear system
    remotesite : str, optional
        The site to use as the remote site
    remotechans : List[str], optional
        Channels to use from the remote reference site
    crosschannels : List[str], optional
        List of channels to use for cross powers
    masks : Dict, optional
        Masks dictionary for passing mask data. The key should be a site name and the value should either be a string for a single mask or a list of multiple masks.
    datetimes : List, optional
        List of datetime constraints, each one as a dictionary. For example [{"type": "datetime", "start": 2018-08-08 00:00:00, "end": 2018-08-08 16:00:00, "levels": [0,1]}]. Note that levels is optional.            
    postpend : str, optional
        String to postpend to the transfer function output
    """

    options: Dict = dict()
    options["sites"]: List[str] = projData.getSites()
    options["sampleFreqs"]: List[float] = projData.getSampleFreqs()
    options["specdir"]: str = projData.config.configParams["Spectra"][
        "specdir"]
    options["inchans"]: List[str] = ["Hx", "Hy"]
    options["inputsite"]: str = ""
    options["outchans"]: List[str] = ["Ex", "Ey"]
    options["remotesite"]: str = ""
    options["remotechans"]: List[str] = options["inchans"]
    options["crosschannels"]: List[str] = []
    options["masks"]: Dict = {}
    options["datetimes"]: List = []
    options["postpend"]: str = ""
    options = parseKeywords(options, kwargs)

    for site in options["sites"]:
        siteData = projData.getSiteData(site)
        siteFreqs = siteData.getSampleFreqs()
        for sampleFreq in siteFreqs:
            # check if not included
            if sampleFreq not in options["sampleFreqs"]:
                continue
            processSite(projData, site, sampleFreq, **options)
def getStatisticDataForSampleFreq(projData: ProjectData,
                                  site: str,
                                  sampleFreq: float,
                                  stat: str,
                                  declevel: int = 0,
                                  **kwargs):
    """Get the statistic data (for a particular decimation level) for all measurements in a site with sampling frequency sampleFreq

    Parameters
    ----------
    projData : ProjectData
        Project instance
    site : str
        The site for which to get the statistic data
    sampleFreq : float
        The sampling frequency
    stat : str
        The statistic for which to get the measurement
    declevel : int, optional
        The decimation level to read in. Default is 0.
    specdir : str, optional
        The spectra directory

    Returns
    -------
    StatisticData : Dict
        A statistic data object
    """

    options = {}
    options["specdir"] = projData.config.configParams["Spectra"]["specdir"]
    options = parseKeywords(options, kwargs)

    siteData = projData.getSiteData(site)
    if not siteData:
        projectError("Unable to find site {} in project".format(site),
                     quitRun=True)

    # load the statistic data
    statData: Dict[str, StatisticData] = {}
    statIO = StatisticIO()
    measurements = siteData.getMeasurements(sampleFreq)
    for meas in measurements:
        statIO.setDatapath(
            os.path.join(siteData.getMeasurementStatPath(meas),
                         options["specdir"]))
        # make sure some data was found
        chk = statIO.read(stat, declevel)
        if chk:
            statData[meas] = statIO.read(stat, declevel)
        else:
            projectWarning(
                "No {} data found for site {} and measurement {}".format(
                    stat, site, meas))
    return statData
def getTransferFunctionData(projData: ProjectData, site: str,
                            sampleFreq: float,
                            **kwargs) -> TransferFunctionData:
    """Get transfer function data

    Parameters
    ----------
    projData : projecData
        The project data
    site : str
        Site to get the transfer functiond data for
    sampleFreq : int, float
        The sampling frequency for which to get the transfer function data
    specdir : str, optional
        The spectra directories used
    postpend : str, optional
        The postpend on the transfer function files
    """

    options: Dict = dict()
    options["specdir"]: str = projData.config.configParams["Spectra"][
        "specdir"]
    options["postpend"]: str = ""
    options = parseKeywords(options, kwargs)

    # deal with the postpend
    if options["postpend"] != "":
        postpend = "_{}".format(options["postpend"])
    else:
        postpend = options["postpend"]

    siteData = projData.getSiteData(site)
    sampleFreqStr = fileFormatSampleFreq(sampleFreq)
    path = os.path.join(
        siteData.transFuncPath,
        "{:s}".format(sampleFreqStr),
        "{}_fs{:s}_{}{}".format(site, sampleFreqStr, options["specdir"],
                                postpend),
    )
    # check path
    if not checkFilepath(path):
        projectWarning("No transfer function file with name {}".format(path))
        return False

    projectText(
        "Reading transfer function for site {}, sample frequency {}, file {}".
        format(site, sampleFreq, path))

    tfReader = TransferFunctionReader(path)
    tfReader.printInfo()
    return tfReader.tfData
Beispiel #8
0
def getSpecReader(
    projData: ProjectData, site: str, meas: str, **kwargs
) -> SpectrumReader:
    """Get the spectrum reader for a measurement

    Parameters
    ----------
    site : str
        Site for which to get the spectra reader
    meas : str
        The measurement
    options : Dict
        Options in a dictionary
    declevel : int, optional
        Decimation level for which to get data
    specdir : str, optional
        String that specifies spectra directory for the measurement

    Returns
    -------
    SpectrumReader
        The spectrum reader object
    """

    options = {}
    options["declevel"]: int = 0
    options["specdir"]: str = projData.config.configParams["Spectra"]["specdir"]
    options = parseKeywords(options, kwargs)

    siteData = projData.getSiteData(site)
    measurements = siteData.getMeasurements()
    if meas not in measurements:
        projectError("Measurement directory {} not found".format(meas), quitRun=True)

    # create the spectrum reader
    specReader = SpectrumReader(
        os.path.join(siteData.getMeasurementSpecPath(meas), options["specdir"])
    )
    specReader.printInfo()

    # open the spectra file for the current decimation level
    check = specReader.openBinaryForReading("spectra", options["declevel"])
    if not check:
        # probably because this decimation level not calculated
        projectError(
            "Spectra file does not exist at level {}".format(options["declevel"]),
            quitRun=True,
        )
    return specReader
def getStatisticData(projData: ProjectData,
                     site: str,
                     meas: str,
                     stat: str,
                     declevel: int = 0,
                     **kwargs):
    """Get the statistic data for a statistic for a site measurement

    Parameters
    ----------
    projData : ProjectData
        Project instance
    site : str
        The site for which to get the statistic data
    meas : str
        The measurement for which to get the statistic
    stat : str
        The statistic for which to get the measurement
    declevel : int, optional
        The decimation level to read in. Default is 0.
    specdir : str, optional
        The spectra directory

    Returns
    -------
    StatisticData
        A statistic data object
    """

    options = {}
    options["specdir"] = projData.config.configParams["Spectra"]["specdir"]
    options = parseKeywords(options, kwargs)

    siteData = projData.getSiteData(site)
    if not siteData:
        projectError("Unable to find site {} in project".format(site),
                     quitRun=True)

    # load the statistic data
    statIO = StatisticIO()
    statIO.setDatapath(
        os.path.join(siteData.getMeasurementStatPath(meas),
                     options["specdir"]))
    statData = statIO.read(stat, declevel)
    return statData
Beispiel #10
0
def olsModel(A, y, **kwargs) -> Tuple:
    r"""Ordinary least squares

    Solves for :math:`x` where,

    .. math::       
        y = Ax .

    Parameters
    ----------
    A : np.ndarray
        Predictors, size nobs*nregressors
    y : np.ndarray
        Observations, size nobs
    intercept : bool, optional
        True or False for adding an intercept term

    Returns
    -------
    params : np.ndarray
        Least squares solution
    resids : np.ndarray
        Residuals
    squareResid : np.ndarray
        Square residuals
    rank : int
        Rank of matrix A
    s : np.ndarray
        Singular values of A
    """

    options = parseKeywords(defaultDictionary(), kwargs, printkw=False)
    if options["intercept"]:
        # add a constant term for the intercept
        A = np.hstack((np.ones(shape=(A.shape[0], 1), dtype="complex"), A))
    params, squareResid, rank, s = linalg.lstsq(A, y)
    resids = y - np.dot(A, params)
    return params, resids, squareResid, rank, s
Beispiel #11
0
def plotOptionsStandard(**kwargs) -> Dict:
    """Get a set of standard plot options

    Parameters
    ----------
    figsize : Tuple, optional
        Set the figure size
    plotfonts : Dict, optional
        Font sizes to use for plotting fonts
    block : bool, optional
        Boolean flag for blocking execution when plot is shown

    Returns
    -------
    Dict
        Dictionary of standard plot options
    """

    default: Dict = {}
    default["figsize"] = (20, 12)
    default["plotfonts"] = getViewFonts()
    default["block"] = True
    default = parseKeywords(default, kwargs)
    return default
Beispiel #12
0
def plotOptionsSpec(**kwargs) -> Dict:
    """Get default plot options for plotting spectra data 

    Parameters
    ----------
    figsize : Tuple, optional
        Set the figure size
    plotfonts : Dict, optional
        Font sizes to use for plotting fonts
    block : bool, optional
        Boolean flag for blocking execution when plot is shown
    amplim : List[float], optional
        Amplitude limits for plotting spectra

    Returns
    -------
    out : Dict
        Dictionary of default plot options for plotting spectra data
    """

    default = plotOptionsStandard()
    default["amplim"] = []
    default = parseKeywords(default, kwargs)
    return default
Beispiel #13
0
def calculateSpectra(projData: ProjectData, **kwargs) -> None:
    """Calculate spectra for the project time data

    The philosophy is that spectra are calculated out for all data and later limited using statistics and time constraints

    Parameters
    ----------
    projData : ProjectData
        A project data object
    sites : str, List[str], optional
        Either a single site or a list of sites
    sampleFreqs : int, float, List[float], optional
        The frequencies in Hz for which to calculate the spectra. Either a single frequency or a list of them.
    chans : List[str], optional
        The channels for which to calculate out the spectra
    polreverse :  Dict[str, bool]
        Keys are channels and values are boolean flags for reversing
    scale : Dict[str, float]
        Keys are channels and values are floats to multiply the channel data by
    calibrate : bool, optional
        Flag whether to calibrate the data or not
    notch : List[float], optional
        List of frequencies to notch
    filter : Dict, optional
        Filter parameters
    specdir : str, optional
        The spectra directory to save the spectra data in
    """

    # default options
    options = {}
    options["sites"] = projData.getSites()
    options["sampleFreqs"]: List[float] = projData.getSampleFreqs()
    options["chans"]: List[str] = []
    options["polreverse"]: Union[bool, Dict[str, bool]] = False
    options["scale"]: Union[bool, Dict[str, float]] = False       
    options["calibrate"]: bool = True
    options["notch"]: List[float] = []
    options["filter"]: Dict = {}
    options["specdir"]: str = projData.config.configParams["Spectra"]["specdir"]
    options = parseKeywords(options, kwargs)

    # prepare calibrator
    cal = getCalibrator(projData.calPath, projData.config)
    if options["calibrate"]:
        cal.printInfo()

    datetimeRef = projData.refTime
    for site in options["sites"]:
        siteData = projData.getSiteData(site)
        siteData.printInfo()

        # calculate spectra for each frequency
        for sampleFreq in options["sampleFreqs"]:
            measurements = siteData.getMeasurements(sampleFreq)
            projectText(
                "Site {} has {:d} measurement(s) at sampling frequency {:.2f}".format(
                    site, len(measurements), sampleFreq
                )
            )
            if len(measurements) == 0:
                continue  # no data files at this sample rate

            for meas in measurements:
                projectText(
                    "Calculating spectra for site {} and measurement {}".format(
                        site, meas
                    )
                )
                # get measurement start and end times - this is the time of the first and last sample
                reader = siteData.getMeasurement(meas)
                startTime = siteData.getMeasurementStart(meas)
                stopTime = siteData.getMeasurementEnd(meas)
                dataChans = (
                    options["chans"]
                    if len(options["chans"]) > 0
                    else reader.getChannels()
                )
                timeData = reader.getPhysicalData(startTime, stopTime, chans=dataChans)
                timeData.addComment(breakComment())
                timeData.addComment("Calculating project spectra")
                timeData.addComment(projData.config.getConfigComment())

                # apply various options
                applyPolarisationReversalOptions(options, timeData)
                applyScaleOptions(options, timeData)
                applyCalibrationOptions(options, cal, timeData, reader)
                applyFilterOptions(options, timeData)
                applyNotchOptions(options, timeData)

                # define decimation and window parameters
                decParams = getDecimationParameters(sampleFreq, projData.config)
                decParams.printInfo()
                numLevels = decParams.numLevels
                winParams = getWindowParameters(decParams, projData.config)
                dec = Decimator(timeData, decParams)
                timeData.addComment(
                    "Decimating with {} levels and {} frequencies per level".format(
                        numLevels, decParams.freqPerLevel
                    )
                )

                # loop through decimation levels
                for iDec in range(0, numLevels):
                    # get the data for the current level
                    check = dec.incrementLevel()
                    if not check:
                        break  # not enough data
                    timeData = dec.timeData

                    # create the windower and give it window parameters for current level
                    sampleFreqDec = dec.sampleFreq
                    win = Windower(
                        datetimeRef,
                        timeData,
                        winParams.getWindowSize(iDec),
                        winParams.getOverlap(iDec),
                    )
                    if win.numWindows < 2:
                        break  # do no more decimation

                    # add some comments
                    timeData.addComment(
                        "Evaluation frequencies for this level {}".format(
                            listToString(decParams.getEvalFrequenciesForLevel(iDec))
                        )
                    )
                    timeData.addComment(
                        "Windowing with window size {} samples and overlap {} samples".format(
                            winParams.getWindowSize(iDec), winParams.getOverlap(iDec)
                        )
                    )

                    # create the spectrum calculator and statistics calculators
                    specCalc = SpectrumCalculator(
                        sampleFreqDec, winParams.getWindowSize(iDec)
                    )
                    # get ready a file to save the spectra
                    specPath = os.path.join(
                        siteData.getMeasurementSpecPath(meas), options["specdir"]
                    )
                    specWrite = SpectrumWriter(specPath, datetimeRef)
                    specWrite.openBinaryForWriting(
                        "spectra",
                        iDec,
                        sampleFreqDec,
                        winParams.getWindowSize(iDec),
                        winParams.getOverlap(iDec),
                        win.winOffset,
                        win.numWindows,
                        dataChans,
                    )

                    # loop though windows, calculate spectra and save
                    for iW in range(0, win.numWindows):
                        # get the window data
                        winData = win.getData(iW)
                        # calculate spectra
                        specData = specCalc.calcFourierCoeff(winData)
                        # write out spectra
                        specWrite.writeBinary(specData)

                    # close spectra file
                    specWrite.writeCommentsFile(timeData.getComments())
                    specWrite.closeFile()
Beispiel #14
0
def mmestimateModel(A: np.ndarray, y: np.ndarray, **kwargs):
    r"""2 stage M estimate

    Solves for :math:`x` where,

    .. math::        
        y = Ax .

    Parameters
    ----------
    A : np.ndarray
        Predictors, size nobs*nregressors
    y : np.ndarray
        Observations, size nobs
    initial : Dict
        Initial solution with parameters, scale and residuals
    scale : optional
        A scale estimate
    intercept : bool, optional
        True or False for adding an intercept term
    
    Returns
    -------
    params : np.ndarray
        Values in x
    resids : np.ndarray
        Residuals = y - Ax
    scale : float
        Robust measure of variance
    weights : np.ndarray
        Weights used in robust regression   
    """

    options = parseKeywords(defaultDictionary(), kwargs, printkw=False)
    intercept = options["intercept"]
    # this uses an initial mestimate with huber to give a measure of scale
    # and then a second with bisquare or hampel weights
    if "initial" in kwargs:
        if "scale" not in kwargs["initial"]:
            kwargs["initial"]["scale"] = sampleMAD0(
                kwargs["initial"]["resids"])
        params, resids, scale, weights = mestimateModel(
            A,
            y,
            weights="huber",
            initial=kwargs["initial"],
            intercept=intercept)
        # now do another, but with a different weighting function
        kwargs["initial"]["scale"] = scale
        # kwargs["initial"]["params"] = params # put the new solution in, because simply then doing bisquare, which has zero weights, might mess things up
        # kwargs["initial"]["resids"] = resids
        params2, resids2, scale2, weights2 = mestimateModel(
            A,
            y,
            weights="bisquare",
            initial=kwargs["initial"],
            intercept=intercept)
    else:
        params, resids, scale, weights = mestimateModel(A,
                                                        y,
                                                        weights="huber",
                                                        intercept=intercept)
        # now do another, but with a different weighting function
        params2, resids2, scale2, weights2 = mestimateModel(
            A, y, weights="bisquare", scale=scale, intercept=intercept)

    return params2, resids2, scale2, weights2
Beispiel #15
0
def viewStatisticCrossplot(projData: ProjectData, site: str,
                           sampleFreq: Union[int, float], stat: str,
                           crossplots: List[List[str]],
                           **kwargs) -> Union[plt.figure, None]:
    """View statistic data for a single sampling frequency of a site
    
    Parameters
    ----------
    projData : ProjectData
        A project instance
    site : str
        The site for which to plot statistics
    stat : str
        The statistic to plot
    sampleFreq : float
        The sampling frequency for which to plot statistics
    crossplots : List[List[str]]
        The statistic element pairs to crossplot
    declevel : int
        The decimation level to plot
    eFreqI : int
        The evaluation frequency index
    specdir : str
        The spectra directory
    maskname : str
        Mask name         
    xlim : List, optional
        Limits for the x axis
    ylim : List, optional
        Limits for the y axis
    maxcols : int
        The maximum number of columns in the plots        
    show : bool, optional
        Show the spectra plot
    save : bool, optional
        Save the plot to the images directory
    plotoptions : Dict, optional
        Dictionary of plot options    

    Returns
    -------
    matplotlib.pyplot.figure or None
        A matplotlib figure unless the plot is not shown and is saved, in which case None and the figure is closed.
    """

    options = {}
    options["declevel"] = 0
    options["eFreqI"] = 0
    options["specdir"] = projData.config.configParams["Spectra"]["specdir"]
    options["maskname"] = ""
    options["xlim"] = []
    options["ylim"] = []
    options["maxcols"] = 2
    options["show"] = True
    options["save"] = False
    options["plotoptions"] = plotOptionsSpec()
    options = parseKeywords(options, kwargs)

    projectText(
        "Plotting crossplot for statistic {}, site {} and sampling frequency {}"
        .format(stat, site, sampleFreq))

    statData = getStatisticDataForSampleFreq(
        projData,
        site,
        sampleFreq,
        stat,
        declevel=options["declevel"],
        specdir=options["specdir"],
    )
    statMeas = list(statData.keys())
    # get the evaluation frequency
    eFreq = statData[statMeas[0]].evalFreq[options["eFreqI"]]

    # get the mask data
    maskWindows = []
    if options["maskname"] != "":
        maskData = getMaskData(projData, site, options["maskname"], sampleFreq)
        maskWindows = maskData.getMaskWindowsFreq(options["declevel"],
                                                  options["eFreqI"])

    # plot information
    nrows, ncols = getPlotRowsAndCols(options["maxcols"], len(crossplots))

    plotfonts = options["plotoptions"]["plotfonts"]
    fig = plt.figure(figsize=options["plotoptions"]["figsize"])
    # suptitle
    st = fig.suptitle(
        "{} crossplots for {}, sampling frequency {} Hz, decimation level {} and evaluation frequency {} Hz"
        .format(stat, site, sampleFreq, options["declevel"], eFreq),
        fontsize=plotfonts["suptitle"],
    )
    st.set_y(0.98)

    # now plot the data
    for idx, cplot in enumerate(crossplots):
        ax = plt.subplot(nrows, ncols, idx + 1)
        plt.title("Crossplot {}".format(cplot), fontsize=plotfonts["title"])

        for meas in statMeas:
            stats = statData[meas].getStats(maskwindows=maskWindows)
            plotI1 = statData[meas].winStats.index(cplot[0])
            plotData1 = np.squeeze(stats[:, options["eFreqI"], plotI1])
            plotI2 = statData[meas].winStats.index(cplot[1])
            plotData2 = np.squeeze(stats[:, options["eFreqI"], plotI2])
            scat = plt.scatter(plotData1,
                               plotData2,
                               edgecolors="none",
                               marker="o",
                               s=12,
                               label=meas)

        # x axis options
        if len(options["xlim"]) > 0:
            plt.xlim(options["xlim"])
        if len(options["ylim"]) > 0:
            plt.ylim(options["ylim"])
        plt.xlabel(cplot[0], fontsize=plotfonts["axisLabel"])
        plt.ylabel(cplot[1], fontsize=plotfonts["axisLabel"])
        plt.grid(True)
        # set tick sizes
        for label in ax.get_xticklabels() + ax.get_yticklabels():
            label.set_fontsize(plotfonts["axisTicks"])
        plt.legend(loc=2, markerscale=4, fontsize=plotfonts["legend"])

    # plot format, show and save
    fig.tight_layout(rect=[0.02, 0.02, 0.98, 0.92])
    if options["save"]:
        impath = projData.imagePath
        sampleFreqStr = fileFormatSampleFreq(sampleFreq)
        filename = "statCrossplot_{:s}_{:s}_{:s}_dec{:d}_efreq{:d}_{:s}".format(
            stat,
            site,
            sampleFreqStr,
            options["declevel"],
            options["eFreqI"],
            options["specdir"],
        )
        if options["maskname"] != "":
            filename = "{}_{}".format(filename, options["maskname"])
        savename = savePlot(impath, filename, fig)
        projectText("Image saved to file {}".format(savename))
    if options["show"]:
        plt.show(block=options["plotoptions"]["block"])
    if not options["show"] and options["save"]:
        plt.close(fig)
        return None
    return fig
Beispiel #16
0
def viewStatisticHistogram(projData: ProjectData, site: str, sampleFreq: float,
                           stat: str, **kwargs) -> Union[plt.figure, None]:
    """View statistic histograms for a single sampling frequency of a site
    
    Parameters
    ----------
    projData : ProjectData
        A project instance
    site : str
        The site for which to plot statistics
    stat : str
        The statistic to plot
    sampleFreq : float
        The sampling frequency for which to plot statistics
    declevel : int
        The decimation level to plot
    eFreqI : int
        The evaluation frequency index       
    specdir : str
        The spectra directory        
    maskname : str
        Mask name 
    numbins : int
        The number of bins for the histogram data binning
    xlim : List, optional
        Limits for the x axis
    maxcols : int
        The maximum number of columns in the plots
    show : bool, optional
        Show the spectra plot
    save : bool, optional
        Save the plot to the images directory
    plotoptions : Dict, optional
        Dictionary of plot options    

    Returns
    -------
    matplotlib.pyplot.figure or None
        A matplotlib figure unless the plot is not shown and is saved, in which case None.
    """

    options = {}
    options["declevel"] = 0
    options["eFreqI"] = 0
    options["specdir"] = projData.config.configParams["Spectra"]["specdir"]
    options["maskname"] = ""
    options["numbins"] = 40
    options["xlim"] = []
    options["maxcols"] = 4
    options["show"] = True
    options["save"] = False
    options["plotoptions"] = plotOptionsSpec()
    options = parseKeywords(options, kwargs)

    projectText(
        "Plotting histogram for statistic {}, site {} and sampling frequency {}"
        .format(stat, site, sampleFreq))

    statData = getStatisticDataForSampleFreq(
        projData,
        site,
        sampleFreq,
        stat,
        declevel=options["declevel"],
        specdir=options["specdir"],
    )
    statMeas = list(statData.keys())
    # get the statistic components
    statComponents = statData[statMeas[0]].winStats
    # get the evaluation frequency
    eFreq = statData[statMeas[0]].evalFreq[options["eFreqI"]]

    # get the mask data
    maskWindows = []
    if options["maskname"] != "":
        maskData = getMaskData(projData, site, options["maskname"], sampleFreq)
        maskWindows = maskData.getMaskWindowsFreq(options["declevel"],
                                                  options["eFreqI"])

    # plot information
    nrows, ncols = getPlotRowsAndCols(options["maxcols"], len(statComponents))
    numbins = options["numbins"]

    plotfonts = options["plotoptions"]["plotfonts"]
    fig = plt.figure(figsize=options["plotoptions"]["figsize"])
    # suptitle
    st = fig.suptitle(
        "{} histogram for {}, sampling frequency {} Hz, decimation level {} and evaluation frequency {} Hz"
        .format(stat, site, sampleFreq, options["declevel"], eFreq),
        fontsize=plotfonts["suptitle"],
    )
    st.set_y(0.98)

    # now plot the data
    for idx, val in enumerate(statComponents):
        ax = plt.subplot(nrows, ncols, idx + 1)
        plt.title("Histogram {}".format(val), fontsize=plotfonts["title"])

        plotData = np.empty(shape=(0))
        for meas in statMeas:
            stats = statData[meas].getStats(maskwindows=maskWindows)
            plotData = np.concatenate(
                (plotData, np.squeeze(stats[:, options["eFreqI"], idx])))
        # remove infinities and nans
        plotData = plotData[np.isfinite(plotData)]

        # x axis options
        xlim = (options["xlim"] if len(options["xlim"]) > 0 else
                [np.min(plotData), np.max(plotData)])
        plt.xlim(xlim)
        plt.xlabel("Value", fontsize=plotfonts["axisLabel"])
        # now plot with xlim in mind
        plt.hist(plotData, numbins, range=xlim, facecolor="red", alpha=0.75)
        plt.grid()
        # y axis options
        plt.ylabel("Count", fontsize=plotfonts["axisLabel"])
        # set tick sizes
        for label in ax.get_xticklabels() + ax.get_yticklabels():
            label.set_fontsize(plotfonts["axisTicks"])

    # plot format, show and save
    fig.tight_layout(rect=[0.02, 0.02, 0.98, 0.92])
    if options["save"]:
        impath = projData.imagePath
        sampleFreqStr = fileFormatSampleFreq(sampleFreq)
        filename = "statHist_{:s}_{:s}_{:s}_dec{:d}_efreq{:d}_{:s}".format(
            stat,
            site,
            sampleFreqStr,
            options["declevel"],
            options["eFreqI"],
            options["specdir"],
        )
        if options["maskname"] != "":
            filename = "{}_{}".format(filename, options["maskname"])
        savename = savePlot(impath, filename, fig)
        projectText("Image saved to file {}".format(savename))
    if options["show"]:
        plt.show(block=options["plotoptions"]["block"])
    if not options["show"] and options["save"]:
        plt.close(fig)
        return None
    return fig
Beispiel #17
0
def chatterjeeMachlerHadi(X, y, **kwargs):
    r"""Regression based on Hadi distances



    # Another regression method based on Hadi distances
    # implemented from the paper A Re-Weighted Least Squares Method for Robust Regression Estimation
    # Billor, Hadi    
    """
    # basic info
    options = parseKeywords(defaultDictionary(), kwargs, printkw=False)

    # for the distances, will use absX - do this before adding intercept term
    # a column of all ones will cause problems with non full rank covariance matrices
    absX = np.absolute(X)

    # now calculate p and n
    n = absX.shape[0]
    p = absX.shape[1]

    # we treat the X matrix as a multivariate matrix with n observations and p variables
    # first need to find a basic subset free of outliers
    correctionFactor = 1 + (1.0 * (p + 1) / (n - p)) + (2.0 / (n - 1 - 3 * p))
    chi = stats.chi2(p, 0)
    alpha = 0.05
    chi2bound = correctionFactor * chi.pdf(alpha / n)
    # calculate h, this is the size of the firt basic subset
    # note that this is the value h, the index of the hth element is h-1
    h = int(1.0 * (n + p + 1) / 2)  # here, only want the integer part of this
    # need to get the coordinatewise medians - this is the median of the columns
    medians = np.median(absX)
    # now compute the matrix to help calculate the distance
    A = np.zeros(shape=(p, p))
    for i in range(0, n):
        tmp = absX[i, :] - medians
        A += np.outer(tmp, tmp)
    A = 1.0 / (n - 1) * A

    # now calculate initial distances
    dInit = calculateDistCMH(n, absX, medians, A)

    # now get the h smallest values of d
    sortOrder = np.argsort(dInit)
    indices = sortOrder[0:h]
    means = np.average(absX[indices, :])
    covariance = np.cov(
        absX[indices],
        rowvar=False)  # observations in rows, columns are variables
    dH = calculateDistCMH(n, absX, means, covariance)

    # rearrange into n observations into order and partition into two initial subsets
    # one subset p+1, the n-p-1
    sortOrder = np.argsort(dH)
    indicesBasic = sortOrder[:p + 1]
    # there is a rank issue here, but ignore for now - natural observations will presumably be full rank
    means = np.average(absX[indicesBasic, :])
    covariance = np.cov(absX[indicesBasic], rowvar=False)
    dist = calculateDistCMH(n, absX, means, covariance)

    # create the basic subset
    r = p + 2
    increment = (h - r) / 100
    if increment < 1:
        increment = 1  # here, limiting to 100 iterations of this
    while r <= h:
        sortOrder = np.argsort(dist)
        indices = sortOrder[:r]  # indices start from zero, hence the - 1
        means = np.average(absX[indices])
        covariance = np.cov(absX[indices], rowvar=False)
        dist = calculateDistCMH(n, absX, means, covariance)
        if h - r > 0 and h - r < increment:
            r = h
        else:
            r += increment

    # now the second part = add more points and exclude outliers to basic set
    # all distances above r+1 = outliers
    # r = p + 1
    # increment = (n - 1 - r)/100
    while r < n:
        sortOrder = np.argsort(dist)
        dist2 = np.power(dist, 2)
        if dist2[sortOrder[r]] > chi2bound:
            break  # then leave, everything else is an outlier - it would be good if this could be saved somehow
        # otherwise, continue adding points
        sortOrder = np.argsort(dist)
        indices = sortOrder[:r]
        means = np.average(absX[indices])
        covariance = np.cov(absX[indices], rowvar=False)
        dist = calculateDistCMH(n, absX, means, covariance)
        if n - 1 - r > 0 and n - 1 - r < increment:
            r = n - 1
        else:
            r += increment

    # now with the Hadi distances calculated, can proceed to do the robust regression
    # normalise and manipulate Hadi distances
    dist = dist / np.max(dist)
    # for the median, use the basic subset
    # indicesBasic = sortOrder[:r]
    # distMedian = np.median(dist[indicesBasic]) # I am using on indicesBasic
    distMedian = np.median(
        dist)  # the paper suggests using the median of the complete
    tmp = np.maximum(dist, np.ones(shape=(n)) * distMedian)
    dist = np.reciprocal(tmp)
    dist2 = np.power(dist, 2)
    dist = dist2 / np.sum(dist2)

    # calculate first set of weights - this is simply dist
    weights = dist

    # now add the additional constant intercept column if required
    if options["intercept"] == True:
        # add column of ones for constant term
        X = np.hstack((np.ones(shape=(X.shape[0], 1), dtype="complex"), X))

    n = X.shape[0]
    p = X.shape[1]

    # iteratively weighted least squares
    iteration = 0
    while iteration < options["maxiter"]:
        # do the weighted least-squares
        Anew, ynew = weightLS(X, y, weights)
        paramsNew, squareResidNew, rankNew, sNew = linalg.lstsq(Anew, ynew)
        residsNew = y - np.dot(X, paramsNew)
        # check residsNew to make sure not all zeros (i.e. will happen in undetermined or equally determined system)
        if np.sum(np.absolute(residsNew)) < eps():
            # then return everything here
            return paramsNew, residsNew, weights

        residsAbs = np.absolute(residsNew)
        residsSquare = np.power(residsAbs, 2)
        residsNew = residsSquare / np.sum(residsSquare)
        residsMedian = np.median(residsAbs)

        # calculate the new weights
        tmpDenom = np.maximum(residsNew,
                              np.ones(shape=(n), dtype="float") * residsMedian)
        tmp = (1 - dist) / tmpDenom
        weightsNew = np.power(tmp, 2) / np.sum(np.power(tmp, 2))

        # increment iteration
        iteration = iteration + 1
        weights = weightsNew
        params = paramsNew

        if iteration > 1:
            # check to see whether the change is smaller than the tolerance
            changeResids = linalg.norm(residsNew -
                                       resids) / linalg.norm(residsNew)
            if changeResids < eps():
                # update resids
                resids = residsNew
                break
        # update resids
        resids = residsNew

    # at the end, return the components
    return params, resids, weights
def processSite(projData: ProjectData, site: str,
                sampleFreq: Union[int, float], **kwargs):
    """Process a single sampling frequency for a site

    The site passed is assumed to be the output site (the output channels will come from this site). If channels from a different site are desired to be used as the input channels, this can be done by specifying the optional inputsite argument.

    .. todo:: Give a few different examples here

    Parameters
    ----------
    projData : ProjectData
        The project data instance for the project
    site : str
        Site to process 
    sampleFreq : float, int
        Sample frequency to process
    specdir : str, optional
        The spectra directories to use
    inchans : List[str], optional
        Channels to use as the input of the linear system
    inputsite : str, optional
        Site from which to take the input channels. The default is to use input and output channels from the same site
    outchans : List[str], optional
        Channels to use as the output of the linear system
    remotesite : str, optional
        The site to use as the remote site
    remotechans : List[str], optional
        Channels to use from the remote reference site
    crosschannels : List[str], optional
        List of channels to use for cross powers
    masks : Dict, optional
        Masks dictionary for passing mask data. The key should be a site name and the value should either be a string for a single mask or a list of multiple masks.
    datetimes : List, optional
        List of datetime constraints, each one as a dictionary. For example [{"type": "datetime", "start": 2018-08-08 00:00:00, "end": 2018-08-08 16:00:00, "levels": [0,1]}]. Note that levels is optional.
    postpend : str, optional
        String to postpend to the transfer function output
    """

    options = {}
    options["specdir"] = projData.config.configParams["Spectra"]["specdir"]
    options["inchans"] = ["Hx", "Hy"]
    options["inputsite"] = ""
    options["outchans"] = ["Ex", "Ey"]
    options["remotesite"] = ""
    options["remotechans"] = options["inchans"]
    options["crosschannels"] = []
    options["masks"] = {}
    options["datetimes"] = []
    options["postpend"] = ""
    options = parseKeywords(options, kwargs)
    if options["inputsite"] == "":
        options["inputsite"] = site

    projectText("Processing site {}, sampling frequency {}".format(
        site, sampleFreq))
    siteData = projData.getSiteData(site)

    # define decimation parameters
    decParams = getDecimationParameters(sampleFreq, projData.config)
    decParams.printInfo()
    winParams = getWindowParameters(decParams, projData.config)
    # window selector
    winSelector = getWindowSelector(projData, decParams, winParams)

    # if two sites are duplicated (e.g. input site and output site), winSelector only uses distinct sites. Hence using site and inputSite is no problem even if they are the same
    processSites = []
    if options["remotesite"]:
        processSites = [site, options["inputsite"], options["remotesite"]]
        winSelector.setSites(processSites)
    else:
        # if no remote site, then single site processing
        processSites = [site, options["inputsite"]]
        winSelector.setSites(processSites)

    # add window masks
    if len(list(options["masks"].keys())) > 0:
        for maskSite in options["masks"]:
            if maskSite not in processSites:
                # there is a site in the masks dictionary which is of no interest
                continue
            if isinstance(options["masks"][maskSite], str):
                # a single mask
                winSelector.addWindowMask(maskSite, options["masks"][maskSite])
                continue
            if all(
                    isinstance(item, str)
                    for item in options["masks"][maskSite]):
                # list of masks for the site
                for mask in options["masks"][maskSite]:
                    winSelector.addWindowMask(maskSite, mask)

    # add datetime constraints
    for dC in options["datetimes"]:
        levels = None
        if "levels" in dC:
            levels = dC["levels"]

        if dC["type"] == "datetime":
            winSelector.addDatetimeConstraint(dC["start"], dC["stop"], levels)
        if dC["type"] == "time":
            winSelector.addTimeConstraint(dC["start"], dC["stop"], levels)
        if dC["type"] == "date":
            winSelector.addDateConstraint(dC["date"], levels)

    # calculate the shared windows and print info
    winSelector.calcSharedWindows()
    winSelector.printInfo()
    winSelector.printDatetimeConstraints()
    winSelector.printWindowMasks()
    winSelector.printSharedWindows()
    winSelector.printWindowsForFrequency()

    # now have the windows, pass the winSelector to processors
    outPath = siteData.transFuncPath
    if options["remotesite"]:
        projectText(
            "Remote reference processing with sites: in = {}, out = {}, reference = {}"
            .format(options["inputsite"], site, options["remotesite"]))
        processor = getRemoteReferenceProcessor(winSelector, outPath,
                                                projData.config)
        processor.setRemote(options["remotesite"], options["remotechans"])
    else:
        projectText(
            "Single site processing with sites: in = {}, out = {}".format(
                options["inputsite"], site))
        processor = getSingleSiteProcessor(winSelector, outPath,
                                           projData.config)

    # add the input and output site
    processor.setInput(options["inputsite"], options["inchans"])
    processor.setOutput(site, options["outchans"])
    if len(options["crosschannels"]) > 0:
        processor.crossChannels = options["crosschannels"]
    processor.postpend = options["postpend"]
    processor.printInfo()
    processor.process()
def viewTipper(projData: ProjectData, **kwargs) -> None:
    """View transfer function data

    Parameters
    ----------
    projData : projecData
        The project data
    sites : List[str], optional
        List of sites to plot transfer functions for
    sampleFreqs : List[float], optional 
        List of samples frequencies for which to plot transfer functions
    specdir : str, optional
        The spectra directories used
    postpend : str, optional
        The postpend on the transfer function files
    cols : bool, optional
        Boolean flag, True to arrange tipper plot as 1 row with 3 columns
    show : bool, optional
        Show the spectra plot
    save : bool, optional
        Save the plot to the images directory
    plotoptions : Dict
        A dictionary of plot options. For example, set the resistivity y limits using res_ylim, set the phase y limits using phase_ylim and set the xlimits using xlim
    """

    options = {}
    options["sites"] = projData.getSites()
    options["sampleFreqs"] = projData.getSampleFreqs()
    options["specdir"] = projData.config.configParams["Spectra"]["specdir"]
    options["postpend"] = ""
    options["cols"] = True
    options["save"] = False
    options["show"] = True
    options["plotoptions"] = plotOptionsTipper()
    options = parseKeywords(options, kwargs)

    # loop over sites
    for site in options["sites"]:
        siteData = projData.getSiteData(site)
        sampleFreqs = set(siteData.getSampleFreqs())
        # find the intersection with the options["freqs"]
        sampleFreqs = sampleFreqs.intersection(options["sampleFreqs"])
        sampleFreqs = sorted(list(sampleFreqs))

        # if prepend is a string, then make it a list
        if isinstance(options["postpend"], str):
            options["postpend"] = [options["postpend"]]

        plotfonts = options["plotoptions"]["plotfonts"]
        # now loop over the postpend options
        for pp in options["postpend"]:
            # add an underscore if not empty
            postpend = "_{}".format(pp) if pp != "" else pp

            fig = plt.figure(figsize=options["plotoptions"]["figsize"])
            mks = ["o", "*", "d", "^", "h"]
            lstyles = ["solid", "dashed", "dashdot", "dotted"]

            # loop over sampling frequencies
            includedFreqs = []
            for idx, sampleFreq in enumerate(sampleFreqs):

                tfData = getTransferFunctionData(projData,
                                                 site,
                                                 sampleFreq,
                                                 specdir=options["specdir"],
                                                 postpend=pp)
                if not tfData:
                    continue

                includedFreqs.append(sampleFreq)
                projectText(
                    "Plotting tipper for site {}, sample frequency {}".format(
                        site, sampleFreq))

                mk = mks[idx % len(mks)]
                ls = lstyles[idx % len(lstyles)]
                tfData.viewTipper(
                    fig=fig,
                    rows=options["cols"],
                    mk=mk,
                    ls=ls,
                    label="{}".format(sampleFreq),
                    xlim=options["plotoptions"]["xlim"],
                    length_ylim=options["plotoptions"]["length_ylim"],
                    angle_ylim=options["plotoptions"]["angle_ylim"],
                    plotfonts=options["plotoptions"]["plotfonts"],
                )

            # check if any files found
            if len(includedFreqs) == 0:
                continue

            # sup title
            sub = "Site {} tipper: {}".format(site,
                                              options["specdir"] + postpend)
            sub = "{}\nfs = {}".format(
                sub, arrayToString(includedFreqs, decimals=3))
            st = fig.suptitle(sub, fontsize=plotfonts["suptitle"])
            st.set_y(0.99)
            fig.tight_layout()
            fig.subplots_adjust(top=0.85)

            if options["save"]:
                impath = projData.imagePath
                filename = "tipper_{}_{}{}".format(site, options["specdir"],
                                                   postpend)
                savename = savePlot(impath, filename, fig)
                projectText("Image saved to file {}".format(savename))

        if not options["show"]:
            plt.close("all")
        else:
            plt.show(block=options["plotoptions"]["block"])
Beispiel #20
0
def calculateMask(projData: ProjectData, maskData: MaskData, **kwargs):
    """Calculate masks sites

    Parameters
    ----------
    projData : projectData
        A project instance
    maskData : MaskData
        A mask data instance
    sites : List[str], optional
        A list of sites to calculate masks for
    specdir : str, optional
        The spectra directory for which to calculate statistics
    """

    options = {}
    options["sites"] = projData.getSites()
    options["specdir"] = projData.config.configParams["Spectra"]["specdir"]
    options = parseKeywords(options, kwargs)

    # create a maskCalculator object
    maskCalc = MaskCalculator(projData, maskData, specdir=options["specdir"])
    maskIO = MaskIO()
    sampleFreq = maskData.sampleFreq

    # loop over sites
    for site in options["sites"]:
        # see if there is a sample freq
        siteData = projData.getSiteData(site)
        siteSampleFreqs = siteData.getSampleFreqs()
        if sampleFreq not in siteSampleFreqs:
            continue

        # decimation and window parameters
        decParams = getDecimationParameters(sampleFreq, projData.config)
        decParams.printInfo()
        winParams = getWindowParameters(decParams, projData.config)

        # clear previous windows from maskCalc
        maskCalc.clearMaskWindows()
        # calculate masked windows
        maskCalc.applyConstraints(site)
        maskCalc.maskData.printInfo()

        # write maskIO file
        maskIO.datapath = os.path.join(
            siteData.getSpecdirMaskPath(options["specdir"]))
        maskIO.write(maskCalc.maskData)

        # test with the window selector
        winSelector = WindowSelector(projData,
                                     sampleFreq,
                                     decParams,
                                     winParams,
                                     specdir=options["specdir"])
        winSelector.setSites([site])
        winSelector.addWindowMask(site, maskData.maskName)
        winSelector.calcSharedWindows()
        winSelector.printInfo()
        winSelector.printDatetimeConstraints()
        winSelector.printWindowMasks()
        winSelector.printSharedWindows()
        winSelector.printWindowsForFrequency()
Beispiel #21
0
def chatterjeeMachler(A: np.ndarray, y: np.ndarray, **kwargs) -> Tuple:
    r"""Robust bounded influence solver
    
    Solves for :math:`x` where,

    .. math::  
        y = Ax .

    Being a bounded influence operator, should be robust against both outliers in dependent and independent variables.

    Parameters
    ----------
    A : np.ndarray
        Predictors, size nobs*nregressors
    y : np.ndarray
        Observations, size nobs
    intercept : bool, optional
        True or False for adding an intercept term

    Returns
    -------
    params : np.ndarray
        Values in x
    resids : np.ndarray
        Residuals = y - Ax
    weights : np.ndarray
        Weights used in robust regression     
    """

    options = parseKeywords(defaultDictionary(), kwargs, printkw=False)
    # generalPrint("S-Estimate", "Using weight function = {}".format(weightFnc))
    if options["intercept"] == True:
        # add column of ones for constant term
        A = np.hstack((np.ones(shape=(A.shape[0], 1), dtype="complex"), A))

    # now calculate p and n
    n = A.shape[0]
    p = A.shape[1]
    pnRatio = 1.0 * p / n

    # calculate the projection matrix
    q, r = linalg.qr(A)
    Pdiag = np.empty(shape=(n), dtype="float")
    for i in range(0, n):
        Pdiag[i] = np.absolute(np.sum(q[i, :] * np.conjugate(q[i, :]))).real
    del q, r
    # and save an array for later
    Pdiag = Pdiag / np.max(Pdiag)
    weightsNom = np.power(1.0 - Pdiag, 2)

    # weights for the first iteration
    tmp = np.ones(shape=(n), dtype="float") * pnRatio
    tmp = np.maximum(Pdiag, tmp)
    weights = np.reciprocal(tmp)

    # iteratively weighted least squares
    iteration = 0
    while iteration < options["maxiter"]:
        # do the weighted least-squares
        Anew, ynew = weightLS(A, y, weights)
        paramsNew, squareResidNew, rankNew, sNew = linalg.lstsq(Anew, ynew)
        residsNew = y - np.dot(A, paramsNew)
        # check residsNew to make sure not all zeros (i.e. will happen in undetermined or equally determined system)
        if np.sum(np.absolute(residsNew)) < eps():
            # return everything here
            return paramsNew, residsNew, weights
        residsAbs = np.absolute(residsNew)
        residsMedian = np.median(residsAbs)
        # now compute the new weights
        weightsDenom = np.maximum(
            residsAbs,
            np.ones(shape=(n), dtype="float") * residsMedian)
        weightsNew = weightsNom / weightsDenom

        # increment iteration
        iteration = iteration + 1
        weights = weightsNew
        params = paramsNew

        if iteration > 1:
            # check to see whether the change is smaller than the tolerance
            changeResids = linalg.norm(residsNew -
                                       resids) / linalg.norm(residsNew)
            if changeResids < eps():
                # update resids
                resids = residsNew
                break
        # update resids
        resids = residsNew
    return params, resids, weights
Beispiel #22
0
def preProcess(projData: ProjectData, **kwargs) -> None:
    """Pre-process project time data

    Preprocess the time data using filters, notch filters, resampling or interpolation. A new measurement folder is created under the site. The name of the new measurement folder is:
    prepend_[name of input measurement]_postpend. By default, prepend is "proc" and postpend is empty. 

    Processed time series data can be saved in a new site by using the outputsite option.

    Parameters
    ----------
    projData : ProjectData
        A project data object
    sites : str, List[str], optional
        Either a single site or a list of sites
    sampleFreqs : int, float, List[float], optional
        The frequencies to preprocess
    start : str, optional
        Start date of data to preprocess in format "%Y-%m-%d %H:%M:%S"
    stop : str, optional
        Stop date of data to process in format "%Y-%m-%d %H:%M:%S"
    outputsite : str, optional
        A site to output the preprocessed time data to. If this site does not exist, it will be created
    polreverse :  Dict[str, bool]
        Keys are channels and values are boolean flags for reversing 
    scale : Dict[str, float]
        Keys are channels and values are floats to multiply the channel data by       
    calibrate : bool, optional
        Boolean flag for calibrating the data. Default is false and setting to True will calibrate where files can be found.
    normalise : bool, optional
        Boolean flag for normalising the data. Default is False and setting to True will normalise each channel independently.
    filter : Dict, optional
        Filtering options in a dictionary
    notch : List[float], optional
        List of frequencies to notch in spectra given as a list of floats
    resamp : Dict, optional
        Resampling parameters in a dictionary with entries in the format: {sampleRateFrom: sampleRateTo}. All measurement directories of sampleRateFrom will be resampled to sampleRateTo
    interp : bool, optional
        Boolean flag for interpolating the data on to the second, so that sampling is coincident with seconds. This is not always the case. For example, SPAM data is not necessarily sampled on the second, whereas ATS data is. This function is useful when combining data of multiple formats. Interpolation does not change the sampling rate. Default is False.
    prepend : str, optional
        String to prepend to the output folder. Default is "proc".
    postpend : str, optional
        String to postpend to the output folder. Default is empty.
    """

    options: Dict = {}
    options["sites"]: List = projData.getSites()
    options["sampleFreqs"]: List[float] = projData.getSampleFreqs()
    options["start"]: Union[bool, str] = False
    options["stop"]: Union[bool, str] = False
    options["outputsite"]: str = ""
    options["polreverse"]: Union[bool, Dict[str, bool]] = False
    options["scale"]: Union[bool, Dict[str, float]] = False
    options["calibrate"]: bool = False
    options["normalise"]: bool = False
    options["filter"]: Dict = {}
    options["notch"]: List[float] = []
    options["resamp"]: Dict = {}
    options["interp"]: bool = False
    options["prepend"]: str = "proc"
    options["postpend"]: str = ""
    options = parseKeywords(options, kwargs)

    # print info
    text: List = ["Processing with options"]
    for op, val in options.items():
        text.append("\t{} = {}".format(op, val))
    projectBlock(text)

    if isinstance(options["sites"], str):
        options["sites"] = [options["sites"]]

    # outputting to another site
    if options["outputsite"] != "":
        projectText(
            "Preprocessed data will be saved to output site {}".format(
                options["outputsite"]
            )
        )
        # create the site
        projData.createSite(options["outputsite"])
        projData.refresh()
        outputSitePath = projData.getSiteData(options["outputsite"]).timePath

    # output naming
    outPre = options["prepend"] + "_" if options["prepend"] != "" else ""
    outPost = "_" + options["postpend"] if options["postpend"] != "" else ""
    if outPre == "" and outPost == "" and options["outputsite"] == "":
        outPre = "proc_"

    # create a data calibrator writer instance
    cal = Calibrator(projData.calPath)
    if options["calibrate"]:
        cal.printInfo()
    writer = DataWriterInternal()

    # format dates
    if options["start"]:
        options["start"] = datetime.strptime(options["start"], "%Y-%m-%d %H:%M:%S")
    if options["stop"]:
        options["stop"] = datetime.strptime(options["stop"], "%Y-%m-%d %H:%M:%S")

    for site in options["sites"]:
        siteData = projData.getSiteData(site)
        siteData.printInfo()
        # loop over frequencies
        for sampleFreq in options["sampleFreqs"]:
            measurements = siteData.getMeasurements(sampleFreq)
            if len(measurements) == 0:
                # no data files at this sample rate
                continue

            # otherwise, process
            for meas in measurements:
                # get the reader
                projectText("Processing site {}, measurement {}".format(site, meas))
                reader = siteData.getMeasurement(meas)
                startTime = reader.getStartDatetime()
                stopTime = reader.getStopDatetime()
                if (options["start"] or options["stop"]) and not checkDateOptions(
                    options, startTime, stopTime
                ):
                    continue
                # if the data contributes, copy in the data if relevant
                if options["start"]:
                    startTime = options["start"]
                if options["stop"]:
                    stopTime = options["stop"]

                # calculate the samples
                sampleStart, sampleEnd = reader.time2sample(startTime, stopTime)
                # now get the data
                timeData = reader.getPhysicalSamples(
                    startSample=sampleStart, endSample=sampleEnd
                )
                timeData.printInfo()
                headers = reader.getHeaders()
                chanHeaders, chanMap = reader.getChanHeaders()

                # apply options
                applyPolarisationReversalOptions(options, timeData)
                applyScaleOptions(options, timeData)
                applyCalibrationOptions(options, cal, timeData, reader)
                applyFilterOptions(options, timeData)
                applyNotchOptions(options, timeData)
                applyInterpolationOptions(options, timeData)
                applyResampleOptions(options, timeData)
                applyNormaliseOptions(options, timeData)

                # output dataset path
                if options["outputsite"] != "":
                    timePath = outputSitePath
                else:
                    timePath = siteData.timePath
                outPath = os.path.join(timePath, "{}{}{}".format(outPre, meas, outPost))
                # write time data - need to manually change some headers (hence the keywords)
                writer = DataWriterInternal()
                writer.setOutPath(outPath)
                writer.writeData(
                    headers,
                    chanHeaders,
                    timeData,
                    start_time=timeData.startTime.strftime("%H:%M:%S.%f"),
                    start_date=timeData.startTime.strftime("%Y-%m-%d"),
                    stop_time=timeData.stopTime.strftime("%H:%M:%S.%f"),
                    stop_date=timeData.stopTime.strftime("%Y-%m-%d"),
                    numSamples=timeData.numSamples,
                    sample_freq=timeData.sampleFreq,
                    physical=True,
                )
                writer.printInfo()
Beispiel #23
0
def viewTime(
    projData: ProjectData, startDate: str, endDate: str, **kwargs
) -> Union[plt.figure, None]:
    """View timeseries in the project

    Parameters
    ----------
    startDate : str
        The start of the data range to plot
    endDate : str
        The end of the date range to plot
    sites : List[str], optional
        List of sites 
    sampleFreqs : List[float], optional
        List of sample frequencies to plot
    chans : List[str], optional
        List of channels to plot
    polreverse :  Dict[str, bool]
        Keys are channels and values are boolean flags for reversing        
    calibrate : bool, optional
        Boolean flag to calibrate data
    normalise : bool, optional
        Boolean flag to normalise the data. Default is False and setting to True will normalise each channel independently.
    notch : List[float], optional
        List of frequencies to notch out
    filter : Dict, optional
        Filter parameters
    show : bool, optional
        Boolean flag to show the plot
    save : bool, optional
        Boolean flag to save the plot to images folder
    plotoptions : Dict
        Dictionary of plot options

    Returns
    -------
    matplotlib.pyplot.figure or None
        A matplotlib figure unless the plot is not shown and is saved, in which case None and the figure is closed.
    """

    # default options
    options = {}
    options["sites"]: List[str] = projData.sites
    options["sampleFreqs"]: Union[List[float], List[str]] = projData.getSampleFreqs()
    options["chans"]: List[str] = ["Ex", "Ey", "Hx", "Hy", "Hz"]
    options["polreverse"]: Union[bool, Dict[str, bool]] = False
    options["calibrate"]: bool = False
    options["normalise"]: bool = False
    options["filter"]: Dict = {}
    options["notch"]: List[float] = []
    options["show"]: bool = True
    options["save"]: bool = False
    options["plotoptions"]: Dict = plotOptionsTime()
    options = parseKeywords(options, kwargs)

    # prepare calibrator
    cal = getCalibrator(projData.calPath, projData.config)
    if options["calibrate"]:
        cal.printInfo()

    # format startDate and endDate
    start = datetime.strptime("{}.000".format(startDate), "%Y-%m-%d %H:%M:%S.%f")
    stop = datetime.strptime("{}.000".format(endDate), "%Y-%m-%d %H:%M:%S.%f")
    # collect relevant data - dictionary to store timeData
    timeDataAll = {}
    for site in options["sites"]:
        siteData = projData.getSiteData(site)
        siteData.printInfo()
        measurements = siteData.getMeasurements()
        timeDataAll[site] = {}

        # loop over measurements and save data for each one
        for meas in measurements:
            sampleFreq = siteData.getMeasurementSampleFreq(meas)
            if sampleFreq not in options["sampleFreqs"]:
                continue

            # check if data contributes to user defined time period
            siteStart = siteData.getMeasurementStart(meas)
            siteStop = siteData.getMeasurementEnd(meas)
            if siteStop < start or siteStart > stop:
                continue

            reader = siteData.getMeasurement(meas)
            # get the samples of the datetimes
            sampleStart, sampleStop = reader.time2sample(start, stop)
            # as the samples returned from time2sample are rounded use sample2time to get the appropriate start and end times for those samples
            readStart, readStop = reader.sample2time(sampleStart, sampleStop)
            # get the data for any available channels meaning even those sites with missing channels can be plotted
            timeData = reader.getPhysicalData(readStart, readStop)

            projectText(
                "Plotting measurement {} of site {} between {} and {}".format(
                    meas, site, readStart, readStop
                )
            )

            # apply various options
            applyPolarisationReversalOptions(options, timeData)
            applyCalibrationOptions(options, cal, timeData, reader)
            applyFilterOptions(options, timeData)
            applyNotchOptions(options, timeData)
            applyNormaliseOptions(options, timeData)
            timeDataAll[site][meas] = timeData

    # plot all the data
    plotfonts = options["plotoptions"]["plotfonts"]
    fig = plt.figure(figsize=options["plotoptions"]["figsize"])
    for site in timeDataAll:
        for meas in timeDataAll[site]:
            timeData = timeDataAll[site][meas]
            timeData.view(
                sampleStop=timeDataAll[site][meas].numSamples - 1,
                fig=fig,
                chans=options["chans"],
                label="{} - {}".format(site, meas),
                xlim=[start, stop],
                plotfonts=plotfonts,
            )

    # add the suptitle
    st = fig.suptitle(
        "Time data from {} to {}".format(
            start.strftime("%Y-%m-%d %H-%M-%S"), stop.strftime("%Y-%m-%d %H-%M-%S")
        ),
        fontsize=plotfonts["suptitle"],
    )
    st.set_y(0.98)

    # do the axis labels
    numChans = len(options["chans"])
    for idx, chan in enumerate(options["chans"]):
        plt.subplot(numChans, 1, idx + 1)
        # do the yaxis
        if isElectric(chan):
            plt.ylabel("mV/km", fontsize=plotfonts["axisLabel"])
            if len(options["plotoptions"]["Eylim"]) > 0:
                plt.ylim(options["plotoptions"]["Eylim"])
        else:
            if options["calibrate"]:
                plt.ylabel("nT", fontsize=plotfonts["axisLabel"])
            else:
                plt.ylabel("mV", fontsize=plotfonts["axisLabel"])
            if len(options["plotoptions"]["Hylim"]) > 0:
                plt.ylim(options["plotoptions"]["Hylim"])
        plt.legend(loc=1, fontsize=plotfonts["legend"])

    # plot format
    fig.tight_layout(rect=[0, 0.02, 1, 0.96])
    fig.subplots_adjust(top=0.92)

    # plot show and save
    if options["save"]:
        impath = projData.imagePath
        filename = "timeData_{}_{}".format(
            start.strftime("%Y-%m-%d_%H-%M-%S_"), stop.strftime("%Y-%m-%d_%H-%M-%S")
        )
        savename = savePlot(impath, filename, fig)
        projectText("Image saved to file {}".format(savename))
    if options["show"]:
        plt.show(block=options["plotoptions"]["block"])
    if not options["show"] and options["save"]:
        plt.close(fig)
        return None
    return fig
Beispiel #24
0
def mestimateModel(A: np.ndarray, y: np.ndarray, **kwargs) -> Tuple:
    r"""Mestimate robust least squares

    Solves for :math:`x` where,

    .. math::        
        y = Ax .

    Good method for dependent outliers (in :math:`y`). Not robust against independent outliers (leverage points)

    Parameters
    ----------
    A : np.ndarray
        Predictors, size nobs*nregressors
    y : np.ndarray
        Observations, size nobs
    initial : 
    scale : optional
        A scale estimate
    intercept : bool, optional
        True or False for adding an intercept term

    Returns
    -------
    params : np.ndarray
        Values in x
    resids : np.ndarray
        Residuals = y - Ax
    scale : float
        Robust measure of variance
    weights : np.ndarray
        Weights used in robust regression    
    """

    options = parseKeywords(defaultDictionary(), kwargs, printkw=False)
    # calculate the leverage
    n = A.shape[0]
    p = A.shape[1]
    pnRatio = 1.0 * p / n

    # calculate the projection matrix
    q, r = linalg.qr(A)
    Pdiag = np.empty(shape=(n), dtype="float")
    for i in range(0, n):
        Pdiag[i] = np.absolute(np.sum(q[i, :] * np.conjugate(q[i, :]))).real
    del q, r
    Pdiag = Pdiag / np.max(Pdiag)
    leverageScale = sampleMAD0(Pdiag)
    leverageWeights = getRobustLocationWeights(
        Pdiag / leverageScale, "huber"
    )  # this should nowhere be equal to zero because of the previous line

    if options["intercept"] == True:
        # add column of ones for constant term
        A = np.hstack((np.ones(shape=(A.shape[0], 1), dtype="complex"), A))

    # see whether to do an initial OLS model or whether one is provided
    if options["initial"]:
        params, resids, scale = initialFromDict(options["initial"])
    else:
        params, resids, squareResid, rank, s = olsModel(A, y)
        scale = sampleMAD0(resids)

    # if an initial model was not provided but an initial scale was, replace the one here
    if options["scale"]:
        scale = options["scale"]

    # standardised residuals and weights
    weights = (getRobustLocationWeights(resids / scale, options["weights"]) *
               leverageWeights)

    # iteratively weighted least squares
    iteration = 0
    while iteration < options["maxiter"]:
        # do the weighted least-squares
        Anew, ynew = weightLS(A, y, weights)
        paramsNew, squareResidNew, rankNew, sNew = linalg.lstsq(Anew, ynew)
        residsNew = y - np.dot(A, paramsNew)
        # check residsNew to make sure not all zeros (i.e. will happen in undetermined or equally determined system)
        if np.sum(np.absolute(residsNew)) < eps():
            # then return everything here
            return paramsNew, residsNew, scale, weights
        scale = sampleMAD0(residsNew)
        # standardise and calculate weights
        weightsNew = (
            getRobustLocationWeights(residsNew / scale, options["weights"]) *
            leverageWeights)
        # increment iteration and save weightsNew
        iteration = iteration + 1
        weights = weightsNew
        params = paramsNew

        # check to see whether the change is smaller than the tolerance
        # use the R method of checking change in residuals (can check change in params)
        changeResids = linalg.norm(residsNew - resids) / linalg.norm(residsNew)
        if changeResids < eps():
            # update residuals
            resids = residsNew
            break
        # update residuals
        resids = residsNew
    return params, resids, scale, weights
Beispiel #25
0
def viewSpectraStack(
    projData: ProjectData, site: str, meas: str, **kwargs
) -> Union[plt.figure, None]:
    """View spectra stacks for a measurement

    Parameters
    ----------
    projData : projecData
        The project data
    site : str
        The site to view
    meas: str
        The measurement of the site to view
    chans : List[str], optional
        Channels to plot
    declevel : int, optional
        Decimation level to plot
    numstacks : int, optional
        The number of windows to stack
    coherences : List[List[str]], optional
        A list of coherences to add, specified as [["Ex", "Hy"], ["Ey", "Hx"]] 
    specdir : str, optional
        String that specifies spectra directory for the measurement
    show : bool, optional
        Show the spectra plot
    save : bool, optional
        Save the plot to the images directory
    plotoptions : Dict, optional
        Dictionary of plot options
    
    Returns
    -------
    matplotlib.pyplot.figure or None
        A matplotlib figure unless the plot is not shown and is saved, in which case None and the figure is closed.
    """

    options = {}
    options["chans"] = []
    options["declevel"] = 0
    options["numstacks"] = 10
    options["coherences"] = []
    options["specdir"] = projData.config.configParams["Spectra"]["specdir"]
    options["show"] = True
    options["save"] = False
    options["plotoptions"] = plotOptionsSpec()
    options = parseKeywords(options, kwargs)

    projectText(
        "Plotting spectra stack for measurement {} and site {}".format(meas, site)
    )
    specReader = getSpecReader(projData, site, meas, **options)

    # channels
    dataChans = specReader.getChannels()
    if len(options["chans"]) > 0:
        dataChans = options["chans"]
    numChans = len(dataChans)

    # get windows
    numWindows = specReader.getNumWindows()
    sampleFreqDec = specReader.getSampleFreq()
    f = specReader.getFrequencyArray()

    # calculate num of windows to stack in each set
    stackSize = int(np.floor(1.0 * numWindows / options["numstacks"]))

    # calculate number of rows - in case interested in coherences too
    nrows = (
        2
        if len(options["coherences"]) == 0
        else 2 + np.ceil(1.0 * len(options["coherences"]) / numChans)
    )

    # setup the figure
    plotfonts = options["plotoptions"]["plotfonts"]
    cmap = colorbarMultiline()
    fig = plt.figure(figsize=options["plotoptions"]["figsize"])
    st = fig.suptitle(
        "Spectra stack, fs = {:.6f} [Hz], decimation level = {:2d}, windows in each set = {:d}".format(
            sampleFreqDec, options["declevel"], stackSize
        ),
        fontsize=plotfonts["suptitle"],
    )
    st.set_y(0.98)

    # do the stacking
    for iP in range(0, options["numstacks"]):
        stackStart = iP * stackSize
        stackStop = min(stackStart + stackSize, numWindows)
        color = cmap(iP/options["numstacks"])
        # dictionaries to hold data for this section
        stackedData = {}
        ampData = {}
        phaseData = {}
        powerData = {}

        # assign initial zeros
        for c in dataChans:
            stackedData[c] = np.zeros(shape=(specReader.getDataSize()), dtype="complex")
            ampData[c] = np.zeros(shape=(specReader.getDataSize()), dtype="complex")
            phaseData[c] = np.zeros(shape=(specReader.getDataSize()), dtype="complex")
            for c2 in dataChans:
                powerData[c + c2] = np.zeros(
                    shape=(specReader.getDataSize()), dtype="complex"
                )

        # now stack the data and create nice plots
        for iW in range(stackStart, stackStop):
            winData = specReader.readBinaryWindowLocal(iW)
            for c in dataChans:
                stackedData[c] += winData.data[c]
                ampData[c] += np.absolute(winData.data[c])
                phaseData[c] += np.angle(winData.data[c]) * (180.0 / np.pi)
                # get coherency data
                for c2 in dataChans:
                    powerData[c + c2] += winData.data[c] * np.conjugate(
                        winData.data[c2]
                    )
            if iW == stackStart:
                startTime = winData.startTime
            if iW == stackStop - 1:
                stopTime = winData.stopTime

        # scale powers and stacks
        ampLim = options["plotoptions"]["amplim"]
        for idx, c in enumerate(dataChans):
            stackedData[c] = stackedData[c] / (stackStop - stackStart)
            ampData[c] = ampData[c] / (stackStop - stackStart)
            phaseData[c] = phaseData[c] / (stackStop - stackStart)
            for c2 in dataChans:
                # normalisation
                powerData[c + c2] = 2 * powerData[c + c2] / (stackStop - stackStart)
                # normalisation
                powerData[c + c2][[0, -1]] = powerData[c + c2][[0, -1]] / 2

            # plot
            ax1 = plt.subplot(nrows, numChans, idx + 1)
            plt.title("Amplitude {}".format(c), fontsize=plotfonts["title"])
            h = ax1.semilogy(
                f,
                ampData[c],
                color=color,
                label="{} to {}".format(
                    startTime.strftime("%m-%d %H:%M:%S"),
                    stopTime.strftime("%m-%d %H:%M:%S"),
                ),
            )
            if len(ampLim) > 2:
                ax1.set_ylim(ampLim)
            else:
                ax1.set_ylim(0.01, 1000)
            ax1.set_xlim(0, sampleFreqDec / 2.0)
            if isMagnetic(c):
                ax1.set_ylabel("Amplitude [nT]", fontsize=plotfonts["axisLabel"])
            else:
                ax1.set_ylabel("Amplitude [mV/km]", fontsize=plotfonts["axisLabel"])
            ax1.set_xlabel("Frequency [Hz]", fontsize=plotfonts["axisLabel"])
            plt.grid(True)

            # set tick sizes
            for label in ax1.get_xticklabels() + ax1.get_yticklabels():
                label.set_fontsize(plotfonts["axisTicks"])
            # plot phase
            ax2 = plt.subplot(nrows, numChans, numChans + idx + 1)
            plt.title("Phase {}".format(c), fontsize=plotfonts["title"])
            ax2.plot(
                f,
                phaseData[c],
                color=color,                
                label="{} to {}".format(
                    startTime.strftime("%m-%d %H:%M:%S"),
                    stopTime.strftime("%m-%d %H:%M:%S"),
                ),
            )
            ax2.set_ylim(-180, 180)
            ax2.set_xlim(0, sampleFreqDec / 2.0)
            ax2.set_ylabel("Phase [degrees]", fontsize=plotfonts["axisLabel"])
            ax2.set_xlabel("Frequency [Hz]", fontsize=plotfonts["axisLabel"])
            plt.grid(True)
            # set tick sizes
            for label in ax2.get_xticklabels() + ax2.get_yticklabels():
                label.set_fontsize(plotfonts["axisTicks"])

        # plot coherences
        for idx, coh in enumerate(options["coherences"]):
            c = coh[0]
            c2 = coh[1]
            cohNom = np.power(np.absolute(powerData[c + c2]), 2)
            cohDenom = powerData[c + c] * powerData[c2 + c2]
            coherence = cohNom / cohDenom
            ax = plt.subplot(nrows, numChans, 2 * numChans + idx + 1)
            plt.title("Coherence {} - {}".format(c, c2), fontsize=plotfonts["title"])
            ax.plot(
                f,
                coherence,
                color=color,
                label="{} to {}".format(
                    startTime.strftime("%m-%d %H:%M:%S"),
                    stopTime.strftime("%m-%d %H:%M:%S"),
                ),
            )
            ax.set_ylim(0, 1.1)
            ax.set_xlim(0, sampleFreqDec / 2)
            ax.set_ylabel("Coherence", fontsize=plotfonts["axisLabel"])
            ax.set_xlabel("Frequency [Hz]", fontsize=plotfonts["axisLabel"])
            plt.grid(True)
            # set tick sizes
            for label in ax.get_xticklabels() + ax.get_yticklabels():
                label.set_fontsize(plotfonts["axisTicks"])

    # fig legend and layout
    ax = plt.gca()
    h, l = ax.get_legend_handles_labels()
    fig.tight_layout(rect=[0.01, 0.01, 0.98, 0.81])
    # legend
    legax = plt.axes(position=[0.01, 0.82, 0.98, 0.12], in_layout=False)
    plt.tick_params(left=False, labelleft=False, bottom=False, labelbottom="False")
    plt.box(False)
    legax.legend(h, l, ncol=4, loc="upper center", fontsize=plotfonts["legend"])

    # plot show and save
    if options["save"]:
        impath = projData.imagePath
        filename = "spectraStack_{}_{}_dec{}_{}".format(
            site, meas, options["declevel"], options["specdir"]
        )
        savename = savePlot(impath, filename, fig)
        projectText("Image saved to file {}".format(savename))
    if options["show"]:
        plt.show(block=options["plotoptions"]["block"])
    if not options["show"] and options["save"]:
        plt.close(fig)
        return None
    return fig
Beispiel #26
0
def viewStatistic(projData: ProjectData, site: str, sampleFreq: Union[int,
                                                                      float],
                  stat: str, **kwargs) -> Union[plt.figure, None]:
    """View statistic data for a single sampling frequency of a site
    
    Parameters
    ----------
    projData : ProjectData
        A project instance
    site : str
        The site for which to plot statistics
    stat : str
        The statistic to plot
    sampleFreq : float
        The sampling frequency for which to plot statistics
    declevel : int
        The decimation level to plot
    eFreqI : int
        The evaluation frequency index
    specdir : str
        The spectra directory
    maskname : str
        Mask name         
    clim : List, optional
        Limits for colourbar axis
    xlim : List, optional
        Limits for the x axis
    ylim : List, optional
        Limits for the y axis
    colortitle : str, optional
        Title for the colourbar
    show : bool, optional
        Show the spectra plot
    save : bool, optional
        Save the plot to the images directory
    plotoptions : Dict, optional
        Dictionary of plot options    

    Returns
    -------
    matplotlib.pyplot.figure or None
        A matplotlib figure unless the plot is not shown and is saved, in which case None and the figure is closed.
    """

    options = {}
    options["declevel"] = 0
    options["eFreqI"] = 0
    options["specdir"] = projData.config.configParams["Spectra"]["specdir"]
    options["maskname"] = ""
    options["clim"] = []
    options["xlim"] = []
    options["ylim"] = []
    options["colortitle"] = ""
    options["show"] = True
    options["save"] = False
    options["plotoptions"] = plotOptionsSpec()
    options = parseKeywords(options, kwargs)

    projectText(
        "Plotting statistic {} for site {} and sampling frequency {}".format(
            stat, site, sampleFreq))
    statData = getStatisticDataForSampleFreq(
        projData,
        site,
        sampleFreq,
        stat,
        declevel=options["declevel"],
        specdir=options["specdir"],
    )
    # get the statistics
    statMeas = list(statData.keys())
    # get the evaluation frequency
    eFreq = statData[statMeas[0]].evalFreq[options["eFreqI"]]

    # get the mask data
    maskWindows = []
    if options["maskname"] != "":
        maskData = getMaskData(projData, site, options["maskname"], sampleFreq)
        maskWindows = maskData.getMaskWindowsFreq(options["declevel"],
                                                  options["eFreqI"])

    # setup the figure
    plotfonts = options["plotoptions"]["plotfonts"]
    fig = plt.figure(figsize=options["plotoptions"]["figsize"])

    # get the date limits
    siteData = projData.getSiteData(site)
    if len(options["xlim"]) == 0:
        start = siteData.getMeasurementStart(statMeas[0])
        end = siteData.getMeasurementEnd(statMeas[0])
        for meas in statMeas:
            start = min(start, siteData.getMeasurementStart(meas))
            end = max(end, siteData.getMeasurementEnd(meas))
        options["xlim"] = [start, end]

    # do the plots
    for meas in statMeas:
        statData[meas].view(
            options["eFreqI"],
            fig=fig,
            xlim=options["xlim"],
            ylim=options["ylim"],
            clim=options["clim"],
            label=meas,
            plotfonts=options["plotoptions"]["plotfonts"],
            maskwindows=maskWindows,
        )
    # add a legened
    plt.legend(markerscale=4, fontsize=plotfonts["legend"])

    # do the title after all the plots
    st = fig.suptitle(
        "{} values for {}, sampling frequency = {:.2f} Hz, decimation level = {} and evaluation frequency {} Hz"
        .format(stat, site, sampleFreq, options["declevel"], eFreq),
        fontsize=plotfonts["suptitle"],
    )

    # plot format, show and save
    fig.tight_layout(rect=[0.02, 0.02, 0.98, 0.92])
    if options["save"]:
        impath = projData.imagePath
        sampleFreqStr = fileFormatSampleFreq(sampleFreq)
        filename = "stat_{:s}_{:s}_{:s}_dec{:d}_efreq{:d}_{:s}".format(
            stat,
            site,
            sampleFreqStr,
            options["declevel"],
            options["eFreqI"],
            options["specdir"],
        )
        if options["maskname"] != "":
            filename = "{}_{}".format(filename, options["maskname"])
        savename = savePlot(impath, filename, fig)
        projectText("Image saved to file {}".format(savename))
    if options["show"]:
        plt.show(block=options["plotoptions"]["block"])
    if not options["show"] and options["save"]:
        plt.close(fig)
        return None
    return fig
Beispiel #27
0
def calculateStatistics(projData: ProjectData, **kwargs):
    """Calculate statistics for sites
    
    Parameters
    ----------
    projData : ProjectData
        A project data instance
    sites : List[str], optional
        A list of sites to calculate statistics for
    sampleFreqs : List[float], optional
        List of sampling frequencies for which to calculate statistics
    chans : List[str], optional
        List of data channels to use
    specdir : str, optional
        The spectra directory for which to calculate statistics
    stats : List[str], optional
        The statistics to calculate out. Acceptable values are: "absvalEqn" "coherence", "psd", "poldir", "transFunc", "resPhase", "partialcoh". Configuration file values are used by default.
    """

    options = {}
    options["sites"] = projData.getSites()
    options["sampleFreqs"] = projData.getSampleFreqs()
    options["chans"] = []
    options["specdir"] = projData.config.configParams["Spectra"]["specdir"]
    options["stats"] = projData.config.configParams["Statistics"]["stats"]
    options = parseKeywords(options, kwargs)

    projectText("Calculating stats: {} for sites: {}".format(
        listToString(options["stats"]), listToString(options["sites"])))

    # create the statistic calculator and IO object
    statCalculator = StatisticCalculator()
    statIO = StatisticIO()

    # loop through sites and calculate statistics
    for site in options["sites"]:
        siteData = projData.getSiteData(site)
        measurements = siteData.getMeasurements()

        for meas in measurements:
            sampleFreq = siteData.getMeasurementSampleFreq(meas)
            if sampleFreq not in options["sampleFreqs"]:
                # don't need to calculate statistics for this sampling frequency
                continue

            projectText("Calculating stats for site {}, measurement {}".format(
                site, meas))

            # decimation parameters
            decParams = getDecimationParameters(sampleFreq, projData.config)
            decParams.printInfo()
            numLevels = decParams.numLevels

            # create the spectrum reader
            specReader = SpectrumReader(
                os.path.join(siteData.getMeasurementSpecPath(meas),
                             options["specdir"]))

            # loop through decimation levels
            for iDec in range(0, numLevels):
                # open the spectra file for the current decimation level
                check = specReader.openBinaryForReading("spectra", iDec)
                if not check:
                    # probably because this decimation level not calculated
                    continue
                specReader.printInfo()

                # get windows
                refTime = specReader.getReferenceTime()
                winSize = specReader.getWindowSize()
                winOlap = specReader.getWindowOverlap()
                numWindows = specReader.getNumWindows()
                evalFreq = decParams.getEvalFrequenciesForLevel(iDec)
                sampleFreqDec = specReader.getSampleFreq()
                globalOffset = specReader.getGlobalOffset()
                fArray = specReader.getFrequencyArray()

                statHandlers = {}
                # create the statistic handlers
                for stat in options["stats"]:
                    statElements = getStatElements(stat)
                    statHandlers[stat] = StatisticData(stat, refTime,
                                                       sampleFreqDec, winSize,
                                                       winOlap)
                    statHandlers[stat].setStatParams(numWindows, statElements,
                                                     evalFreq)
                    statHandlers[stat].comments = specReader.getComments()
                    statHandlers[stat].addComment(
                        projData.config.getConfigComment())
                    statHandlers[stat].addComment(
                        "Calculating statistic: {}".format(stat))
                    statHandlers[stat].addComment(
                        "Statistic components: {}".format(
                            listToString(statElements)))

                # loop over windows and calculate the relevant statistics
                for iW in range(0, numWindows):
                    # get data
                    winData = specReader.readBinaryWindowLocal(iW)
                    globalIndex = iW + globalOffset
                    # give the statistic calculator the spectra
                    statCalculator.setSpectra(fArray, winData, evalFreq)
                    # get the desired statistics
                    for sH in statHandlers:
                        data = statCalculator.getDataForStatName(sH)
                        statHandlers[sH].addStat(iW, globalIndex, data)

                # save statistic
                for sH in statHandlers:
                    statIO.setDatapath(
                        os.path.join(siteData.getMeasurementStatPath(meas),
                                     options["specdir"]))
                    statIO.write(statHandlers[sH], iDec)
Beispiel #28
0
def chatterjeeMachlerMod(A, y, **kwargs):
    # using the weights in chaterjeeMachler means that min resids val in median(resids)
    # instead, use M estimate weights with a modified residual which includes a measure of leverage
    # for this, use residuals / (1-p)^2
    # I wonder if this will have a divide by zero bug

    # now calculate p and n
    n = A.shape[0]
    p = A.shape[1]
    pnRatio = 1.0 * p / n

    # calculate the projection matrix
    q, r = linalg.qr(A)
    Pdiag = np.empty(shape=(n), dtype="float")
    for i in range(0, n):
        Pdiag[i] = np.absolute(np.sum(q[i, :] * np.conjugate(q[i, :]))).real
    del q, r
    Pdiag = Pdiag / (np.max(Pdiag) + 0.0000000001)
    locP = np.median(Pdiag)
    scaleP = sampleMAD(Pdiag)
    # bound = locP + 6*scaleP
    bound = locP + 6 * scaleP
    indices = np.where(Pdiag > bound)
    Pdiag[indices] = 0.99999
    leverageMeas = np.power(1.0 - Pdiag, 2)

    # weights for the first iteration
    # this is purely based on the leverage
    tmp = np.ones(shape=(n), dtype="float") * pnRatio
    tmp = np.maximum(Pdiag, tmp)
    weights = np.reciprocal(tmp)

    # get options
    options = parseKeywords(defaultDictionary(), kwargs, printkw=False)
    # generalPrint("S-Estimate", "Using weight function = {}".format(weightFnc))
    if options["intercept"] == True:
        # add column of ones for constant term
        A = np.hstack((np.ones(shape=(A.shape[0], 1), dtype="complex"), A))

    # iteratively weighted least squares
    iteration = 0
    while iteration < options["maxiter"]:
        # do the weighted least-squares
        Anew, ynew = weightLS(A, y, weights)
        paramsNew, squareResidNew, rankNew, sNew = linalg.lstsq(Anew, ynew)
        residsNew = y - np.dot(A, paramsNew)
        # check residsNew to make sure not all zeros (i.e. will happen in undetermined or equally determined system)
        if np.sum(np.absolute(residsNew)) < eps():
            # then return everything here
            return paramsNew, residsNew, weights
        residsNew = residsNew / leverageMeas
        scale = sampleMAD0(residsNew)

        # standardise and calculate weights
        residsNew = residsNew / scale
        weightsNew = getRobustLocationWeights(residsNew, "huber")
        # increment iteration
        iteration = iteration + 1
        weights = weightsNew
        params = paramsNew

        if iteration > 1:
            # check to see whether the change is smaller than the tolerance
            changeResids = linalg.norm(residsNew -
                                       resids) / linalg.norm(residsNew)
            if changeResids < eps():
                # update resids
                resids = residsNew
                break
        # update resids
        resids = residsNew

    # now do the same again, but with a different function
    # do the least squares solution
    params, resids, squareResid, rank, s = olsModel(A, y)
    resids = resids / leverageMeas
    resids = resids / scale
    weights = getRobustLocationWeights(resids, "trimmedMean")
    # iteratively weighted least squares
    iteration = 0
    while iteration < options["maxiter"]:
        # do the weighted least-squares
        Anew, ynew = weightLS(A, y, weights)
        paramsNew, squareResidNew, rankNew, sNew = linalg.lstsq(Anew, ynew)
        residsNew = y - np.dot(A, paramsNew)
        # check residsNew to make sure not all zeros (i.e. will happen in undetermined or equally determined system)
        if np.sum(np.absolute(residsNew)) < eps():
            # then return everything here
            return paramsNew, residsNew, weights

        residsNew = residsNew / leverageMeas
        scale = sampleMAD0(residsNew)

        # standardise and calculate weights
        residsNew = residsNew / scale
        weightsNew = getRobustLocationWeights(residsNew, options["weights"])
        # increment iteration
        iteration = iteration + 1
        weights = weightsNew
        params = paramsNew

        # check to see whether the change is smaller than the tolerance
        changeResids = linalg.norm(residsNew - resids) / linalg.norm(residsNew)
        if changeResids < eps():
            # update resids
            resids = residsNew
            break
        # update resids
        resids = residsNew

    # at the end, return the components
    return params, resids, weights
Beispiel #29
0
def calculateRemoteStatistics(projData: ProjectData, remoteSite: str,
                              **kwargs):
    """Calculate statistics involving a remote reference site

    
    Parameters
    ----------
    projData : ProjectData
        A project data instance
    remoteSite : str
        The name of the site to use as the remote site
    sites : List[str], optional
        A list of sites to calculate statistics for
    sampleFreqs : List[float], optional
        List of sampling frequencies for which to calculate statistics
    chans : List[str], optional
        List of data channels to use
    specdir : str, optional
        The spectra directory for which to calculate statistics
    remotestats : List[str], optional
        The statistics to calculate out. Acceptable statistics are: "RR_coherence", "RR_coherenceEqn", "RR_absvalEqn", "RR_transferFunction", "RR_resPhase". Configuration file values are used by default.
    """

    options = {}
    options["sites"] = projData.getSites()
    options["sampleFreqs"] = projData.getSampleFreqs()
    options["chans"] = []
    options["specdir"] = projData.config.configParams["Spectra"]["specdir"]
    options["remotestats"] = projData.config.configParams["Statistics"][
        "remotestats"]
    options = parseKeywords(options, kwargs)

    projectText(
        "Calculating stats: {} for sites: {} with remote site {}".format(
            listToString(options["remotestats"]),
            listToString(options["sites"]),
            remoteSite,
        ))

    # create the statistic calculator and IO object
    statCalculator = StatisticCalculator()
    statIO = StatisticIO()

    # loop over sites
    for site in options["sites"]:
        siteData = projData.getSiteData(site)
        measurements = siteData.getMeasurements()

        for meas in measurements:
            sampleFreq = siteData.getMeasurementSampleFreq(meas)
            if sampleFreq not in options["sampleFreqs"]:
                # don't need to calculate statistics for this sampling frequency
                continue

            projectText(
                "Calculating stats for site {}, measurement {} with reference {}"
                .format(site, meas, remoteSite))

            # decimation and window parameters
            decParams = getDecimationParameters(sampleFreq, projData.config)
            decParams.printInfo()
            numLevels = decParams.numLevels
            winParams = getWindowParameters(decParams, projData.config)

            # create the window selector and find the shared windows
            winSelector = getWindowSelector(projData, decParams, winParams)
            winSelector.setSites([site, remoteSite])
            # calc shared windows between site and remote
            winSelector.calcSharedWindows()
            # create the spectrum reader
            specReader = SpectrumReader(
                os.path.join(siteData.getMeasurementSpecPath(meas),
                             options["specdir"]))

            # loop through decimation levels
            for iDec in range(0, numLevels):
                # open the spectra file for the current decimation level
                check = specReader.openBinaryForReading("spectra", iDec)
                if not check:
                    # probably because this decimation level not calculated
                    continue
                specReader.printInfo()

                # get a set of the shared windows at this decimation level
                # these are the global indices
                sharedWindows = winSelector.getSharedWindowsLevel(iDec)

                # get other information regarding only this spectra file
                refTime = specReader.getReferenceTime()
                winSize = specReader.getWindowSize()
                winOlap = specReader.getWindowOverlap()
                numWindows = specReader.getNumWindows()
                evalFreq = decParams.getEvalFrequenciesForLevel(iDec)
                sampleFreqDec = specReader.getSampleFreq()
                globalOffset = specReader.getGlobalOffset()
                fArray = specReader.getFrequencyArray()

                # now want to find the size of the intersection between the windows in this file and the shared windows
                sharedWindowsMeas = sharedWindows.intersection(
                    set(np.arange(globalOffset, globalOffset + numWindows)))
                sharedWindowsMeas = sorted(list(sharedWindowsMeas))
                numSharedWindows = len(sharedWindowsMeas)

                statHandlers = {}
                # create the statistic handlers
                for stat in options["remotestats"]:
                    statElements = getStatElements(stat)
                    statHandlers[stat] = StatisticData(stat, refTime,
                                                       sampleFreqDec, winSize,
                                                       winOlap)
                    # remember, this is with the remote reference, so the number of windows is number of shared windows
                    statHandlers[stat].setStatParams(numSharedWindows,
                                                     statElements, evalFreq)
                    statHandlers[stat].comments = specReader.getComments()
                    statHandlers[stat].addComment(
                        projData.config.getConfigComment())
                    statHandlers[stat].addComment(
                        "Calculating remote statistic: {}".format(stat))
                    statHandlers[stat].addComment(
                        "Statistic components: {}".format(
                            listToString(statElements)))

                # loop over the shared windows between the remote station and local station
                for iW, globalWindow in enumerate(sharedWindowsMeas):
                    # get data and set in the statCalculator
                    winData = specReader.readBinaryWindowGlobal(globalWindow)
                    statCalculator.setSpectra(fArray, winData, evalFreq)
                    # for the remote site, use the reader in win selector
                    remoteSF, remoteReader = winSelector.getSpecReaderForWindow(
                        remoteSite, iDec, globalWindow)
                    winDataRR = remoteReader.readBinaryWindowGlobal(
                        globalWindow)
                    statCalculator.addRemoteSpec(winDataRR)

                    for sH in statHandlers:
                        data = statCalculator.getDataForStatName(sH)
                        statHandlers[sH].addStat(iW, globalWindow, data)

                # save statistic
                for sH in statHandlers:
                    statIO.setDatapath(
                        os.path.join(siteData.getMeasurementStatPath(meas),
                                     options["specdir"]))
                    statIO.write(statHandlers[sH], iDec)
Beispiel #30
0
def viewSpectraSection(
    projData: ProjectData, site: str, meas: str, **kwargs
) -> Union[plt.figure, None]:
    """View spectra section for a measurement

    Parameters
    ----------
    projData : projecData
        The project data
    site : str
        The site to view
    meas: str
        The measurement of the site to view    
    chans : List[str], optional
        Channels to plot
    declevel : int, optional
        Decimation level to plot
    specdir : str, optional
        String that specifies spectra directory for the measurement
    show : bool, optional
        Show the spectra plot
    save : bool, optional
        Save the plot to the images directory
    plotoptions : Dict, optional
        Dictionary of plot options
    
    Returns
    -------
    matplotlib.pyplot.figure or None
        A matplotlib figure unless the plot is not shown and is saved, in which case None and the figure is closed.
    """

    options = {}
    options["chans"] = []
    options["declevel"] = 0
    options["specdir"] = projData.config.configParams["Spectra"]["specdir"]
    options["show"] = True
    options["save"] = False
    options["plotoptions"] = plotOptionsSpec()
    options = parseKeywords(options, kwargs)

    projectText(
        "Plotting spectra section for measurement {} and site {}".format(meas, site)
    )
    specReader = getSpecReader(projData, site, meas, **options)

    # channels
    dataChans = specReader.getChannels()
    if len(options["chans"]) > 0:
        dataChans = options["chans"]

    # get windows
    numWindows = specReader.getNumWindows()
    sampleFreqDec = specReader.getSampleFreq()

    # freq array
    f = specReader.getFrequencyArray()

    # now if plotting a section, ignore plotwindow for now
    if numWindows > 250:
        windows = list(np.linspace(0, numWindows, 250, endpoint=False, dtype=np.int32))
    else:
        windows = np.arange(0, 250)

    # create figure
    plotfonts = options["plotoptions"]["plotfonts"]
    fig = plt.figure(figsize=options["plotoptions"]["figsize"])
    st = fig.suptitle(
        "Spectra section, site = {}, meas = {}, fs = {:.2f} [Hz], decimation level = {:2d}, windows = {:d}, {} to {}".format(
            site,
            meas,
            sampleFreqDec,
            options["declevel"],
            len(windows),
            windows[0],
            windows[-1],
        ),
        fontsize=plotfonts["suptitle"],
    )
    st.set_y(0.98)

    # collect the data
    specData = np.empty(
        shape=(len(windows), len(dataChans), specReader.getDataSize()), dtype="complex"
    )
    dates = []
    for idx, iW in enumerate(windows):
        winData = specReader.readBinaryWindowLocal(iW)
        for cIdx, chan in enumerate(dataChans):
            specData[idx, cIdx, :] = winData.data[chan]
        dates.append(winData.startTime)

    ampLim = options["plotoptions"]["amplim"]
    for idx, chan in enumerate(dataChans):
        ax = plt.subplot(1, len(dataChans), idx + 1)
        plotData = np.transpose(np.absolute(np.squeeze(specData[:, idx, :])))
        if len(ampLim) == 2:
            plt.pcolor(
                dates,
                f,
                plotData,
                norm=LogNorm(vmin=ampLim[0], vmax=ampLim[1]),
                cmap=colorbar2dSpectra(),
            )
        else:
            plt.pcolor(
                dates,
                f,
                plotData,
                norm=LogNorm(vmin=plotData.min(), vmax=plotData.max()),
                cmap=colorbar2dSpectra(),
            )
        cb = plt.colorbar()
        cb.ax.tick_params(labelsize=plotfonts["axisTicks"])
        # set axis limits
        ax.set_ylim(0, specReader.getSampleFreq() / 2.0)
        ax.set_xlim([dates[0], dates[-1]])
        if isMagnetic(chan):
            plt.title("Amplitude {} [nT]".format(chan), fontsize=plotfonts["title"])
        else:
            plt.title("Amplitude {} [mV/km]".format(chan), fontsize=plotfonts["title"])
        ax.set_ylabel("Frequency [Hz]", fontsize=plotfonts["axisLabel"])
        ax.set_xlabel("Time", fontsize=plotfonts["axisLabel"])
        # set tick sizes
        for label in ax.get_xticklabels() + ax.get_yticklabels():
            label.set_fontsize(plotfonts["axisTicks"])
        plt.grid(True)

    # plot format
    fig.autofmt_xdate(rotation=90, ha="center")
    fig.tight_layout(rect=[0.02, 0.02, 0.96, 0.92])

    # plot show and save
    if options["save"]:
        impath = projData.imagePath
        filename = "spectraSection_{}_{}_dec{}_{}".format(
            site, meas, options["declevel"], options["specdir"]
        )
        savename = savePlot(impath, filename, fig)
        projectText("Image saved to file {}".format(savename))
    if options["show"]:
        plt.show(block=options["plotoptions"]["block"])
    if not options["show"] and options["save"]:
        plt.close(fig)
        return None
    return fig