Esempio n. 1
0
def bandPass(
    timeData: TimeData, cutoffLow: float, cutoffHigh: float, inplace: bool = True
) -> TimeData:
    """Bandpass butterworth filter for time data
    
    Parameters
    ----------
    timeData : TimeData
        timeData to filter
    cutoff : float
        Cutoff frequency in Hz
    inplace : bool, optional
        Whether to manipulate the data inplace        

    Returns
    -------
    TimeData
        Filtered time data
    """
    if not inplace:
        timeData = timeData.copy()
    timeData.data = bandPassData(
        timeData.data, timeData.sampleFreq, cutoffLow, cutoffHigh
    )
    timeData.addComment(
        "Band pass filter applied with cutoffs {} Hz and {} Hz".format(
            cutoffLow, cutoffHigh
        )
    )
    return timeData
Esempio n. 2
0
def polarityReversal(timeData: TimeData,
                     reversal: Dict[str, bool],
                     inplace: bool = True) -> TimeData:
    """Multiply the data by -1 (polarity reversal)
    
    Parameters
    ----------
    timeData : TimeData
        timeData to normalise
    reversal : Dict[str, bool]
        Keys are channels and values are boolean flags for reversing
    inplace : bool, optional
        Whether to manipulate the data inplace

    Returns
    -------
    TimeData
        Normalised time data
    """
    if not inplace:
        timeData = timeData.copy()
    timeData.data = polarityReversalData(timeData.data, reversal)
    timeData.addComment(
        "Polarity reversal with parameters: {}".format(reversal))
    return timeData
Esempio n. 3
0
    def getData(self, iWindow: int) -> TimeData:
        """Returns time window data for local index

        Parameters
        ----------
        iWindow : int
            Local index of window

        Returns
        -------
        windowData : TimeData
            TimeData object with the window data
        """
        winSamples = self.winSamples[iWindow]
        winData = {}
        for c in self.chans:
            winData[c] = self.timeData.data[c][
                winSamples[0]:winSamples[1] +
                1]  # add 1 because numpy indexing like this is not inclusive
        globalWindow = self.winTimes[iWindow][0]
        winStartTime = self.winTimes[iWindow][1]
        winStopTime = self.winTimes[iWindow][2]
        return TimeData(
            sampleFreq=self.sampleFreq,
            startTime=winStartTime,
            stopTime=winStopTime,
            data=winData,
            comments=self.timeData.comments +
            ["Local window iW, global window {}".format(globalWindow)],
        )
Esempio n. 4
0
def interpolateToSecond(timeData: TimeData, inplace: bool = True) -> TimeData:
    """Interpolate data to be on the second

    Some formats of time data (e.g. SPAM) do not start on the second with their sampling. This method interpolates so that sampling starts on the second and improves interoperability with other recording formats. 

    Parameters
    ----------
    timeData : TimeData
        Time data to interpolate onto the second
    inplace :  bool, optional
        Whether to do the interpolation inplace or not. Default is True.
    
    Returns
    -------
    TimeData
        Time data interpolated to start on the second
    """
    startTimeInterp, numSamplesInterp, dataInterp = interpolateToSecondData(
        timeData.data, timeData.sampleFreq, timeData.startTime
    )
    if not inplace:
        timeData = timeData.copy()
    timeData.numSamples = numSamplesInterp
    timeData.startTime = startTimeInterp
    # calculate end timeEnd
    timeData.stopTime = timeData.startTime + timedelta(
        seconds=(1.0 / timeData.sampleFreq) * (timeData.numSamples - 1)
    )
    timeData.data = dataInterp
    timeData.addComment(
        "Time data interpolated to nearest second. New start time {}, new end time {}, new number of samples {} ".format(
            timeData.startTime, timeData.stopTime, timeData.numSamples
        )
    )
    return timeData
Esempio n. 5
0
def resample(timeData: TimeData, resampFreq: float, inplace: bool = True) -> TimeData:
    """Resample time data
    
    Parameters
    ----------
    timeData : TimeData
        timeData to filter
    resampFreq : float
        The frequency to resample to
    inplace : bool, optional
        Whether to manipulate the data inplace        

    Returns
    -------
    TimeData
        Filtered time data
    """
    origFreq = timeData.sampleFreq
    if not inplace:
        timeData = timeData.copy()
    timeData.data = resampleData(timeData.data, timeData.sampleFreq, resampFreq)
    # update the time info
    timeData.sampleFreq = resampFreq
    timeData.numSamples = timeData.data[timeData.chans[0]].size
    timeData.stopTime = timeData.startTime + timedelta(
        seconds=(1.0 / timeData.sampleFreq) * (timeData.numSamples - 1)
    )
    timeData.addComment(
        "Time data resampled from {:.6f} Hz to {:.6f} Hz".format(origFreq, resampFreq)
    )
    return timeData
Esempio n. 6
0
def normalise(timeData: TimeData, inplace: bool = True) -> TimeData:
    """Normalise time data
    
    Parameters
    ----------
    timeData : TimeData
        timeData to normalise
    inplace : bool, optional
        Whether to manipulate the data inplace

    Returns
    -------
    TimeData
        Normalised time data
    """
    if not inplace:
        timeData = timeData.copy()
    timeData.data = normaliseData(timeData.data)
    timeData.addComment("Data normalised")
    return timeData
Esempio n. 7
0
def scale(timeData: TimeData,
          scalars: Dict[str, bool],
          inplace: bool = True) -> TimeData:
    """Scale the data by an arbitrary amount
    
    Parameters
    ----------
    timeData : TimeData
        timeData to normalise
    scalars : Dict[str, float]
        Keys are channels and values are boolean flags for reversing
    inplace : bool, optional
        Whether to manipulate the data inplace

    Returns
    -------
    TimeData
        Normalised time data
    """
    if not inplace:
        timeData = timeData.copy()
    timeData.data = scaleData(timeData.data, scalars)
    timeData.addComment("Time data scaled with scalars: {}".format(scalars))
    return timeData
Esempio n. 8
0
def notchFilter(timeData: TimeData, notch: float, inplace: bool = True) -> TimeData:
    """Bandpass butterworth filter for time data
    
    Parameters
    ----------
    timeData : TimeData
        timeData to filter
    notch : float
        Frequency to notch filter in Hz
    inplace : bool, optional
        Whether to manipulate the data inplace        

    Returns
    -------
    TimeData
        Filtered time data
    """
    if not inplace:
        timeData = timeData.copy()
    timeData.data = notchFilterData(
        timeData.data, timeData.sampleFreq, notch, notch / 5.0
    )
    timeData.addComment("Notch filter applied at {} Hz".format(notch))
    return timeData
Esempio n. 9
0
def test_spectra_calculator_window():
    """Test Fourier transfrom with linear detrend applied"""
    from resistics.common.format import datetimeFormat
    from resistics.time.data import TimeData
    from resistics.spectra.calculator import SpectrumCalculator
    import numpy as np
    from datetime import datetime

    # intialise some time data
    sampleFreq = 128
    startTime = "2020-01-01 00:00:00.000000"
    stopTime = "2020-01-01 00:00:00.062500"
    data = {}
    # test with impulse on zero and impulse shifted to give a phase
    data["Ex"] = np.array([1, 0, 0, 0, 0, 0, 0, 0])
    data["Hy"] = np.array([0, 1, 0, 0, 0, 0, 0, 0])
    timeData = TimeData(sampleFreq, startTime, stopTime, data)
    specCalc = SpectrumCalculator(128, 8)
    specCalc.detrend = False
    specCalc.applywindow = True
    specData = specCalc.calcFourierCoeff(timeData)
    assert np.absolute(specData.nyquist - 64) < 0.000001
    assert specData.windowSize == 8
    assert specData.dataSize == 5
    assert specData.numChans == 2
    assert sorted(specData.chans) == sorted(["Ex", "Hy"])
    assert specData.startTime == datetime.strptime(startTime,
                                                   datetimeFormat(ns=True))
    assert specData.stopTime == datetime.strptime(stopTime,
                                                  datetimeFormat(ns=True))
    np.testing.assert_array_almost_equal(specData.freqArray,
                                         [0, 16, 32, 48, 64])
    np.testing.assert_array_almost_equal(
        specData.data["Ex"],
        [1.0 + 0.0j, 1.0 + 0.0j, 1.0 + 0.0j, 1.0 + 0.0j, 1.0 + 0.0j],
    )
    np.testing.assert_array_almost_equal(
        specData.data["Hy"],
        [
            1 + 0j,
            0.707107 - 0.707107j,
            0 - 1j,
            -0.707107 - 0.707107j,
            -1 + 0j,
        ],
    )
Esempio n. 10
0
def fillGap(timeData1, timeData2):
    """Fill gap between time series
    
    Fill gaps between two different recordings. The intent is to fill the gap when recording has been interrupted and there are two data files. Both times series must have the same sampling frequency.

    Parameters
    ----------
    timeDat1 : TimeData
        Time series data
    timeData2 : TimeData
        Time series data

    Returns
    -------
    TimeData
        Time series data with gap filled
    """
    if timeData1.sampleFreq != timeData2.sampleFreq:
        errorPrint(
            "fillGap",
            "fillGap requires both timeData objects to have the same sample rate",
            quitrun=True,
        )
        return False
    sampleFreq = timeData1.sampleFreq
    sampleRate = 1.0 / sampleFreq
    timeDataFirst = timeData1
    timeDataSecond = timeData2
    if timeData1.startTime > timeData2.stopTime:
        timeDataFirst = timeData2
        timeDataSecond = timeData1
    # now want to do a simple interpolation between timeDataFirst and timeDataSecond
    # recall, these times are inclusive, so want to do the samples in between
    # this is mostly for clarity of programming
    gapStart = timeDataFirst.stopTime + timedelta(seconds=sampleRate)
    gapEnd = timeDataSecond.startTime - timedelta(seconds=sampleRate)
    # calculate number of samples in the gap
    numSamplesGap = (
        int(round((gapEnd - gapStart).total_seconds() * sampleFreq)) + 1
    )  # add 1 because inclusive
    # now want to interpolate
    newData = {}
    for chan in timeDataFirst.chans:
        startVal = timeDataFirst.data[chan][-1]
        endVal = timeDataSecond.data[chan][0]
        increment = 1.0 * (endVal - startVal) / (numSamplesGap + 2)
        fillData = np.zeros(shape=(numSamplesGap), dtype=timeDataFirst.data[chan].dtype)
        for i in range(0, numSamplesGap):
            fillData[i] = startVal + (i + 1) * increment
        newData[chan] = np.concatenate(
            [timeDataFirst.data[chan], fillData, timeDataSecond.data[chan]]
        )
    # return a new time data object
    # deal with the comment
    comment = (
        ["-----------------------------", "TimeData1 comments"]
        + timeDataFirst.comments
        + ["-----------------------------", "TimeData2 comments"]
        + timeDataSecond.comments
    )
    comment += ["-----------------------------"] + [
        "Gap filled from {} to {}".format(gapStart, gapEnd)
    ]
    return TimeData(
        sampleFreq=sampleFreq,
        startTime=timeDataFirst.startTime,
        stopTime=timeDataSecond.stopTime,
        data=newData,
        comments=comment,
    )
Esempio n. 11
0
 def reformatHigh(self, path: str, **kwargs) -> None:
     """Write out high frequency time series in internal format
     
     Parameters
     ----------
     path : str
         Directory to write out the reformatted time series
     ts : List[int], optional
         A list of the high frequency ts files to reformat. By default, all of the higher frequency recordings are reformatted
     """
     writer = TimeWriterInternal()
     for idx, ts in enumerate(self.tsNums):
         if "ts" in kwargs and ts not in kwargs["ts"]:
             continue  # do not reformat this one
         # let's get the headers
         headers = self.getHeaders()
         chanHeaders, chanMap = self.getChanHeaders()
         chans = self.getChannels()
         # now go through the different ts files to get ready to output
         if ts == self.continuous:
             continue
         sampleFreq = self.tsSampleFreqs[idx]
         # set sample frequency in headers
         headers["sample_freq"] = sampleFreq
         for cH in chanHeaders:
             cH["sample_freq"] = sampleFreq
         # now open the data file
         dFile = open(self.dataF[idx], "rb")
         # each record has to be read separately and then compare time to previous
         outStartTime = datetime.strptime(self.recordStarts[ts][0],
                                          "%Y-%m-%d %H:%M:%S.%f")
         # set up the data dictionary
         data = {}
         for record, startDate in enumerate(self.recordStarts[ts]):
             # start date is a string
             startByte = self.recordBytes[ts][record]
             startDateTime = datetime.strptime(startDate,
                                               "%Y-%m-%d %H:%M:%S.%f")
             # read the record - numpy does not support 24 bit two's complement (3 bytes) - hence use struct
             bytesToRead = (self.recordScans[ts][record] *
                            self.sampleByteSize * self.getNumChannels())
             dFile.seek(startByte,
                        0)  # seek to start byte from start of file
             dataBytes = dFile.read(bytesToRead)
             dataRead = self.twosComplement(dataBytes)
             dataRecord = {}
             for chan in chans:
                 # as it is the same order as in the header file
                 chanIndex = self.chanMap[chan]
                 dataRecord[chan] = dataRead[
                     chanIndex:self.recordScans[ts][record] *
                     self.getNumChannels():self.getNumChannels()]
             # need to compare to previous record
             if record != 0 and startDateTime != prevEndTime:
                 # then need to write out the current data before saving the new data
                 # write out current data
                 outStopTime = prevEndTime - timedelta(
                     seconds=1.0 / sampleFreq
                 )  # because inclusive of first sample (previous end time for continuity comparison)
                 # calculate number of samples
                 numSamples = data[chans[0]].size
                 headers["start_date"] = outStartTime.strftime("%Y-%m-%d")
                 headers["start_time"] = outStartTime.strftime(
                     "%H:%M:%S.%f")
                 headers["stop_date"] = outStopTime.strftime("%Y-%m-%d")
                 headers["stop_time"] = outStopTime.strftime("%H:%M:%S.%f")
                 headers["num_samples"] = numSamples
                 for cH in chanHeaders:
                     cH["start_date"] = headers["start_date"]
                     cH["start_time"] = headers["start_time"]
                     cH["stop_date"] = headers["stop_date"]
                     cH["stop_time"] = headers["stop_time"]
                     cH["num_samples"] = numSamples
                 # get the outpath
                 dataOutpath = os.path.join(
                     path,
                     "meas_ts{}_{}_{}".format(
                         ts,
                         outStartTime.strftime("%Y-%m-%d-%H-%M-%S"),
                         outStopTime.strftime("%Y-%m-%d-%H-%M-%S"),
                     ),
                 )
                 # create the timeData object
                 comment = "Unscaled samples for interval {} to {} read in from measurement {}".format(
                     outStartTime, outStopTime, self.dataF[idx])
                 timeData = TimeData(
                     sampleFreq=self.getSampleFreq(),
                     startTime=outStartTime,
                     stopTime=outStopTime,
                     data=data,
                     comments=comment,
                 )
                 # write out
                 writer.setOutPath(dataOutpath)
                 writer.writeData(headers, chanHeaders, timeData)
                 # then save current data
                 outStartTime = startDateTime
                 data = copy.deepcopy(dataRecord)
                 prevEndTime = startDateTime + timedelta(seconds=(
                     (1.0 / sampleFreq) * self.recordScans[ts][record]))
             else:
                 # then record == 0 or startDateTime == prevEndTime
                 # update prevEndTime
                 prevEndTime = startDateTime + timedelta(seconds=(
                     (1.0 / sampleFreq) * self.recordScans[ts][record]))
                 if record == 0:
                     data = copy.deepcopy(dataRecord)
                     continue
                 # otherwise, want to concatenate the data
                 for chan in chans:
                     data[chan] = np.concatenate(
                         (data[chan], dataRecord[chan]))
         # close the data file
         dFile.close()
Esempio n. 12
0
    def getUnscaledSamples(self, **kwargs) -> TimeData:
        """Get raw data from data file

        Only returns the continuous data. The continuous data is in 24 bit two's complement (3 bytes) format and is read in using struct as this is not supported by numpy.
        
        Parameters
        ----------
        chans : List[str], optional
            List of channels to return if not all are required
        startSample : int, optional
            First sample to return
        endSample : int, optional
            Last sample to return

        Returns
        -------
        TimeData
            Time data object
        """
        # initialise chans, startSample and endSample with the whole dataset
        options = self.parseGetDataKeywords(kwargs)

        # get the files to read and the samples to take from them, in the correct order
        recordsToRead, samplesToRead = self.getRecordsForSamples(
            options["startSample"], options["endSample"])
        numSamples = options["endSample"] - options["startSample"] + 1
        # set up the dictionary to hold the data
        data = {}
        for chan in options["chans"]:
            data[chan] = np.zeros(shape=(numSamples), dtype=self.dtype)

        # open the file
        dFile = open(self.continuousF, "rb")

        # loop through chans and get data
        sampleCounter = 0
        for record, sToRead in zip(recordsToRead, samplesToRead):
            # number of samples to read in record
            dSamples = sToRead[1] - sToRead[0] + 1
            # find the byte read start and byte read end
            recordByteStart = self.recordBytes[self.continuous][record]
            recordSampleStart = self.recordSampleStarts[
                self.continuous][record]
            # find the offset on the readFrom bytes
            # now recall, each sample is recorded as a scan (all channels recorded at the same time)
            # so multiply by number of channels to get the number of bytes to read
            byteReadStart = (recordByteStart +
                             (sToRead[0] - recordSampleStart) *
                             self.sampleByteSize * self.getNumChannels())
            bytesToRead = dSamples * self.sampleByteSize * self.getNumChannels(
            )
            # read the data - numpy does not support 24 bit two's complement (3 bytes) - hence use struct
            dFile.seek(byteReadStart,
                       0)  # seek to start byte from start of file
            dataBytes = dFile.read(bytesToRead)
            dataRead = self.twosComplement(dataBytes)
            # now need to unpack this
            for chan in options["chans"]:
                # check to make sure channel exists
                self.checkChan(chan)
                # get the channel index - the chanIndex should give the right order in the data file
                # as it is the same order as in the header file
                chanIndex = self.chanMap[chan]
                # now populate the channel data appropriately
                data[chan][sampleCounter:sampleCounter +
                           dSamples] = dataRead[chanIndex:dSamples *
                                                self.getNumChannels():self.
                                                getNumChannels()]
            # increment sample counter
            sampleCounter = sampleCounter + dSamples  # get ready for the next data read
        # close file
        dFile.close()

        # return data
        startTime, stopTime = self.sample2time(options["startSample"],
                                               options["endSample"])
        comment = "Unscaled data {} to {} read in from measurement {}, samples {} to {}".format(
            startTime,
            stopTime,
            self.dataPath,
            options["startSample"],
            options["endSample"],
        )
        return TimeData(
            sampleFreq=self.getSampleFreq(),
            startTime=startTime,
            stopTime=stopTime,
            data=data,
            comments=comment,
        )
Esempio n. 13
0
    def calibrate(
        self,
        timeData: TimeData,
        sensor: Dict[str, str],
        serial: Dict[str, int],
        chopper: Dict[str, bool],
    ) -> TimeData:
        """Calibrate time data

        For each channel in timeData, searches for a matching calibration file based on sensor type, serial number and chopper. If a calibration file is found, the channel is calibrated using the data in the file. If useTheoretical is False and no file is found, the data is not calibrated

        todo:
        If no calibration file is found and the channel is a magnetic data channel, a theoretical function can be used
    
        Parameters
        ----------
        timeData : TimeData
            TimeData object
        sensor : Dict
            Dictionary of sensor information with channels as the key and sensor as the value (sensor is a string)
        serial :
            Dictionary of serial information with channels as the key and sensor as the value (serial is a number)
        chopper :
            Dictionary of chopper information with channels as the key and sensor as the value (chopper is a bool)

        Returns
        -------
        timeData : TimeData
            Calibration TimeData object
        """
        calIO = CalibrationIO()
        # iterate over data
        for chan in timeData.chans:
            # output some info
            self.printText("Calibrating channel {}".format(chan))
            # try and find calibration file
            calFile, calFormat = self.getCalFile(sensor[chan], serial[chan],
                                                 chopper[chan])
            if calFile == "":
                # no file found
                if self.useTheoretical and isMagnetic(chan):
                    # use theoretical
                    calData = self.getTheoreticalCalData(sensor[chan])
                    timeData.data[chan] = self.calibrateChan(
                        timeData.data[chan], timeData.sampleFreq, calData)
                    timeData.addComment(
                        "Channel {} calibrated with theoretical calibration function"
                        .format(chan))
                    continue
                else:
                    self.printText(
                        "No Calibration data found - channel will not be calibrated"
                    )
                    timeData.addComment(
                        "Channel {} not calibrated".format(chan))
                    continue  # nothing to do

            # else file found
            # no need to separately apply static gain, already included in cal data
            calIO.refresh(calFile,
                          calFormat,
                          chopper=chopper[chan],
                          extend=self.extend)
            calData = calIO.read()
            self.printText(
                "Calibration file found for sensor {}, serial number {}, chopper {}: {}"
                .format(sensor[chan], serial[chan], chopper[chan], calFile))
            self.printText("Format: {}".format(calFormat))
            self.printText(
                "Static gain correction of {} applied to calibration data".
                format(calData.staticGain))
            # calibrate time data
            timeData.data[chan] = self.calibrateChan(timeData.data[chan],
                                                     timeData.sampleFreq,
                                                     calData)
            timeData.addComment(
                "Channel {} calibrated with calibration data from file {}".
                format(chan, calFile))
        # return calibrated time data
        return timeData
Esempio n. 14
0
    def getUnscaledSamples(self, **kwargs) -> TimeData:
        """Get raw data from data file

        Depending on the data format, this could be raw counts or in some physical unit. The method implemented in the base DataReader can read from ATS and internal files. SPAM and Phoenix data readers have their own implementations.

        The raw data units for ATS and internal data formats are as follows: 

        - ATS data format has raw data in counts.
        - The raw data unit of the internal format is dependent on what happened to the data before writing it out in the internal format. If the channel header scaling_applied is set to True, no scaling happens in either getUnscaledSamples or getPhysicalSamples. However, if the channel header scaling_applied is set to False, the internal format data will be treated like ATS data, meaning raw data in counts.
        
        Parameters
        ----------
        chans : List[str], optional
            List of channels to return if not all are required
        startSample : int, optional
            First sample to return
        endSample : int, optional
            Last sample to return

        Returns
        -------
        TimeData
            Time data object
        """
        # initialise chans, startSample and endSample with the whole dataset
        options = self.parseGetDataKeywords(kwargs)
        # get samples - this is inclusive
        dSamples = options["endSample"] - options["startSample"] + 1

        # loop through chans and get data
        data = {}
        for chan in options["chans"]:
            # check to make sure channel exists
            self.checkChan(chan)
            # get data file
            dFile = os.path.join(self.dataPath, self.getChanDataFile(chan))
            # get the data
            byteOff = self.dataByteOffset + options[
                "startSample"] * self.dataByteSize
            # now check if lsb applied or not and read data as float32 or int32 accordingly
            if self.getChanScalingApplied(chan):
                data[chan] = np.memmap(dFile,
                                       dtype="float32",
                                       mode="r",
                                       offset=byteOff,
                                       shape=(dSamples))
            else:
                data[chan] = np.memmap(dFile,
                                       dtype="int32",
                                       mode="r",
                                       offset=byteOff,
                                       shape=(dSamples))

        # get data start and stop time
        startTime, stopTime = self.sample2time(options["startSample"],
                                               options["endSample"])
        # dataset comments
        comments = []
        comments.append(
            "Unscaled data {} to {} read in from measurement {}, samples {} to {}"
            .format(
                startTime,
                stopTime,
                self.dataPath,
                options["startSample"],
                options["endSample"],
            ))
        comments.append("Sampling frequency {}".format(self.getSampleFreq()))
        if len(self.comments) > 0:
            comments = self.comments + comments
        return TimeData(
            sampleFreq=self.getSampleFreq(),
            startTime=startTime,
            stopTime=stopTime,
            data=data,
            comments=comments,
        )
Esempio n. 15
0
    def getUnscaledSamples(self, **kwargs) -> TimeData:
        """Get raw data from data file, returned in mV

        SPAM raw data is single precision float with unit Volts. Calling this applies the ts_lsb calculated when the headers are read. This is because when a recording consists of multiple data files, each channel of each data file might have a different scaling. The only way to make the data consistent is to apply the ts_lsb scaling.  
        
        Therefore, this method returns the data in mV for all channels.

        Parameters
        ----------
        chans : List[str], optional
            List of channels to return if not all are required
        startSample : int, optional
            First sample to return
        endSample : int, optional
            Last sample to return

        Returns
        -------
        TimeData
            Time data object 
        """
        # initialise chans, startSample and endSample with the whole dataset
        options = self.parseGetDataKeywords(kwargs)

        # get the files to read and the samples to take from them, in the correct order
        dataFilesToRead, samplesToRead, scalings = self.getDataFilesForSamples(
            options["startSample"], options["endSample"])
        numSamples = options["endSample"] - options["startSample"] + 1
        # set up the dictionary to hold the data
        data = {}
        for chan in options["chans"]:
            data[chan] = np.zeros(shape=(numSamples), dtype=self.dtype)

        # loop through chans and get data
        sampleCounter = 0
        for dFile, sToRead, scalar in zip(dataFilesToRead, samplesToRead,
                                          scalings):
            # get samples - this is inclusive
            dSamples = sToRead[1] - sToRead[0] + 1
            # spam files always record 5 channels
            dSamplesRead = dSamples * self.recChannels[dFile]
            # read the data
            byteOff = (
                self.dataByteOffset[dFile] +
                sToRead[0] * self.recChannels[dFile] * self.dataByteSize)
            dFilePath = os.path.join(self.dataPath, dFile)
            dataRead = np.memmap(
                dFilePath,
                dtype=self.dtype,
                mode="r",
                offset=byteOff,
                shape=(dSamplesRead),
            )
            # now need to unpack this
            for chan in options["chans"]:
                # check to make sure channel exists
                self.checkChan(chan)
                # get the channel index - the chanIndex should give the right order in the data file
                # as it is the same order as in the header file
                chanIndex = self.chanMap[chan]
                # use the range sampleCounter -> sampleCounter +  dSamples, because this actually means sampleCounter + dSamples - 1 as python ranges are not inclusive of the end value
                # scale by the lsb scalar here - note that these can be different for each file in the run
                data[chan][sampleCounter:sampleCounter + dSamples] = (
                    dataRead[chanIndex:dSamplesRead:self.recChannels[dFile]] *
                    scalar[chan])
            # increment sample counter
            sampleCounter = sampleCounter + dSamples  # get ready for the next data read

        # return data
        startTime, stopTime = self.sample2time(options["startSample"],
                                               options["endSample"])
        comments = []
        comments.append(
            "Unscaled data {} to {} read in from measurement {}, samples {} to {}"
            .format(
                startTime,
                stopTime,
                self.dataPath,
                options["startSample"],
                options["endSample"],
            ))
        comments.append("Data read from {} files in total".format(
            len(dataFilesToRead)))
        comments.append(
            "Data scaled to mV for all channels using scalings in header files"
        )
        comments.append("Sampling frequency {}".format(self.getSampleFreq()))
        return TimeData(
            sampleFreq=self.getSampleFreq(),
            startTime=startTime,
            stopTime=stopTime,
            data=data,
            comments=comments,
        )
Esempio n. 16
0
    def write(self, headers: Dict, chanHeaders: List, chanMap: Dict,
              timeData: TimeData, **kwargs):
        """Write out the header file

        Parameters
        ----------
        headers : Dict
            Dictionary of headers
        chanHeaders : List
            List of channel headers
        chanMap : Dict
            Maps channel to index for chanHeaders    
        timeData : TimeData
            Time series data as TimeData object        
        """
        # set global headers for keyword arguments
        headers = self.setGlobalHeadersFromKeywords(headers, kwargs)
        # set channel headers for keyword arguments
        chanHeaders = self.setChanHeadersFromKeywords(chanHeaders, kwargs)

        # now overwrite the options by checking the TimeData object
        # number of samples and sample frequency
        # Current method favours the time data object
        chans = sorted(list(timeData.chans))
        dataSizes = []
        for c in chans:
            dataSizes.append(timeData.data[c].size)
        if min(dataSizes) != max(dataSizes):
            self.printWarning(
                "Channels do not have the same number of samples: {} - {}".
                format(", ".join(chans), ", ".join(dataSizes)))
            self.printWarning(
                "Only the smallest number of samples will be written out")
        numSamples = min(dataSizes)
        if headers["num_samples"] != numSamples:
            self.printWarning(
                "Number of samples {} in headers does not match number of samples in TimeData object {}. TimeData info will be used."
                .format(headers["num_samples"], numSamples))
            headers["num_samples"] = numSamples
        timeData.numSamples = numSamples
        # sample freq
        if headers["sample_freq"] != timeData.sampleFreq:
            self.printWarning(
                "Sample frequency of {} Hz in headers does not match {} Hz in TimeData object"
                .format(headers["sample_freq"], timeData.sampleFreq))
            self.printWarning(
                "Sample frequency in TimeData object will be used")
            headers["sample_freq"] = timeData.sampleFreq

        # deal with start and end time and create datetime objects
        # the start time does not change on resampling, only the end time
        datetimeStart = datetime.strptime(
            "{} {}".format(headers["start_date"], headers["start_time"]),
            "%Y-%m-%d %H:%M:%S.%f",
        )
        datetimeStop = datetime.strptime(
            "{} {}".format(headers["stop_date"], headers["stop_time"]),
            "%Y-%m-%d %H:%M:%S.%f",
        )
        # now let's compare to the time data
        if datetimeStart != timeData.startTime:
            self.printWarning(
                "Start in headers {} does not match that in TimeData object {}. TimeData start time will be used"
                .format(datetimeStart, timeData.startTime))
            datetimeStart = timeData.startTime
        if datetimeStop != timeData.stopTime:
            self.printWarning(
                "Stop in headers {} does not match that in TimeData object {}. TimeData stop time will be used"
                .format(datetimeStop, timeData.stopTime))
            datetimeStop = timeData.stopTime
        # now recalculate datetime using the number of samples and compare again
        datetimeRecalc = self.calcStopDateTime(timeData.sampleFreq, numSamples,
                                               datetimeStart)
        if datetimeRecalc != datetimeStop:
            self.printWarning(
                "Note, discrepancy between stop time in given headers and those calculated from data"
            )
            self.printWarning(
                "Causes of this might be resampling or interpolation processes and the limiting of data"
            )
            self.printWarning(
                "If no resampling, interpolation or limiting of data has been performed, please check all times"
            )
            self.printWarning(
                "Stop time {} calculated from data will be used instead of that in data {}"
                .format(datetimeRecalc, datetimeStop))
            datetimeStop = datetimeRecalc
        headers["start_date"] = datetimeStart.strftime("%Y-%m-%d")
        headers["start_time"] = datetimeStart.strftime("%H:%M:%S.%f")
        headers["stop_date"] = datetimeStop.strftime("%Y-%m-%d")
        headers["stop_time"] = datetimeStop.strftime("%H:%M:%S.%f")

        # now update all the chan headers and limit data to numSamples
        for c in chans:
            timeData.data[c] = timeData.data[c][:numSamples]
            cIndex = chanMap[c]
            chanHeaders[cIndex]["num_samples"] = headers["num_samples"]
            chanHeaders[cIndex]["sample_freq"] = headers["sample_freq"]
            chanHeaders[cIndex]["start_date"] = headers["start_date"]
            chanHeaders[cIndex]["start_time"] = headers["start_time"]
            chanHeaders[cIndex]["stop_date"] = headers["stop_date"]
            chanHeaders[cIndex]["stop_time"] = headers["stop_time"]

        # finally, check the number of measurement channels
        headers["meas_channels"] = len(chans)

        # now write out the headers and save to class variables
        self.writeHeaders(headers, chans, chanMap, chanHeaders)
        self.headers = headers
        self.chans = chans
        self.chanMap = chanMap
        self.chanHeaders = chanHeaders
        # write out comment file
        self.writeComments(timeData.comments)
        # write out the data files
        self.writeDataFiles(chans, timeData)
Esempio n. 17
0
    def getUnscaledSamples(self, **kwargs) -> TimeData:
        """Get raw data from ascii data file

        This function simply reads the lines which match the samples to be read
        
        Parameters
        ----------
        chans : List[str], optional
            List of channels to return if not all are required
        startSample : int, optional
            First sample to return
        endSample : int, optional
            Last sample to return

        Returns
        -------
        TimeData
            Time data object
        """
        # initialise chans, startSample and endSample with the whole dataset
        options = self.parseGetDataKeywords(kwargs)
        # get samples - this is inclusive
        dSamples = options["endSample"] - options["startSample"] + 1

        # loop through chans and get data
        data = {}
        for chan in options["chans"]:
            # check to make sure channel exists
            self.checkChan(chan)
            # get data file
            dFile = os.path.join(self.dataPath, self.getChanDataFile(chan))
            # read the lines
            dataChan = np.zeros(shape=(dSamples), dtype=np.float32)
            with open(dFile) as dF:
                for il, line in enumerate(dF):
                    if il > options["endSample"]:
                        break
                    if il >= options["startSample"] and il <= options["endSample"]:
                        dIndex = il - options["startSample"]
                        dataChan[dIndex] = float(line.strip())
            # set the data
            data[chan] = dataChan

        # get data start and stop time
        startTime, stopTime = self.sample2time(
            options["startSample"], options["endSample"]
        )
        # dataset comments
        comments = []
        comments.append(
            "Unscaled data {} to {} read in from measurement {}, samples {} to {}".format(
                startTime,
                stopTime,
                self.dataPath,
                options["startSample"],
                options["endSample"],
            )
        )
        comments.append("Sampling frequency {}".format(self.getSampleFreq()))
        if len(self.comments) > 0:
            comments = self.comments + comments
        return TimeData(
            sampleFreq=self.getSampleFreq(),
            startTime=startTime,
            stopTime=stopTime,
            data=data,
            comments=comments,
        )
Esempio n. 18
0
    def getUnscaledSamples(self, **kwargs) -> TimeData:
        """Get raw data from data file, returned in mV

        Lemi B423 data always has five channels, in order Hx, Hy, Hz, Ex, Ey. The raw data is integer counts. Therefore, getting unscaled samples returns raw counts for the measurement. There are additional scalings which can be applied using the scale optional argument. Lemi B423 is recorded in multiple files. It has not been verified whether it is possible for each individual file to have different scaling. 

        Without the scale option, the data is returned in:
        
        - Counts for both magnetic and electric channels (reading long integers)

        With the scaling option, the data is returned in:

        - microvolts for the electric channels
        - millivolts for the magnetic with the gain applied

        Applying the scaling does not appear to remove the internal gain of the Lemi. This will be removed when getting physical samples and the appropriate value must be set in the headers.

        Parameters
        ----------
        chans : List[str], optional
            List of channels to return if not all are required
        startSample : int, optional
            First sample to return
        endSample : int, optional
            Last sample to return
        scale : bool, optional
            Boolean flag for applying the gain scaling

        Returns
        -------
        TimeData
            Time data object 
        """
        # initialise chans, startSample and endSample with the whole dataset
        options = self.parseGetDataKeywords(kwargs)

        # get the files to read and the samples to take from them, in the correct order
        dataFilesToRead, samplesToRead, scalings = self.getDataFilesForSamples(
            options["startSample"], options["endSample"])
        numSamples = options["endSample"] - options["startSample"] + 1
        # set up the dictionary to hold the data
        dtype = np.float32 if options["scale"] else self.dtype
        data = {}
        for chan in options["chans"]:
            data[chan] = np.zeros(shape=(numSamples), dtype=dtype)

        # prepare comments
        startTime, stopTime = self.sample2time(options["startSample"],
                                               options["endSample"])
        comments = []
        comments.append(
            "Unscaled data {} to {} read in from measurement {}, samples {} to {}"
            .format(
                startTime,
                stopTime,
                self.dataPath,
                options["startSample"],
                options["endSample"],
            ))
        comments.append("Sampling frequency {}".format(self.getSampleFreq()))
        comments.append("Data read from {} files in total".format(
            len(dataFilesToRead)))
        comments.append("Scaling = {}".format(options["scale"]))

        # loop through chans and get data
        sampleCounter = 0
        for dFile, sToRead, scalar in zip(dataFilesToRead, samplesToRead,
                                          scalings):
            # calculate the starting byte and the number of bytes to read
            byteReadStart = self.dataByteOffset + sToRead[
                0] * self.recordByteSize
            dSamples = sToRead[1] - sToRead[0] + 1
            dSamplesRead = dSamples * self.getNumChannels()
            bytesToRead = dSamples * self.recordByteSize
            # read
            dFileHandle = open(dFile, "rb")
            dFileHandle.seek(byteReadStart,
                             0)  # seek to start byte from start of file
            dataBytes = dFileHandle.read(bytesToRead)
            dFileHandle.close()
            dataRead = self.readRecords(dataBytes, dSamples)

            # now need to unpack this
            for chan in options["chans"]:
                # check to make sure channel exists
                self.checkChan(chan)
                # get the channel index - the chanIndex should give the right order in the data file
                chanIndex = self.chanMap[chan]
                # use the range sampleCounter -> sampleCounter +  dSamples, because this actually means sampleCounter + dSamples - 1 as python ranges are not inclusive of the end value
                data[chan][sampleCounter:sampleCounter +
                           dSamples] = dataRead[chanIndex:dSamplesRead:self.
                                                getNumChannels()]
                if options["scale"]:
                    data[chan][sampleCounter:sampleCounter + dSamples] = (
                        data[chan][sampleCounter:sampleCounter + dSamples] *
                        scalar[chan][0] + scalar[chan][1])
                    comments.append(
                        "Scaling channel {} of file {} with multiplier {} and adding {}"
                        .format(chan, dFile, scalar[chan][0], scalar[chan][1]))
            # increment sample counter
            sampleCounter = sampleCounter + dSamples  # get ready for the next data read

        # return data
        return TimeData(
            sampleFreq=self.getSampleFreq(),
            startTime=startTime,
            stopTime=stopTime,
            data=data,
            comments=comments,
        )