def getData(self, iWindow: int) -> TimeData: """Returns time window data for local index Parameters ---------- iWindow : int Local index of window Returns ------- windowData : TimeData TimeData object with the window data """ winSamples = self.winSamples[iWindow] winData = {} for c in self.chans: winData[c] = self.timeData.data[c][ winSamples[0]:winSamples[1] + 1] # add 1 because numpy indexing like this is not inclusive globalWindow = self.winTimes[iWindow][0] winStartTime = self.winTimes[iWindow][1] winStopTime = self.winTimes[iWindow][2] return TimeData( sampleFreq=self.sampleFreq, startTime=winStartTime, stopTime=winStopTime, data=winData, comments=self.timeData.comments + ["Local window iW, global window {}".format(globalWindow)], )
def test_spectra_calculator_window(): """Test Fourier transfrom with linear detrend applied""" from resistics.common.format import datetimeFormat from resistics.time.data import TimeData from resistics.spectra.calculator import SpectrumCalculator import numpy as np from datetime import datetime # intialise some time data sampleFreq = 128 startTime = "2020-01-01 00:00:00.000000" stopTime = "2020-01-01 00:00:00.062500" data = {} # test with impulse on zero and impulse shifted to give a phase data["Ex"] = np.array([1, 0, 0, 0, 0, 0, 0, 0]) data["Hy"] = np.array([0, 1, 0, 0, 0, 0, 0, 0]) timeData = TimeData(sampleFreq, startTime, stopTime, data) specCalc = SpectrumCalculator(128, 8) specCalc.detrend = False specCalc.applywindow = True specData = specCalc.calcFourierCoeff(timeData) assert np.absolute(specData.nyquist - 64) < 0.000001 assert specData.windowSize == 8 assert specData.dataSize == 5 assert specData.numChans == 2 assert sorted(specData.chans) == sorted(["Ex", "Hy"]) assert specData.startTime == datetime.strptime(startTime, datetimeFormat(ns=True)) assert specData.stopTime == datetime.strptime(stopTime, datetimeFormat(ns=True)) np.testing.assert_array_almost_equal(specData.freqArray, [0, 16, 32, 48, 64]) np.testing.assert_array_almost_equal( specData.data["Ex"], [1.0 + 0.0j, 1.0 + 0.0j, 1.0 + 0.0j, 1.0 + 0.0j, 1.0 + 0.0j], ) np.testing.assert_array_almost_equal( specData.data["Hy"], [ 1 + 0j, 0.707107 - 0.707107j, 0 - 1j, -0.707107 - 0.707107j, -1 + 0j, ], )
def reformatHigh(self, path: str, **kwargs) -> None: """Write out high frequency time series in internal format Parameters ---------- path : str Directory to write out the reformatted time series ts : List[int], optional A list of the high frequency ts files to reformat. By default, all of the higher frequency recordings are reformatted """ writer = TimeWriterInternal() for idx, ts in enumerate(self.tsNums): if "ts" in kwargs and ts not in kwargs["ts"]: continue # do not reformat this one # let's get the headers headers = self.getHeaders() chanHeaders, chanMap = self.getChanHeaders() chans = self.getChannels() # now go through the different ts files to get ready to output if ts == self.continuous: continue sampleFreq = self.tsSampleFreqs[idx] # set sample frequency in headers headers["sample_freq"] = sampleFreq for cH in chanHeaders: cH["sample_freq"] = sampleFreq # now open the data file dFile = open(self.dataF[idx], "rb") # each record has to be read separately and then compare time to previous outStartTime = datetime.strptime(self.recordStarts[ts][0], "%Y-%m-%d %H:%M:%S.%f") # set up the data dictionary data = {} for record, startDate in enumerate(self.recordStarts[ts]): # start date is a string startByte = self.recordBytes[ts][record] startDateTime = datetime.strptime(startDate, "%Y-%m-%d %H:%M:%S.%f") # read the record - numpy does not support 24 bit two's complement (3 bytes) - hence use struct bytesToRead = (self.recordScans[ts][record] * self.sampleByteSize * self.getNumChannels()) dFile.seek(startByte, 0) # seek to start byte from start of file dataBytes = dFile.read(bytesToRead) dataRead = self.twosComplement(dataBytes) dataRecord = {} for chan in chans: # as it is the same order as in the header file chanIndex = self.chanMap[chan] dataRecord[chan] = dataRead[ chanIndex:self.recordScans[ts][record] * self.getNumChannels():self.getNumChannels()] # need to compare to previous record if record != 0 and startDateTime != prevEndTime: # then need to write out the current data before saving the new data # write out current data outStopTime = prevEndTime - timedelta( seconds=1.0 / sampleFreq ) # because inclusive of first sample (previous end time for continuity comparison) # calculate number of samples numSamples = data[chans[0]].size headers["start_date"] = outStartTime.strftime("%Y-%m-%d") headers["start_time"] = outStartTime.strftime( "%H:%M:%S.%f") headers["stop_date"] = outStopTime.strftime("%Y-%m-%d") headers["stop_time"] = outStopTime.strftime("%H:%M:%S.%f") headers["num_samples"] = numSamples for cH in chanHeaders: cH["start_date"] = headers["start_date"] cH["start_time"] = headers["start_time"] cH["stop_date"] = headers["stop_date"] cH["stop_time"] = headers["stop_time"] cH["num_samples"] = numSamples # get the outpath dataOutpath = os.path.join( path, "meas_ts{}_{}_{}".format( ts, outStartTime.strftime("%Y-%m-%d-%H-%M-%S"), outStopTime.strftime("%Y-%m-%d-%H-%M-%S"), ), ) # create the timeData object comment = "Unscaled samples for interval {} to {} read in from measurement {}".format( outStartTime, outStopTime, self.dataF[idx]) timeData = TimeData( sampleFreq=self.getSampleFreq(), startTime=outStartTime, stopTime=outStopTime, data=data, comments=comment, ) # write out writer.setOutPath(dataOutpath) writer.writeData(headers, chanHeaders, timeData) # then save current data outStartTime = startDateTime data = copy.deepcopy(dataRecord) prevEndTime = startDateTime + timedelta(seconds=( (1.0 / sampleFreq) * self.recordScans[ts][record])) else: # then record == 0 or startDateTime == prevEndTime # update prevEndTime prevEndTime = startDateTime + timedelta(seconds=( (1.0 / sampleFreq) * self.recordScans[ts][record])) if record == 0: data = copy.deepcopy(dataRecord) continue # otherwise, want to concatenate the data for chan in chans: data[chan] = np.concatenate( (data[chan], dataRecord[chan])) # close the data file dFile.close()
def getUnscaledSamples(self, **kwargs) -> TimeData: """Get raw data from data file Only returns the continuous data. The continuous data is in 24 bit two's complement (3 bytes) format and is read in using struct as this is not supported by numpy. Parameters ---------- chans : List[str], optional List of channels to return if not all are required startSample : int, optional First sample to return endSample : int, optional Last sample to return Returns ------- TimeData Time data object """ # initialise chans, startSample and endSample with the whole dataset options = self.parseGetDataKeywords(kwargs) # get the files to read and the samples to take from them, in the correct order recordsToRead, samplesToRead = self.getRecordsForSamples( options["startSample"], options["endSample"]) numSamples = options["endSample"] - options["startSample"] + 1 # set up the dictionary to hold the data data = {} for chan in options["chans"]: data[chan] = np.zeros(shape=(numSamples), dtype=self.dtype) # open the file dFile = open(self.continuousF, "rb") # loop through chans and get data sampleCounter = 0 for record, sToRead in zip(recordsToRead, samplesToRead): # number of samples to read in record dSamples = sToRead[1] - sToRead[0] + 1 # find the byte read start and byte read end recordByteStart = self.recordBytes[self.continuous][record] recordSampleStart = self.recordSampleStarts[ self.continuous][record] # find the offset on the readFrom bytes # now recall, each sample is recorded as a scan (all channels recorded at the same time) # so multiply by number of channels to get the number of bytes to read byteReadStart = (recordByteStart + (sToRead[0] - recordSampleStart) * self.sampleByteSize * self.getNumChannels()) bytesToRead = dSamples * self.sampleByteSize * self.getNumChannels( ) # read the data - numpy does not support 24 bit two's complement (3 bytes) - hence use struct dFile.seek(byteReadStart, 0) # seek to start byte from start of file dataBytes = dFile.read(bytesToRead) dataRead = self.twosComplement(dataBytes) # now need to unpack this for chan in options["chans"]: # check to make sure channel exists self.checkChan(chan) # get the channel index - the chanIndex should give the right order in the data file # as it is the same order as in the header file chanIndex = self.chanMap[chan] # now populate the channel data appropriately data[chan][sampleCounter:sampleCounter + dSamples] = dataRead[chanIndex:dSamples * self.getNumChannels():self. getNumChannels()] # increment sample counter sampleCounter = sampleCounter + dSamples # get ready for the next data read # close file dFile.close() # return data startTime, stopTime = self.sample2time(options["startSample"], options["endSample"]) comment = "Unscaled data {} to {} read in from measurement {}, samples {} to {}".format( startTime, stopTime, self.dataPath, options["startSample"], options["endSample"], ) return TimeData( sampleFreq=self.getSampleFreq(), startTime=startTime, stopTime=stopTime, data=data, comments=comment, )
def getUnscaledSamples(self, **kwargs) -> TimeData: """Get raw data from data file Depending on the data format, this could be raw counts or in some physical unit. The method implemented in the base DataReader can read from ATS and internal files. SPAM and Phoenix data readers have their own implementations. The raw data units for ATS and internal data formats are as follows: - ATS data format has raw data in counts. - The raw data unit of the internal format is dependent on what happened to the data before writing it out in the internal format. If the channel header scaling_applied is set to True, no scaling happens in either getUnscaledSamples or getPhysicalSamples. However, if the channel header scaling_applied is set to False, the internal format data will be treated like ATS data, meaning raw data in counts. Parameters ---------- chans : List[str], optional List of channels to return if not all are required startSample : int, optional First sample to return endSample : int, optional Last sample to return Returns ------- TimeData Time data object """ # initialise chans, startSample and endSample with the whole dataset options = self.parseGetDataKeywords(kwargs) # get samples - this is inclusive dSamples = options["endSample"] - options["startSample"] + 1 # loop through chans and get data data = {} for chan in options["chans"]: # check to make sure channel exists self.checkChan(chan) # get data file dFile = os.path.join(self.dataPath, self.getChanDataFile(chan)) # get the data byteOff = self.dataByteOffset + options[ "startSample"] * self.dataByteSize # now check if lsb applied or not and read data as float32 or int32 accordingly if self.getChanScalingApplied(chan): data[chan] = np.memmap(dFile, dtype="float32", mode="r", offset=byteOff, shape=(dSamples)) else: data[chan] = np.memmap(dFile, dtype="int32", mode="r", offset=byteOff, shape=(dSamples)) # get data start and stop time startTime, stopTime = self.sample2time(options["startSample"], options["endSample"]) # dataset comments comments = [] comments.append( "Unscaled data {} to {} read in from measurement {}, samples {} to {}" .format( startTime, stopTime, self.dataPath, options["startSample"], options["endSample"], )) comments.append("Sampling frequency {}".format(self.getSampleFreq())) if len(self.comments) > 0: comments = self.comments + comments return TimeData( sampleFreq=self.getSampleFreq(), startTime=startTime, stopTime=stopTime, data=data, comments=comments, )
def getUnscaledSamples(self, **kwargs) -> TimeData: """Get raw data from data file, returned in mV SPAM raw data is single precision float with unit Volts. Calling this applies the ts_lsb calculated when the headers are read. This is because when a recording consists of multiple data files, each channel of each data file might have a different scaling. The only way to make the data consistent is to apply the ts_lsb scaling. Therefore, this method returns the data in mV for all channels. Parameters ---------- chans : List[str], optional List of channels to return if not all are required startSample : int, optional First sample to return endSample : int, optional Last sample to return Returns ------- TimeData Time data object """ # initialise chans, startSample and endSample with the whole dataset options = self.parseGetDataKeywords(kwargs) # get the files to read and the samples to take from them, in the correct order dataFilesToRead, samplesToRead, scalings = self.getDataFilesForSamples( options["startSample"], options["endSample"]) numSamples = options["endSample"] - options["startSample"] + 1 # set up the dictionary to hold the data data = {} for chan in options["chans"]: data[chan] = np.zeros(shape=(numSamples), dtype=self.dtype) # loop through chans and get data sampleCounter = 0 for dFile, sToRead, scalar in zip(dataFilesToRead, samplesToRead, scalings): # get samples - this is inclusive dSamples = sToRead[1] - sToRead[0] + 1 # spam files always record 5 channels dSamplesRead = dSamples * self.recChannels[dFile] # read the data byteOff = ( self.dataByteOffset[dFile] + sToRead[0] * self.recChannels[dFile] * self.dataByteSize) dFilePath = os.path.join(self.dataPath, dFile) dataRead = np.memmap( dFilePath, dtype=self.dtype, mode="r", offset=byteOff, shape=(dSamplesRead), ) # now need to unpack this for chan in options["chans"]: # check to make sure channel exists self.checkChan(chan) # get the channel index - the chanIndex should give the right order in the data file # as it is the same order as in the header file chanIndex = self.chanMap[chan] # use the range sampleCounter -> sampleCounter + dSamples, because this actually means sampleCounter + dSamples - 1 as python ranges are not inclusive of the end value # scale by the lsb scalar here - note that these can be different for each file in the run data[chan][sampleCounter:sampleCounter + dSamples] = ( dataRead[chanIndex:dSamplesRead:self.recChannels[dFile]] * scalar[chan]) # increment sample counter sampleCounter = sampleCounter + dSamples # get ready for the next data read # return data startTime, stopTime = self.sample2time(options["startSample"], options["endSample"]) comments = [] comments.append( "Unscaled data {} to {} read in from measurement {}, samples {} to {}" .format( startTime, stopTime, self.dataPath, options["startSample"], options["endSample"], )) comments.append("Data read from {} files in total".format( len(dataFilesToRead))) comments.append( "Data scaled to mV for all channels using scalings in header files" ) comments.append("Sampling frequency {}".format(self.getSampleFreq())) return TimeData( sampleFreq=self.getSampleFreq(), startTime=startTime, stopTime=stopTime, data=data, comments=comments, )
def fillGap(timeData1, timeData2): """Fill gap between time series Fill gaps between two different recordings. The intent is to fill the gap when recording has been interrupted and there are two data files. Both times series must have the same sampling frequency. Parameters ---------- timeDat1 : TimeData Time series data timeData2 : TimeData Time series data Returns ------- TimeData Time series data with gap filled """ if timeData1.sampleFreq != timeData2.sampleFreq: errorPrint( "fillGap", "fillGap requires both timeData objects to have the same sample rate", quitrun=True, ) return False sampleFreq = timeData1.sampleFreq sampleRate = 1.0 / sampleFreq timeDataFirst = timeData1 timeDataSecond = timeData2 if timeData1.startTime > timeData2.stopTime: timeDataFirst = timeData2 timeDataSecond = timeData1 # now want to do a simple interpolation between timeDataFirst and timeDataSecond # recall, these times are inclusive, so want to do the samples in between # this is mostly for clarity of programming gapStart = timeDataFirst.stopTime + timedelta(seconds=sampleRate) gapEnd = timeDataSecond.startTime - timedelta(seconds=sampleRate) # calculate number of samples in the gap numSamplesGap = ( int(round((gapEnd - gapStart).total_seconds() * sampleFreq)) + 1 ) # add 1 because inclusive # now want to interpolate newData = {} for chan in timeDataFirst.chans: startVal = timeDataFirst.data[chan][-1] endVal = timeDataSecond.data[chan][0] increment = 1.0 * (endVal - startVal) / (numSamplesGap + 2) fillData = np.zeros(shape=(numSamplesGap), dtype=timeDataFirst.data[chan].dtype) for i in range(0, numSamplesGap): fillData[i] = startVal + (i + 1) * increment newData[chan] = np.concatenate( [timeDataFirst.data[chan], fillData, timeDataSecond.data[chan]] ) # return a new time data object # deal with the comment comment = ( ["-----------------------------", "TimeData1 comments"] + timeDataFirst.comments + ["-----------------------------", "TimeData2 comments"] + timeDataSecond.comments ) comment += ["-----------------------------"] + [ "Gap filled from {} to {}".format(gapStart, gapEnd) ] return TimeData( sampleFreq=sampleFreq, startTime=timeDataFirst.startTime, stopTime=timeDataSecond.stopTime, data=newData, comments=comment, )
def getUnscaledSamples(self, **kwargs) -> TimeData: """Get raw data from ascii data file This function simply reads the lines which match the samples to be read Parameters ---------- chans : List[str], optional List of channels to return if not all are required startSample : int, optional First sample to return endSample : int, optional Last sample to return Returns ------- TimeData Time data object """ # initialise chans, startSample and endSample with the whole dataset options = self.parseGetDataKeywords(kwargs) # get samples - this is inclusive dSamples = options["endSample"] - options["startSample"] + 1 # loop through chans and get data data = {} for chan in options["chans"]: # check to make sure channel exists self.checkChan(chan) # get data file dFile = os.path.join(self.dataPath, self.getChanDataFile(chan)) # read the lines dataChan = np.zeros(shape=(dSamples), dtype=np.float32) with open(dFile) as dF: for il, line in enumerate(dF): if il > options["endSample"]: break if il >= options["startSample"] and il <= options["endSample"]: dIndex = il - options["startSample"] dataChan[dIndex] = float(line.strip()) # set the data data[chan] = dataChan # get data start and stop time startTime, stopTime = self.sample2time( options["startSample"], options["endSample"] ) # dataset comments comments = [] comments.append( "Unscaled data {} to {} read in from measurement {}, samples {} to {}".format( startTime, stopTime, self.dataPath, options["startSample"], options["endSample"], ) ) comments.append("Sampling frequency {}".format(self.getSampleFreq())) if len(self.comments) > 0: comments = self.comments + comments return TimeData( sampleFreq=self.getSampleFreq(), startTime=startTime, stopTime=stopTime, data=data, comments=comments, )
def getUnscaledSamples(self, **kwargs) -> TimeData: """Get raw data from data file, returned in mV Lemi B423 data always has five channels, in order Hx, Hy, Hz, Ex, Ey. The raw data is integer counts. Therefore, getting unscaled samples returns raw counts for the measurement. There are additional scalings which can be applied using the scale optional argument. Lemi B423 is recorded in multiple files. It has not been verified whether it is possible for each individual file to have different scaling. Without the scale option, the data is returned in: - Counts for both magnetic and electric channels (reading long integers) With the scaling option, the data is returned in: - microvolts for the electric channels - millivolts for the magnetic with the gain applied Applying the scaling does not appear to remove the internal gain of the Lemi. This will be removed when getting physical samples and the appropriate value must be set in the headers. Parameters ---------- chans : List[str], optional List of channels to return if not all are required startSample : int, optional First sample to return endSample : int, optional Last sample to return scale : bool, optional Boolean flag for applying the gain scaling Returns ------- TimeData Time data object """ # initialise chans, startSample and endSample with the whole dataset options = self.parseGetDataKeywords(kwargs) # get the files to read and the samples to take from them, in the correct order dataFilesToRead, samplesToRead, scalings = self.getDataFilesForSamples( options["startSample"], options["endSample"]) numSamples = options["endSample"] - options["startSample"] + 1 # set up the dictionary to hold the data dtype = np.float32 if options["scale"] else self.dtype data = {} for chan in options["chans"]: data[chan] = np.zeros(shape=(numSamples), dtype=dtype) # prepare comments startTime, stopTime = self.sample2time(options["startSample"], options["endSample"]) comments = [] comments.append( "Unscaled data {} to {} read in from measurement {}, samples {} to {}" .format( startTime, stopTime, self.dataPath, options["startSample"], options["endSample"], )) comments.append("Sampling frequency {}".format(self.getSampleFreq())) comments.append("Data read from {} files in total".format( len(dataFilesToRead))) comments.append("Scaling = {}".format(options["scale"])) # loop through chans and get data sampleCounter = 0 for dFile, sToRead, scalar in zip(dataFilesToRead, samplesToRead, scalings): # calculate the starting byte and the number of bytes to read byteReadStart = self.dataByteOffset + sToRead[ 0] * self.recordByteSize dSamples = sToRead[1] - sToRead[0] + 1 dSamplesRead = dSamples * self.getNumChannels() bytesToRead = dSamples * self.recordByteSize # read dFileHandle = open(dFile, "rb") dFileHandle.seek(byteReadStart, 0) # seek to start byte from start of file dataBytes = dFileHandle.read(bytesToRead) dFileHandle.close() dataRead = self.readRecords(dataBytes, dSamples) # now need to unpack this for chan in options["chans"]: # check to make sure channel exists self.checkChan(chan) # get the channel index - the chanIndex should give the right order in the data file chanIndex = self.chanMap[chan] # use the range sampleCounter -> sampleCounter + dSamples, because this actually means sampleCounter + dSamples - 1 as python ranges are not inclusive of the end value data[chan][sampleCounter:sampleCounter + dSamples] = dataRead[chanIndex:dSamplesRead:self. getNumChannels()] if options["scale"]: data[chan][sampleCounter:sampleCounter + dSamples] = ( data[chan][sampleCounter:sampleCounter + dSamples] * scalar[chan][0] + scalar[chan][1]) comments.append( "Scaling channel {} of file {} with multiplier {} and adding {}" .format(chan, dFile, scalar[chan][0], scalar[chan][1])) # increment sample counter sampleCounter = sampleCounter + dSamples # get ready for the next data read # return data return TimeData( sampleFreq=self.getSampleFreq(), startTime=startTime, stopTime=stopTime, data=data, comments=comments, )