def do_fetch(self): chan_name = (self.diag_name.split('-'))[-1] # remove - filename_dict = {'diag_name': chan_name, 'shot': self.shot} self.basename = path.join(pf.config.get('global', 'localdatapath'), data_filename % filename_dict) files_exist = path.exists(self.basename) if not files_exist: raise Exception, "file " + self.basename + " not found." else: signal_dict = newload(self.basename) if ((chan_name == array(['MP5', 'HMP13', 'HMP05'])).any()): flip = -1. print('flip') else: flip = 1. if self.diag_name[0] == '-': flip = -flip # coords = get_coords_for_channel(**self.__dict__) ch = Channel(self.diag_name, Coords('dummy', (0, 0, 0))) output_data = TimeseriesData( timebase=Timebase(signal_dict['timebase']), signal=Signal(flip * signal_dict['signal']), channels=ch) # bdb - used "fetcher" instead of "self" in the "direct from LHD data" version output_data.config_name = self.config_name # when using saved files, same as name output_data.meta.update({'shot': self.shot}) return output_data
def test_multi_channel_timeseries(self): tb = generate_timebase(t0=-0.5, n_samples=1.e2, sample_freq=1.e2) tsd = TimeseriesData(timebase=tb, signal=Signal(np.resize(np.arange(3*len(tb)), (3,len(tb)))), channels=get_n_channels(3)) seg_dataset = tsd.segment(n_samples=10) self.assertTrue(len(seg_dataset)==10)
def test_remove_noncontiguous(self): tb1 = generate_timebase(t0=-0.5, n_samples=1.e2, sample_freq=1.e2) tb2 = generate_timebase(t0=-0.5, n_samples=1.e2, sample_freq=1.e2) tb3 = generate_timebase(t0=-0.5, n_samples=1.e2, sample_freq=1.e2) # nonzero signal mean tsd1 = TimeseriesData(timebase=tb1, signal=Signal(np.arange(len(tb1))), channels=ChannelList(Channel('ch_01',Coords('dummy',(0,0,0))))) tsd2 = TimeseriesData(timebase=tb2, signal=Signal(np.arange(len(tb2))), channels=ChannelList(Channel('ch_01',Coords('dummy',(0,0,0))))) tsd3 = TimeseriesData(timebase=tb3, signal=Signal(np.arange(len(tb3))), channels=ChannelList(Channel('ch_01',Coords('dummy',(0,0,0))))) self.assertTrue(tb1.is_contiguous()) self.assertTrue(tb2.is_contiguous()) self.assertTrue(tb3.is_contiguous()) tsd2.timebase[-50:] += 1.0 self.assertFalse(tb2.is_contiguous()) ds = DataSet('ds') for tsd in [tsd1, tsd2, tsd3]: ds.add(tsd) for tsd in [tsd1, tsd2, tsd3]: self.assertTrue(tsd in ds) filtered_ds = ds.remove_noncontiguous() for tsd in [tsd1, tsd3]: self.assertTrue(tsd in filtered_ds) self.assertFalse(tsd2 in filtered_ds)
def do_fetch(self): chan_name = (self.diag_name.split('-'))[-1] # remove - filename_dict = {'diag_name':chan_name, 'shot':self.shot} self.basename = path.join(pf.config.get('global', 'localdatapath'), data_filename %filename_dict) files_exist = path.exists(self.basename) if not files_exist: raise Exception, "file " + self.basename + " not found." else: signal_dict = newload(self.basename) if ((chan_name == array(['MP5','HMP13','HMP05'])).any()): flip = -1. print('flip') else: flip = 1. if self.diag_name[0]=='-': flip = -flip # coords = get_coords_for_channel(**self.__dict__) ch = Channel(self.diag_name, Coords('dummy', (0,0,0))) output_data = TimeseriesData(timebase=Timebase(signal_dict['timebase']), signal=Signal(flip*signal_dict['signal']), channels=ch) # bdb - used "fetcher" instead of "self" in the "direct from LHD data" version output_data.config_name = self.config_name # when using saved files, same as name output_data.meta.update({'shot':self.shot}) return output_data
def do_fetch(self): channel_length = int(self.length) outdata=np.zeros(1024*2*256+1) ## !! really should put a wrapper around gethjdata to do common stuff # outfile is only needed if the direct passing of binary won't work # with tempfile.NamedTemporaryFile(prefix="pyfusion_") as outfile: ierror, getrets = gethjdata.gethjdata(self.shot,channel_length,self.path, verbose=VERBOSE, opt=1, ierror=2, outdata=outdata, outname='') if ierror != 0: raise LookupError('hj Okada style data not found for {s}:{c}'.format(s=self.shot, c=self.path)) ch = Channel(self.path, Coords('dummy', (0,0,0))) # the intent statement causes the out var to be returned in the result lsit # looks like the time,data is interleaved in a 1x256 array # it is fed in as real*64, but returns as real*32! (as per fortran decl) debug_(pyfusion.DEBUG, 4, key='Heliotron_fetch', msg='after call to getdata') # timebase in secs (ms in raw data) - could add a preferred unit? # this is partly allowed for in savez_compressed, newload, and # for plotting, in the config file. # important that the 1e-3 be inside the Timebase() output_data = TimeseriesData(timebase=Timebase(1e-3 * getrets[1::2]), signal=Signal(getrets[2::2]), channels=ch) output_data.meta.update({'shot':self.shot}) if pyfusion.VERBOSE>0: print('HJ config name',self.config_name) output_data.config_name = self.config_name stprms = get_static_params(shot=self.shot,signal=self.path) if len(list(stprms)) == 0: # maybe this should be ignored - how do we use it? raise LookupError(' failure to get params for {shot}:{path}' .format(shot=self.shot, path=self.path)) output_data.params = stprms return output_data
def segment(input_data, n_samples, overlap=DEFAULT_SEGMENT_OVERLAP): """Break into segments length n_samples. Overlap of 2.0 starts a new segment halfway into previous, overlap=1 is no overlap. overlap should divide into n_samples. Probably should consider a nicer definition such as in pyfusion 0 """ from pyfusion.data.base import DataSet from pyfusion.data.timeseries import TimeseriesData if isinstance(input_data, DataSet): output_dataset = DataSet() for ii,data in enumerate(input_data): try: output_dataset.update(data.segment(n_samples)) except AttributeError: pyfusion.logger.warning("Data filter 'segment' not applied to item in dataset") return output_dataset output_data = DataSet('segmented_%s, %d samples, %.3f overlap' %(datetime.now(), n_samples, overlap)) for el in arange(0,len(input_data.timebase), n_samples/overlap): if input_data.signal.ndim == 1: tmp_data = TimeseriesData(timebase=input_data.timebase[el:el+n_samples], signal=input_data.signal[el:el+n_samples], channels=input_data.channels, bypass_length_check=True) else: tmp_data = TimeseriesData(timebase=input_data.timebase[el:el+n_samples], signal=input_data.signal[:,el:el+n_samples], channels=input_data.channels, bypass_length_check=True) tmp_data.meta = input_data.meta.copy() tmp_data.history = input_data.history # bdb - may be redundant now meta is copied output_data.add(tmp_data) return output_data
def test_remove_mean_single_channel(self): tb = generate_timebase(t0=-0.5, n_samples=1.e2, sample_freq=1.e2) # nonzero signal mean tsd = TimeseriesData(timebase=tb, signal=Signal(np.arange(len(tb))), channels=ChannelList(Channel('ch_01',Coords('dummy',(0,0,0))))) filtered_tsd = tsd.subtract_mean() assert_almost_equal(np.mean(filtered_tsd.signal), 0)
def test_reduce_time_dataset(self): new_times = [-0.25, 0.25] tb = generate_timebase(t0=-0.5, n_samples=1.e2, sample_freq=1.e2) tsd_1 = TimeseriesData(timebase=tb, signal=Signal(np.resize(np.arange(5*len(tb)),(5,len(tb)))), channels=get_n_channels(5)) tsd_2 = TimeseriesData(timebase=tb, signal=Signal(np.resize(np.arange(5*len(tb))+1,(5,len(tb)))), channels=get_n_channels(5)) test_dataset = DataSet('test_dataset') test_dataset.add(tsd_1) test_dataset.add(tsd_2) test_dataset.reduce_time(new_times)
def test_reduce_time_filter_multi_channel_attached_method(self): new_times = [-0.25, 0.25] tb = generate_timebase(t0=-0.5, n_samples=1.e2, sample_freq=1.e2) tsd = TimeseriesData(timebase=tb, signal=Signal(np.resize(np.arange(5*len(tb)),(5,len(tb)))), channels=get_n_channels(5)) new_time_args = np.searchsorted(tb, new_times) timebase_test = tsd.timebase[new_time_args[0]:new_time_args[1]].copy() signal_test = tsd.signal[:,new_time_args[0]:new_time_args[1]].copy() reduced_tsd = tsd.reduce_time(new_times) self.assertTrue(isinstance(reduced_tsd, TimeseriesData)) assert_array_almost_equal(reduced_tsd.timebase, timebase_test) assert_array_almost_equal(reduced_tsd.signal, signal_test)
def test_dataset(self): tb = generate_timebase(t0=-0.5, n_samples=1.e2, sample_freq=1.e2) tsd_1 = TimeseriesData(timebase=tb, signal=Signal(np.resize(np.arange(3*len(tb)), (3,len(tb)))), channels=get_n_channels(3)) tsd_2 = TimeseriesData(timebase=tb, signal=Signal(np.resize(np.arange(3*len(tb)+1),(3,len(tb)))), channels=get_n_channels(3)) input_dataset = DataSet('test_dataset') input_dataset.add(tsd_1) input_dataset.add(tsd_2) seg_dataset = input_dataset.segment(n_samples=10) self.assertTrue(len(seg_dataset)==20)
def get_probe_angles(input_data, closed=False): """ return a list of thetas for a given signal (timeseries) or a string that specifies it. get_probe_angles('W7X:W7X_MIRNOV_41_BEST_LOOP:(20180912,43)') This is a kludgey way to read coordinates. Should be through acquisition.base or acquisition.'device' rather than looking up config directly """ import pyfusion if isinstance(input_data, str): pieces = input_data.split(':') if len(pieces) == 3: dev_name, diag_name, shotstr = pieces shot_number = eval(shotstr) dev = pyfusion.getDevice(dev_name) data = dev.acq.getdata(shot_number, diag_name, time_range=[0, 0.1]) else: from pyfusion.data.timeseries import TimeseriesData, Timebase, Signal from pyfusion.data.base import Channel, ChannelList, Coords input_data = TimeseriesData(Timebase([0, 1]), Signal([0, 1])) dev_name, diag_name = pieces # channels are amongst options opts = pyfusion.config.pf_options('Diagnostic', diag_name) chans = [ pyfusion.config.pf_get('Diagnostic', diag_name, opt) for opt in opts if 'channel_' in opt ] # for now, assume config_name is some as name input_data.channels = ChannelList( *[Channel(ch, Coords('?', [0, 0, 0])) for ch in chans]) Phi = np.array([ 2 * np.pi / 360 * float( pyfusion.config.get( 'Diagnostic:{cn}'.format( cn=c.config_name if c.config_name != '' else c.name), 'Coords_reduced').split(',')[0]) for c in input_data.channels ]) Theta = np.array([ 2 * np.pi / 360 * float( pyfusion.config.get( 'Diagnostic:{cn}'.format( cn=c.config_name if c.config_name != '' else c.name), 'Coords_reduced').split(',')[1]) for c in input_data.channels ]) if closed: Phi = np.append(Phi, Phi[0]) Theta = np.append(Theta, Theta[0]) return (dict(Theta=Theta, Phi=Phi))
def do_fetch(self): channel_length = int(self.length) outdata = np.zeros(1024 * 2 * 256 + 1) ## !! really should put a wrapper around gethjdata to do common stuff # outfile is only needed if the direct passing of binary won't work # with tempfile.NamedTemporaryFile(prefix="pyfusion_") as outfile: # get in two steps to make debugging easier allrets = gethjdata.gethjdata(self.shot, channel_length, self.path, verbose=VERBOSE, opt=1, ierror=2, isample=-1, outdata=outdata, outname='') ierror, isample, getrets = allrets if ierror != 0: raise LookupError( 'hj Okada style data not found for {s}:{c}'.format( s=self.shot, c=self.path)) ch = Channel(self.path, Coords('dummy', (0, 0, 0))) # the intent statement causes the out var to be returned in the result lsit # looks like the time,data is interleaved in a 1x256 array # it is fed in as real*64, but returns as real*32! (as per fortran decl) debug_(pyfusion.DEBUG, 4, key='Heliotron_fetch', msg='after call to getdata') # timebase in secs (ms in raw data) - could add a preferred unit? # this is partly allowed for in savez_compressed, newload, and # for plotting, in the config file. # important that the 1e-3 be inside the Timebase() output_data = TimeseriesData(timebase=Timebase( 1e-3 * getrets[1::2][0:isample]), signal=Signal(getrets[2::2][0:isample]), channels=ch) output_data.meta.update({'shot': self.shot}) if pyfusion.VERBOSE > 0: print('HJ config name', self.config_name) output_data.config_name = self.config_name stprms = get_static_params(shot=self.shot, signal=self.path) if len(list(stprms) ) == 0: # maybe this should be ignored - how do we use it? raise LookupError( ' failure to get params for {shot}:{path}'.format( shot=self.shot, path=self.path)) output_data.params = stprms return output_data
def test_dataset(self): ch=get_n_channels(5) new_times = [-0.25, 0.25] tb = generate_timebase(t0=-0.5, n_samples=1.e2, sample_freq=1.e2) tsd_1 = TimeseriesData(timebase=tb, signal=Signal(np.resize(np.arange(5*len(tb)),(5,len(tb)))), channels=ch) tsd_2 = TimeseriesData(timebase=tb, signal=Signal(np.resize(np.arange(5*len(tb))+1, (5,len(tb)))), channels=ch) test_dataset = DataSet('test_ds_1') test_dataset.add(tsd_1) test_dataset.add(tsd_2) self.assertTrue(tsd_1 in test_dataset) """
def do_fetch(self): if pyfusion.DBG() > 2: print('in fetch',self.diag_name, self) debug_(pyfusion.DEBUG, level=3, key='igetfile_fetch') diag = self.config_name infodict = eval(eval(self.info)) vardict = get_basic_diagnostics(diags=[diag], times=None, shot=self.shot, file_info={diag:infodict}, debug=1, exception=None) debug_(pyfusion.DEBUG, level=2, key='after get_basic') output_data = TimeseriesData(timebase=Timebase(vardict['check_tm']), signal=Signal(vardict[self.config_name]), channels=Channel(self.config_name,Coords('dummy',(0,0,0)))) output_data.config_name = self.config_name # ??? bdb - my fault? return output_data
def segment(input_data, n_samples, overlap=1.0, datalist= 0): """Break into segments length n_samples. Overlap of 2.0 starts a new segment halfway into previous, overlap=1 is no overlap. overlap should divide into n_samples. Probably should consider a nicer definition such as in pyfusion 0 if datalist = 0 returns a DataSet object otherwise, returns a OrderedDataSet object """ from pyfusion.data.base import DataSet, OrderedDataSet from pyfusion.data.timeseries import TimeseriesData if isinstance(input_data, DataSet): output_dataset = DataSet() for ii,data in enumerate(input_data): try: output_dataset.update(data.segment(n_samples)) except AttributeError: pyfusion.logger.warning("Data filter 'segment' not applied to item in dataset") return output_dataset #SH modification incase ordering is important... i.e you are doing #two processing two different arrays at the same time (in different Timeseries objects) #and you don't want to loose the time relationship between them if datalist: output_data = OrderedDataSet('segmented_%s, %d samples, %.3f overlap' %(datetime.now(), n_samples, overlap)) else: output_data = DataSet('segmented_%s, %d samples, %.3f overlap' %(datetime.now(), n_samples, overlap)) #SH : 24May2013 fixed bug here - before, the index was allowed to go past #the length of samples, gives smalled length data towards the end - fixed to finish the #last time we can get n_samples length #for el in arange(0,len(input_data.timebase), n_samples/overlap): for el in arange(0,len(input_data.timebase) - n_samples, n_samples/overlap): if input_data.signal.ndim == 1: tmp_data = TimeseriesData(timebase=input_data.timebase[el:el+n_samples], signal=input_data.signal[el:el+n_samples], channels=input_data.channels, bypass_length_check=True) else: tmp_data = TimeseriesData(timebase=input_data.timebase[el:el+n_samples], signal=input_data.signal[:,el:el+n_samples], channels=input_data.channels, bypass_length_check=True) tmp_data.meta = input_data.meta.copy() if datalist: output_data.append(tmp_data) else: output_data.add(tmp_data) return output_data
def do_fetch(self): print(self.pointname) print(self.shot) if self.NC!=None: print(self.NC) t_name = '{}_time'.format(self.pointname) NC_vars = self.NC.variables.keys() if self.pointname in NC_vars: print('Reading cache!!!!') t_axis = self.NC.variables[t_name].data[:].copy() data = self.NC.variables[self.pointname].data[:].copy() else: tmp = self.acq.connection.get('ptdata2("{}",{})'.format(self.pointname, self.shot)) data = tmp.data() tmp = self.acq.connection.get('dim_of(ptdata2("{}",{}))'.format(self.pointname, self.shot)) t_axis = tmp.data() self.write_cache = True print(t_axis) print(data) coords = get_coords_for_channel(**self.__dict__) ch = Channel(self.pointname, coords) # con=MDS.Connection('atlas.gat.com::') # pointname = 'MPI66M067D' # shot = 164950 # tmp = con.get('ptdata2("{}",{})'.format(pointname, shot)) # dat = tmp.data() # tmp = con.get('dim_of(ptdata2("{}",{}))'.format(pointname, shot)) # t = tmp.data() if self.NC!=None and self.write_cache: print self.pointname self.NC.createDimension(t_name, len(t_axis)) f_time = self.NC.createVariable(t_name,'d',(t_name,)) f_time[:] = +t_axis print('Wrote time') sig = self.NC.createVariable(self.pointname,'f',(t_name,)) sig[:] = +data print('Wrote signal') output_data = TimeseriesData(timebase=Timebase(t_axis), signal=Signal(data), channels=ch) # output_data = super(DIIIDDataFetcherPTdata, self).do_fetch() # coords = get_coords_for_channel(**self.__dict__) # ch = Channel(self.mds_path, coords) # output_data.channels = ch # output_data.meta.update({'shot':self.shot, 'kh':self.get_kh()}) # print(ch) output_data.config_name = ch self.fetch_mode = 'ptdata' return output_data
def testFilteredDataHistory_nocopy(self): tb = generate_timebase(t0=-0.5, n_samples=1.e2, sample_freq=1.e2) # nonzero signal mean ch = get_n_channels(1) tsd = TimeseriesData(timebase=tb, signal=Signal(np.arange(len(tb))), channels=ch) filtered_tsd = tsd.subtract_mean() #self.assertEqual(len(filtered_tsd.history.split('\n')), 3) self.assertEqual(len(filtered_tsd.history.split('\n')), 5) # bdb thinks extra 2 is OK output_data = filtered_tsd.normalise(method='rms', copy=False) #self.assertEqual(filtered_tsd.history.split('> ')[-1], "normalise(method='rms')") self.assertEqual(filtered_tsd.history.split('> ')[-1].split('\n')[0], "normalise(method='rms')") #self.assertEqual(output_data.history.split('> ')[-1], "normalise(method='rms')") self.assertEqual(output_data.history.split('> ')[-1].split('\n')[0], "normalise(method='rms')")
def do_fetch(self): # evaluate filename list try: filenames = eval(self.__dict__.get("filenames", "[]")) except TypeError: # assume we have been given a list of filenames as a keyword argument, rather than # reading the config file. filenames = self.__dict__.get("filenames") data_array = [] channel_names = [] dtypes = [] for fn_i, fn in enumerate(filenames): dt = eval(self.__dict__.get("dtype_%d" % (fn_i + 1), None)) dtypes.append(dt) if fn.endswith('.bz2'): f = bz2.BZ2File(fn.replace("(shot)", str(self.shot))) data_array.append(np.fromstring(f.read(), dtype=dt)) f.close() else: data_array.append( np.fromfile(fn.replace("(shot)", str(self.shot)), dtype=dt)) channel_names.extend( [i for i in dt.names if i.startswith('channel_')]) ch_generator = (named_ch(i) for i in channel_names) ch = ChannelList(*ch_generator) signal_data = np.zeros((len(channel_names), data_array[0].shape[0]), dtype=dtypes[0][channel_names[0]]) sig_counter = 0 for d_i, d in enumerate(data_array): for ch_name in dtypes[d_i].names: if ch_name.startswith('channel_'): signal_data[sig_counter, :] = d[ch_name] sig_counter += 1 tsd = TimeseriesData(timebase=Timebase(data_array[0]['timebase']), signal=Signal(signal_data), channels=ch) tsd.phase_pairs = self.__dict__.get("phase_pairs", None) if tsd.phase_pairs != None: tsd.phase_pairs = eval(tsd.phase_pairs) return tsd
def get_tsd_from_node(fetcher, node): """Return pyfusion TimeSeriesData corresponding to an MDSplus signal node.""" # TODO: load actual coordinates ch = Channel(fetcher.mds_path_components['nodepath'], Coords('dummy', (0, 0, 0))) signal = Signal(node.data()) dim = node.dim_of().data() # TODO: stupid hack, the test signal has dim of [[...]], real data # has [...]. Figure out why. (...probably because original signal # uses a build_signal function) if len(dim) == 1: dim = dim[0] timebase = Timebase(dim) output_data = TimeseriesData(timebase=timebase, signal=signal, channels=ch) output_data.config_name = fetcher.config_name #bdb config_name fix output_data.meta.update({'shot': fetcher.shot}) return output_data
def get_tsd_from_node(fetcher, node): """Return pyfusion TimeSeriesData corresponding to an MDSplus signal node.""" # TODO: load actual coordinates ch = Channel(fetcher.mds_path_components['nodepath'], Coords('dummy', (0,0,0))) signal = Signal(node.data()) dim = node.dim_of().data() # TODO: stupid hack, the test signal has dim of [[...]], real data # has [...]. Figure out why. (...probably because original signal # uses a build_signal function) if len(dim) == 1: dim = dim[0] timebase = Timebase(dim) output_data = TimeseriesData(timebase=timebase, signal=signal, channels=ch) output_data.config_name = fetcher.config_name #bdb config_name fix output_data.meta.update({'shot':fetcher.shot}) return output_data
def fetch_data_from_file(fetcher): prm_dict = read_prm_file(fetcher.basename + ".prm") bytes = int(prm_dict['DataLength(byte)'][0]) bits = int(prm_dict['Resolution(bit)'][0]) if not (prm_dict.has_key('ImageType')): #if so assume unsigned bytes_per_sample = 2 dat_arr = Array.array('H') offset = 2**(bits - 1) dtype = np.dtype('uint16') else: if prm_dict['ImageType'][0] == 'INT16': bytes_per_sample = 2 if prm_dict['BinaryCoding'][0] == 'offset_binary': dat_arr = Array.array('H') offset = 2**(bits - 1) dtype = np.dtype('uint16') elif prm_dict['BinaryCoding'][0] == "shifted_2's_complementary": dat_arr = Array.array('h') offset = 0 dtype = np.dtype('int16') else: raise NotImplementedError, ' binary coding ' + prm_dict[ 'BinaryCoding'] fp = open(fetcher.basename + '.dat', 'rb') dat_arr.fromfile(fp, bytes / bytes_per_sample) fp.close() clockHz = None if prm_dict.has_key('SamplingClock'): clockHz = double(prm_dict['SamplingClock'][0]) if prm_dict.has_key('SamplingInterval'): clockHz = clockHz / double(prm_dict['SamplingInterval'][0]) if prm_dict.has_key('ClockSpeed'): if clockHz != None: pyfusion.utils.warn( 'Apparent duplication of clock speed information') clockHz = double(prm_dict['ClockSpeed'][0]) clockHz = LHD_A14_clk(fetcher.shot) # see above if clockHz != None: timebase = arange(len(dat_arr)) / clockHz else: raise NotImplementedError, "timebase not recognised" ch = Channel("%s-%s" % (fetcher.diag_name, fetcher.channel_number), Coords('dummy', (0, 0, 0))) if fetcher.gain != None: gain = fetcher.gain else: gain = 1 output_data = TimeseriesData(timebase=Timebase(timebase), signal=Signal(gain * dat_arr), channels=ch) output_data.meta.update({'shot': fetcher.shot}) return output_data
def testFilteredDataHistory_copy(self): """ make sure that _copy version does NOT alter original """ tb = generate_timebase(t0=-0.5, n_samples=1.e2, sample_freq=1.e2) # nonzero signal mean ch = get_n_channels(1) tsd = TimeseriesData(timebase=tb, signal=Signal(np.arange(len(tb))), channels=ch) filtered_tsd = tsd.subtract_mean() # bdb in 4 places, assume that the xtra info (norm_value) is supposed to be there. #self.assertEqual(len(filtered_tsd.history.split('\n')), 3) self.assertEqual(len(filtered_tsd.history.split('\n')), 5) # bdb thinks extra 2 is OK output_data = filtered_tsd.normalise(method='rms', copy=True) #self.assertEqual(output_data.history.split('> ')[-1], "normalise(method='rms')") self.assertEqual(output_data.history.split('> ')[-1].split('\n')[0], "normalise(method='rms')") #self.assertEqual(filtered_tsd.history.split('> ')[-1], "subtract_mean()") self.assertEqual(filtered_tsd.history.split('> ')[-1].split('\n')[0], "subtract_mean()")
def do_fetch(self): # TODO support non-signal datatypes if self.fetch_mode == 'thin client': ch = Channel(self.mds_path_components['nodepath'], Coords('dummy', (0, 0, 0))) data = self.acq.connection.get( self.mds_path_components['nodepath']) dim = self.acq.connection.get('dim_of(%s)' % self.mds_path_components['nodepath']) # TODO: fix this hack (same hack as when getting signal from node) if len(data.shape) > 1: data = np.array(data)[0, ] if len(dim.shape) > 1: dim = np.array(dim)[0, ] output_data = TimeseriesData(timebase=Timebase(dim), signal=Signal(data), channels=ch) output_data.meta.update({'shot': self.shot}) return output_data elif self.fetch_mode == 'http': data_url = self.acq.server + '/'.join([ self.mds_path_components['tree'], str(self.shot), self.mds_path_components['tagname'], self.mds_path_components['nodepath'] ]) data = mdsweb.data_from_url(data_url) ch = Channel(self.mds_path_components['nodepath'], Coords('dummy', (0, 0, 0))) t = Timebase(data.data.dim) s = Signal(data.data.signal) output_data = TimeseriesData(timebase=t, signal=s, channels=ch) output_data.meta.update({'shot': self.shot}) return output_data else: node = self.tree.getNode(self.mds_path) if int(node.dtype) == 195: return get_tsd_from_node(self, node) else: raise Exception('Unsupported MDSplus node type')
def do_fetch(self): print("Shot: {}\nPoint Name: {}".format(self.shot, self.pointname)) if not hasattr(self, 'NC'): self.NC = None if self.NC is not None: #print(self.NC) t_name = '{}_time'.format(self.pointname) NC_vars = self.NC.variables.keys() else: NC_vars = [] if self.pointname in NC_vars: print(' Pointname in NC cache, Reading...\n') t_axis = self.NC.variables[t_name].data[:].copy() data = self.NC.variables[self.pointname].data[:].copy() self.write_cache = False else: print(' Fetching from ptdata') tmp = self.acq.connection.get('ptdata2("{}",{})'.format( self.pointname, self.shot)) data = tmp.data() tmp = self.acq.connection.get('dim_of(ptdata2("{}",{}))'.format( self.pointname, self.shot)) t_axis = tmp.data() self.write_cache = True coords = get_coords_for_channel(**self.__dict__) ch = Channel(self.pointname, coords) if self.NC is not None and self.write_cache: print("\t Writing to NC file disabled temporarily.") #print(' Writing pointname to NC file\n') #self.NC.createDimension(t_name, len(t_axis)) #f_time = self.NC.createVariable(t_name,'d',(t_name,)) #f_time[:] = +t_axis # sig = self.NC.createVariable(self.pointname,'f',(t_name,)) #sig[:] = +data output_data = TimeseriesData(timebase=Timebase(t_axis), signal=Signal(data), channels=ch) output_data.config_name = ch self.fetch_mode = 'ptdata' return output_data
def do_fetch(self): # evaluate filename list try: filenames = eval(self.__dict__.get("filenames", "[]")) except TypeError: # assume we have been given a list of filenames as a keyword argument, rather than # reading the config file. filenames = self.__dict__.get("filenames") data_array = [] channel_names = [] dtypes = [] for fn_i, fn in enumerate(filenames): dt = eval(self.__dict__.get("dtype_%d" % (fn_i + 1), None)) dtypes.append(dt) if fn.endswith(".bz2"): f = bz2.BZ2File(fn.replace("(shot)", str(self.shot))) data_array.append(np.fromstring(f.read(), dtype=dt)) f.close() else: data_array.append(np.fromfile(fn.replace("(shot)", str(self.shot)), dtype=dt)) channel_names.extend([i for i in dt.names if i.startswith("channel_")]) ch_generator = (named_ch(i) for i in channel_names) ch = ChannelList(*ch_generator) signal_data = np.zeros((len(channel_names), data_array[0].shape[0]), dtype=dtypes[0][channel_names[0]]) sig_counter = 0 for d_i, d in enumerate(data_array): for ch_name in dtypes[d_i].names: if ch_name.startswith("channel_"): signal_data[sig_counter, :] = d[ch_name] sig_counter += 1 tsd = TimeseriesData(timebase=Timebase(data_array[0]["timebase"]), signal=Signal(signal_data), channels=ch) tsd.phase_pairs = self.__dict__.get("phase_pairs", None) if tsd.phase_pairs != None: tsd.phase_pairs = eval(tsd.phase_pairs) return tsd
def do_fetch(self): sig = self.conn.get(self.mds_path) dim = self.conn.get('DIM_OF(' + self.mds_path + ')') scl = 1.0 coords = get_coords_for_channel(**self.__dict__) ch = Channel(self.config_name, coords) timedata = dim.data() output_data = TimeseriesData(timebase=Timebase(1e-9 * timedata), signal=scl * Signal(sig), channels=ch) output_data.meta.update({'shot': self.shot}) if hasattr(self, 'mdsshot'): # intended for checks - not yet used. output_data.mdsshot = self.mdsshot output_data.config_name = self.config_name output_data.utc = [timedata[0], timedata[-1]] #output_data.units = dat['units'] if 'units' in dat else '' debug_(pyfusion.DEBUG, level=1, key='W7M_do_fetch', msg='entering W7X MDS do_fetch') return (output_data)
def fetch(self): tb = generate_timebase(t0=float(self.t0), n_samples=int(self.n_samples), sample_freq=float(self.sample_freq)) sig = Signal( float(self.amplitude) * sin(2 * pi * float(self.frequency) * tb)) dummy_channel = Channel('ch_01', Coords('dummy', (0, 0, 0))) output_data = TimeseriesData(timebase=tb, signal=sig, channels=ChannelList(dummy_channel)) output_data.meta.update({'shot': self.shot}) return output_data
def do_fetch(self): chan_name = (self.diag_name.split('-'))[-1] # remove - filename_dict = {'shot':self.shot, # goes with Boyd's local stg 'config_name':self.config_name} #filename_dict = {'diag_name':self.diag_name, # goes with retrieve names # 'channel_number':self.channel_number, # 'shot':self.shot} debug_(pf.DEBUG, 4, key='local_fetch') for each_path in pf.config.get('global', 'localdatapath').split('+'): self.basename = path.join(each_path, data_filename %filename_dict) files_exist = path.exists(self.basename) if files_exist: break if not files_exist: raise Exception("file {fn} not found. (localdatapath was {p})" .format(fn=self.basename, p=pf.config.get('global', 'localdatapath').split('+'))) else: signal_dict = newload(self.basename) if ((chan_name == array(['MP5','HMP13','HMP05'])).any()): flip = -1. print('flip') else: flip = 1. if self.diag_name[0]=='-': flip = -flip # coords = get_coords_for_channel(**self.__dict__) ch = Channel(self.diag_name, Coords('dummy', (0,0,0))) output_data = TimeseriesData(timebase=Timebase(signal_dict['timebase']), signal=Signal(flip*signal_dict['signal']), channels=ch) # bdb - used "fetcher" instead of "self" in the "direct from LHD data" version output_data.config_name = self.config_name # when using saved files, same as name output_data.meta.update({'shot':self.shot}) return output_data
def fetch(self, interp_if_diff=True): """Fetch each channel and combine into a multichannel instance of :py:class:`~pyfusion.data.timeseries.TimeseriesData`. :rtype: :py:class:`~pyfusion.data.timeseries.TimeseriesData` """ ## initially, assume only single channel signals ordered_channel_names = self.ordered_channel_names() data_list = [] channels = ChannelList() timebase = None meta_dict = {} #from scipy.io import netcdf #home = os.environ['HOME'] #os.system('mkdir -p {}/tmp_pyfusion/'.format(home)) #fname = '{}/tmp_pyfusion/{}.nc'.format(home,self.shot) #if os.path.exists(fname): # NC = netcdf.netcdf_file(fname,'r',version=2) #else: # NC = netcdf.netcdf_file(fname,'w',version=2) for chan in ordered_channel_names: fetcher_class = import_setting('Diagnostic', chan, 'data_fetcher') tmp_data = fetcher_class(self.acq, self.shot, config_name=chan).fetch() channels.append(tmp_data.channels) meta_dict.update(tmp_data.meta) if timebase is None: timebase = tmp_data.timebase data_list.append(tmp_data.signal) else: try: assert_array_almost_equal(timebase, tmp_data.timebase) data_list.append(tmp_data.signal) except: if interp_if_diff: data_list.append( np.interp(timebase, tmp_data.timebase, tmp_data.signal)) else: raise #NC.close() signal = Signal(data_list) output_data = TimeseriesData(signal=signal, timebase=timebase, channels=channels) #output_data.meta.update({'shot':self.shot}) output_data.meta.update(meta_dict) return output_data
def do_fetch(self): delimiter = self.__dict__.get("delimiter", None) data = genfromtxt(self.filename.replace("(shot)", str(self.shot)), unpack=True, delimiter=delimiter) # len(data) is number of channels + 1 (timebase) n_channels = len(data) - 1 ch_generator = (generic_ch(i) for i in range(n_channels)) ch = ChannelList(*ch_generator) return TimeseriesData(timebase=Timebase(data[0]), signal=Signal(data[1:]), channels=ch)
def do_fetch(self): channel_length = int(self.length) outdata=np.zeros(1024*2*256+1) with tempfile.NamedTemporaryFile(prefix="pyfusion_") as outfile: getrets=gethjdata.gethjdata(self.shot,channel_length,self.path, VERBOSE, OPT, outfile.name, outdata) ch = Channel(self.path, Coords('dummy', (0,0,0))) output_data = TimeseriesData(timebase=Timebase(getrets[1::2]), signal=Signal(getrets[2::2]), channels=ch) output_data.meta.update({'shot':self.shot}) return output_data
def get_multimode_test_data(channels = get_n_channels(DEFAULT_N_CHANNELS), timebase = DEFAULT_TIMEBASE, modes = [mode_1, mode_2], noise = DEFAULT_NOISE): """Generate synthetic multi-channel data for testing.""" n_channels = len(channels) data_size = (n_channels, timebase.size) data_array = noise*2*(np.random.random(data_size)-0.5) timebase_matrix = np.resize(timebase, data_size) angle_matrix = np.resize(np.array([i.coords.cylindrical[1] for i in channels]), data_size[::-1]).T for m in modes: data_array += m['amp']*np.cos(2*np.pi*m['freq']*timebase_matrix + m['mode_number']*angle_matrix + m['phase']) output = TimeseriesData(timebase=timebase, signal=Signal(data_array), channels=channels) return output
def fetch(self, interp_if_diff = True): """Fetch each channel and combine into a multichannel instance of :py:class:`~pyfusion.data.timeseries.TimeseriesData`. :rtype: :py:class:`~pyfusion.data.timeseries.TimeseriesData` """ print('******** hello world ***********') ## initially, assume only single channel signals ordered_channel_names = self.ordered_channel_names() data_list = [] channels = ChannelList() timebase = None meta_dict={} from scipy.io import netcdf fname = '/u/haskeysr/tmp/{}.nc'.format(self.shot) write_cache=False; read_cache=False if os.path.exists(fname): NC = netcdf.netcdf_file(fname,'a',version=2) else: NC = netcdf.netcdf_file(fname,'w',version=2) for chan in ordered_channel_names: fetcher_class = import_setting('Diagnostic', chan, 'data_fetcher') tmp_data = fetcher_class(self.acq, self.shot, config_name=chan, NC=NC).fetch() channels.append(tmp_data.channels) meta_dict.update(tmp_data.meta) if timebase == None: timebase = tmp_data.timebase data_list.append(tmp_data.signal) else: try: assert_array_almost_equal(timebase, tmp_data.timebase) data_list.append(tmp_data.signal) except: if interp_if_diff: data_list.append(np.interp(timebase, tmp_data.timebase, tmp_data.signal)) else: raise NC.close() signal=Signal(data_list) output_data = TimeseriesData(signal=signal, timebase=timebase, channels=channels) #output_data.meta.update({'shot':self.shot}) output_data.meta.update(meta_dict) return output_data
def do_fetch(self): print self.shot, self.senal data_dim = tjiidata.dimens(self.shot, self.senal) if data_dim[0] < MAX_SIGNAL_LENGTH: data_dict = tjiidata.lectur(self.shot, self.senal, data_dim[0], data_dim[0], data_dim[1]) else: raise ValueError, 'Not loading data to avoid segmentation fault in tjiidata.lectur' ch = Channel(self.senal, Coords('dummy', (0, 0, 0))) if self.invert == 'true': #yuk - TODO: use boolean type from config s = Signal(-np.array(data_dict['y'])) else: s = Signal(np.array(data_dict['y'])) output_data = TimeseriesData(timebase=Timebase(data_dict['x']), signal=s, channels=ch) output_data.meta.update({'shot': self.shot}) return output_data
def fetch(self, interp_if_diff=True): """Fetch each channel and combine into a multichannel instance of :py:class:`~pyfusion.data.timeseries.TimeseriesData`. :rtype: :py:class:`~pyfusion.data.timeseries.TimeseriesData` """ ## initially, assume only single channel signals ordered_channel_names = self.ordered_channel_names() data_list = [] channels = ChannelList() timebase = None meta_dict = {} for chan in ordered_channel_names: fetcher_class = import_setting('Diagnostic', chan, 'data_fetcher') tmp_data = fetcher_class(self.acq, self.shot, config_name=chan).fetch() channels.append(tmp_data.channels) meta_dict.update(tmp_data.meta) if timebase == None: timebase = tmp_data.timebase data_list.append(tmp_data.signal) else: try: assert_array_almost_equal(timebase, tmp_data.timebase) data_list.append(tmp_data.signal) except: if interp_if_diff: data_list.append( np.interp(timebase, tmp_data.timebase, tmp_data.signal)) else: raise signal = Signal(data_list) output_data = TimeseriesData(signal=signal, timebase=timebase, channels=channels) #output_data.meta.update({'shot':self.shot}) output_data.meta.update(meta_dict) return output_data
def do_fetch(self): dtype = self.read_dtype() if self.filename.endswith('.bz2'): f = bz2.BZ2File(self.filename.replace("(shot)", str(self.shot))) data = np.fromstring(f.read(), dtype=dtype) f.close() else: data = np.fromfile(self.filename.replace("(shot)", str(self.shot)), dtype=dtype) channel_names = [i for i in dtype.names if i.startswith('channel_')] ch_generator = (named_ch(i) for i in channel_names) ch = ChannelList(*ch_generator) signal_data = np.zeros((len(channel_names), data.shape[0]), dtype=dtype[channel_names[0]]) for ch_i, ch_name in enumerate(channel_names): signal_data[ch_i, :] = data[ch_name] return TimeseriesData(timebase=Timebase(data['timebase']), signal=Signal(signal_data), channels=ch)
def do_fetch(self): # my W7X shots are of the form from_utc, to_utc # or date (8dig) and shot (progId) # the format is in the acquisition properties, to avoid # repetition in each individual diagnostic if self.shot[1]>1e9: # we have start and end in UTC f,t = self.shot else: f,t = get_shot_utc(*self.shot) # A URL STYLE diagnostic - used for a one-off # this could be moved to setup so that the error info is more complete if hasattr(self,'url'): fmt = self.url+'_signal.json?from={shot_f}&upto={shot_t}' fmt = self.url+'_signal.json?from={shot_f}&upto={shot_t}&nSamples=200000' params = {} else: # a pattern-based one - used for arrays of probes if hasattr(self,'fmt'): # does the diagnostic have one? fmt = self.fmt elif hasattr(self.acq,'fmt'): # else use the acq.fmt fmt = self.acq.fmt else: # so far we have no quick way to check the server is online raise LookupError('no fmt - perhaps pyfusion.cfg has been ' 'edited because the url is not available') params = eval('dict('+self.params+')') if 'upto' not in fmt: fmt += '_signal.json?from={shot_f}&upto={shot_t}' if ('nSamples' not in fmt) and (pyfusion.NSAMPLES != 0): fmt += '&nSamples={ns}'.format(ns=pyfusion.NSAMPLES) params.update(shot_f=f, shot_t=t) url = fmt.format(**params) if pyfusion.CACHE: print('using wget on {url}'.format(url=url)) os.system('wget -x "{url}"'.format(url=url)) # now read from the local copy - it is in the wd, so only // # but it seems we need the full path for now url = url.replace('http://','file:///home/bdb112/pyfusion/working/pyfusion/') print('now trying the cached copy we just grabbed: {url}'.format(url=url)) if pyfusion.VERBOSE > 0: print('===> fetching url {u}'.format(u=url)) # seems to take twice as long as timeout requested. # haven't thought about python3 for the json stuff yet try: # dat = json.load(urlopen(url,timeout=pyfusion.TIMEOUT)) works # but follow example in # http://webservices.ipp-hgw.mpg.de/docs/howtoREST.html#python, dat = json.loads(urlopen(url,timeout=pyfusion.TIMEOUT).read().decode()) except socket.timeout: # should check if this is better tested by the URL module print('****** first timeout error *****') dat = json.load(urlopen(url,timeout=3*pyfusion.TIMEOUT)) except Exception as reason: if pyfusion.VERBOSE: print('********Exception***** on {c}: {u} \n{r}' .format(c=self.config_name, u=url, r=reason)) raise # this form will default to repair = 2 for all LP probes. default_repair = 2 if 'Desc.82/' in url else 0 # this form follows the config file settings self.repair = int(self.repair) if hasattr(self, 'repair') else default_repair dimraw = np.array(dat['dimensions']) dim = dimraw - dimraw[0] if self.repair == 0: pass # leave as is # need at least this clipping for Langmuir probes in Op1.1 elif self.repair == 1: dim = np.clip(dim, 0, 1e99) elif self.repair == 2: dim, msg = regenerate_dim(dim) if msg is not None: print('shot {s}, {c}: {m}' .format(s=self.shot, c=self.config_name, m=msg)) else: raise ValueError('repair value of {r} not understood'.format(r=self.repair)) if pyfusion.VERBOSE>2: print('repair',self.repair) #ch = Channel(self.config_name, Coords('dummy', (0,0,0))) # this probably should be in base.py coords = get_coords_for_channel(**self.__dict__) # used to be bare_chan? should we include - signs? ch = Channel(self.config_name, coords) output_data = TimeseriesData(timebase=Timebase(1e-9*dim), signal=Signal(dat['values']), channels=ch) output_data.meta.update({'shot': self.shot}) output_data.utc = [dat['dimensions'][0], dat['dimensions'][-1]] output_data.units = dat['units'] if 'units' in dat else '' # this is a minor duplication - at least it gets saved via params params['data_utc'] = output_data.utc # Warning - this could slow things down! - but allows corrupted time to be re-calculated as algorithms improve. params['diff_dimraw'] = dimraw params['diff_dimraw'][1:] = np.diff(dimraw) params['pyfusion_version'] = pyfusion.version.get_version() if pyfusion.VERBOSE > 0: print('shot {s}, config name {c}' .format(c=self.config_name, s=self.shot)) output_data.config_name = self.config_name debug_(pyfusion.DEBUG, 2, key='W7XDataFetcher') output_data.params = params ### the total shot utc. output_data.utc = [f, t] return output_data
def try_fetch_local(input_data, bare_chan): """ return data if in the local cache, otherwise None doesn't work for single channel HJ data. sgn (not gain) be only be used at the single channel base/fetch level """ for each_path in pyfusion.config.get('global', 'localdatapath').split('+'): # check for multi value shot number, e.g. utc bounds for W7-X data shot = input_data.shot # MDSplus style path to access sorted files into folders by shot path, patt = os.path.split(each_path) # print(patt) if len(patt) == 2*len(patt.replace('~','')): # a subdir code based on date subdir = '' # reverse the order of both the pattern and the shot so a posn is 0th char strshot = str(shot[0]) if len(np.shape(shot))>0 else str(shot) revshot = strshot[::-1] for i,ch in enumerate(patt): if (i%2) == 0: if ch != '~': raise LookupError("Can't parse {d} as a MDS style subdir" .format(d=patt)) continue subdir += revshot[ord(ch) - ord('a')] else: subdir = patt debug_(pyfusion.DEBUG, 3, key='MDS style subdir', msg=each_path) each_path = os.path.join(path, subdir) if isinstance(shot, (tuple, list, ndarray)): shot_str = '{s0}_{s1}'.format(s0=shot[0], s1=shot[1]) else: shot_str = str(shot) input_data.localname = os.path.join(each_path, '{shot}_{bc}.npz' .format(shot=shot_str, bc=bare_chan)) # original - data_filename %filename_dict) if pyfusion.VERBOSE>2: print(each_path,input_data.localname) files_exist = os.path.exists(input_data.localname) debug_(pyfusion.DEBUG, 3, key='try_local_fetch') if files_exist: intmp = np.any([st in input_data.localname.lower() for st in ['tmp', 'temp']]) # add anything you wish to warn about if pyfusion.VERBOSE>0 or intmp: if intmp: pyfusion.logging.warning('Using {f} in temporary directory!' .format(f=input_data.localname)) print('found local data in {f}'. format(f=input_data.localname)) break if not files_exist: return None signal_dict = newload(input_data.localname) if 'params' in signal_dict and 'name' in signal_dict['params'] and 'W7X_L5' in signal_dict['params']['name']: if signal_dict['params']['pyfusion_version'] < '0.6.8b': raise ValueError('probe assignments in error LP11-22 in {fn}' .format(fn=input_data.localname)) if np.nanmax(signal_dict['timebase']) == 0: pyfusion.logging.warning('making a fake timebase for {fn}' .format(fn=input_data.localname)) signal_dict['timebase'] = 2e-6*np.cumsum(1.0 + 0*signal_dict['signal']) coords = get_coords_for_channel(**input_data.__dict__) #ch = Channel(bare_chan, Coords('dummy', (0,0,0))) ch = Channel(bare_chan, coords) output_data = TimeseriesData(timebase=Timebase(signal_dict['timebase']), signal=Signal(signal_dict['signal']), channels=ch) # bdb - used "fetcher" instead of "self" in the "direct from LHD data" version # when using saved files, should use the name - not input_data.config_name # it WAS the config_name coming from the raw format. output_data.config_name = bare_chan # would be nice to get to the gain here - but how - maybe setup will get it output_data.meta.update({'shot':input_data.shot}) if 'params' in signal_dict: output_data.params = signal_dict['params'] if 'utc' in signal_dict['params']: output_data.utc = signal_dict['params'].get('utc',None) else: # yes, it seems like duplication, but no output_data.utc = None output_data.params = dict(comment = 'old npz file has no params') oldsrc = ', originally from ' + output_data.params['source'] if 'source' in output_data.params else '' output_data.params.update(dict(source='from npz cache' + oldsrc)) return(output_data)
def fetch(self): """Fetch each channel and combine into a multichannel instance of :py:class:`~pyfusion.data.timeseries.TimeseriesData`. :rtype: :py:class:`~pyfusion.data.timeseries.TimeseriesData` """ ## initially, assume only single channel signals # this base debug breakpoint will apply to all flavours of acquisition debug_(pyfusion.DEBUG, level=3, key='entry_base_multi_fetch') ordered_channel_names = self.ordered_channel_names() data_list = [] channels = ChannelList() # empty I guess common_tb = None # common_timebase meta_dict={} group_utc = None # assume no utc, will be replaced by channels # t_min, t_max seem obsolete, and should be done in single chan fetch if hasattr(self, 't_min') and hasattr(self, 't_max'): t_range = [float(self.t_min), float(self.t_max)] else: t_range = [] for chan in ordered_channel_names: sgn = 1 if chan[0]=='-': sgn = -sgn bare_chan = (chan.split('-'))[-1] ch_data = self.acq.getdata(self.shot, bare_chan) if len(t_range) == 2: ch_data = ch_data.reduce_time(t_range) channels.append(ch_data.channels) # two tricky things here - tmp.data.channels only gets one channel hhere # Config_name for a channel is attached to the multi part - # We need to move it to the particular channel # Was channels[-1].config_name = chan # 2013 - default to something if config_name not defined if pyfusion.VERBOSE>0: print("base:multi ch_data.config_name", ch_data.config_name) if hasattr(ch_data,'config_name'): channels[-1].config_name = ch_data.config_name else: channels[-1].config_name = 'fix_me' meta_dict.update(ch_data.meta) # tack on the data utc ch_data.signal.utc = ch_data.utc # Make a common timebase and do some basic checks. if common_tb is None: common_tb = ch_data.timebase if hasattr(ch_data,'utc'): group_utc = ch_data.utc tb_chan = ch_data.channels # for the first, append the whole signal data_list.append(sgn * ch_data.signal) else: if hasattr(self, 'skip_timebase_check') and self.skip_timebase_check == 'True': # append regardless, but will have to go back over # later to check length cf common_tb if pyfusion.VERBOSE > 0: print('utcs: ******',ch_data.utc[0],group_utc[0]) if ch_data.utc[0] != group_utc[0]: dts = (ch_data.utc[0] - group_utc[0])/1e9 print('*** different start times *****\n********trace {chcnf} starts after {tbch} by {dts:.2g} s' .format(tbch = tb_chan.config_name, chcnf = ch_data.config_name, dts=dts)) # should pad this channel out with nans - for now report an error. # this won't work if combining signals on a common_tb # ch_data.timebase += -dts # not sure if + or -? # print(ch_data.timebase[0]) """ kind of works for L53_LP0701 309 13, but time error dtclock = 2e-6 nsampsdiff = int(round(dts/dtclock,0)) newlen = len(ch_data.timebase) + nsampsdiff newsig = np.array(newlen * [np.nan]) newtb = np.array(newlen * [np.nan]) newsig[nsampsdiff:] = ch_data.signal newtb[nsampsdiff:] = ch_data.timebase ch_data.signal = newsig ch_data.timebase = newtb ch_data.timebase += -dts # not sure if + or -? """ if len(ch_data.signal)<len(common_tb): common_tb = ch_data.timebase tb_chan = ch_data.channels data_list.append(ch_data.signal[0:len(common_tb)]) else: try: assert_array_almost_equal(common_tb, ch_data.timebase) data_list.append(ch_data.signal) except: print('#### matching error in {c} - perhaps timebase not the same as the previous channel'.format(c=ch_data.config_name)) raise if hasattr(self, 'skip_timebase_check') and self.skip_timebase_check == 'True': # Messy - if we ignored timebase checks, then we have to check # length and cut to size, otherwise it will be stored as a signal (and # we will end up with a signal of signals) # This code may look unpythonic, but it avoids a copy() # and may be faster than for sig in data_list.... ltb = len(common_tb) for i in range(len(data_list)): if len(data_list[i]) > ltb: # this is a replacement. data_list.insert(i,data_list.pop(i)[0:ltb]) signal = Signal(data_list) print(shape(signal)) output_data = TimeseriesData(signal=signal, timebase=common_tb, channels=channels) #output_data.meta.update({'shot':self.shot}) output_data.meta.update(meta_dict) output_data.comment = self.comment if hasattr(self, 'comment') else '' print(output_data.comment) #if not hasattr(output_data,'utc'): # output_data.utc = None # probably should try to get it # # from a channel - but how? output_data.utc = group_utc # should check that all channels have same utc debug_(pyfusion.DEBUG, level=2, key='return_base_multi_fetch') return output_data
def get_basic_diagnostics(diags=None, shot=54196, times=None, delay=None, exception=False, debug=0): """ return a list of np.arrays of normally numeric values for the times given, for the given shot. Will access server if env('IGETFILE') points to an exe, else accesses cache """ global HJ_summary # if no exception given and we are not debugging # note - exception=None is a valid entry, meaning tolerate no exceptions # so the "default" we use is False if exception==False and debug==0: exception=Exception if diags is None: diags = "<n_e19>,b_0,i_p,w_p,dw_pdt,dw_pdt2".split(',') if len(np.shape(diags)) == 0: diags = [diags] # LHD only if delay is None: delay = get_delay(shot) if times is None: times = np.linspace(0,4,4000) times = np.array(times) vals = {} # create an extra time array to allow a cross check vals.update({'check_tm':times}) vals.update({'check_shot':np.zeros(len(times),dtype=np.int)+shot}) debug_(pyfusion.DEBUG,2,key='get_basic') for diag in diags: if not(diag in file_info): warn('diagnostic {0} not found in shot {1}'.format(diag, shot),stacklevel=2) vals.update({diag: np.nan + times}) debug_(pyfusion.DEBUG,2,key='get_basic') else: info = file_info[diag] varname = info['name'] infofmt = info['format'] subfolder = infofmt.split('@')[0] filepath = os.path.sep.join([localigetfilepath,subfolder,infofmt]) if ':' in varname: (oper,varname) = varname.split(':') else: oper = None if '(' in varname: try: left,right = varname.split('(') varname,rest=right.split(')') except: raise ValueError('in expression {v} - parens?'.format(varname)) if infofmt.find('.npz') > 0: try: test=HJ_summary.keys() except: csvfilename = acq_HJ+'/'+infofmt if pyfusion.DBG() > 1: print('looking for HeliotronJ summary in' + csvfilename) print('reloading {0}'.format(csvfilename)) HJ_summary = np.load(csvfilename) val = HJ_summary[varname][shot] valarr = np.double(val)+(times*0) elif 'get_static_params' in infofmt: pdicts = eval(infofmt.format(shot=shot)) if len(pdicts)==0: print('empty dictionary returned') val = pdicts[varname] valarr = np.double(val)+(times*0) else: # read signal from data system debug_(max(pyfusion.DEBUG, debug), level=4, key='find_data') try: #get HJparams channel = info['name'] outdata=np.zeros(1024*2*256+1) channel_length =(len(outdata)-1)/2 # outdfile only needed for opt=1 (get data via temp file) # with tempfile.NamedTemporaryFile(prefix="pyfusion_") as outdfile: ierror, getrets=gethjdata.gethjdata(shot,channel_length, info['name'], verbose=VERBOSE, opt=1, ierror=2, outdata=outdata, outname='') if ierror != 0: raise LookupError('data not found for {s}:{c}'.format(s=shot, c=channel)) ch = Channel(info['name'], Coords('dummy', (0,0,0))) # timebase in secs (was ms in raw data) dg = TimeseriesData(timebase=Timebase(1e-3 * getrets[1::2]), signal=Signal(getrets[2::2]), channels=ch) except exception as reason: if debug>0: print('exception running gethjdata {r} {a}', format(r=reason, a=reason.args)) dg=None #break # give up and try next diagnostic if dg is None: # messy - break doesn't do what I want? valarr=None else: nd = 1 # initially only deal with single channels (HJ) # get the column(s) of the array corresponding to the name w = [0] if (oper in 'sum,average,rms,max,min'.split(',')): if oper=='sum': op = np.sum elif oper=='average': op = np.average elif oper=='min': op = np.min elif oper=='std': op = np.std else: raise ValueError('operator {o} in {n} not known to get_basic_diagnostics' .format(o=oper, n=info['name'])) # valarr = op(dg.data[:,nd+w],1) valarr = op(dg.data[:,nd+w],1) else: if len(w) != 1: raise LookupError( 'Need just one instance of variable {0} in {1}' .format(varname, dg.filename)) dg.data = dg.signal # fudge compatibility if len(np.shape(dg.data))!=1: # 2 for igetfile raise LookupError( 'insufficient data for {0} in {1}' .format(varname, dg.filename)) #valarr = dg.data[:,nd+w[0]] #tim = dg.data[:,0] - delay valarr = dg.signal tim = dg.timebase # fudge until we can gete the number of points valarr = valarr[:np.argmax(tim)] tim = tim[:np.argmax(tim)] if oper == 'ddt': # derivative operator valarr = np.diff(valarr)/(np.average(np.diff(tim))) tim = (tim[0:-1] + tim[1:])/2.0 if oper == 'ddt2': # abd(ddw)*derivative operator dw = np.diff(valarr)/(np.average(np.diff(tim))) ddw = np.diff(dw)/(np.average(np.diff(tim))) tim = tim[2:] valarr = 4e-6 * dw[1:] * np.abs(ddw) if (len(tim) < 10) or (np.std(tim)<0.1): raise ValueError('Insufficient points or degenerate' 'timebase data in {0}, {1}' .format(varname, dg.filename)) valarr = (stineman_interp(times, tim, valarr)) w = np.where(times > max(tim)) valarr[w] = np.nan if valarr is not None: vals.update({diag: valarr}) debug_(max(pyfusion.DEBUG, debug), level=5, key='interp') return(vals)
def fetch_data_from_file(fetcher): prm_dict = read_prm_file(fetcher.basename + ".prm") bytes = int(prm_dict["DataLength(byte)"][0]) bits = int(prm_dict["Resolution(bit)"][0]) if not (prm_dict.has_key("ImageType")): # if so assume unsigned bytes_per_sample = 2 dat_arr = Array.array("H") offset = 2 ** (bits - 1) dtype = np.dtype("uint16") else: if prm_dict["ImageType"][0] == "INT16": bytes_per_sample = 2 if prm_dict["BinaryCoding"][0] == "offset_binary": dat_arr = Array.array("H") offset = 2 ** (bits - 1) dtype = np.dtype("uint16") elif prm_dict["BinaryCoding"][0] == "shifted_2's_complementary": dat_arr = Array.array("h") offset = 0 dtype = np.dtype("int16") else: raise NotImplementedError, " binary coding " + prm_dict["BinaryCoding"] fp = open(fetcher.basename + ".dat", "rb") dat_arr.fromfile(fp, bytes / bytes_per_sample) fp.close() # print(fetcher.config_name) clockHz = None if prm_dict.has_key("SamplingClock"): clockHz = double(prm_dict["SamplingClock"][0]) if prm_dict.has_key("SamplingInterval"): clockHz = clockHz / double(prm_dict["SamplingInterval"][0]) if prm_dict.has_key("ClockSpeed"): if clockHz != None: pyfusion.utils.warn("Apparent duplication of clock speed information") clockHz = double(prm_dict["ClockSpeed"][0]) clockHz = LHD_A14_clk(fetcher.shot) # see above if clockHz != None: timebase = arange(len(dat_arr)) / clockHz else: raise NotImplementedError, "timebase not recognised" ch = Channel("%s-%s" % (fetcher.diag_name, fetcher.channel_number), Coords("dummy", (0, 0, 0))) # if fetcher.gain != None: # this may have worked once...not now! # gain = fetcher.gain # else: # was - crude!! if channel == 20: arr = -arr # (MP5 and HMP13 need flipping) try: gain = float(fetcher.gain) except: gain = 1 # dodgy - should only apply to a diag in a list - don't want to define -MP5 separately - see other comment on "-" # if fetcher.diag_name[0]=='-': flip = -1 # else: flip = 1 # not sure if this needs a factor of two for RangePolarity,Bipolar (A14) scale_factor = flip * double(prm_dict["Range"][0]) / (2 ** bits) # not sure how this worked before I added array() - but has using # array slowed things? I clearly went to trouble using tailored ints above? output_data = TimeseriesData( timebase=Timebase(timebase), signal=Signal(scale_factor * gain * (array(dat_arr) - offset)), channels=ch ) output_data.meta.update({"shot": fetcher.shot}) output_data.config_name = fetcher.config_name return output_data
def read_data_from_file(fetcher): prm_dict = read_prm_file(fetcher.basename+".prm") bytes = int(prm_dict['DataLength(byte)'][0]) bits = int(prm_dict['Resolution(bit)'][0]) if 'ImageType' not in prm_dict: #if so assume unsigned bytes_per_sample = 2 dat_arr = Array.array('H') offset = 2**(bits-1) dtyp = np.dtype('uint16') else: if prm_dict['ImageType'][0] == 'INT16': bytes_per_sample = 2 if prm_dict['BinaryCoding'][0] == 'offset_binary': dat_arr = Array.array('H') offset = 2**(bits-1) dtyp = np.dtype('uint16') elif prm_dict['BinaryCoding'][0] == "shifted_2's_complementary": dat_arr = Array.array('h') offset = 0 dtyp = np.dtype('int16') # this was added for the VSL digitisers elif prm_dict['BinaryCoding'][0] == "2's_complementary": # not sure about this dat_arr = Array.array('h') offset = 0 dtyp = np.dtype('int16') else: raise NotImplementedError(' binary coding {pd}'.format(pd=prm_dict['BinaryCoding'])) """ fp = open(fetcher.basename + '.dat', 'rb') dat_arr.fromfile(fp, bytes/bytes_per_sample) fp.close() """ dat_arr = np.fromfile(fetcher.basename + '.dat',dtyp) #print(dat_arr[0:10]) #print(fetcher.config_name) if fetcher.timeOK: # we have retrieve_t data! # check for ArrayDataType: float is float32 # skip is 0 as there is no initial digitiser type token tprm_dict = read_prm_file(fetcher.basename+".tprm",skip=0) if pyfusion.VERBOSE>1: print(tprm_dict) ftype = tprm_dict['ArrayDataType'][0] floats = dict(float = 'float32', double='float64') timebase = np.fromfile(fetcher.basename + '.time', np.dtype(floats[ftype])) else: # use the info from the .prm file clockHz = None if 'SamplingClock' in prm_dict: clockHz = double(prm_dict['SamplingClock'][0]) if 'SamplingInterval' in prm_dict: clockHz = clockHz/double(prm_dict['SamplingInterval'][0]) if 'ClockInterval(uSec)' in prm_dict: # VSL dig clockHz = 1e6/double(prm_dict['ClockInterval(uSec)'][0]) if 'ClockSpeed' in prm_dict: if clockHz != None: pyfusion.utils.warn('Apparent duplication of clock speed information') clockHz = double(prm_dict['ClockSpeed'][0]) clockHz = LHD_A14_clk(fetcher.shot) # see above if clockHz != None: if 'PreSamples/Ch' in prm_dict: # needed for "WE" e.g. VSL pretrig = float(prm_dict['PreSamples/Ch'][0])/clockHz else: pretrig = 0. timebase = arange(len(dat_arr))/clockHz - pretrig else: debug_(pyfusion.DEBUG, level=4, key='LHD read debug') raise NotImplementedError("timebase not recognised") debug_(pyfusion.DEBUG, level=4, key='LHD read debug') ch = Channel("{dn}-{dc}" .format(dn=fetcher.diag_name, dc=fetcher.channel_number), Coords('dummy', (0,0,0))) # if fetcher.gain != None: # this may have worked once...not now! # gain = fetcher.gain # else: # was - crude!! if channel == 20: arr = -arr # (MP5 and HMP13 need flipping) try: gain = float(fetcher.gain) except: gain = 1 # dodgy - should only apply to a diag in a list - don't want to define -MP5 separately - see other comment on "-" #if fetcher.diag_name[0]=='-': flip = -1 #else: flip = 1 # not sure if this needs a factor of two for RangePolarity,Bipolar (A14) rng=None for key in 'Range,Range(V)'.split(','): # equivalent alteratives rng=prm_dict.get(key) if rng is not None: break scale_factor = flip*double(rng[0])/(2**bits) # not sure how this worked before I added array() - but has using # array slowed things? I clearly went to trouble using tailored ints above? # - yes array(dat_arr) takes 1.5 sec for 4MS!! # Answer - using numpy fromfile('file',dtype=numpy.int16) - 16ms instead! # NOTE! ctype=int32 required if the array is already an np array - can be fixed once Array code is removed (next version) output_data = TimeseriesData(timebase=Timebase(timebase), signal=Signal(scale_factor*gain*(array(dat_arr,dtype=np.int32)-offset)), channels=ch) #print(output_data.signal[0:5],offset,(array(dat_arr)-offset)[0:5]) output_data.meta.update({'shot':fetcher.shot}) output_data.config_name = fetcher.config_name output_data.params = prm_dict return output_data