def do_fetch(self): # my W7X shots are of the form from_utc, to_utc # or date (8dig) and shot (progId) # the format is in the acquisition properties, to avoid # repetition in each individual diagnostic if self.shot[1]>1e9: # we have start and end in UTC f,t = self.shot else: f,t = get_shot_utc(*self.shot) # A URL STYLE diagnostic - used for a one-off # this could be moved to setup so that the error info is more complete if hasattr(self,'url'): fmt = self.url+'_signal.json?from={shot_f}&upto={shot_t}' fmt = self.url+'_signal.json?from={shot_f}&upto={shot_t}&nSamples=200000' params = {} else: # a pattern-based one - used for arrays of probes if hasattr(self,'fmt'): # does the diagnostic have one? fmt = self.fmt elif hasattr(self.acq,'fmt'): # else use the acq.fmt fmt = self.acq.fmt else: # so far we have no quick way to check the server is online raise LookupError('no fmt - perhaps pyfusion.cfg has been ' 'edited because the url is not available') params = eval('dict('+self.params+')') if 'upto' not in fmt: fmt += '_signal.json?from={shot_f}&upto={shot_t}' if ('nSamples' not in fmt) and (pyfusion.NSAMPLES != 0): fmt += '&nSamples={ns}'.format(ns=pyfusion.NSAMPLES) params.update(shot_f=f, shot_t=t) url = fmt.format(**params) if pyfusion.CACHE: print('using wget on {url}'.format(url=url)) os.system('wget -x "{url}"'.format(url=url)) # now read from the local copy - it is in the wd, so only // # but it seems we need the full path for now url = url.replace('http://','file:///home/bdb112/pyfusion/working/pyfusion/') print('now trying the cached copy we just grabbed: {url}'.format(url=url)) if pyfusion.VERBOSE > 0: print('===> fetching url {u}'.format(u=url)) # seems to take twice as long as timeout requested. # haven't thought about python3 for the json stuff yet try: # dat = json.load(urlopen(url,timeout=pyfusion.TIMEOUT)) works # but follow example in # http://webservices.ipp-hgw.mpg.de/docs/howtoREST.html#python, dat = json.loads(urlopen(url,timeout=pyfusion.TIMEOUT).read().decode()) except socket.timeout: # should check if this is better tested by the URL module print('****** first timeout error *****') dat = json.load(urlopen(url,timeout=3*pyfusion.TIMEOUT)) except Exception as reason: if pyfusion.VERBOSE: print('********Exception***** on {c}: {u} \n{r}' .format(c=self.config_name, u=url, r=reason)) raise # this form will default to repair = 2 for all LP probes. default_repair = 2 if 'Desc.82/' in url else 0 # this form follows the config file settings self.repair = int(self.repair) if hasattr(self, 'repair') else default_repair dimraw = np.array(dat['dimensions']) dim = dimraw - dimraw[0] if self.repair == 0: pass # leave as is # need at least this clipping for Langmuir probes in Op1.1 elif self.repair == 1: dim = np.clip(dim, 0, 1e99) elif self.repair == 2: dim, msg = regenerate_dim(dim) if msg is not None: print('shot {s}, {c}: {m}' .format(s=self.shot, c=self.config_name, m=msg)) else: raise ValueError('repair value of {r} not understood'.format(r=self.repair)) if pyfusion.VERBOSE>2: print('repair',self.repair) #ch = Channel(self.config_name, Coords('dummy', (0,0,0))) # this probably should be in base.py coords = get_coords_for_channel(**self.__dict__) # used to be bare_chan? should we include - signs? ch = Channel(self.config_name, coords) output_data = TimeseriesData(timebase=Timebase(1e-9*dim), signal=Signal(dat['values']), channels=ch) output_data.meta.update({'shot': self.shot}) output_data.utc = [dat['dimensions'][0], dat['dimensions'][-1]] output_data.units = dat['units'] if 'units' in dat else '' # this is a minor duplication - at least it gets saved via params params['data_utc'] = output_data.utc # Warning - this could slow things down! - but allows corrupted time to be re-calculated as algorithms improve. params['diff_dimraw'] = dimraw params['diff_dimraw'][1:] = np.diff(dimraw) params['pyfusion_version'] = pyfusion.version.get_version() if pyfusion.VERBOSE > 0: print('shot {s}, config name {c}' .format(c=self.config_name, s=self.shot)) output_data.config_name = self.config_name debug_(pyfusion.DEBUG, 2, key='W7XDataFetcher') output_data.params = params ### the total shot utc. output_data.utc = [f, t] return output_data
def do_fetch(self): # Definitions: # data_utc: is meant to be the first and last timestamps of the saved # or the retrieved data if not saved. (at least from 3 march 2016) # shot_f, shot_t: are beginning and end of shot (from programs) at least from 3 march 2016 # utc: seems to be same as data_utc # seg_f_u: up til 20 march 2020, seg_f_u appears to be the *requested* segment, (now fixed - renamed to req_f_u) # utc0 - only 9.9 or so. Should be in file or set in base.py # In this revision, the only changes are - allow for self.time_range, # variable names f, t changed to f_u, t_u (the desired data utc) # and comment fixes, # in preparation for including time_range and other cleanups. # My W7X shots are either of the form from_utc, to_utc, # or date (8dig) and shot (progId) # the format is in the acquisition properties, to avoid # repetition in each individual diagnostic t_start = seconds() if self.shot[1] > 1e9 or hasattr( self, 'time_range') and self.time_range is not None: # we have start and end in UTC instead of shot no # need the shot and utc to get t1 to set zero t if hasattr(self, 'time_range') and self.time_range is not None: if self.shot[1] > 1e9: raise ValueError( "Can't supply shot as a utc pair and specify a time_range" ) actual_shot = self.shot f_u = None # set to None to make sure we don't use it else: f_u, t_u = self.shot # Initialize to the requested utc range actual_shot = get_shot_number([f_u, t_u]) progs = get_programs(actual_shot) # need shot to look up progs # need prog to get trigger - not tested for OP1.1 if len(progs) > 1: raise LookupError( 'fetch: too many programs found - covers > 1 shot?') #this_prog = [prog for prog in progs if (f_u >= progs[prog]['from'] and # t_u <= progs[prog]['upto'])] if len(progs) == 1: this_prog = progs.values()[0] # on shot 20180724,10, this trigger[1] is an empty list trigger = this_prog['trigger']['1'] # This fallback to trigger[0] mean that more rubbish shots are saved than # if we only look at the proper trigger (1) - here is an example # run pyfusion/examples/plot_signals shot_number=[20180724,10] diag_name="W7X_UTDU_LP15_I" dev_name='W7X' if len(trigger) == 0: # example above print('** No Trigger 1 on shot {s}'.format(s=actual_shot)) debug_(pyfusion.DEBUG, 0, key="noTrig1", msg="No Trigger 1 found") # take any that exist, at 60 trigger = [ trr[0] + int(60 * 1e9) for trr in this_prog['trigger'].values() if len(trr) > 0 ] if len(trigger) == 0: raise LookupError( 'No Triggers at all on shot {s}'.format( s=actual_shot)) utc_0 = trigger[ 0] # utc_0 is the first trigger (usu 61 sec ahead of data) else: print( 'Unable to look up programs - assuming this is a test shot' ) utc_0 = f_u # better than nothing - probably a 'private' test/calibration shot if f_u is None: # shorthand for have time range f_u = utc_0 + int(1e9 * (self.time_range[0])) # + 61)) t_u = utc_0 + int( 1e9 * (self.time_range[1])) # + 61)) was 61 rel to prog['from'] else: # self.shot is an 8,3 digit shot and time range not specified actual_shot = self.shot f_u, t_u = get_shot_utc( actual_shot ) # f_u is the start of the overall shot - i.e about plasma time -61 sec. # at present, ECH usually starts 61 secs after t0 # so it is usually sufficient to request a later start than t0 pre_trig_secs = self.pre_trig_secs if hasattr( self, 'pre_trig_secs') else 0.3 # should get this from programs really - code is already above. We need to just move it. pyfusion.utils.warn('fetch: still using hard-coded 61 secs') utc_0 = f_u + int(1e9 * (61)) # utc_0 is plasma initiation in utc f_u = utc_0 - int( 1e9 * pre_trig_secs) # f_u is the first time wanted # make sure we have the following defined regardless of how we got here shot_f_u, shot_t_u = get_shot_utc(actual_shot) req_f_u = f_u # req_f_u is the start of the desired data segment - sim. for req_t_u req_t_u = t_u # A URL STYLE diagnostic - used for a one-off rather than an array # this could be moved to setup so that the error info is more complete if hasattr(self, 'url'): fmt = self.url # add from= further down: +'_signal.json?from={req_f_u}&upto={req_t_u}' params = {} # check consistency - # url should be literal - no params (only for fmt) - gain, units are OK as they are not params if hasattr(self, 'params'): pyfusion.utils.warn( 'URL diagnostic {n} should not have params <{p}>'.format( n=self.config_name, p=self.params)) else: # a pattern-based one - used for arrays of probes if hasattr(self, 'fmt'): # does the diagnostic have one? fmt = self.fmt elif hasattr(self.acq, 'fmt'): # else use the acq.fmt fmt = self.acq.fmt else: # so far we have no quick way to check the server is online raise LookupError('no fmt - perhaps pyfusion.cfg has been ' 'edited because the url is not available') params = eval('dict(' + self.params + ')') # Originally added to fix up erroneous ECH alias mapping if ECH - only # 6 sources work if I don't. But it seems to help with many others # This implementation is kludgey but proves the principle, and # means we don't have to refer to any raw.. signals # would be nice if they made a formal way to do this. if 'upto' not in fmt: fmt += '_signal.json?from={req_f_u}&upto={req_t_u}' assert req_f_u == f_u, 'req_f_u error' assert req_t_u == t_u, 'req_t_u error' # params.update(req_f_u=req_f_u, req_t_u=req_t_u, shot_f_u=shot_f_u) url = fmt.format(**params) # substitute the channel params debug_(pyfusion.DEBUG, 2, key="url", msg="middle of work on urls") if np.any([ nm in url for nm in 'Rf,Tower5,MainCoils,ControlCoils,TrimCoils,Mirnov,Interfer,_NBI_' .split(',') ]): from pyfusion.acquisition.W7X.get_url_parms import get_signal_url # replace the main middle bit with the expanded one from the GUI tgt = url.split('KKS/')[1].split('/scaled')[0].split('_signal')[0] # construct a time filter for the shot self.tgt = tgt # for the sake of error_info filt = '?filterstart={req_f_u}&filterstop={req_t_u}'.format( **params) # get the url with the filter url = url.replace(tgt, get_signal_url(tgt, filt)).split('KKS/')[-1] # take the filter back out - we will add the exact one later url = url.replace(filt, '/') # nSamples now needs a reduction mechanism http://archive-webapi.ipp-hgw.mpg.de/ # minmax is increasingly slow for nSamples>10k, 100k hopeless # Should ignore the test comparing the first two elements of the tb # prevent reduction (NSAMPLES=...) to avoid the bug presently in codac if (('nSamples' not in url) and (pyfusion.NSAMPLES != 0) and not (hasattr(self, 'allow_reduction') and int(self.allow_reduction) == 0)): url += '&reduction=minmax&nSamples={ns}'.format( ns=pyfusion.NSAMPLES) debug_(pyfusion.DEBUG, 2, key="url", msg="work on urls") # we need %% in pyfusion.cfg to keep py3 happy # however with the new get_signal_url, this will all disappear if sys.version < '3.0.0' and '%%' in url: url = url.replace('%%', '%') if 'StationDesc.82' in url: # fix spike bug in scaled QRP data url = url.replace('/scaled/', '/unscaled/') if pyfusion.CACHE: # Needed for improperly configured cygwin systems: e.g.IPP Virual PC # Perhaps this should be executed at startup of pyfusion? cygbin = "c:\\cygwin\\bin" if os.path.exists(cygbin) and not cygbin in os.environ['path']: os.environ['path'] += ";" + cygbin print('using wget on {url}'.format(url=url)) retcode = os.system('wget -x "{url}"'.format(url=url)) # retcode = os.system('c:\\cygwin\\bin\\bash.exe -c "/bin/wget {url}"'.format(url=url)) debug_(retcode != 0, level=1, key='wget', msg="wget error or DEBUG='wget'") # now read from the local copy - seems like urls need full paths # appears to be a feature! http://stackoverflow.com/questions/7857416/file-uri-scheme-and-relative-files # /home/bdb112/pyfusion/working/pyfusion/archive-webapi.ipp-hgw.mpg.de/ArchiveDB/codac/W7X/CoDaStationDesc.82/DataModuleDesc.181_DATASTREAM/7/Channel_7/scaled/_signal.json?from=1457626020000000000&upto=1457626080000000000&nSamples=10000 # url = url.replace('http://','file:///home/bdb112/pyfusion/working/pyfusion/') url = url.replace('http://', 'file:/' + os.getcwd() + '/') if 'win' in os.sys.platform: # weven thoug it seems odd, want 'file:/c:\\cygwin\\home\\bobl\\pyfusion\\working\\pyfusion/archive-webapi.ipp-hgw.mpg.de/ArchiveDB/codac/W7X/CoDaStationDesc.82/DataModuleDesc.192_DATASTREAM/4/Channel_4/scaled/_signal.json@from=147516863807215960&upto=1457516863809815961' url = url.replace('?', '@') # nicer replace - readback still fails in Win, untested on unix systems print('now trying the cached copy we just grabbed: {url}'.format( url=url)) if (req_f_u > shot_t_u) or (req_t_u < shot_f_u): pyfusion.utils.warn( 'segment requested is outside the shot times for ' + str(actual_shot)) if pyfusion.VERBOSE > 0: print( '======== fetching url over {dt:.1f} secs from {fr:.1f} to {tt:.1f} =========\n[{u}]' .format(u=url, dt=(params['req_t_u'] - params['req_f_u']) / 1e9, fr=(params['req_f_u'] - shot_f_u) / 1e9, tt=(params['req_t_u'] - shot_f_u) / 1e9)) # seems to take twice as long as timeout requested. # haven't thought about python3 for the json stuff yet # This is not clean - should loop for timeout in [pyfusion.TIMEOUT, 3*pyfusion.TIMEOUT] try: # dat = json.load(urlopen(url,timeout=pyfusion.TIMEOUT)) works # but follow example in # http://webservices.ipp-hgw.mpg.de/docs/howtoREST.html#python # Some extracts in examples/howtoREST.py # dat = json.loads(urlopen(url,timeout=pyfusion.TIMEOUT).read().decode('utf-8')) t_pre = seconds() # for long shots, adjust strategy and timeout to reduce memory consumption ONE = 4 # memory conservation tricks only apply for DEBUG<1 # Thin allows the cutoff value to be increased in come cases # uncomment the following two for testing the exception handler ## timeout = pyfusion.TIMEOUT ## raise httplib.IncompleteRead('test') if (req_t_u - req_f_u) / 1e9 > pyfusion.VERY_LONG: size_MS = 2 * ( req_t_u - req_f_u ) / 1e9 # approximate - later on calc from dt i.e. MSamples if pyfusion.NSAMPLES != 0: # allow for subsampled data size_MS = pyfusion.NSAMPLES / 1e6 timeout = 8 * size_MS + pyfusion.TIMEOUT # don't make timeout too small! print('On-the-fly conversion: Setting timeout to {tmo}'.format( tmo=timeout)) dat = json.load(urlopen(url, timeout=timeout)) t_text = seconds() else: timeout = pyfusion.TIMEOUT txtform = urlopen(url, timeout=timeout).read() t_text = seconds() print('{tm} {tp:.2f} prep, {tt:.2f} fetch without decode, '. format(tm=time.strftime('%H:%M:%S'), tp=t_pre - t_start, tt=t_text - t_pre)), sys.stdout.flush() dat = json.loads(txtform.decode('utf-8')) if pyfusion.DEBUG < ONE: txtform = None # release memory t_conv = seconds() # for 10MS of mirnov 0.06 prep, 9.61 fetch 19.03 conv #print('{tp:.2f} prep, {tt:.2f} fetch {tc:.2f} conv'. print('{tc:.2f} conv'.format(tp=t_pre - t_start, tt=t_text - t_pre, tc=t_conv - t_text)) except socket.timeout as reason: # the following url access is a slightly different form? # should check if this is better tested by the URL module print('{tm} {tp:.2f} prep, {tt:.2f} timeout. '.format( tp=t_pre - t_start, tt=seconds() - t_pre, tm=time.strftime('%H:%M:%S'))), print( '****** first timeout error, try again with longer timeout *****' ) timeout *= 3 dat = json.load(urlopen(url, timeout=timeout)) except MemoryError as reason: raise # too dangerous to do anything else except to reraise except httplib.IncompleteRead as reason: msg = str( '** IncompleteRead after {tinc:.0f}/{timeout:.0f}s ** on {c}: {u} \n{r}' .format(tinc=seconds() - t_start, c=self.config_name, u=url, r=reason, timeout=timeout)) pyfusion.logging.error( msg ) # don't want to disturb the original exception, so raise <nothing> i.e. reraise raise # possibly a memory error really? - not the case for 4114 20180912.48 except Exception as reason: if pyfusion.VERBOSE >= 0: print( '**** Exception (Memory? out of disk space?) OR timeout of {timeout} **** on {c}: {u} \n{r}' .format(c=self.config_name, u=url, r=reason, timeout=timeout)) raise # re raises the last exception # this form will default to repair = 2 for all LP probes. #default_repair = -1 if 'Desc.82/' in url else 0 # Override acq.repair with the probe value default_repair = int(self.repair) if hasattr( self, 'repair') else 2 if 'Desc.82/' in url else 0 # this form follows the config file settings self.repair = int(self.repair) if hasattr(self, 'repair') else default_repair dimraw = np.array(dat['dimensions']) if ('nSamples' not in url): # skip this check if we are decimating if np.abs(req_f_u - dimraw[0]) > 2000: print( '** Start is delayed by >2 us {dtdel:,} relative to the request' .format(dtdel=dimraw[0] - req_f_u)) if (req_t_u - dimraw[-1]) > 2000: print( '** End is earlier by >2 us {dtdel:,} relative to the request' .format(dtdel=req_t_u - dimraw[-1])) output_data_utc = [dat['dimensions'][0], dat['dimensions'][-1]] if pyfusion.DEBUG < ONE: dat['dimensions'] = None # release memory # adjust dim only (not dim_raw so that zero time remains at t1 dim = dimraw - utc_0 # decimation with NSAMPLES will make the timebase look wrong - so disable repair if pyfusion.NSAMPLES != 0 or self.repair == 0 or self.repair == -1: pass # leave as is # need at least this clipping for Langmuir probes in Op1.1 elif self.repair == 1: dim = np.clip(dim, 0, 1e99) elif self.repair == 2: dim, msg = regenerate_dim(dim) if msg is not None: print('shot {s}, {c}: {m}'.format(s=self.shot, c=self.config_name, m=msg)) else: raise ValueError( 'repair value of {r} not understood'.format(r=self.repair)) if pyfusion.VERBOSE > 2: print('repair', self.repair) #ch = Channel(self.config_name, Coords('dummy', (0,0,0))) # this probably should be in base.py coords = get_coords_for_channel(**self.__dict__) # used to be bare_chan? should we include - signs? ch = Channel(self.config_name, coords) scl = 1 / 3277.24 if dat['datatype'].lower() == 'short' else 1 if self.repair == -1: output_data = TimeseriesData(timebase=Timebase(dimraw), signal=scl * Signal(dat['values']), channels=ch) else: output_data = TimeseriesData(timebase=Timebase(1e-9 * dim), signal=scl * Signal(dat['values']), channels=ch) output_data.meta.update({'shot': self.shot}) output_data.utc = output_data_utc # this copy was saved earlier so we could delete the large array to save space output_data.units = dat['units'] if 'units' in dat else '' # this is a minor duplication - at least it gets saved via params params['data_utc'] = output_data.utc params['utc_0'] = utc_0 # hopefully t0 -- useful in npz files # Warning - this could slow things down! - but allows # corrupted time to be re-calculated as algorithms improve. # and storage as differences takes very little space. params['diff_dimraw'] = dimraw params['diff_dimraw'][1:] = np.diff(dimraw) if pyfusion.DEBUG < ONE: dimraw = None # NOTE!!! need float128 to process dimraw, and cumsum won't return ints # or automatically promote to 128bit (neither do simple ops like *, +) params['pyfusion_version'] = pyfusion.version.get_version() if pyfusion.VERBOSE > 0: print('shot {s}, config name {c}'.format(c=self.config_name, s=self.shot)) output_data.config_name = self.config_name debug_(pyfusion.DEBUG, 2, key='W7XDataFetcher') output_data.params = params ### the total shot utc. output_data.utc = [f_u, t_u] return output_data