Example #1
0
    def do_fetch(self):
        channel_length = int(self.length)
        outdata=np.zeros(1024*2*256+1)
        ##  !! really should put a wrapper around gethjdata to do common stuff          
        #  outfile is only needed if the direct passing of binary won't work
        #  with tempfile.NamedTemporaryFile(prefix="pyfusion_") as outfile:
        ierror, getrets = gethjdata.gethjdata(self.shot,channel_length,self.path,
                                              verbose=VERBOSE, opt=1, ierror=2,
                                              outdata=outdata, outname='')
        if ierror != 0:
            raise LookupError('hj Okada style data not found for {s}:{c}'.format(s=self.shot, c=self.path))

        ch = Channel(self.path,
                     Coords('dummy', (0,0,0)))

        # the intent statement causes the out var to be returned in the result lsit
        # looks like the time,data is interleaved in a 1x256 array
        # it is fed in as real*64, but returns as real*32! (as per fortran decl)
        debug_(pyfusion.DEBUG, 4, key='Heliotron_fetch', msg='after call to getdata')
        # timebase in secs (ms in raw data) - could add a preferred unit?
        # this is partly allowed for in savez_compressed, newload, and
        # for plotting, in the config file.
        # important that the 1e-3 be inside the Timebase()
        output_data = TimeseriesData(timebase=Timebase(1e-3 * getrets[1::2]),
                                 signal=Signal(getrets[2::2]), channels=ch)
        output_data.meta.update({'shot':self.shot})
        if pyfusion.VERBOSE>0: print('HJ config name',self.config_name)
        output_data.config_name = self.config_name         
        stprms = get_static_params(shot=self.shot,signal=self.path)
        if len(list(stprms)) == 0:  # maybe this should be ignored - how do we use it?
            raise LookupError(' failure to get params for {shot}:{path}'
                              .format(shot=self.shot, path=self.path))
        output_data.params = stprms
        return output_data
Example #2
0
    def do_fetch(self):
        channel_length = int(self.length)
        outdata = np.zeros(1024 * 2 * 256 + 1)
        ##  !! really should put a wrapper around gethjdata to do common stuff
        #  outfile is only needed if the direct passing of binary won't work
        #  with tempfile.NamedTemporaryFile(prefix="pyfusion_") as outfile:
        # get in two steps to make debugging easier
        allrets = gethjdata.gethjdata(self.shot,
                                      channel_length,
                                      self.path,
                                      verbose=VERBOSE,
                                      opt=1,
                                      ierror=2,
                                      isample=-1,
                                      outdata=outdata,
                                      outname='')
        ierror, isample, getrets = allrets
        if ierror != 0:
            raise LookupError(
                'hj Okada style data not found for {s}:{c}'.format(
                    s=self.shot, c=self.path))

        ch = Channel(self.path, Coords('dummy', (0, 0, 0)))

        # the intent statement causes the out var to be returned in the result lsit
        # looks like the time,data is interleaved in a 1x256 array
        # it is fed in as real*64, but returns as real*32! (as per fortran decl)
        debug_(pyfusion.DEBUG,
               4,
               key='Heliotron_fetch',
               msg='after call to getdata')
        # timebase in secs (ms in raw data) - could add a preferred unit?
        # this is partly allowed for in savez_compressed, newload, and
        # for plotting, in the config file.
        # important that the 1e-3 be inside the Timebase()
        output_data = TimeseriesData(timebase=Timebase(
            1e-3 * getrets[1::2][0:isample]),
                                     signal=Signal(getrets[2::2][0:isample]),
                                     channels=ch)
        output_data.meta.update({'shot': self.shot})
        if pyfusion.VERBOSE > 0: print('HJ config name', self.config_name)
        output_data.config_name = self.config_name
        stprms = get_static_params(shot=self.shot, signal=self.path)
        if len(list(stprms)
               ) == 0:  # maybe this should be ignored - how do we use it?
            raise LookupError(
                ' failure to get params for {shot}:{path}'.format(
                    shot=self.shot, path=self.path))
        output_data.params = stprms
        return output_data
Example #3
0
    def do_fetch(self):
        # my W7X shots are of the form from_utc, to_utc 
        #  or date (8dig) and shot (progId)
        # the format is in the acquisition properties, to avoid
        # repetition in each individual diagnostic

        if self.shot[1]>1e9:  # we have start and end in UTC
            f,t = self.shot
        else:
            f,t = get_shot_utc(*self.shot)
        # A URL STYLE diagnostic - used for a one-off
        # this could be moved to setup so that the error info is more complete
        if hasattr(self,'url'):
            fmt = self.url+'_signal.json?from={shot_f}&upto={shot_t}'
            fmt = self.url+'_signal.json?from={shot_f}&upto={shot_t}&nSamples=200000'
            params = {}
        else:  # a pattern-based one - used for arrays of probes
            if hasattr(self,'fmt'):  #  does the diagnostic have one?
                fmt = self.fmt
            elif hasattr(self.acq,'fmt'):  # else use the acq.fmt
                fmt = self.acq.fmt
            else:  #  so far we have no quick way to check the server is online
                raise LookupError('no fmt - perhaps pyfusion.cfg has been '
                                  'edited because the url is not available')

            params = eval('dict('+self.params+')')

        if 'upto' not in fmt:
            fmt += '_signal.json?from={shot_f}&upto={shot_t}'

        if ('nSamples' not in fmt) and (pyfusion.NSAMPLES != 0):
            fmt += '&nSamples={ns}'.format(ns=pyfusion.NSAMPLES)

        params.update(shot_f=f, shot_t=t)
        url = fmt.format(**params)
        if pyfusion.CACHE:
            print('using wget on {url}'.format(url=url))
            os.system('wget -x "{url}"'.format(url=url))
            # now read from the local copy - it is in the wd, so only //
            # but it seems we need the full path for now
            url = url.replace('http://','file:///home/bdb112/pyfusion/working/pyfusion/')
            print('now trying the cached copy we just grabbed: {url}'.format(url=url))
        if pyfusion.VERBOSE > 0:
            print('===> fetching url {u}'.format(u=url))

        # seems to take twice as long as timeout requested.
        # haven't thought about python3 for the json stuff yet
        try:
            # dat = json.load(urlopen(url,timeout=pyfusion.TIMEOUT)) works
            # but follow example in
            #    http://webservices.ipp-hgw.mpg.de/docs/howtoREST.html#python, 
            dat = json.loads(urlopen(url,timeout=pyfusion.TIMEOUT).read().decode())
        except socket.timeout:
            # should check if this is better tested by the URL module
            print('****** first timeout error *****')
            dat = json.load(urlopen(url,timeout=3*pyfusion.TIMEOUT))
        except Exception as reason:
            if pyfusion.VERBOSE:
                print('********Exception***** on {c}: {u} \n{r}'
                      .format(c=self.config_name, u=url, r=reason))
            raise

        # this form will default to repair = 2 for all LP probes.
        default_repair = 2 if 'Desc.82/' in url else 0
        # this form follows the config file settings
        self.repair = int(self.repair) if hasattr(self, 'repair') else default_repair
        dimraw = np.array(dat['dimensions'])  
        dim = dimraw - dimraw[0]
        if self.repair == 0:
            pass # leave as is
        # need at least this clipping for Langmuir probes in Op1.1
        elif self.repair == 1:
            dim = np.clip(dim, 0, 1e99)
        elif self.repair == 2:
            dim, msg = regenerate_dim(dim)
            if msg is not None:
                print('shot {s}, {c}: {m}'
                      .format(s=self.shot, c=self.config_name, m=msg))
        else:
            raise ValueError('repair value of {r} not understood'.format(r=self.repair))

        if pyfusion.VERBOSE>2:  print('repair',self.repair)
        #ch = Channel(self.config_name,  Coords('dummy', (0,0,0)))
        # this probably should be in base.py
        coords = get_coords_for_channel(**self.__dict__)
        # used to be bare_chan? should we include - signs?
        ch = Channel(self.config_name,  coords)
        output_data = TimeseriesData(timebase=Timebase(1e-9*dim),
                                     signal=Signal(dat['values']), channels=ch)
        output_data.meta.update({'shot': self.shot})
        output_data.utc = [dat['dimensions'][0], dat['dimensions'][-1]]
        output_data.units = dat['units'] if 'units' in dat else ''
        # this is a minor duplication - at least it gets saved via params
        params['data_utc'] = output_data.utc
        # Warning - this could slow things down! - but allows corrupted time to be re-calculated as algorithms improve.
        params['diff_dimraw'] = dimraw
        params['diff_dimraw'][1:] = np.diff(dimraw)
        params['pyfusion_version'] = pyfusion.version.get_version()
        if pyfusion.VERBOSE > 0:
            print('shot {s}, config name {c}'
                  .format(c=self.config_name, s=self.shot))

        output_data.config_name = self.config_name
        debug_(pyfusion.DEBUG, 2, key='W7XDataFetcher')
        output_data.params = params
        
        ###  the total shot utc.  output_data.utc = [f, t]
        return output_data
Example #4
0
def try_fetch_local(input_data, bare_chan):
    """ return data if in the local cache, otherwise None
    doesn't work for single channel HJ data.
    sgn (not gain) be only be used at the single channel base/fetch level
    """
    for each_path in pyfusion.config.get('global', 'localdatapath').split('+'):
        # check for multi value shot number, e.g. utc bounds for W7-X data
        shot = input_data.shot
        # MDSplus style path to access sorted files into folders by shot
        path, patt = os.path.split(each_path)
        #  print(patt)
        if len(patt) == 2*len(patt.replace('~','')):  # a subdir code based on date
            subdir = ''
            # reverse the order of both the pattern and the shot so a posn is 0th char
            strshot = str(shot[0]) if len(np.shape(shot))>0 else str(shot)
            revshot = strshot[::-1]
            for i,ch in enumerate(patt):
                if (i%2) == 0: 
                    if ch != '~':
                        raise LookupError("Can't parse {d} as a MDS style subdir"
                                          .format(d=patt))
                    continue
                subdir += revshot[ord(ch) - ord('a')]

        else:
            subdir = patt
        debug_(pyfusion.DEBUG, 3, key='MDS style subdir', msg=each_path)
        each_path = os.path.join(path, subdir)
        if isinstance(shot, (tuple, list, ndarray)):
            shot_str = '{s0}_{s1}'.format(s0=shot[0], s1=shot[1])
        else:
            shot_str = str(shot)
        input_data.localname = os.path.join(each_path, '{shot}_{bc}.npz'
                                          .format(shot=shot_str, bc=bare_chan))
        # original - data_filename %filename_dict)
        if pyfusion.VERBOSE>2: print(each_path,input_data.localname)
        files_exist = os.path.exists(input_data.localname)
        debug_(pyfusion.DEBUG, 3, key='try_local_fetch')
        if files_exist: 
            intmp = np.any([st in input_data.localname.lower() for st in 
                            ['tmp', 'temp']])  # add anything you wish to warn about
            if pyfusion.VERBOSE>0 or intmp:
                if intmp: 
                    pyfusion.logging.warning('Using {f} in temporary directory!'
                                  .format(f=input_data.localname))
                print('found local data in {f}'. format(f=input_data.localname))
            break

    if not files_exist:
        return None

    signal_dict = newload(input_data.localname)
    if 'params' in signal_dict and 'name' in signal_dict['params'] and 'W7X_L5' in signal_dict['params']['name']:
        if  signal_dict['params']['pyfusion_version'] < '0.6.8b':
            raise ValueError('probe assignments in error LP11-22 in {fn}'
                             .format(fn=input_data.localname))
        if np.nanmax(signal_dict['timebase']) == 0:
            pyfusion.logging.warning('making a fake timebase for {fn}'
                                     .format(fn=input_data.localname))
            signal_dict['timebase'] = 2e-6*np.cumsum(1.0 + 0*signal_dict['signal'])

    coords = get_coords_for_channel(**input_data.__dict__)
    #ch = Channel(bare_chan,  Coords('dummy', (0,0,0)))
    ch = Channel(bare_chan,  coords)
    output_data = TimeseriesData(timebase=Timebase(signal_dict['timebase']),
                             signal=Signal(signal_dict['signal']), channels=ch)
    # bdb - used "fetcher" instead of "self" in the "direct from LHD data" version
    #  when using saved files, should use the name - not input_data.config_name
    #  it WAS the config_name coming from the raw format.
    output_data.config_name = bare_chan
    # would be nice to get to the gain here - but how - maybe setup will get it
    output_data.meta.update({'shot':input_data.shot})
    if 'params' in signal_dict: 
        output_data.params = signal_dict['params']
        if 'utc' in signal_dict['params']:
            output_data.utc =  signal_dict['params'].get('utc',None)
    else:
        # yes, it seems like duplication, but no
        output_data.utc = None
        output_data.params = dict(comment = 'old npz file has no params')

    oldsrc =  ', originally from ' + output_data.params['source'] if 'source' in output_data.params else ''
    output_data.params.update(dict(source='from npz cache' + oldsrc))
    return(output_data)
Example #5
0
def read_data_from_file(fetcher):
    prm_dict = read_prm_file(fetcher.basename+".prm")
    bytes = int(prm_dict['DataLength(byte)'][0])
    bits = int(prm_dict['Resolution(bit)'][0])
    if 'ImageType' not in prm_dict:      #if so assume unsigned
        bytes_per_sample = 2
        dat_arr = Array.array('H')
        offset = 2**(bits-1)
        dtyp = np.dtype('uint16')
    else:
        if prm_dict['ImageType'][0] == 'INT16':
            bytes_per_sample = 2
            if prm_dict['BinaryCoding'][0] == 'offset_binary':
                dat_arr = Array.array('H')
                offset = 2**(bits-1)
                dtyp = np.dtype('uint16')
            elif prm_dict['BinaryCoding'][0] == "shifted_2's_complementary":
                dat_arr = Array.array('h')
                offset = 0
                dtyp = np.dtype('int16')
            # this was added for the VSL digitisers
            elif prm_dict['BinaryCoding'][0] == "2's_complementary": # not sure about this
                dat_arr = Array.array('h')
                offset = 0
                dtyp = np.dtype('int16')
            else: raise NotImplementedError(' binary coding {pd}'.format(pd=prm_dict['BinaryCoding']))

    """
    fp = open(fetcher.basename + '.dat', 'rb')
    dat_arr.fromfile(fp, bytes/bytes_per_sample)
    fp.close()
    """
    dat_arr = np.fromfile(fetcher.basename + '.dat',dtyp)
    #print(dat_arr[0:10])
    #print(fetcher.config_name)


    if fetcher.timeOK:  # we have retrieve_t data!
         #  check for ArrayDataType: float is float32  
         # skip is 0 as there is no initial digitiser type token
         tprm_dict = read_prm_file(fetcher.basename+".tprm",skip=0)
         if pyfusion.VERBOSE>1: print(tprm_dict)
         ftype = tprm_dict['ArrayDataType'][0]
         floats = dict(float = 'float32', double='float64')
         timebase = np.fromfile(fetcher.basename + '.time', 
                                np.dtype(floats[ftype]))

    else:  #  use the info from the .prm file
         clockHz = None

         if 'SamplingClock' in prm_dict: 
             clockHz =  double(prm_dict['SamplingClock'][0])
         if 'SamplingInterval' in prm_dict: 
             clockHz =  clockHz/double(prm_dict['SamplingInterval'][0])
         if 'ClockInterval(uSec)' in prm_dict:  # VSL dig
              clockHz =  1e6/double(prm_dict['ClockInterval(uSec)'][0])
         if 'ClockSpeed' in prm_dict: 
             if clockHz != None:
                 pyfusion.utils.warn('Apparent duplication of clock speed information')
             clockHz =  double(prm_dict['ClockSpeed'][0])
             clockHz = LHD_A14_clk(fetcher.shot)  # see above

         if clockHz != None:
              if 'PreSamples/Ch' in prm_dict:   # needed for "WE" e.g. VSL  
                   pretrig = float(prm_dict['PreSamples/Ch'][0])/clockHz
              else:
                   pretrig = 0.
              timebase = arange(len(dat_arr))/clockHz  - pretrig

         else:  
              debug_(pyfusion.DEBUG, level=4, key='LHD read debug') 
              raise NotImplementedError("timebase not recognised")
    
    debug_(pyfusion.DEBUG, level=4, key='LHD read debug') 
    ch = Channel("{dn}-{dc}" 
                 .format(dn=fetcher.diag_name, 
                         dc=fetcher.channel_number), 
                 Coords('dummy', (0,0,0)))
#    if fetcher.gain != None:   # this may have worked once...not now!
#        gain = fetcher.gain
#    else: 
    #  was - crude!! if channel ==  20: arr = -arr   # (MP5 and HMP13 need flipping)
    try:
        gain = float(fetcher.gain)
    except: 
        gain = 1

    # dodgy - should only apply to a diag in a list - don't want to define -MP5 separately - see other comment on "-"
    #if fetcher.diag_name[0]=='-': flip = -1
    #else: 
    flip = 1

    # not sure if this needs a factor of two for RangePolarity,Bipolar (A14)
    rng=None
    for key in 'Range,Range(V)'.split(','):  # equivalent alteratives
         rng=prm_dict.get(key)
         if rng is not None: break

    scale_factor = flip*double(rng[0])/(2**bits)
    # not sure how this worked before I added array() - but has using
    # array slowed things?  I clearly went to trouble using tailored ints above?
    # - yes array(dat_arr) takes 1.5 sec for 4MS!!
    # Answer - using numpy fromfile('file',dtype=numpy.int16) - 16ms instead!
    # NOTE! ctype=int32 required if the array is already an np array - can be fixed once Array code is removed (next version)
    output_data = TimeseriesData(timebase=Timebase(timebase),
                                 signal=Signal(scale_factor*gain*(array(dat_arr,dtype=np.int32)-offset)),
                                 channels=ch)
    #print(output_data.signal[0:5],offset,(array(dat_arr)-offset)[0:5])
    output_data.meta.update({'shot':fetcher.shot})
    output_data.config_name = fetcher.config_name
    output_data.params = prm_dict
    return output_data
Example #6
0
    def do_fetch(self):
        # Definitions:
        # data_utc: is meant to be the first and last timestamps of the saved
        #        or the retrieved data if not saved. (at least from 3 march 2016)
        # shot_f, shot_t: are beginning and end of shot (from programs) at least from 3 march 2016
        # utc: seems to be same as data_utc
        # seg_f_u: up til 20 march 2020, seg_f_u appears to be the *requested* segment, (now fixed - renamed to req_f_u)
        # utc0 - only 9.9 or so.  Should be in file or set in base.py

        # In this revision, the only changes are - allow for self.time_range,
        #   variable names f, t changed to f_u, t_u (the desired data utc)
        # and comment fixes,
        # in preparation for including time_range and other cleanups.

        # My W7X shots are either of the form from_utc, to_utc,
        #  or date (8dig) and shot (progId)
        # the format is in the acquisition properties, to avoid
        # repetition in each individual diagnostic

        t_start = seconds()
        if self.shot[1] > 1e9 or hasattr(
                self, 'time_range') and self.time_range is not None:
            # we have start and end in UTC instead of shot no
            # need the shot and utc to get t1 to set zero t
            if hasattr(self, 'time_range') and self.time_range is not None:
                if self.shot[1] > 1e9:
                    raise ValueError(
                        "Can't supply shot as a utc pair and specify a time_range"
                    )
                actual_shot = self.shot
                f_u = None  # set to None to make sure we don't use it
            else:
                f_u, t_u = self.shot  #  Initialize to the requested utc range
                actual_shot = get_shot_number([f_u, t_u])

            progs = get_programs(actual_shot)  # need shot to look up progs
            #  need prog to get trigger - not tested for OP1.1
            if len(progs) > 1:
                raise LookupError(
                    'fetch: too many programs found - covers > 1 shot?')
                #this_prog = [prog for prog in progs if (f_u >= progs[prog]['from'] and
                #                                    t_u <= progs[prog]['upto'])]
            if len(progs) == 1:
                this_prog = progs.values()[0]
                # on shot 20180724,10, this trigger[1] is an empty list
                trigger = this_prog['trigger']['1']
                # This fallback to trigger[0] mean that more rubbish shots are saved than
                # if we only look at the proper trigger (1) - here is an example
                # run pyfusion/examples/plot_signals shot_number=[20180724,10] diag_name="W7X_UTDU_LP15_I" dev_name='W7X'
                if len(trigger) == 0:  # example above
                    print('** No Trigger 1 on shot {s}'.format(s=actual_shot))
                    debug_(pyfusion.DEBUG,
                           0,
                           key="noTrig1",
                           msg="No Trigger 1 found")
                    # take any that exist, at 60
                    trigger = [
                        trr[0] + int(60 * 1e9)
                        for trr in this_prog['trigger'].values()
                        if len(trr) > 0
                    ]
                    if len(trigger) == 0:
                        raise LookupError(
                            'No Triggers at all on shot {s}'.format(
                                s=actual_shot))
                utc_0 = trigger[
                    0]  # utc_0 is the first trigger (usu 61 sec ahead of data)
            else:
                print(
                    'Unable to look up programs - assuming this is a test shot'
                )
                utc_0 = f_u  #   better than nothing - probably a 'private' test/calibration shot
            if f_u is None:  # shorthand for have time range
                f_u = utc_0 + int(1e9 * (self.time_range[0]))  # + 61))
                t_u = utc_0 + int(
                    1e9 *
                    (self.time_range[1]))  # + 61)) was 61 rel to prog['from']

        else:  # self.shot is an 8,3 digit shot and time range not specified
            actual_shot = self.shot
            f_u, t_u = get_shot_utc(
                actual_shot
            )  # f_u is the start of the overall shot - i.e about plasma time -61 sec.
            # at present, ECH usually starts 61 secs after t0
            # so it is usually sufficient to request a later start than t0
            pre_trig_secs = self.pre_trig_secs if hasattr(
                self, 'pre_trig_secs') else 0.3
            # should get this from programs really - code is already above. We need to just move it.
            pyfusion.utils.warn('fetch: still using hard-coded 61 secs')
            utc_0 = f_u + int(1e9 * (61))  # utc_0 is plasma initiation in utc
            f_u = utc_0 - int(
                1e9 * pre_trig_secs)  # f_u is the first time wanted

        # make sure we have the following defined regardless of how we got here
        shot_f_u, shot_t_u = get_shot_utc(actual_shot)
        req_f_u = f_u  # req_f_u is the start of the desired data segment - sim. for req_t_u
        req_t_u = t_u
        # A URL STYLE diagnostic - used for a one-off rather than an array
        # this could be moved to setup so that the error info is more complete
        if hasattr(self, 'url'):
            fmt = self.url  # add from= further down: +'_signal.json?from={req_f_u}&upto={req_t_u}'
            params = {}
            # check consistency -   # url should be literal - no params (only for fmt) - gain, units are OK as they are not params
            if hasattr(self, 'params'):
                pyfusion.utils.warn(
                    'URL diagnostic {n} should not have params <{p}>'.format(
                        n=self.config_name, p=self.params))
        else:  # a pattern-based one - used for arrays of probes
            if hasattr(self, 'fmt'):  # does the diagnostic have one?
                fmt = self.fmt
            elif hasattr(self.acq, 'fmt'):  # else use the acq.fmt
                fmt = self.acq.fmt
            else:  # so far we have no quick way to check the server is online
                raise LookupError('no fmt - perhaps pyfusion.cfg has been '
                                  'edited because the url is not available')

            params = eval('dict(' + self.params + ')')

        # Originally added to fix up erroneous ECH alias mapping if ECH - only
        #   6 sources work if I don't.  But it seems to help with many others
        # This implementation is kludgey but proves the principle, and
        #   means we don't have to refer to any raw.. signals
        #   would be nice if they made a formal way to do this.
        if 'upto' not in fmt:
            fmt += '_signal.json?from={req_f_u}&upto={req_t_u}'

        assert req_f_u == f_u, 'req_f_u error'
        assert req_t_u == t_u, 'req_t_u error'  #
        params.update(req_f_u=req_f_u, req_t_u=req_t_u, shot_f_u=shot_f_u)
        url = fmt.format(**params)  # substitute the channel params

        debug_(pyfusion.DEBUG, 2, key="url", msg="middle of work on urls")
        if np.any([
                nm in url for nm in
                'Rf,Tower5,MainCoils,ControlCoils,TrimCoils,Mirnov,Interfer,_NBI_'
                .split(',')
        ]):
            from pyfusion.acquisition.W7X.get_url_parms import get_signal_url
            # replace the main middle bit with the expanded one from the GUI
            tgt = url.split('KKS/')[1].split('/scaled')[0].split('_signal')[0]
            # construct a time filter for the shot
            self.tgt = tgt  # for the sake of error_info
            filt = '?filterstart={req_f_u}&filterstop={req_t_u}'.format(
                **params)
            # get the url with the filter
            url = url.replace(tgt, get_signal_url(tgt, filt)).split('KKS/')[-1]
            # take the filter back out - we will add the exact one later
            url = url.replace(filt, '/')

        # nSamples now needs a reduction mechanism http://archive-webapi.ipp-hgw.mpg.de/
        # minmax is increasingly slow for nSamples>10k, 100k hopeless
        # Should ignore the test comparing the first two elements of the tb
        # prevent reduction (NSAMPLES=...) to avoid the bug presently in codac
        if (('nSamples' not in url) and (pyfusion.NSAMPLES != 0)
                and not (hasattr(self, 'allow_reduction')
                         and int(self.allow_reduction) == 0)):
            url += '&reduction=minmax&nSamples={ns}'.format(
                ns=pyfusion.NSAMPLES)

        debug_(pyfusion.DEBUG, 2, key="url", msg="work on urls")
        # we need %% in pyfusion.cfg to keep py3 happy
        # however with the new get_signal_url, this will all disappear
        if sys.version < '3.0.0' and '%%' in url:
            url = url.replace('%%', '%')

        if 'StationDesc.82' in url:  # fix spike bug in scaled QRP data
            url = url.replace('/scaled/', '/unscaled/')

        if pyfusion.CACHE:
            # Needed for improperly configured cygwin systems: e.g.IPP Virual PC
            # Perhaps this should be executed at startup of pyfusion?
            cygbin = "c:\\cygwin\\bin"
            if os.path.exists(cygbin) and not cygbin in os.environ['path']:
                os.environ['path'] += ";" + cygbin
            print('using wget on {url}'.format(url=url))
            retcode = os.system('wget -x "{url}"'.format(url=url))
            #  retcode = os.system('c:\\cygwin\\bin\\bash.exe -c "/bin/wget {url}"'.format(url=url))
            debug_(retcode != 0,
                   level=1,
                   key='wget',
                   msg="wget error or DEBUG='wget'")
            # now read from the local copy - seems like urls need full paths
            # appears to be a feature! http://stackoverflow.com/questions/7857416/file-uri-scheme-and-relative-files
            # /home/bdb112/pyfusion/working/pyfusion/archive-webapi.ipp-hgw.mpg.de/ArchiveDB/codac/W7X/CoDaStationDesc.82/DataModuleDesc.181_DATASTREAM/7/Channel_7/scaled/_signal.json?from=1457626020000000000&upto=1457626080000000000&nSamples=10000
            # url = url.replace('http://','file:///home/bdb112/pyfusion/working/pyfusion/')
            url = url.replace('http://', 'file:/' + os.getcwd() + '/')
            if 'win' in os.sys.platform:
                # weven thoug it seems odd, want 'file:/c:\\cygwin\\home\\bobl\\pyfusion\\working\\pyfusion/archive-webapi.ipp-hgw.mpg.de/ArchiveDB/codac/W7X/CoDaStationDesc.82/DataModuleDesc.192_DATASTREAM/4/Channel_4/scaled/_signal.json@from=147516863807215960&upto=1457516863809815961'
                url = url.replace('?', '@')
            # nicer replace - readback still fails in Win, untested on unix systems
            print('now trying the cached copy we just grabbed: {url}'.format(
                url=url))
        if (req_f_u > shot_t_u) or (req_t_u < shot_f_u):
            pyfusion.utils.warn(
                'segment requested is outside the shot times for ' +
                str(actual_shot))
        if pyfusion.VERBOSE > 0:
            print(
                '======== fetching url over {dt:.1f} secs from {fr:.1f} to {tt:.1f} =========\n[{u}]'
                .format(u=url,
                        dt=(params['req_t_u'] - params['req_f_u']) / 1e9,
                        fr=(params['req_f_u'] - shot_f_u) / 1e9,
                        tt=(params['req_t_u'] - shot_f_u) / 1e9))

        # seems to take twice as long as timeout requested.
        # haven't thought about python3 for the json stuff yet
        # This is not clean - should loop for timeout in [pyfusion.TIMEOUT, 3*pyfusion.TIMEOUT]
        try:
            # dat = json.load(urlopen(url,timeout=pyfusion.TIMEOUT)) works
            # but follow example in
            #    http://webservices.ipp-hgw.mpg.de/docs/howtoREST.html#python
            #  Some extracts in examples/howtoREST.py
            # dat = json.loads(urlopen(url,timeout=pyfusion.TIMEOUT).read().decode('utf-8'))
            t_pre = seconds()
            # for long shots, adjust strategy and timeout to reduce memory consumption
            ONE = 4  #  memory conservation tricks only apply for DEBUG<1
            # Thin allows the cutoff value to be increased in come cases
            # uncomment the following two for testing the exception handler
            ## timeout = pyfusion.TIMEOUT
            ## raise  httplib.IncompleteRead('test')
            if (req_t_u - req_f_u) / 1e9 > pyfusion.VERY_LONG:
                size_MS = 2 * (
                    req_t_u - req_f_u
                ) / 1e9  # approximate - later on calc from dt i.e. MSamples
                if pyfusion.NSAMPLES != 0:  # allow for subsampled data
                    size_MS = pyfusion.NSAMPLES / 1e6
                timeout = 8 * size_MS + pyfusion.TIMEOUT  #  don't make timeout too small!
                print('On-the-fly conversion: Setting timeout to {tmo}'.format(
                    tmo=timeout))
                dat = json.load(urlopen(url, timeout=timeout))
                t_text = seconds()
            else:
                timeout = pyfusion.TIMEOUT
                txtform = urlopen(url, timeout=timeout).read()
                t_text = seconds()
                print('{tm} {tp:.2f} prep, {tt:.2f} fetch without decode, '.
                      format(tm=time.strftime('%H:%M:%S'),
                             tp=t_pre - t_start,
                             tt=t_text - t_pre)),
                sys.stdout.flush()
                dat = json.loads(txtform.decode('utf-8'))
                if pyfusion.DEBUG < ONE:
                    txtform = None  # release memory
            t_conv = seconds()
            #  for 10MS of mirnov 0.06 prep, 9.61 fetch 19.03 conv
            #print('{tp:.2f} prep, {tt:.2f} fetch {tc:.2f} conv'.
            print('{tc:.2f} conv'.format(tp=t_pre - t_start,
                                         tt=t_text - t_pre,
                                         tc=t_conv - t_text))
        except socket.timeout as reason:  #  the following url access is a slightly different form?
            # should check if this is better tested by the URL module
            print('{tm} {tp:.2f} prep, {tt:.2f} timeout. '.format(
                tp=t_pre - t_start,
                tt=seconds() - t_pre,
                tm=time.strftime('%H:%M:%S'))),
            print(
                '****** first timeout error, try again with longer timeout  *****'
            )
            timeout *= 3
            dat = json.load(urlopen(url, timeout=timeout))
        except MemoryError as reason:
            raise  # too dangerous to do anything else except to reraise
        except httplib.IncompleteRead as reason:
            msg = str(
                '** IncompleteRead after {tinc:.0f}/{timeout:.0f}s ** on {c}: {u} \n{r}'
                .format(tinc=seconds() - t_start,
                        c=self.config_name,
                        u=url,
                        r=reason,
                        timeout=timeout))
            pyfusion.logging.error(
                msg
            )  # don't want to disturb the original exception, so raise <nothing> i.e. reraise
            raise  # possibly a memory error really? - not the case for 4114 20180912.48
        except Exception as reason:
            if pyfusion.VERBOSE >= 0:
                print(
                    '**** Exception (Memory? out of disk space?) OR timeout of {timeout} **** on {c}: {u} \n{r}'
                    .format(c=self.config_name,
                            u=url,
                            r=reason,
                            timeout=timeout))

            raise  # re raises the last exception

        # this form will default to repair = 2 for all LP probes.
        #default_repair = -1 if 'Desc.82/' in url else 0
        # Override acq.repair with the probe value
        default_repair = int(self.repair) if hasattr(
            self, 'repair') else 2 if 'Desc.82/' in url else 0
        # this form follows the config file settings
        self.repair = int(self.repair) if hasattr(self,
                                                  'repair') else default_repair
        dimraw = np.array(dat['dimensions'])
        if ('nSamples' not in url):  # skip this check if we are decimating
            if np.abs(req_f_u - dimraw[0]) > 2000:
                print(
                    '** Start is delayed by >2 us {dtdel:,} relative to the request'
                    .format(dtdel=dimraw[0] - req_f_u))
            if (req_t_u - dimraw[-1]) > 2000:
                print(
                    '** End is earlier by >2 us {dtdel:,} relative to the request'
                    .format(dtdel=req_t_u - dimraw[-1]))

        output_data_utc = [dat['dimensions'][0], dat['dimensions'][-1]]
        if pyfusion.DEBUG < ONE:
            dat['dimensions'] = None  # release memory
        # adjust dim only (not dim_raw so that zero time remains at t1
        dim = dimraw - utc_0
        # decimation with NSAMPLES will make the timebase look wrong - so disable repair
        if pyfusion.NSAMPLES != 0 or self.repair == 0 or self.repair == -1:
            pass  # leave as is
        # need at least this clipping for Langmuir probes in Op1.1
        elif self.repair == 1:
            dim = np.clip(dim, 0, 1e99)
        elif self.repair == 2:
            dim, msg = regenerate_dim(dim)
            if msg is not None:
                print('shot {s}, {c}: {m}'.format(s=self.shot,
                                                  c=self.config_name,
                                                  m=msg))
        else:
            raise ValueError(
                'repair value of {r} not understood'.format(r=self.repair))

        if pyfusion.VERBOSE > 2: print('repair', self.repair)
        #ch = Channel(self.config_name,  Coords('dummy', (0,0,0)))
        # this probably should be in base.py
        coords = get_coords_for_channel(**self.__dict__)
        # used to be bare_chan? should we include - signs?
        ch = Channel(self.config_name, coords)
        scl = 1 / 3277.24 if dat['datatype'].lower() == 'short' else 1
        if self.repair == -1:
            output_data = TimeseriesData(timebase=Timebase(dimraw),
                                         signal=scl * Signal(dat['values']),
                                         channels=ch)
        else:
            output_data = TimeseriesData(timebase=Timebase(1e-9 * dim),
                                         signal=scl * Signal(dat['values']),
                                         channels=ch)
        output_data.meta.update({'shot': self.shot})
        output_data.utc = output_data_utc  #  this copy was saved earlier so we could delete the large array to save space
        output_data.units = dat['units'] if 'units' in dat else ''
        # this is a minor duplication - at least it gets saved via params
        params['data_utc'] = output_data.utc
        params['utc_0'] = utc_0  # hopefully t0 -- useful in npz files
        # Warning - this could slow things down! - but allows
        # corrupted time to be re-calculated as algorithms improve.
        # and storage as differences takes very little space.
        params['diff_dimraw'] = dimraw
        params['diff_dimraw'][1:] = np.diff(dimraw)
        if pyfusion.DEBUG < ONE:
            dimraw = None
        # NOTE!!! need float128 to process dimraw, and cumsum won't return ints
        # or automatically promote to 128bit (neither do simple ops like *, +)
        params['pyfusion_version'] = pyfusion.version.get_version()
        if pyfusion.VERBOSE > 0:
            print('shot {s}, config name {c}'.format(c=self.config_name,
                                                     s=self.shot))

        output_data.config_name = self.config_name
        debug_(pyfusion.DEBUG, 2, key='W7XDataFetcher')
        output_data.params = params

        ###  the total shot utc.  output_data.utc = [f_u, t_u]
        return output_data