Example #1
0
    def do_fetch(self):
        # my W7X shots are of the form from_utc, to_utc 
        #  or date (8dig) and shot (progId)
        # the format is in the acquisition properties, to avoid
        # repetition in each individual diagnostic

        if self.shot[1]>1e9:  # we have start and end in UTC
            f,t = self.shot
        else:
            f,t = get_shot_utc(*self.shot)
        # A URL STYLE diagnostic - used for a one-off
        # this could be moved to setup so that the error info is more complete
        if hasattr(self,'url'):
            fmt = self.url+'_signal.json?from={shot_f}&upto={shot_t}'
            fmt = self.url+'_signal.json?from={shot_f}&upto={shot_t}&nSamples=200000'
            params = {}
        else:  # a pattern-based one - used for arrays of probes
            if hasattr(self,'fmt'):  #  does the diagnostic have one?
                fmt = self.fmt
            elif hasattr(self.acq,'fmt'):  # else use the acq.fmt
                fmt = self.acq.fmt
            else:  #  so far we have no quick way to check the server is online
                raise LookupError('no fmt - perhaps pyfusion.cfg has been '
                                  'edited because the url is not available')

            params = eval('dict('+self.params+')')

        if 'upto' not in fmt:
            fmt += '_signal.json?from={shot_f}&upto={shot_t}'

        if ('nSamples' not in fmt) and (pyfusion.NSAMPLES != 0):
            fmt += '&nSamples={ns}'.format(ns=pyfusion.NSAMPLES)

        params.update(shot_f=f, shot_t=t)
        url = fmt.format(**params)
        if pyfusion.CACHE:
            print('using wget on {url}'.format(url=url))
            os.system('wget -x "{url}"'.format(url=url))
            # now read from the local copy - it is in the wd, so only //
            # but it seems we need the full path for now
            url = url.replace('http://','file:///home/bdb112/pyfusion/working/pyfusion/')
            print('now trying the cached copy we just grabbed: {url}'.format(url=url))
        if pyfusion.VERBOSE > 0:
            print('===> fetching url {u}'.format(u=url))

        # seems to take twice as long as timeout requested.
        # haven't thought about python3 for the json stuff yet
        try:
            # dat = json.load(urlopen(url,timeout=pyfusion.TIMEOUT)) works
            # but follow example in
            #    http://webservices.ipp-hgw.mpg.de/docs/howtoREST.html#python, 
            dat = json.loads(urlopen(url,timeout=pyfusion.TIMEOUT).read().decode())
        except socket.timeout:
            # should check if this is better tested by the URL module
            print('****** first timeout error *****')
            dat = json.load(urlopen(url,timeout=3*pyfusion.TIMEOUT))
        except Exception as reason:
            if pyfusion.VERBOSE:
                print('********Exception***** on {c}: {u} \n{r}'
                      .format(c=self.config_name, u=url, r=reason))
            raise

        # this form will default to repair = 2 for all LP probes.
        default_repair = 2 if 'Desc.82/' in url else 0
        # this form follows the config file settings
        self.repair = int(self.repair) if hasattr(self, 'repair') else default_repair
        dimraw = np.array(dat['dimensions'])  
        dim = dimraw - dimraw[0]
        if self.repair == 0:
            pass # leave as is
        # need at least this clipping for Langmuir probes in Op1.1
        elif self.repair == 1:
            dim = np.clip(dim, 0, 1e99)
        elif self.repair == 2:
            dim, msg = regenerate_dim(dim)
            if msg is not None:
                print('shot {s}, {c}: {m}'
                      .format(s=self.shot, c=self.config_name, m=msg))
        else:
            raise ValueError('repair value of {r} not understood'.format(r=self.repair))

        if pyfusion.VERBOSE>2:  print('repair',self.repair)
        #ch = Channel(self.config_name,  Coords('dummy', (0,0,0)))
        # this probably should be in base.py
        coords = get_coords_for_channel(**self.__dict__)
        # used to be bare_chan? should we include - signs?
        ch = Channel(self.config_name,  coords)
        output_data = TimeseriesData(timebase=Timebase(1e-9*dim),
                                     signal=Signal(dat['values']), channels=ch)
        output_data.meta.update({'shot': self.shot})
        output_data.utc = [dat['dimensions'][0], dat['dimensions'][-1]]
        output_data.units = dat['units'] if 'units' in dat else ''
        # this is a minor duplication - at least it gets saved via params
        params['data_utc'] = output_data.utc
        # Warning - this could slow things down! - but allows corrupted time to be re-calculated as algorithms improve.
        params['diff_dimraw'] = dimraw
        params['diff_dimraw'][1:] = np.diff(dimraw)
        params['pyfusion_version'] = pyfusion.version.get_version()
        if pyfusion.VERBOSE > 0:
            print('shot {s}, config name {c}'
                  .format(c=self.config_name, s=self.shot))

        output_data.config_name = self.config_name
        debug_(pyfusion.DEBUG, 2, key='W7XDataFetcher')
        output_data.params = params
        
        ###  the total shot utc.  output_data.utc = [f, t]
        return output_data
Example #2
0
    def do_fetch(self):
        # Definitions:
        # data_utc: is meant to be the first and last timestamps of the saved
        #        or the retrieved data if not saved. (at least from 3 march 2016)
        # shot_f, shot_t: are beginning and end of shot (from programs) at least from 3 march 2016
        # utc: seems to be same as data_utc
        # seg_f_u: up til 20 march 2020, seg_f_u appears to be the *requested* segment, (now fixed - renamed to req_f_u)
        # utc0 - only 9.9 or so.  Should be in file or set in base.py

        # In this revision, the only changes are - allow for self.time_range,
        #   variable names f, t changed to f_u, t_u (the desired data utc)
        # and comment fixes,
        # in preparation for including time_range and other cleanups.

        # My W7X shots are either of the form from_utc, to_utc,
        #  or date (8dig) and shot (progId)
        # the format is in the acquisition properties, to avoid
        # repetition in each individual diagnostic

        t_start = seconds()
        if self.shot[1] > 1e9 or hasattr(
                self, 'time_range') and self.time_range is not None:
            # we have start and end in UTC instead of shot no
            # need the shot and utc to get t1 to set zero t
            if hasattr(self, 'time_range') and self.time_range is not None:
                if self.shot[1] > 1e9:
                    raise ValueError(
                        "Can't supply shot as a utc pair and specify a time_range"
                    )
                actual_shot = self.shot
                f_u = None  # set to None to make sure we don't use it
            else:
                f_u, t_u = self.shot  #  Initialize to the requested utc range
                actual_shot = get_shot_number([f_u, t_u])

            progs = get_programs(actual_shot)  # need shot to look up progs
            #  need prog to get trigger - not tested for OP1.1
            if len(progs) > 1:
                raise LookupError(
                    'fetch: too many programs found - covers > 1 shot?')
                #this_prog = [prog for prog in progs if (f_u >= progs[prog]['from'] and
                #                                    t_u <= progs[prog]['upto'])]
            if len(progs) == 1:
                this_prog = progs.values()[0]
                # on shot 20180724,10, this trigger[1] is an empty list
                trigger = this_prog['trigger']['1']
                # This fallback to trigger[0] mean that more rubbish shots are saved than
                # if we only look at the proper trigger (1) - here is an example
                # run pyfusion/examples/plot_signals shot_number=[20180724,10] diag_name="W7X_UTDU_LP15_I" dev_name='W7X'
                if len(trigger) == 0:  # example above
                    print('** No Trigger 1 on shot {s}'.format(s=actual_shot))
                    debug_(pyfusion.DEBUG,
                           0,
                           key="noTrig1",
                           msg="No Trigger 1 found")
                    # take any that exist, at 60
                    trigger = [
                        trr[0] + int(60 * 1e9)
                        for trr in this_prog['trigger'].values()
                        if len(trr) > 0
                    ]
                    if len(trigger) == 0:
                        raise LookupError(
                            'No Triggers at all on shot {s}'.format(
                                s=actual_shot))
                utc_0 = trigger[
                    0]  # utc_0 is the first trigger (usu 61 sec ahead of data)
            else:
                print(
                    'Unable to look up programs - assuming this is a test shot'
                )
                utc_0 = f_u  #   better than nothing - probably a 'private' test/calibration shot
            if f_u is None:  # shorthand for have time range
                f_u = utc_0 + int(1e9 * (self.time_range[0]))  # + 61))
                t_u = utc_0 + int(
                    1e9 *
                    (self.time_range[1]))  # + 61)) was 61 rel to prog['from']

        else:  # self.shot is an 8,3 digit shot and time range not specified
            actual_shot = self.shot
            f_u, t_u = get_shot_utc(
                actual_shot
            )  # f_u is the start of the overall shot - i.e about plasma time -61 sec.
            # at present, ECH usually starts 61 secs after t0
            # so it is usually sufficient to request a later start than t0
            pre_trig_secs = self.pre_trig_secs if hasattr(
                self, 'pre_trig_secs') else 0.3
            # should get this from programs really - code is already above. We need to just move it.
            pyfusion.utils.warn('fetch: still using hard-coded 61 secs')
            utc_0 = f_u + int(1e9 * (61))  # utc_0 is plasma initiation in utc
            f_u = utc_0 - int(
                1e9 * pre_trig_secs)  # f_u is the first time wanted

        # make sure we have the following defined regardless of how we got here
        shot_f_u, shot_t_u = get_shot_utc(actual_shot)
        req_f_u = f_u  # req_f_u is the start of the desired data segment - sim. for req_t_u
        req_t_u = t_u
        # A URL STYLE diagnostic - used for a one-off rather than an array
        # this could be moved to setup so that the error info is more complete
        if hasattr(self, 'url'):
            fmt = self.url  # add from= further down: +'_signal.json?from={req_f_u}&upto={req_t_u}'
            params = {}
            # check consistency -   # url should be literal - no params (only for fmt) - gain, units are OK as they are not params
            if hasattr(self, 'params'):
                pyfusion.utils.warn(
                    'URL diagnostic {n} should not have params <{p}>'.format(
                        n=self.config_name, p=self.params))
        else:  # a pattern-based one - used for arrays of probes
            if hasattr(self, 'fmt'):  # does the diagnostic have one?
                fmt = self.fmt
            elif hasattr(self.acq, 'fmt'):  # else use the acq.fmt
                fmt = self.acq.fmt
            else:  # so far we have no quick way to check the server is online
                raise LookupError('no fmt - perhaps pyfusion.cfg has been '
                                  'edited because the url is not available')

            params = eval('dict(' + self.params + ')')

        # Originally added to fix up erroneous ECH alias mapping if ECH - only
        #   6 sources work if I don't.  But it seems to help with many others
        # This implementation is kludgey but proves the principle, and
        #   means we don't have to refer to any raw.. signals
        #   would be nice if they made a formal way to do this.
        if 'upto' not in fmt:
            fmt += '_signal.json?from={req_f_u}&upto={req_t_u}'

        assert req_f_u == f_u, 'req_f_u error'
        assert req_t_u == t_u, 'req_t_u error'  #
        params.update(req_f_u=req_f_u, req_t_u=req_t_u, shot_f_u=shot_f_u)
        url = fmt.format(**params)  # substitute the channel params

        debug_(pyfusion.DEBUG, 2, key="url", msg="middle of work on urls")
        if np.any([
                nm in url for nm in
                'Rf,Tower5,MainCoils,ControlCoils,TrimCoils,Mirnov,Interfer,_NBI_'
                .split(',')
        ]):
            from pyfusion.acquisition.W7X.get_url_parms import get_signal_url
            # replace the main middle bit with the expanded one from the GUI
            tgt = url.split('KKS/')[1].split('/scaled')[0].split('_signal')[0]
            # construct a time filter for the shot
            self.tgt = tgt  # for the sake of error_info
            filt = '?filterstart={req_f_u}&filterstop={req_t_u}'.format(
                **params)
            # get the url with the filter
            url = url.replace(tgt, get_signal_url(tgt, filt)).split('KKS/')[-1]
            # take the filter back out - we will add the exact one later
            url = url.replace(filt, '/')

        # nSamples now needs a reduction mechanism http://archive-webapi.ipp-hgw.mpg.de/
        # minmax is increasingly slow for nSamples>10k, 100k hopeless
        # Should ignore the test comparing the first two elements of the tb
        # prevent reduction (NSAMPLES=...) to avoid the bug presently in codac
        if (('nSamples' not in url) and (pyfusion.NSAMPLES != 0)
                and not (hasattr(self, 'allow_reduction')
                         and int(self.allow_reduction) == 0)):
            url += '&reduction=minmax&nSamples={ns}'.format(
                ns=pyfusion.NSAMPLES)

        debug_(pyfusion.DEBUG, 2, key="url", msg="work on urls")
        # we need %% in pyfusion.cfg to keep py3 happy
        # however with the new get_signal_url, this will all disappear
        if sys.version < '3.0.0' and '%%' in url:
            url = url.replace('%%', '%')

        if 'StationDesc.82' in url:  # fix spike bug in scaled QRP data
            url = url.replace('/scaled/', '/unscaled/')

        if pyfusion.CACHE:
            # Needed for improperly configured cygwin systems: e.g.IPP Virual PC
            # Perhaps this should be executed at startup of pyfusion?
            cygbin = "c:\\cygwin\\bin"
            if os.path.exists(cygbin) and not cygbin in os.environ['path']:
                os.environ['path'] += ";" + cygbin
            print('using wget on {url}'.format(url=url))
            retcode = os.system('wget -x "{url}"'.format(url=url))
            #  retcode = os.system('c:\\cygwin\\bin\\bash.exe -c "/bin/wget {url}"'.format(url=url))
            debug_(retcode != 0,
                   level=1,
                   key='wget',
                   msg="wget error or DEBUG='wget'")
            # now read from the local copy - seems like urls need full paths
            # appears to be a feature! http://stackoverflow.com/questions/7857416/file-uri-scheme-and-relative-files
            # /home/bdb112/pyfusion/working/pyfusion/archive-webapi.ipp-hgw.mpg.de/ArchiveDB/codac/W7X/CoDaStationDesc.82/DataModuleDesc.181_DATASTREAM/7/Channel_7/scaled/_signal.json?from=1457626020000000000&upto=1457626080000000000&nSamples=10000
            # url = url.replace('http://','file:///home/bdb112/pyfusion/working/pyfusion/')
            url = url.replace('http://', 'file:/' + os.getcwd() + '/')
            if 'win' in os.sys.platform:
                # weven thoug it seems odd, want 'file:/c:\\cygwin\\home\\bobl\\pyfusion\\working\\pyfusion/archive-webapi.ipp-hgw.mpg.de/ArchiveDB/codac/W7X/CoDaStationDesc.82/DataModuleDesc.192_DATASTREAM/4/Channel_4/scaled/_signal.json@from=147516863807215960&upto=1457516863809815961'
                url = url.replace('?', '@')
            # nicer replace - readback still fails in Win, untested on unix systems
            print('now trying the cached copy we just grabbed: {url}'.format(
                url=url))
        if (req_f_u > shot_t_u) or (req_t_u < shot_f_u):
            pyfusion.utils.warn(
                'segment requested is outside the shot times for ' +
                str(actual_shot))
        if pyfusion.VERBOSE > 0:
            print(
                '======== fetching url over {dt:.1f} secs from {fr:.1f} to {tt:.1f} =========\n[{u}]'
                .format(u=url,
                        dt=(params['req_t_u'] - params['req_f_u']) / 1e9,
                        fr=(params['req_f_u'] - shot_f_u) / 1e9,
                        tt=(params['req_t_u'] - shot_f_u) / 1e9))

        # seems to take twice as long as timeout requested.
        # haven't thought about python3 for the json stuff yet
        # This is not clean - should loop for timeout in [pyfusion.TIMEOUT, 3*pyfusion.TIMEOUT]
        try:
            # dat = json.load(urlopen(url,timeout=pyfusion.TIMEOUT)) works
            # but follow example in
            #    http://webservices.ipp-hgw.mpg.de/docs/howtoREST.html#python
            #  Some extracts in examples/howtoREST.py
            # dat = json.loads(urlopen(url,timeout=pyfusion.TIMEOUT).read().decode('utf-8'))
            t_pre = seconds()
            # for long shots, adjust strategy and timeout to reduce memory consumption
            ONE = 4  #  memory conservation tricks only apply for DEBUG<1
            # Thin allows the cutoff value to be increased in come cases
            # uncomment the following two for testing the exception handler
            ## timeout = pyfusion.TIMEOUT
            ## raise  httplib.IncompleteRead('test')
            if (req_t_u - req_f_u) / 1e9 > pyfusion.VERY_LONG:
                size_MS = 2 * (
                    req_t_u - req_f_u
                ) / 1e9  # approximate - later on calc from dt i.e. MSamples
                if pyfusion.NSAMPLES != 0:  # allow for subsampled data
                    size_MS = pyfusion.NSAMPLES / 1e6
                timeout = 8 * size_MS + pyfusion.TIMEOUT  #  don't make timeout too small!
                print('On-the-fly conversion: Setting timeout to {tmo}'.format(
                    tmo=timeout))
                dat = json.load(urlopen(url, timeout=timeout))
                t_text = seconds()
            else:
                timeout = pyfusion.TIMEOUT
                txtform = urlopen(url, timeout=timeout).read()
                t_text = seconds()
                print('{tm} {tp:.2f} prep, {tt:.2f} fetch without decode, '.
                      format(tm=time.strftime('%H:%M:%S'),
                             tp=t_pre - t_start,
                             tt=t_text - t_pre)),
                sys.stdout.flush()
                dat = json.loads(txtform.decode('utf-8'))
                if pyfusion.DEBUG < ONE:
                    txtform = None  # release memory
            t_conv = seconds()
            #  for 10MS of mirnov 0.06 prep, 9.61 fetch 19.03 conv
            #print('{tp:.2f} prep, {tt:.2f} fetch {tc:.2f} conv'.
            print('{tc:.2f} conv'.format(tp=t_pre - t_start,
                                         tt=t_text - t_pre,
                                         tc=t_conv - t_text))
        except socket.timeout as reason:  #  the following url access is a slightly different form?
            # should check if this is better tested by the URL module
            print('{tm} {tp:.2f} prep, {tt:.2f} timeout. '.format(
                tp=t_pre - t_start,
                tt=seconds() - t_pre,
                tm=time.strftime('%H:%M:%S'))),
            print(
                '****** first timeout error, try again with longer timeout  *****'
            )
            timeout *= 3
            dat = json.load(urlopen(url, timeout=timeout))
        except MemoryError as reason:
            raise  # too dangerous to do anything else except to reraise
        except httplib.IncompleteRead as reason:
            msg = str(
                '** IncompleteRead after {tinc:.0f}/{timeout:.0f}s ** on {c}: {u} \n{r}'
                .format(tinc=seconds() - t_start,
                        c=self.config_name,
                        u=url,
                        r=reason,
                        timeout=timeout))
            pyfusion.logging.error(
                msg
            )  # don't want to disturb the original exception, so raise <nothing> i.e. reraise
            raise  # possibly a memory error really? - not the case for 4114 20180912.48
        except Exception as reason:
            if pyfusion.VERBOSE >= 0:
                print(
                    '**** Exception (Memory? out of disk space?) OR timeout of {timeout} **** on {c}: {u} \n{r}'
                    .format(c=self.config_name,
                            u=url,
                            r=reason,
                            timeout=timeout))

            raise  # re raises the last exception

        # this form will default to repair = 2 for all LP probes.
        #default_repair = -1 if 'Desc.82/' in url else 0
        # Override acq.repair with the probe value
        default_repair = int(self.repair) if hasattr(
            self, 'repair') else 2 if 'Desc.82/' in url else 0
        # this form follows the config file settings
        self.repair = int(self.repair) if hasattr(self,
                                                  'repair') else default_repair
        dimraw = np.array(dat['dimensions'])
        if ('nSamples' not in url):  # skip this check if we are decimating
            if np.abs(req_f_u - dimraw[0]) > 2000:
                print(
                    '** Start is delayed by >2 us {dtdel:,} relative to the request'
                    .format(dtdel=dimraw[0] - req_f_u))
            if (req_t_u - dimraw[-1]) > 2000:
                print(
                    '** End is earlier by >2 us {dtdel:,} relative to the request'
                    .format(dtdel=req_t_u - dimraw[-1]))

        output_data_utc = [dat['dimensions'][0], dat['dimensions'][-1]]
        if pyfusion.DEBUG < ONE:
            dat['dimensions'] = None  # release memory
        # adjust dim only (not dim_raw so that zero time remains at t1
        dim = dimraw - utc_0
        # decimation with NSAMPLES will make the timebase look wrong - so disable repair
        if pyfusion.NSAMPLES != 0 or self.repair == 0 or self.repair == -1:
            pass  # leave as is
        # need at least this clipping for Langmuir probes in Op1.1
        elif self.repair == 1:
            dim = np.clip(dim, 0, 1e99)
        elif self.repair == 2:
            dim, msg = regenerate_dim(dim)
            if msg is not None:
                print('shot {s}, {c}: {m}'.format(s=self.shot,
                                                  c=self.config_name,
                                                  m=msg))
        else:
            raise ValueError(
                'repair value of {r} not understood'.format(r=self.repair))

        if pyfusion.VERBOSE > 2: print('repair', self.repair)
        #ch = Channel(self.config_name,  Coords('dummy', (0,0,0)))
        # this probably should be in base.py
        coords = get_coords_for_channel(**self.__dict__)
        # used to be bare_chan? should we include - signs?
        ch = Channel(self.config_name, coords)
        scl = 1 / 3277.24 if dat['datatype'].lower() == 'short' else 1
        if self.repair == -1:
            output_data = TimeseriesData(timebase=Timebase(dimraw),
                                         signal=scl * Signal(dat['values']),
                                         channels=ch)
        else:
            output_data = TimeseriesData(timebase=Timebase(1e-9 * dim),
                                         signal=scl * Signal(dat['values']),
                                         channels=ch)
        output_data.meta.update({'shot': self.shot})
        output_data.utc = output_data_utc  #  this copy was saved earlier so we could delete the large array to save space
        output_data.units = dat['units'] if 'units' in dat else ''
        # this is a minor duplication - at least it gets saved via params
        params['data_utc'] = output_data.utc
        params['utc_0'] = utc_0  # hopefully t0 -- useful in npz files
        # Warning - this could slow things down! - but allows
        # corrupted time to be re-calculated as algorithms improve.
        # and storage as differences takes very little space.
        params['diff_dimraw'] = dimraw
        params['diff_dimraw'][1:] = np.diff(dimraw)
        if pyfusion.DEBUG < ONE:
            dimraw = None
        # NOTE!!! need float128 to process dimraw, and cumsum won't return ints
        # or automatically promote to 128bit (neither do simple ops like *, +)
        params['pyfusion_version'] = pyfusion.version.get_version()
        if pyfusion.VERBOSE > 0:
            print('shot {s}, config name {c}'.format(c=self.config_name,
                                                     s=self.shot))

        output_data.config_name = self.config_name
        debug_(pyfusion.DEBUG, 2, key='W7XDataFetcher')
        output_data.params = params

        ###  the total shot utc.  output_data.utc = [f_u, t_u]
        return output_data