def save(self, filename, verbose=None, use_dictionary=False): """ Save as an npz file, using an incremental method, which only uses as much /tmp space as required by each var at a time. if use_dictionary is a valid dictionary, save the values of ANY AND ONLY the LOCAL variables whose names are in the keys for this set. So if you have extracted a subset, and you specify use_dictionary=locals(), only that subset is saved (both in array length, and variables chosen). Beware locals that are not your variables - e.g. mtrand.beta """ if verbose == None: verbose = self.verbose st = seconds() if use_dictionary == False: save_dict = self.da # the dict used to get data else: save_dict = use_dictionary print('Warning - saving only a subset') use_keys = [] for k in self.da.keys(): if k in save_dict.keys(): use_keys.append(k) if verbose: print(' Saving only {k}'.format(k=use_keys)) args = ','.join(["{k}=save_dict['{k}']".format(k=k) for k in use_keys]) if verbose: print('lengths: {0} -999 indicates dodgy variable'.format( [mylen(save_dict[k]) for k in use_keys])) exec("np.savez_compressed(filename," + args + ")") self.name = filename if verbose: print(' in {dt:.1f} secs'.format(dt=seconds() - st))
def load(self): start_mem = report_mem(msg='load') st = seconds() if self.verbose>0: print('loading {nm}'.format(nm=self.name)), if self.loaded: if self.verbose: ('print {nm} already loaded'.format(nm=self.name)) else: dd = {} for k in self.da.keys(): dd.update({k: self.da[k]}) # dictionaries get stored as an object, need "to list" if hasattr(dd[k],'dtype'): if dd[k].dtype == np.dtype('object'): dd[k] = dd[k].tolist() if self.verbose: print('object conversion for {k}' .format(k=k)) if (hasattr(dd[k],'dtype') and dd[k].dtype == np.dtype('object')): dd[k] = dd[k].tolist() print('*** Second object conversion for {k}!!!' .format(k=k)) if self.verbose: print(' in {dt:.1f} secs'.format(dt=seconds()-st)) self.da = dd report_mem(start_mem)
def dist_mp(cl_instance, instances, squared=None, averaged=None, threads=None, debug=0): """ multithreaded distance calculation using dist.pyx and nogil version set debug>=1 to to a comparison against single processor, GIL version >>> import numpy as np >>> x=dist_mp(np.random.random((14,)).astype(np.float32),np.random.random((1000,14)).astype(np.float32),debug=1,threads=3) """ if threads is None: threads = 3 n_workers = threads workers = [] for w in range(n_workers): workers.append(ThreadWorker(dist)) # allow 1 more, it may be short or empty, no prob. chunk = 1 + len(instances) // n_workers st = seconds() for (w, worker) in enumerate(workers): #print worker.start([big[w*chunk:(w+1)*chunk],2]) worker.start([cl_instance, instances[w * chunk:(w + 1) * chunk, :]], squared=squared, averaged=averaged, debug=debug) # NO! not this kwargs=dict(squared=squared, debug=debug)) while 'running' in [worker.status() for worker in workers]: pass #print('waiting'), x = [] for worker in workers: x = np.append(x, worker.get_results()) tm = seconds() - st if debug > 0: from pyfusion.utils.dist import dist as dist_gil from numpy.testing import assert_array_almost_equal # extra \n to avoid thread output overwriting (mostly) if debug > 1: print( '\ncheck with safer, single thread version of dist_nogil - no output==OK' ) x1 = dist_gil(cl_instance, instances, averaged=averaged, squared=squared, debug=debug) assert_array_almost_equal(x, x1) print('took {dt:0.3g} sec for {w} workers'.format(dt=tm, w=n_workers)) return (x)
def load(self): start_mem = report_mem(msg='load') st = seconds() if self.verbose > 0: print('loading {nm}'.format(nm=self.name)), if self.loaded: if self.verbose: ('print {nm} already loaded'.format(nm=self.name)) else: dd = {} for k in self.da.keys(): dd.update({k: self.da[k]}) # dictionaries get stored as an object, need "to list" if hasattr(dd[k], 'dtype'): if dd[k].dtype == np.dtype('object'): dd[k] = dd[k].tolist() if self.verbose: print('object conversion for {k}'.format(k=k)) if (hasattr(dd[k], 'dtype') and dd[k].dtype == np.dtype('object')): dd[k] = dd[k].tolist() print('*** Second object conversion for {k}!!!'.format( k=k)) if self.verbose: print(' in {dt:.1f} secs'.format(dt=seconds() - st)) self.da = dd report_mem(start_mem)
def read_text_pyfusion(files, target='^Shot .*', ph_dtype=None, plot=pl.isinteractive(), ms=100, hold=0, debug=0, quiet=1, exception = Exception): """ Accepts a file or a list of files, returns a list of structured arrays See merge ds_list to merge and convert types (float -> pyfusion.prec_med """ st = seconds(); last_update=seconds() file_list = files if len(np.shape(files)) == 0: file_list = [file_list] f='f8' if ph_dtype == None: ph_dtype = [('p12',f),('p23',f),('p34',f),('p45',f),('p56',f)] #ph_dtype = [('p12',f)] ds_list =[] comment_list =[] count = 0 for (i,filename) in enumerate(file_list): if seconds() - last_update > 30: last_update = seconds() print('reading {n}/{t}: {f}' .format(f=filename, n=i, t=len(file_list))) try: if pl.is_string_like(target): skip = 1+find_data(filename, target,debug=debug) else: skip = target if quiet == 0: print('{t:.1f} sec, loading data from line {s} of {f}' .format(t = seconds()-st, s=skip, f=filename)) # this little bit to determine layout of data # very inefficient to read twice, but in a hurry! txt = np.loadtxt(fname=filename, skiprows=skip-1, dtype=str, delimiter='FOOBARWOOBAR') header_toks = txt[0].split() # is the first character of the 2nd last a digit? if header_toks[-2][0] in '0123456789': if pyfusion.VERBOSE > 0: print('found new header including number of phases') n_phases = int(header_toks[-2]) ph_dtype = [('p{n}{np1}'.format(n=n,np1=n+1), f) for n in range(n_phases)] if 'frlow' in header_toks: # add the two extra fields fs_dtype= [ ('shot','i8'), ('t_mid','f8'), ('_binary_svs','i8'), ('freq','f8'), ('amp', 'f8'), ('a12','f8'), ('p', 'f8'), ('H','f8'), ('frlow','f8'), ('frhigh', 'f8'),('phases',ph_dtype)] else: fs_dtype= [ ('shot','i8'), ('t_mid','f8'), ('_binary_svs','i8'), ('freq','f8'), ('amp', 'f8'), ('a12','f8'), ('p', 'f8'), ('H','f8'), ('phases',ph_dtype)] ds_list.append( np.loadtxt(fname=filename, skiprows = skip, dtype= fs_dtype) ) count += 1 comment_list.append(filename) except ValueError, info: print('Conversion error while reading {f} with loadtxt - {info}'.format(f=filename, info=info))
def check_time_order(dd, allow_equal=True): shot = dd['shot'] last_check = seconds() for s in np.unique(dd['shot']): ws = np.where(s == shot)[0] if np.min(np.diff(dd['t_mid'][ws])) < 0: print('time order problem in ' + str(s)) else: print('OK'), if seconds() - last_check > 20: print('\n') last_check = seconds()
def get_fftw3_speed(arr, iters=10, direction=None, dtype=np.float32, **kwargs): """ measure the fftw3 speed for various data sizes by using plan with estimate, and running one instance. If arr is int, then the elements are different array sizes, otherwise use the array. direction default - just fwd - use 'both' for both To "train": - the print allows you to ^c out. from pyfusion.utils.primefactors import get_fftw3_speed from pyfusion.utils.fftw3_bdb_utils import save_wisdom, load_wisdom from pyfusion.data.filters import next_nice_number from pyfusion.utils.primefactors import get_fftw3_speed for n in next_nice_number(None): print(n); get_fftw3_speed(n, flags=['FFTW_MEASURE'],direction='both',dtype=np.float32) save_wisdom() Accepts all pyfftw.FFTW args e.g. planning_timelimit """ import pyfftw from time import time as seconds if np.isscalar(arr): arr = np.array([arr]) if np.issubdtype(arr.dtype, int): atimes = [] for n in arr: atimes.append([ n, get_fftw3_speed(np.ones(n, dtype=dtype), direction=direction, iters=iters, **kwargs) ]) return (np.array(atimes)) else: # do one example build_kwargs = dict(flags=['FFTW_ESTIMATE']) build_kwargs.update(kwargs) print(build_kwargs) simd_align = pyfftw.simd_alignment # 16 at the moment. arr = pyfftw.n_byte_align(arr, simd_align) out = pyfftw.n_byte_align( np.ones(len(arr) / 2 + 1, dtype=np.complex64), simd_align) fwd = pyfftw.FFTW(arr, out, **build_kwargs) if direction == 'both': rev = pyfftw.FFTW(out, arr, direction='FFTW_BACKWARD', **build_kwargs) st = seconds() for i in range(iters): fwd.execute() if direction == 'both': rev.execute() return ((seconds() - st) / iters)
def timeinfo(message, outstream=sys.stdout): if show_times == 0: return global st, st_0 try: dt = seconds()-st except: outstream.writelines('Time:' + message + '(first call)\n') st = seconds() st_0 = seconds() return outstream.writelines("Time: {m} in {dt:.2f}/{t:.2f}\n".format(m=message, dt=dt, t=seconds()-st_0)) st = seconds() return
def timeinfo(message): if show_times == 0: return global st, st_0 try: dt = seconds()-st except: print('Time:' + message + '(first call)') st = seconds() st_0 = seconds() return print("Time: {m} in {dt:.2f}/{t:.2f}".format(m=message, dt=dt, t=seconds()-st_0)) st = seconds() return
def report_mem(prev_values=None, msg=None): """ Show status of phy/virt mem, and if given previous values, report differences requires the psutil module - avail in apt-get, but I used pip """ if msg is None: msg = "" else: msg += ": " if type(prev_values) == type(""): # catch error in prev_values msg += prev_values prev_values = None print("need msg= in call to report_mem") # available in linux is free + buffers + cached if hasattr(psutil, "swap_memory"): # new version (0.7+) pm = psutil.virtual_memory().available # was psutil.avail_phymem() vm = psutil.swap_memory().free # was psutil.avail_virtmem() else: pm = psutil.phymem_usage().free # was .avail_phymem() vm = psutil.virtmem_usage().free # avail_virtmem() tim = seconds() print("{msg}{pm:.3g} GB phys mem, {vm:.3g} GB virt mem avail".format(msg=msg, pm=pm / 1e9, vm=vm / 1e9)), if prev_values is None: print() else: print( "- dt={dt:.2g}s, used {pm:.2g} GB phys, {vm:.2g} GB virt".format( pm=(prev_values[0] - pm) / 1e9, vm=(prev_values[1] - vm) / 1e9, dt=tim - prev_values[2] ) ) return (pm, vm, tim)
def timeinfo(message, outstream=sys.stdout): if show_times == 0: return global st, st_0 try: dt = seconds() - st except: outstream.writelines('Time:' + message + '(first call)\n') st = seconds() st_0 = seconds() return msgstr = str("Time: {m} in {dt:.2f}/{t:.2f}\n".format(m=message, dt=dt, t=seconds() - st_0)) outstream.writelines(msgstr) pyfusion.logging.info(msgstr) st = seconds() return
def dist_mp(cl_instance, instances, squared = None, averaged = None, threads=None, debug=0): """ multithreaded distance calculation using dist.pyx and nogil version set debug>=1 to to a comparison against single processor, GIL version >>> import numpy as np >>> x=dist_mp(np.random.random((14,)).astype(np.float32),np.random.random((1000,14)).astype(np.float32),debug=1,threads=3) """ if threads is None: threads = 3 n_workers = threads workers = [] for w in range(n_workers): workers.append(ThreadWorker(dist)) # allow 1 more, it may be short or empty, no prob. chunk = 1+len(instances)//n_workers st = seconds() for (w, worker) in enumerate(workers): #print worker.start([big[w*chunk:(w+1)*chunk],2]) worker.start([cl_instance, instances[w*chunk:(w+1)*chunk,:]], squared=squared, averaged=averaged, debug=debug) # NO! not this kwargs=dict(squared=squared, debug=debug)) while 'running' in [worker.status() for worker in workers]: pass #print('waiting'), x = [] for worker in workers: x = np.append(x, worker.get_results()) tm = seconds()-st if debug>0: from pyfusion.utils.dist import dist as dist_gil from numpy.testing import assert_array_almost_equal # extra \n to avoid thread output overwriting (mostly) if debug>1: print('\ncheck with safer, single thread version of dist_nogil - no output==OK') x1 = dist_gil(cl_instance, instances, averaged=averaged, squared=squared, debug=debug) assert_array_almost_equal(x, x1) print('took {dt:0.3g} sec for {w} workers'.format(dt=tm, w=n_workers)) return(x)
def get_fftw3_speed(arr, iters=10, direction=None, dtype=np.float32, **kwargs): """ measure the fftw3 speed for various data sizes by using plan with estimate, and running one instance. If arr is int, then the elements are different array sizes, otherwise use the array. direction default - just fwd - use 'both' for both To "train": - the print allows you to ^c out. from pyfusion.utils.primefactors import get_fftw3_speed from pyfusion.utils.fftw3_bdb_utils import save_wisdom, load_wisdom from pyfusion.data.filters import next_nice_number from pyfusion.utils.primefactors import get_fftw3_speed for n in next_nice_number(None): print(n); get_fftw3_speed(n, flags=['FFTW_MEASURE'],direction='both',dtype=np.float32) save_wisdom() Accepts all pyfftw.FFTW args e.g. planning_timelimit """ import pyfftw from time import time as seconds if np.isscalar(arr): arr = np.array([arr]) if np.issubdtype(arr.dtype, int): atimes = [] for n in arr: atimes.append([n, get_fftw3_speed(np.ones(n, dtype=dtype), direction=direction, iters=iters, **kwargs)]) return np.array(atimes) else: # do one example build_kwargs = dict(flags=["FFTW_ESTIMATE"]) build_kwargs.update(kwargs) simd_align = pyfftw.simd_alignment # 16 at the moment. arr = pyfftw.n_byte_align(arr, simd_align) out = pyfftw.n_byte_align(np.ones(len(arr) / 2 + 1, dtype=np.complex64), simd_align) fwd = pyfftw.FFTW(arr, out, **build_kwargs) if direction == "both": rev = pyfftw.FFTW(out, arr, direction="FFTW_BACKWARD", **build_kwargs) st = seconds() for i in range(iters): fwd.execute() if direction == "both": rev.execute() return (seconds() - st) / iters
def wait(self): self.count += 1 if self.maxcpu == 1: return () avg_time = (seconds() - self.st - self.waited) / self.count self.last_wait = max(avg_time * (1 - self.maxcpu) / self.maxcpu, 0.01) print(avg_time, self.last_wait, self.count) self.waited += self.last_wait sleep(self.last_wait)
def wait(self): self.count += 1 if self.maxcpu == 1: return() avg_time = (seconds() - self.st - self.waited)/self.count self.last_wait = max(avg_time*(1-self.maxcpu)/self.maxcpu,0.01) print(avg_time, self.last_wait, self.count) self.waited += self.last_wait sleep(self.last_wait)
def load(self, sel=None): start_mem = report_mem(msg="load") st = seconds() if sel is None: # the arg overrides the object value sel = self.sel else: print("Overriding any decimation - selecting {s:,} instances".format(s=len(sel))) self.sel = sel if self.verbose > 0: print("loading {nm}".format(nm=self.name)), if self.loaded: if self.verbose: ("print {nm} already loaded".format(nm=self.name)) else: dd = {} for k in self.da.keys(): if sel is None: dd.update({k: self.da[k]}) else: # selective (decimated to limit) try: dd.update({k: self.da[k][sel]}) except Exception as reason: dd.update({k: self.da[k]}) print("{k} loaded in full: {reason}".format(k=k, reason=reason)) # dictionaries get stored as an object, need "to list" debug_(self.debug, 2, key="limit") if hasattr(dd[k], "dtype"): if dd[k].dtype == np.dtype("object"): dd[k] = dd[k].tolist() if self.verbose: print("object conversion for {k}".format(k=k)) if hasattr(dd[k], "dtype") and dd[k].dtype == np.dtype("object"): dd[k] = dd[k].tolist() print("*** Second object conversion for {k}!!!".format(k=k)) # key 'info' should be replaced by the more up-to-date self. copy self.da = dd self.update({"info": self.infodict}, check=False) if self.verbose: print(" in {dt:.1f} secs".format(dt=seconds() - st)) report_mem(start_mem) return True
def __init__(self, maxcpu=0.5): self.maxcpu = maxcpu self.maxcpu = min(self.maxcpu,1) self.maxcpu = max(self.maxcpu,0.01) if self.maxcpu != maxcpu: print('changed maxcpu from {i} to {a}'.format(i=maxcpu, a=self.maxcpu)) self.count = 0 self.st = seconds() self.last_wait = 0 self.waited = 0
def __init__(self, maxcpu=0.5): self.maxcpu = maxcpu self.maxcpu = min(self.maxcpu, 1) self.maxcpu = max(self.maxcpu, 0.01) if self.maxcpu != maxcpu: print('changed maxcpu from {i} to {a}'.format(i=maxcpu, a=self.maxcpu)) self.count = 0 self.st = seconds() self.last_wait = 0 self.waited = 0
def save(self, filename, verbose=None, use_dictionary=False): """ Save as an npz file, using an incremental method, which only uses as much /tmp space as required by each var at a time. if use_dictionary is a valid dictionary, save the values of ANY AND ONLY the LOCAL variables whose names are in the keys for this set. So if you have extracted a subset, and you specify use_dictionary=locals(), only that subset is saved (both in array length, and variables chosen). Beware locals that are not your variables - e.g. mtrand.beta """ if verbose == None: verbose = self.verbose st = seconds() if use_dictionary == False: save_dict = self.da # the dict used to get data else: save_dict = use_dictionary print('Warning - saving only a subset') use_keys = [] for k in self.da.keys(): if k in save_dict.keys(): use_keys.append(k) if verbose: print(' Saving only {k}'.format(k=use_keys)) args=','.join(["{k}=save_dict['{k}']". format(k=k) for k in use_keys]) if verbose: print('lengths: {0} -999 indicates dodgy variable' .format([mylen(save_dict[k]) for k in use_keys])) exec("np.savez_compressed(filename,"+args+")") self.name = filename if verbose: print(' in {dt:.1f} secs'.format(dt=seconds()-st))
def report_mem(prev_values=None,msg=None): """ Show status of phy/virt mem, and if given previous values, report differences requires the psutil module - avail in apt-get, but I used pip """ if msg is None: msg='' else: msg += ': ' pm = psutil.avail_phymem() vm = psutil.avail_virtmem() tim = seconds() print('{msg}{pm:.2g} GB phys mem, {vm:.2g} GB virt mem avail' .format(msg=msg, pm=pm/1e9, vm=vm/1e9)), if prev_values is None: print else: print('- dt={dt:.2g}s, used {pm:.2g} GB phys, {vm:.2g} GB virt' .format(pm=(prev_values[0] - pm)/1e9, vm=(prev_values[1] - vm)/1e9, dt = tim-prev_values[2])) return((pm,vm,tim))
def report_mem(prev_values=None, msg=None): """ Show status of phy/virt mem, and if given previous values, report differences requires the psutil module - avail in apt-get, but I used pip """ if msg is None: msg = '' else: msg += ': ' pm = psutil.avail_phymem() vm = psutil.avail_virtmem() tim = seconds() print('{msg}{pm:.2g} GB phys mem, {vm:.2g} GB virt mem avail'.format( msg=msg, pm=pm / 1e9, vm=vm / 1e9)), if prev_values is None: print else: print('- dt={dt:.2g}s, used {pm:.2g} GB phys, {vm:.2g} GB virt'. format(pm=(prev_values[0] - pm) / 1e9, vm=(prev_values[1] - vm) / 1e9, dt=tim - prev_values[2])) return ((pm, vm, tim))
def store_index(self, time, index) : """Store information about: * Time-satmp * Total intensity * Beam center * Estimated Particle Size * Estimated particle nr """ self.tot_t[index] = time.time() self.tot_s[index] = time.seconds() self.tot_ns[index] = time.nanoseconds() self.tot_fd[index] = time.fiducial() self.tot_int[index] = float(self.img.sum()) self.tot_cx[index] = self.cent[0] self.tot_cy[index] = self.cent[1] self.tot_size[index] = self.radius self.tot_score[index] = self.score self.ave += self.img
def store_index(self, time, index, flag = 1) : """Store information about: * Time-stamp * Total intensity * Beam center * Estimated Particle Size * Estimated particle nr """ if not hasattr(self, 'streak_m'): self.streak_m = 0 if not hasattr(self, 'streak_s'): self.streak_s = 0 self.tot_t[index] = time.time() self.tot_s[index] = time.seconds() self.tot_ns[index] = time.nanoseconds() self.tot_fd[index] = time.fiducial() self.tot_int[index] = float(self.img.sum()) if (self.peak is not None): self.tot_peak1_int[index] = self.peak1 self.tot_peak2_int[index] = self.peak2 self.tot_streak_m[index] = self.streak_m self.tot_streak_s[index] = self.streak_s self.tot_cx[index] = self.cent[0] self.tot_cy[index] = self.cent[1] self.tot_size[index] = self.radius self.tot_score[index] = self.score if flag : self.ave += self.img
def store_index2(self, time, index, flag=1): """Store information about: * Time-stamp * Total intensity * Beam center * Estimated Particle Size * Estimated particle nr """ self.tot_t[index] = time.time() self.tot_s[index] = time.seconds() self.tot_ns[index] = time.nanoseconds() self.tot_fd[index] = time.fiducial() self.tot_int[index] = float(self.image.sum()) self.tot_cx[index] = self.cent[0] self.tot_cy[index] = self.cent[1] self.tot_size[index] = self.radius self.tot_score[index] = self.score if flag: self.ave += self.image
def do_fetch(self): # Definitions: # data_utc: is meant to be the first and last timestamps of the saved # or the retrieved data if not saved. (at least from 3 march 2016) # shot_f, shot_t: are beginning and end of shot (from programs) at least from 3 march 2016 # utc: seems to be same as data_utc # seg_f_u: up til 20 march 2020, seg_f_u appears to be the *requested* segment, (now fixed - renamed to req_f_u) # utc0 - only 9.9 or so. Should be in file or set in base.py # In this revision, the only changes are - allow for self.time_range, # variable names f, t changed to f_u, t_u (the desired data utc) # and comment fixes, # in preparation for including time_range and other cleanups. # My W7X shots are either of the form from_utc, to_utc, # or date (8dig) and shot (progId) # the format is in the acquisition properties, to avoid # repetition in each individual diagnostic t_start = seconds() if self.shot[1] > 1e9 or hasattr( self, 'time_range') and self.time_range is not None: # we have start and end in UTC instead of shot no # need the shot and utc to get t1 to set zero t if hasattr(self, 'time_range') and self.time_range is not None: if self.shot[1] > 1e9: raise ValueError( "Can't supply shot as a utc pair and specify a time_range" ) actual_shot = self.shot f_u = None # set to None to make sure we don't use it else: f_u, t_u = self.shot # Initialize to the requested utc range actual_shot = get_shot_number([f_u, t_u]) progs = get_programs(actual_shot) # need shot to look up progs # need prog to get trigger - not tested for OP1.1 if len(progs) > 1: raise LookupError( 'fetch: too many programs found - covers > 1 shot?') #this_prog = [prog for prog in progs if (f_u >= progs[prog]['from'] and # t_u <= progs[prog]['upto'])] if len(progs) == 1: this_prog = progs.values()[0] # on shot 20180724,10, this trigger[1] is an empty list trigger = this_prog['trigger']['1'] # This fallback to trigger[0] mean that more rubbish shots are saved than # if we only look at the proper trigger (1) - here is an example # run pyfusion/examples/plot_signals shot_number=[20180724,10] diag_name="W7X_UTDU_LP15_I" dev_name='W7X' if len(trigger) == 0: # example above print('** No Trigger 1 on shot {s}'.format(s=actual_shot)) debug_(pyfusion.DEBUG, 0, key="noTrig1", msg="No Trigger 1 found") # take any that exist, at 60 trigger = [ trr[0] + int(60 * 1e9) for trr in this_prog['trigger'].values() if len(trr) > 0 ] if len(trigger) == 0: raise LookupError( 'No Triggers at all on shot {s}'.format( s=actual_shot)) utc_0 = trigger[ 0] # utc_0 is the first trigger (usu 61 sec ahead of data) else: print( 'Unable to look up programs - assuming this is a test shot' ) utc_0 = f_u # better than nothing - probably a 'private' test/calibration shot if f_u is None: # shorthand for have time range f_u = utc_0 + int(1e9 * (self.time_range[0])) # + 61)) t_u = utc_0 + int( 1e9 * (self.time_range[1])) # + 61)) was 61 rel to prog['from'] else: # self.shot is an 8,3 digit shot and time range not specified actual_shot = self.shot f_u, t_u = get_shot_utc( actual_shot ) # f_u is the start of the overall shot - i.e about plasma time -61 sec. # at present, ECH usually starts 61 secs after t0 # so it is usually sufficient to request a later start than t0 pre_trig_secs = self.pre_trig_secs if hasattr( self, 'pre_trig_secs') else 0.3 # should get this from programs really - code is already above. We need to just move it. pyfusion.utils.warn('fetch: still using hard-coded 61 secs') utc_0 = f_u + int(1e9 * (61)) # utc_0 is plasma initiation in utc f_u = utc_0 - int( 1e9 * pre_trig_secs) # f_u is the first time wanted # make sure we have the following defined regardless of how we got here shot_f_u, shot_t_u = get_shot_utc(actual_shot) req_f_u = f_u # req_f_u is the start of the desired data segment - sim. for req_t_u req_t_u = t_u # A URL STYLE diagnostic - used for a one-off rather than an array # this could be moved to setup so that the error info is more complete if hasattr(self, 'url'): fmt = self.url # add from= further down: +'_signal.json?from={req_f_u}&upto={req_t_u}' params = {} # check consistency - # url should be literal - no params (only for fmt) - gain, units are OK as they are not params if hasattr(self, 'params'): pyfusion.utils.warn( 'URL diagnostic {n} should not have params <{p}>'.format( n=self.config_name, p=self.params)) else: # a pattern-based one - used for arrays of probes if hasattr(self, 'fmt'): # does the diagnostic have one? fmt = self.fmt elif hasattr(self.acq, 'fmt'): # else use the acq.fmt fmt = self.acq.fmt else: # so far we have no quick way to check the server is online raise LookupError('no fmt - perhaps pyfusion.cfg has been ' 'edited because the url is not available') params = eval('dict(' + self.params + ')') # Originally added to fix up erroneous ECH alias mapping if ECH - only # 6 sources work if I don't. But it seems to help with many others # This implementation is kludgey but proves the principle, and # means we don't have to refer to any raw.. signals # would be nice if they made a formal way to do this. if 'upto' not in fmt: fmt += '_signal.json?from={req_f_u}&upto={req_t_u}' assert req_f_u == f_u, 'req_f_u error' assert req_t_u == t_u, 'req_t_u error' # params.update(req_f_u=req_f_u, req_t_u=req_t_u, shot_f_u=shot_f_u) url = fmt.format(**params) # substitute the channel params debug_(pyfusion.DEBUG, 2, key="url", msg="middle of work on urls") if np.any([ nm in url for nm in 'Rf,Tower5,MainCoils,ControlCoils,TrimCoils,Mirnov,Interfer,_NBI_' .split(',') ]): from pyfusion.acquisition.W7X.get_url_parms import get_signal_url # replace the main middle bit with the expanded one from the GUI tgt = url.split('KKS/')[1].split('/scaled')[0].split('_signal')[0] # construct a time filter for the shot self.tgt = tgt # for the sake of error_info filt = '?filterstart={req_f_u}&filterstop={req_t_u}'.format( **params) # get the url with the filter url = url.replace(tgt, get_signal_url(tgt, filt)).split('KKS/')[-1] # take the filter back out - we will add the exact one later url = url.replace(filt, '/') # nSamples now needs a reduction mechanism http://archive-webapi.ipp-hgw.mpg.de/ # minmax is increasingly slow for nSamples>10k, 100k hopeless # Should ignore the test comparing the first two elements of the tb # prevent reduction (NSAMPLES=...) to avoid the bug presently in codac if (('nSamples' not in url) and (pyfusion.NSAMPLES != 0) and not (hasattr(self, 'allow_reduction') and int(self.allow_reduction) == 0)): url += '&reduction=minmax&nSamples={ns}'.format( ns=pyfusion.NSAMPLES) debug_(pyfusion.DEBUG, 2, key="url", msg="work on urls") # we need %% in pyfusion.cfg to keep py3 happy # however with the new get_signal_url, this will all disappear if sys.version < '3.0.0' and '%%' in url: url = url.replace('%%', '%') if 'StationDesc.82' in url: # fix spike bug in scaled QRP data url = url.replace('/scaled/', '/unscaled/') if pyfusion.CACHE: # Needed for improperly configured cygwin systems: e.g.IPP Virual PC # Perhaps this should be executed at startup of pyfusion? cygbin = "c:\\cygwin\\bin" if os.path.exists(cygbin) and not cygbin in os.environ['path']: os.environ['path'] += ";" + cygbin print('using wget on {url}'.format(url=url)) retcode = os.system('wget -x "{url}"'.format(url=url)) # retcode = os.system('c:\\cygwin\\bin\\bash.exe -c "/bin/wget {url}"'.format(url=url)) debug_(retcode != 0, level=1, key='wget', msg="wget error or DEBUG='wget'") # now read from the local copy - seems like urls need full paths # appears to be a feature! http://stackoverflow.com/questions/7857416/file-uri-scheme-and-relative-files # /home/bdb112/pyfusion/working/pyfusion/archive-webapi.ipp-hgw.mpg.de/ArchiveDB/codac/W7X/CoDaStationDesc.82/DataModuleDesc.181_DATASTREAM/7/Channel_7/scaled/_signal.json?from=1457626020000000000&upto=1457626080000000000&nSamples=10000 # url = url.replace('http://','file:///home/bdb112/pyfusion/working/pyfusion/') url = url.replace('http://', 'file:/' + os.getcwd() + '/') if 'win' in os.sys.platform: # weven thoug it seems odd, want 'file:/c:\\cygwin\\home\\bobl\\pyfusion\\working\\pyfusion/archive-webapi.ipp-hgw.mpg.de/ArchiveDB/codac/W7X/CoDaStationDesc.82/DataModuleDesc.192_DATASTREAM/4/Channel_4/scaled/_signal.json@from=147516863807215960&upto=1457516863809815961' url = url.replace('?', '@') # nicer replace - readback still fails in Win, untested on unix systems print('now trying the cached copy we just grabbed: {url}'.format( url=url)) if (req_f_u > shot_t_u) or (req_t_u < shot_f_u): pyfusion.utils.warn( 'segment requested is outside the shot times for ' + str(actual_shot)) if pyfusion.VERBOSE > 0: print( '======== fetching url over {dt:.1f} secs from {fr:.1f} to {tt:.1f} =========\n[{u}]' .format(u=url, dt=(params['req_t_u'] - params['req_f_u']) / 1e9, fr=(params['req_f_u'] - shot_f_u) / 1e9, tt=(params['req_t_u'] - shot_f_u) / 1e9)) # seems to take twice as long as timeout requested. # haven't thought about python3 for the json stuff yet # This is not clean - should loop for timeout in [pyfusion.TIMEOUT, 3*pyfusion.TIMEOUT] try: # dat = json.load(urlopen(url,timeout=pyfusion.TIMEOUT)) works # but follow example in # http://webservices.ipp-hgw.mpg.de/docs/howtoREST.html#python # Some extracts in examples/howtoREST.py # dat = json.loads(urlopen(url,timeout=pyfusion.TIMEOUT).read().decode('utf-8')) t_pre = seconds() # for long shots, adjust strategy and timeout to reduce memory consumption ONE = 4 # memory conservation tricks only apply for DEBUG<1 # Thin allows the cutoff value to be increased in come cases # uncomment the following two for testing the exception handler ## timeout = pyfusion.TIMEOUT ## raise httplib.IncompleteRead('test') if (req_t_u - req_f_u) / 1e9 > pyfusion.VERY_LONG: size_MS = 2 * ( req_t_u - req_f_u ) / 1e9 # approximate - later on calc from dt i.e. MSamples if pyfusion.NSAMPLES != 0: # allow for subsampled data size_MS = pyfusion.NSAMPLES / 1e6 timeout = 8 * size_MS + pyfusion.TIMEOUT # don't make timeout too small! print('On-the-fly conversion: Setting timeout to {tmo}'.format( tmo=timeout)) dat = json.load(urlopen(url, timeout=timeout)) t_text = seconds() else: timeout = pyfusion.TIMEOUT txtform = urlopen(url, timeout=timeout).read() t_text = seconds() print('{tm} {tp:.2f} prep, {tt:.2f} fetch without decode, '. format(tm=time.strftime('%H:%M:%S'), tp=t_pre - t_start, tt=t_text - t_pre)), sys.stdout.flush() dat = json.loads(txtform.decode('utf-8')) if pyfusion.DEBUG < ONE: txtform = None # release memory t_conv = seconds() # for 10MS of mirnov 0.06 prep, 9.61 fetch 19.03 conv #print('{tp:.2f} prep, {tt:.2f} fetch {tc:.2f} conv'. print('{tc:.2f} conv'.format(tp=t_pre - t_start, tt=t_text - t_pre, tc=t_conv - t_text)) except socket.timeout as reason: # the following url access is a slightly different form? # should check if this is better tested by the URL module print('{tm} {tp:.2f} prep, {tt:.2f} timeout. '.format( tp=t_pre - t_start, tt=seconds() - t_pre, tm=time.strftime('%H:%M:%S'))), print( '****** first timeout error, try again with longer timeout *****' ) timeout *= 3 dat = json.load(urlopen(url, timeout=timeout)) except MemoryError as reason: raise # too dangerous to do anything else except to reraise except httplib.IncompleteRead as reason: msg = str( '** IncompleteRead after {tinc:.0f}/{timeout:.0f}s ** on {c}: {u} \n{r}' .format(tinc=seconds() - t_start, c=self.config_name, u=url, r=reason, timeout=timeout)) pyfusion.logging.error( msg ) # don't want to disturb the original exception, so raise <nothing> i.e. reraise raise # possibly a memory error really? - not the case for 4114 20180912.48 except Exception as reason: if pyfusion.VERBOSE >= 0: print( '**** Exception (Memory? out of disk space?) OR timeout of {timeout} **** on {c}: {u} \n{r}' .format(c=self.config_name, u=url, r=reason, timeout=timeout)) raise # re raises the last exception # this form will default to repair = 2 for all LP probes. #default_repair = -1 if 'Desc.82/' in url else 0 # Override acq.repair with the probe value default_repair = int(self.repair) if hasattr( self, 'repair') else 2 if 'Desc.82/' in url else 0 # this form follows the config file settings self.repair = int(self.repair) if hasattr(self, 'repair') else default_repair dimraw = np.array(dat['dimensions']) if ('nSamples' not in url): # skip this check if we are decimating if np.abs(req_f_u - dimraw[0]) > 2000: print( '** Start is delayed by >2 us {dtdel:,} relative to the request' .format(dtdel=dimraw[0] - req_f_u)) if (req_t_u - dimraw[-1]) > 2000: print( '** End is earlier by >2 us {dtdel:,} relative to the request' .format(dtdel=req_t_u - dimraw[-1])) output_data_utc = [dat['dimensions'][0], dat['dimensions'][-1]] if pyfusion.DEBUG < ONE: dat['dimensions'] = None # release memory # adjust dim only (not dim_raw so that zero time remains at t1 dim = dimraw - utc_0 # decimation with NSAMPLES will make the timebase look wrong - so disable repair if pyfusion.NSAMPLES != 0 or self.repair == 0 or self.repair == -1: pass # leave as is # need at least this clipping for Langmuir probes in Op1.1 elif self.repair == 1: dim = np.clip(dim, 0, 1e99) elif self.repair == 2: dim, msg = regenerate_dim(dim) if msg is not None: print('shot {s}, {c}: {m}'.format(s=self.shot, c=self.config_name, m=msg)) else: raise ValueError( 'repair value of {r} not understood'.format(r=self.repair)) if pyfusion.VERBOSE > 2: print('repair', self.repair) #ch = Channel(self.config_name, Coords('dummy', (0,0,0))) # this probably should be in base.py coords = get_coords_for_channel(**self.__dict__) # used to be bare_chan? should we include - signs? ch = Channel(self.config_name, coords) scl = 1 / 3277.24 if dat['datatype'].lower() == 'short' else 1 if self.repair == -1: output_data = TimeseriesData(timebase=Timebase(dimraw), signal=scl * Signal(dat['values']), channels=ch) else: output_data = TimeseriesData(timebase=Timebase(1e-9 * dim), signal=scl * Signal(dat['values']), channels=ch) output_data.meta.update({'shot': self.shot}) output_data.utc = output_data_utc # this copy was saved earlier so we could delete the large array to save space output_data.units = dat['units'] if 'units' in dat else '' # this is a minor duplication - at least it gets saved via params params['data_utc'] = output_data.utc params['utc_0'] = utc_0 # hopefully t0 -- useful in npz files # Warning - this could slow things down! - but allows # corrupted time to be re-calculated as algorithms improve. # and storage as differences takes very little space. params['diff_dimraw'] = dimraw params['diff_dimraw'][1:] = np.diff(dimraw) if pyfusion.DEBUG < ONE: dimraw = None # NOTE!!! need float128 to process dimraw, and cumsum won't return ints # or automatically promote to 128bit (neither do simple ops like *, +) params['pyfusion_version'] = pyfusion.version.get_version() if pyfusion.VERBOSE > 0: print('shot {s}, config name {c}'.format(c=self.config_name, s=self.shot)) output_data.config_name = self.config_name debug_(pyfusion.DEBUG, 2, key='W7XDataFetcher') output_data.params = params ### the total shot utc. output_data.utc = [f_u, t_u] return output_data
print ("Example\n>>> conn.execute('select * from summ order by shot desc limit 1').fetchone()") sys.exit() import MDSplus as MDS shots = 0 # set these both to () to stop on errors shot_exception = Exception # () to see message - catches and displays all errors node_exception = Exception # () ditto for nodes. errs = dict(shot=[]) # error list for the shot overall (i.e. if tree is not found) for diag in diags.keys(): errs.update({diag: []}) # error list for each diagnostic start = seconds() for s in srange: # on t440p (83808,86450): # FY14-15 (86451,89155):#(36363,88891): #81399,81402): #(81600,84084): datdic = dict(shot=s) shots += 1 try: if (s % 10) == 0: print (s), else: print ("."), if (s % 100) == 0: print ("") tree = MDS.Tree("h1data", s) non_special = list(diags.keys()) non_special.remove("mirnov_coh") for diag in non_special: try:
runfile = tempfile.mktemp() env = os.environ # env.update({'PYTHONSTARTUP':tmpfil}) only works in interactive with open(filename, 'rt') as pf: prog = pf.readlines() with open(runfile, 'wt') as tf: tf.write(prerun+'\n') tf.writelines(prog) else: env = None runfile = filename cmd = '{py} {file} {args}'.format(file=runfile, py=python_exe, args=args) print(cmd) st = seconds() sub_pipe = subprocess.Popen(cmd, env=env, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) """ Doesn't workthis way - seems to wait. for t in range(max_sec): print(t) sleep(1) if sub_pipe.poll(): break if t == max_sec - 1: print('terminate') sub_pipe.kill() """ (resp, errout) = sub_pipe.communicate() print(errout) have_error = (((errout != b'')
shots = 0 # set these both to () to stop on errors shot_exception = () # Exception # () to see message - catches and displays all errors node_exception = () if debug > 0 else Exception # () ditto for nodes. errs = dict(shot=[]) # error list for the shot overall (i.e. if tree is not found) for diag in diags.keys(): errs.update({diag:[]}) # error list for each diagnostic print("""If off-line, set pyfusion.TIMEOUT=0 to prevent long delays The search for valid shot numbers is now much quicker """) # will take a while (~ 0.7 sec/day?), about 2 mins as of 10/2017 start = seconds() dev = pyfusion.getDevice(devname) # 'H1Local') if 'W7' in devname: if (len(np.shape(srange)) == 3) or (np.shape(srange) == (2,2)): ansexp = input('About to expand shot range: Continue? (Y/n/q)') if len(ans)==0 or ans.lower()[0] != 'y': sys.exit() pyfusion.logging.info(str('Starting with {st}, shape is {shp}' .format(st=srangestr, shp=np.shape(srange)))) if len(np.shape(srange)) == 3: # a bunch of begin/ends srange = [sh for sr in srange for sh in expand_shot_range(*sr)] elif np.shape(srange) == (2,2): print('assume a simple range') srange = expand_shot_range(*srange) else: # already a list
print('changed maxcpu from {i} to {a}'.format(i=maxcpu, a=self.maxcpu)) self.count = 0 self.st = seconds() self.last_wait = 0 self.waited = 0 def wait(self): self.count += 1 if self.maxcpu == 1: return () avg_time = (seconds() - self.st - self.waited) / self.count self.last_wait = max(avg_time * (1 - self.maxcpu) / self.maxcpu, 0.01) print(avg_time, self.last_wait, self.count) self.waited += self.last_wait sleep(self.last_wait) if __name__ == '__main__': from time import time as seconds st = seconds() regulator = Regulator(1) for i in range(10000): regulator.wait() print('{nits} iterations with 100% CPU -> overhead of {dt:.2f}us per call'. format(nits=regulator.count, dt=1e6 * (seconds() - regulator.st) / regulator.count))
print("Example\n>>> conn.execute('select * from summ order by shot desc limit 1').fetchone()") sys.exit() shots = 0 # set these both to () to stop on errors shot_exception = () # Exception # () to see message - catches and displays all errors node_exception = () # Exception # () ditto for nodes. errs = dict(shot=[]) # error list for the shot overall (i.e. if tree is not found) for diag in diags.keys(): errs.update({diag:[]}) # error list for each diagnostic print('If off-line, set pyfusion.TIMEOUT=0 to prevent long delays') start = seconds() dev=pyfusion.getDevice(devname) # 'H1Local') from pyfusion.data.shot_range import shot_range as expand_shot_range srange = ((20160309,1), (20160310,99)) srange = ((20160101,1), (20160310,99)) #srange = ((20160202,1), (20160202,99)) if 'W7' in devname: srange = expand_shot_range(*srange) else: srange=range(92000,95948) for sh in srange: # on t440p (83808,86450): # FY14-15 (86451,89155):#(36363,88891): #81399,81402): #(81600,84084): if 'W7' in devname: datdic = dict(shot=sh[0]*1000+sh[1], date=sh[0], sshot=sh[1]) if (sh[1] % 10) == 0: print(sh),
exec(_var_defaults) exec(process_cmd_line_args()) filters = tb.Filters(complevel=complevel, complib=complib) dd = DA(DAfilename).da if outfilename is None: (base, ext) = os.path.splitext(os.path.realpath(DAfilename)) outfilename = base + os.path.extsep + 'h5' outf = tb.openFile(outfilename, "a") for var in dd.keys(): st_copy = seconds() if var in [nd.name for nd in outf.listNodes('/')]: raise LookupError('{f} already has a node "{n}"'.format( f=outf.filename, n=var)) val = dd[ var] # need to hold it in memory this way to avoid multiple access sizebytes = val.nbytes print('{dt:.1f}s to read {v} {GB:.2f} GB for {f}'.format( dt=seconds() - st_copy, GB=sizebytes / 1e9, f=os.path.split(outfilename)[-1], v=var)) st_write = seconds() try:
def test_pcubed(self): import matplotlib.pyplot as plt from matplotlib import gridspec from pyonline.percentiles import Percentiles, PCubed from time import time as seconds n = 10000000 random_state = np.random.RandomState(10) #X = random_state.exponential(0.00000000000001, size=(n,)) X = random_state.normal(0, 1, size=(n, )) n_markers = 20 def get_psquared(): return Percentiles(n_markers=n_markers) def get_pcubed(): return PCubed(n_markers=n_markers) # JIT compile the code get_psquared().partial_fit(X[:n_markers]).transform(X[:n_markers], cubic_interp=True) get_pcubed().partial_fit(X[:n_markers]).transform(X[:n_markers], cubic_interp=True) psquared = get_psquared() pcubed = get_pcubed() start_seconds = seconds() pcubed_percentiles = pcubed.partial_fit(X).transform(X, cubic_interp=True) stop_seconds = seconds() start_seconds = seconds() percentiles = pcubed.partial_fit(X).transform(X, cubic_interp=True) stop_seconds = seconds() pcubed_seconds = stop_seconds - start_seconds print('P-Cubed in %.2f seconds!' % pcubed_seconds) true_percentiles = pd.Series(X).rank() / X.shape[0] marker_percentiles = to_percentile(percentile_model.markers) gs = gridspec.GridSpec(2, 2) top_left = plt.subplot(gs[0, :1]) bottom_left = plt.subplot(gs[1, :1]) right = plt.subplot(gs[:, 1:]) plot_n = 10000 plot_X = X[:plot_n] pd.DataFrame( dict(Actual=plot_X, PSquare=percentile_model.inverse_transform( true_percentiles.values[:plot_n]), PCubed=percentile_model.inverse_transform( true_percentiles.values[:plot_n], cubic_interp=True)), index=true_percentiles.values[:plot_n], ).sort_index().plot(ax=top_left, title="n = %d" % n, colormap='Paired') for marker in marker_percentiles: top_left.axvline(marker, alpha=0.5, color='grey') bottom_left.axvline(marker, alpha=0.5, color='grey') top_left.scatter(marker_percentiles, percentile_model.quantiles) if hasattr(percentile_model, 'curvature'): print(percentile_model.curvature) pd.DataFrame(dict(StressLeft=percentile_model.curvature[:-1], StressRight=percentile_model.curvature[1:]), index=marker_percentiles[1:-1]).plot(ax=bottom_left) pd.DataFrame( dict(Actual=true_percentiles[:10000], Estimated=percentiles[:10000])).plot.scatter(ax=right, x='Actual', y='Estimated', figsize=(9, 7)) fig = plt.gcf() fig.set_size_inches(24, 12) plt.tight_layout() plt.show() self.assertTrue(True)
def read_text_pyfusion(files, target=b'^Shot .*', ph_dtype=None, plot=pl.isinteractive(), ms=100, hold=0, debug=0, quiet=1, maxcpu=1, exception = Exception): """ Accepts a file or a list of files, returns a list of structured arrays See merge ds_list to merge and convert types (float -> pyfusion.prec_med """ regulator = pyfusion.utils.Regulator(maxcpu) st = seconds(); last_update=seconds() file_list = files if len(np.shape(files)) == 0: file_list = [file_list] f='f8' if ph_dtype is None: ph_dtype = [('p12',f),('p23',f),('p34',f),('p45',f),('p56',f)] #ph_dtype = [('p12',f)] ds_list =[] comment_list =[] count = 0 for (i,filename) in enumerate(file_list): regulator.wait() if seconds() - last_update > 10: last_update = seconds() tot = len(file_list) print('read {n}/{t}: ETA {m:.1f}m {f}' .format(f=filename, n=i, t=tot, m=(seconds()-st)*(tot-i)/float(60*i))) try: if (isinstance(target,str) or isinstance(target,bytes)): skip = 1+find_data(filename, target, debug=debug) elif isinstance(target, int): skip = target else: raise Exception('target ({target}) is not recognised'.format(target=target)) if quiet == 0: print('{t:.1f} sec, loading data from line {s} of {f}' .format(t = seconds()-st, s=skip, f=filename)) # this little bit to determine layout of data # very inefficient to read twice, but in a hurry! if debug>2: print('skiprows = \n', skip-1) txt = np.loadtxt(fname=filename, skiprows=skip-1, dtype=bytes, delimiter='FOOBARWOOBAR',ndmin=1) header_toks = txt[0].split() # look for a version number first if header_toks[-1][-1] in b'0123456789.': version = float(header_toks.pop()) if b'ersion' not in header_toks.pop(): raise ValueError('Error reading header in {f}' .format(f=filename)) else: version=-1 # pre Aug 12 2013 # noticed that the offset moved in 2015 - when did it happen? phase_offs = -4 if sys.version>'3,' else -2 # is the first character of the 2nd last a digit? if header_toks[phase_offs][0] in b'0123456789': if pyfusion.VERBOSE > 0: print('header toks', header_toks) print('found new header including number of phases') n_phases = int(header_toks[phase_offs]) ph_dtype = [('p{n}{np1}'.format(n=n,np1=n+1), f) for n in range(n_phases)] if 'frlow' in header_toks: # add the two extra fields fs_dtype= [ ('shot','i8'), ('t_mid','f8'), ('_binary_svs','u8'), # f16 - really want u8 here, but npyio #has problem converting 10000000000000000000000000 #OverflowError: Python int too large to convert to C long # doesn't happen if text is read in directly with loadtxt ('freq','f8'), ('amp', 'f8'), ('a12','f8'), ('p', 'f8'), ('H','f8'), ('frlow','f8'), ('frhigh', 'f8'),('phases',ph_dtype)] else: fs_dtype= [ ('shot','i8'), ('t_mid','f8'), ('_binary_svs','u8'), ('freq','f8'), ('amp', 'f8'), ('a12','f8'), ('p', 'f8'), ('H','f8'), ('phases',ph_dtype)] if version > 0.69: # don't rely on precision fs_dtype.insert(-1,('cpkf', 'f8')) # -1 is 1 before the end fs_dtype.insert(-1,('fpkf', 'f8')) # they appear in this order if pyfusion.VERBOSE > 0: print(version, fs_dtype, '\n') ds = np.loadtxt(fname=filename, skiprows = skip, dtype= fs_dtype, ndmin=1) # ENSURE a 1D array if len(ds) > 0: ds_list.append(ds) count += 1 # npz reads in python 2 can't cope with unicode - don't report errors unless really debugging comment_list.append(filename.encode(errors=['ignore','strict'][pyfusion.DBG() > 5])) else: print('no data in {f}'.format(f=filename)) except ValueError as reason: print('Conversion error while processing {f} with loadtxt - {reason} {args}' .format(f=filename, reason=reason, args=reason.args)) traceback.print_exc() except exception as info: print('Other exception while reading {f} with loadtxt - {info} {a}'.format(f=filename, info=info, a=info.args)) traceback.print_exc() print("{c} out of {t} files".format(c=count, t=len(file_list))) if plot>0 and len(ds_list)>0: plot_fs_DA(ds_list[0], ms=ms) return(ds_list, comment_list)
def read_text_pyfusion(files, target=b'^Shot .*', ph_dtype=None, plot=pl.isinteractive(), ms=100, hold=0, debug=0, quiet=1, maxcpu=1, exception = Exception): """ Accepts a file or a list of files, returns a list of structured arrays See merge ds_list to merge and convert types (float -> pyfusion.prec_med """ regulator = pyfusion.utils.Regulator(maxcpu) st = seconds(); last_update=seconds() file_list = files if len(np.shape(files)) == 0: file_list = [file_list] f='f8' if ph_dtype is None: ph_dtype = [('p12',f),('p23',f),('p34',f),('p45',f),('p56',f)] #ph_dtype = [('p12',f)] ds_list =[] comment_list =[] count = 0 for (i,filename) in enumerate(file_list): regulator.wait() if seconds() - last_update > 10: last_update = seconds() tot = len(file_list) print('read {n}/{t}: ETA {m:.1f}m {f}' .format(f=filename, n=i, t=tot, m=(seconds()-st)*(tot-i)/float(60*i))) try: if (isinstance(target,str) or isinstance(target,bytes)): skip = 1+find_data(filename, target, debug=debug) elif isinstance(target, int): skip = target else: raise Exception('target ({target}) is not recognised'.format(target=target)) if quiet == 0: print('{t:.1f} sec, loading data from line {s} of {f}' .format(t = seconds()-st, s=skip, f=filename)) # this little bit to determine layout of data # very inefficient to read twice, but in a hurry! if debug>2: print('skiprows = \n', skip-1) txt = np.loadtxt(fname=filename, skiprows=skip-1, dtype=bytes, delimiter='FOOBARWOOBAR',ndmin=1) header_toks = txt[0].split() # look for a version number first if header_toks[-1][-1] in b'0123456789.': version = float(header_toks.pop()) if b'ersion' not in header_toks.pop(): raise ValueError('Error reading header in {f}' .format(f=filename)) else: version=-1 # pre Aug 12 2013 # noticed that the offset moved in 2015 - when did it happen? phase_offs = -4 if sys.version>'3,' else -2 # is the first character of the 2nd last a digit? if header_toks[phase_offs][0] in b'0123456789': if pyfusion.VERBOSE > 0: print('found new header including number of phases') n_phases = int(header_toks[phase_offs]) ph_dtype = [('p{n}{np1}'.format(n=n,np1=n+1), f) for n in range(n_phases)] if 'frlow' in header_toks: # add the two extra fields fs_dtype= [ ('shot','i8'), ('t_mid','f8'), ('_binary_svs','u8'), # f16 - really want u8 here, but npyio #has problem converting 10000000000000000000000000 #OverflowError: Python int too large to convert to C long # doesn't happen if text is read in directly with loadtxt ('freq','f8'), ('amp', 'f8'), ('a12','f8'), ('p', 'f8'), ('H','f8'), ('frlow','f8'), ('frhigh', 'f8'),('phases',ph_dtype)] else: fs_dtype= [ ('shot','i8'), ('t_mid','f8'), ('_binary_svs','u8'), ('freq','f8'), ('amp', 'f8'), ('a12','f8'), ('p', 'f8'), ('H','f8'), ('phases',ph_dtype)] if version > 0.69: # don't rely on precision fs_dtype.insert(-1,('cpkf', 'f8')) # -1 is 1 before the end fs_dtype.insert(-1,('fpkf', 'f8')) # they appear in this order if pyfusion.VERBOSE > 0: print(version, fs_dtype, '\n') ds = np.loadtxt(fname=filename, skiprows = skip, dtype= fs_dtype, ndmin=1) # ENSURE a 1D array if len(ds) > 0: ds_list.append(ds) count += 1 # npz reads in python 2 can't cope with unicode - don't report errors unless really debugging comment_list.append(filename.encode(errors=['ignore','strict'][pyfusion.DBG() > 5])) else: print('no data in {f}'.format(f=filename)) except ValueError as info: print('Conversion error while processing {f} with loadtxt - {info} {args}' .format(f=filename, info=info, args=info.args)) traceback.print_exc() except exception as info: print('Other exception while reading {f} with loadtxt - {info} {a}'.format(f=filename, info=info, a=info.args)) traceback.print_exc() print("{c} out of {t} files".format(c=count, t=len(file_list))) if plot>0 and len(ds_list)>0: ds = ds_list[0] if hold == 0: pl.clf() # for the colorbar() pl.scatter(ds['t_mid'],ds['freq'],ms*ds['a12'],ds['amp']) pl.title('{s}, colour is amp, size is a12'.format(s=ds['shot'][0])) pl.colorbar() return(ds_list, comment_list)
def save(self, filename, verbose=None, sel=None, use_dictionary=False, tempdir=None, zipopt=-1): """ Save as an npz file, using an incremental method, which only uses as much /tmp space as required by each var at a time. Select which to save with sel: if sel is None, save all except for use_dictionary below. If use_dictionary is a valid dictionary, save the values of ANY AND ONLY the LOCAL variables whose names are in the keys for this set. So if you have extracted a subset, and you specify use_dictionary=locals(), only that subset is saved (both in array length, and variables chosen). Beware locals that are not your variables - e.g. mtrand.beta To avoid running out of space on tmp, or to speed up zip - Now included as an argument (Note that the normal os.putenv() doesn't seem to write to THIS environment use the fudge below - careful - no guarantees) os.environ.__setitem__('TMPDIR',os.getenv('HOME')) actually - this seems OK os.environ['IGETFILE']='/data/datamining/myView/bin/linux/igetfile' reload tempfile tempfile.gettempdir() also ('ZIPOPT','"-1"') (Now incorporated into args, not tested) ** superseded by zlib.Z_DEFAULT_COMPRESSION 0--9 (or -1 for default) """ if verbose is None: verbose = self.verbose st = seconds() if tempdir is not None: os.environ.__setitem__("TMPDIR", tempdir) import tempfile reload(tempfile) # in case it was already imported if tempfile.gettempdir() != tempdir: warn("failed to set tempdir = {t}: Value is {v}".format(t=tempdir, v=tempfile.gettempdir())) import zlib zlib.Z_DEFAULT_COMPRESSION = int(zipopt) # """ now obsolete zipfile calls zlin with Z_DEFAULT_COMPRESION arg # import os # print('overriding default zip compression to {z}'.format(z=zipopt)) # os.environ.__setitem__('ZIPOPT', str(zipopt)) # """ if use_dictionary == False: save_dict = self.da # the dict used to get data else: save_dict = use_dictionary print("Warning - saving only a subset") if sel is not None: use_keys = sel else: use_keys = [] for k in self.da.keys(): if k in save_dict.keys(): use_keys.append(k) if verbose: print(" Saving only {k}".format(k=use_keys)) if "info" in use_keys: info = info_to_bytes(self["info"]) # avoid py3 unicode error args = ",".join(["{k}=save_dict['{k}']".format(k=k) for k in use_keys]) if verbose: print("lengths: {0} -999 indicates dodgy variable".format([mylen(save_dict[k]) for k in use_keys])) if "mask" in self.da: self.infodict.update({"valid_keys": self.masked.valid_keys}) self.da["info"] = np.array(self.infodict) self.update({"info": self.infodict}, check=False) if self.debug: print("saving " + filename, args) exec("np.savez_compressed(filename," + args + ")") self.name = filename if verbose: print(" in {dt:.1f} secs".format(dt=seconds() - st))
sys.exit() import MDSplus as MDS shots = 0 # set these both to () to stop on errors shot_exception = () # () to see message - catches and displays all errors node_exception = Exception # () ditto for nodes. errs = dict( shot=[]) # error list for the shot overall (i.e. if tree is not found) for diag in diags.keys(): errs.update({diag: []}) # error list for each diagnostic start = seconds() for s in srange: # on t440p (83808,86450): # FY14-15 (86451,89155):#(36363,88891): #81399,81402): #(81600,84084): datdic = dict(shot=s) shots += 1 try: if (s % 10) == 0: print(s), else: print('.'), if (s % 100) == 0: print('') tree = MDS.Tree('magpie2', s) non_special = [ diag for diag in list(diags) if diags[diag][2] != Special ] # if 'dim' in diag] #non_special.remove('mirnov_coh') for diag in non_special:
def read_text_pyfusion(files, target='^Shot .*', ph_dtype=None, plot=pl.isinteractive(), ms=100, hold=0, debug=0, quiet=1, exception=Exception): """ Accepts a file or a list of files, returns a list of structured arrays See merge ds_list to merge and convert types (float -> pyfusion.prec_med """ st = seconds() last_update = seconds() file_list = files if len(np.shape(files)) == 0: file_list = [file_list] f = 'f8' if ph_dtype == None: ph_dtype = [('p12', f), ('p23', f), ('p34', f), ('p45', f), ('p56', f)] #ph_dtype = [('p12',f)] ds_list = [] comment_list = [] count = 0 for (i, filename) in enumerate(file_list): if seconds() - last_update > 30: last_update = seconds() print('reading {n}/{t}: {f}'.format(f=filename, n=i, t=len(file_list))) try: if pl.is_string_like(target): skip = 1 + find_data(filename, target, debug=debug) else: skip = target if quiet == 0: print('{t:.1f} sec, loading data from line {s} of {f}'.format( t=seconds() - st, s=skip, f=filename)) # this little bit to determine layout of data # very inefficient to read twice, but in a hurry! txt = np.loadtxt(fname=filename, skiprows=skip - 1, dtype=str, delimiter='FOOBARWOOBAR') header_toks = txt[0].split() # is the first character of the 2nd last a digit? if header_toks[-2][0] in '0123456789': if pyfusion.VERBOSE > 0: print('found new header including number of phases') n_phases = int(header_toks[-2]) ph_dtype = [('p{n}{np1}'.format(n=n, np1=n + 1), f) for n in range(n_phases)] if 'frlow' in header_toks: # add the two extra fields fs_dtype = [('shot', 'i8'), ('t_mid', 'f8'), ('_binary_svs', 'i8'), ('freq', 'f8'), ('amp', 'f8'), ('a12', 'f8'), ('p', 'f8'), ('H', 'f8'), ('frlow', 'f8'), ('frhigh', 'f8'), ('phases', ph_dtype)] else: fs_dtype = [('shot', 'i8'), ('t_mid', 'f8'), ('_binary_svs', 'i8'), ('freq', 'f8'), ('amp', 'f8'), ('a12', 'f8'), ('p', 'f8'), ('H', 'f8'), ('phases', ph_dtype)] ds_list.append( np.loadtxt(fname=filename, skiprows=skip, dtype=fs_dtype)) count += 1 comment_list.append(filename) except ValueError, info: print('Conversion error while reading {f} with loadtxt - {info}'. format(f=filename, info=info))
exec(_var_defaults) exec(process_cmd_line_args()) filters=tb.Filters(complevel=complevel, complib=complib) dd = DA(DAfilename).da if outfilename is None: (base, ext) = os.path.splitext(os.path.realpath(DAfilename)) outfilename = base + os.path.extsep + 'h5' outf = tb.openFile(outfilename, "a") for var in dd.keys(): st_copy = seconds() if var in [nd.name for nd in outf.listNodes('/')]: raise LookupError('{f} already has a node "{n}"' .format(f=outf.filename, n=var)) val = dd[var] # need to hold it in memory this way to avoid multiple access sizebytes = val.nbytes print('{dt:.1f}s to read {v} {GB:.2f} GB for {f}' .format(dt=seconds()-st_copy, GB = sizebytes/1e9, f=os.path.split(outfilename)[-1], v=var)) st_write = seconds() try: var_atom = tb.atom.Atom.from_dtype(numpy.dtype(val.dtype)) except Exception as reason: print('failed to copy {v}, reason: {r} {a}'
def __init__(self, name="none", datadir="..", system="none"): if name == "none": name = str(int(seconds())) self.name = name global startdir self.startdir = startdir self.counter = 1 os.chdir(datadir) self.datadir = datadir self.saveddir = self.startdir self.talkfailed = False self.speakversion = "none" if system == "none": self.system = platform.system() else: self.system = system if self.system == "Darwin": try: from AppKit import NSSpeechSynthesizer #from AppKit import NSSpeechSynthesizer def wsay(afile): #https://stackoverflow.com/questions/12758591/python-text-to-speech-in-macintosh #NSSpeechSynthesizer.availableVoices() speechSynthesizer = NSSpeechSynthesizer.alloc().init() speechSynthesizer.setVoice_( 'com.apple.speech.synthesis.voice.karen') x = open(afile, "r") txt = x.read() txt = txt.split("====") txt = txt[0] x.close() speechSynthesizer.startSpeakingString_(txt) self.speakversion = "Mac NSSpeechSynthesizer" except: def wsay(afile): x = open(afile, "r") txt = x.read() txt = txt.split("====") txt = txt[0] x.close() txt = txt.replace('"', "'") cmd = "echo \"" + txt + "\" | say -v Karen --quality=128" x = os.popen(cmd, "r") self.speakversion = "Mac say" #local if self.system == "local": # print("local") def wsay(afile): if self.talkfailed: return () from os.path import isfile afile = afile + ".wave" if isfile(afile): pass else: return ofile = """<!DOCTYPE html> <html> <head> <title></title> </head> <body> <audio controls autoplay> <source src="../OUT" type="audio/wave"> Your browser does not support the audio element. </audio> </body> </html> """ ofile = ofile.replace("OUT", afile) ofile = ofile.replace("..", self.datadir) #print(ofile) try: display(HTML(ofile)) except: print("talk failed") self.talkfailed = True self.speakversion = "local " if self.system == "Windows" or self.system == "Linux": try: ##### conda create --name slides Jupiter Matplotlib scipy pandas ##### conda activate slides ##### pip install gtts from gtts import gTTS def wsay(afile): global speakversion if self.talkfailed: return () x = open(afile, "r") txt = x.read() txt = txt.split("====") txt = txt[0] x.close() if len(txt) > 0: tts = gTTS(text=txt, lang='en') tts.save(self.name + ".mp3") ofile = """<!DOCTYPE html> <html> <head> <title></title> </head> <body> <audio controls autoplay> <source src="../OUT.mp3" type="audio/mp3"> Your browser does not support the audio element. </audio> </body> </html> """ ofile = ofile.replace("OUT", self.name) ofile = ofile.replace("..", self.datadir) try: if len(txt) > 0: #print(ofile) display(HTML(ofile)) #os.remove(self.name+".mp3") except: print("talk failed") self.talkfailed = True self.speakversion = "gtts (Internet Connection) " except: def wsay(afile): global speakversion x = open(afile, "r") txt = x.read() txt = txt.split("====") txt = txt[0] x.close() #tts = gTTS(text=txt, lang='en') print("no speech module found") username = getpass.getuser() self.wsay = wsay if (self.datadir == ".."): dir = os.getcwd() else: dir = datadir self.datadir = dir #os.chdir(dir) # text to be spoken txt = [] # images to show, either jpg or png files lfiles = os.listdir() img = [] for i in lfiles: if i.find("jpeg") > 0: img.append(i) if len(img) == 0: img = [] for i in lfiles: if i.find("png") > 0: img.append(i) img.sort() # optional text to show after the slide # that can be copy/pasted out = [] for s in range(1, len(img) + 1): t = str("%4.4d" % (s)) #t=dir+"/"+t txt.append(t) out.append("") #outfile=str("i%3.3d" % (s)) try: #outfile=open(outfile,"r") outfile = open(t, "r") buff = outfile.read() buff = buff.split("====") if (len(buff) > 1): buff = buff[1] else: buff = "" out[-1] = buff #out[-1]=outfile.read() out[-1] = out[-1].replace("USER", username) except: pass self.out = out self.txt = txt self.img = img
_var_defaults = """ DFfilename='/data/datamining/DA/PF2_130813_50_5X_1.5_5b_rms_1_diags_comp5.h5' keep_open = 1 # for read, should keep open! debug=1 """ exec(_var_defaults) exec(process_cmd_line_args()) mem = report_mem('init') df = tb.open_file(DFfilename, 'r') dd = {} for nd in df.list_nodes('/'): var = nd.name st_copy = seconds() v = df.get_node('/' + var) dd.update({var: v}) dt_copy = seconds() - st_copy if debug > 0: print('{var} in {dt_copy:.1f}s '.format(var=var, dt_copy=dt_copy)) report_mem(mem) # This is model for a way to extract selected shots from a huge data set. st_access = seconds() n = 100 # take a little bit so it doean't take too long, n=10000 gets all for k in dd.keys(): for i in range(2000):
runfile = tempfile.mktemp() env = os.environ # env.update({'PYTHONSTARTUP':tmpfil}) only works in interactive with open(filename, 'rt') as pf: prog = pf.readlines() with open(runfile, 'wt') as tf: tf.write(prerun+'\n') tf.writelines(prog) else: env = None runfile = filename cmd = '{py} {file} {args}'.format(file=runfile, py=python_exe, args=args) print(cmd) st = seconds() sub_pipe = subprocess.Popen(cmd, env=env, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) """ Doesn't workthis way - seems to wait. for t in range(max_sec): print(t) sleep(1) if sub_pipe.poll(): break if t == max_sec - 1: print('terminate') sub_pipe.kill() """ (resp, errout) = sub_pipe.communicate() print(errout) if ((errout != b'') and (not 'warn' in errout.lower())) or (sub_pipe.returncode != 0):
_var_defaults = """ DFfilename='/data/datamining/DA/PF2_130813_50_5X_1.5_5b_rms_1_diags_comp5.h5' keep_open = 1 # for read, should keep open! debug=1 """ exec(_var_defaults) exec(process_cmd_line_args()) mem = report_mem('init') df = tb.open_file(DFfilename,'r') dd = {} for nd in df.list_nodes('/'): var = nd.name st_copy = seconds() v = df.get_node('/'+var) dd.update({var:v}) dt_copy = seconds() - st_copy if debug>0: print('{var} in {dt_copy:.1f}s '.format(var=var, dt_copy=dt_copy)) report_mem(mem) # This is model for a way to extract selected shots from a huge data set. st_access = seconds() n=100 # take a little bit so it doean't take too long, n=10000 gets all for k in dd.keys(): for i in range(2000):
self.maxcpu = max(self.maxcpu,0.01) if self.maxcpu != maxcpu: print('changed maxcpu from {i} to {a}'.format(i=maxcpu, a=self.maxcpu)) self.count = 0 self.st = seconds() self.last_wait = 0 self.waited = 0 def wait(self): self.count += 1 if self.maxcpu == 1: return() avg_time = (seconds() - self.st - self.waited)/self.count self.last_wait = max(avg_time*(1-self.maxcpu)/self.maxcpu,0.01) print(avg_time, self.last_wait, self.count) self.waited += self.last_wait sleep(self.last_wait) if __name__ == '__main__': from time import time as seconds st = seconds() regulator = Regulator(1) for i in range(10000): regulator.wait() print('{nits} iterations with 100% CPU -> overhead of {dt:.2f}us per call' .format(nits=regulator.count, dt=1e6*(seconds()-regulator.st)/regulator.count))