import tables as tb import os import numpy as np from pyfusion.utils import process_cmd_line_args from time import time as seconds from pyfusion.data.DA_datamining import DA, report_mem _var_defaults = """ DFfilename='/data/datamining/DA/PF2_130813_50_5X_1.5_5b_rms_1_diags_comp5.h5' keep_open = 1 # for read, should keep open! debug=1 """ exec(_var_defaults) exec(process_cmd_line_args()) mem = report_mem('init') df = tb.open_file(DFfilename, 'r') dd = {} for nd in df.list_nodes('/'): var = nd.name st_copy = seconds() v = df.get_node('/' + var) dd.update({var: v}) dt_copy = seconds() - st_copy if debug > 0: print('{var} in {dt_copy:.1f}s '.format(var=var, dt_copy=dt_copy)) report_mem(mem)
import tables as tb import os import numpy as np from pyfusion.utils import process_cmd_line_args from time import time as seconds from pyfusion.data.DA_datamining import DA, report_mem _var_defaults = """ DFfilename='/data/datamining/DA/PF2_130813_50_5X_1.5_5b_rms_1_diags_comp5.h5' keep_open = 1 # for read, should keep open! debug=1 """ exec(_var_defaults) exec(process_cmd_line_args()) mem = report_mem('init') df = tb.open_file(DFfilename,'r') dd = {} for nd in df.list_nodes('/'): var = nd.name st_copy = seconds() v = df.get_node('/'+var) dd.update({var:v}) dt_copy = seconds() - st_copy if debug>0: print('{var} in {dt_copy:.1f}s '.format(var=var, dt_copy=dt_copy)) report_mem(mem)
da if oldDAfilename != DAfilename: 1 / 0 # create an error to force reload print("Using old data") except: print("loading {f}".format(f=DAfilename)) da = DA(DAfilename) oldDAfilename = DAfilename da.extract(locals(), "shot,phases,beta,freq,frlow,frhigh,t_mid,amp,a12") print("loading {f}".format(f=clusterfile)) x = np.load(clusterfile) for k in x.keys(): exec("{v}=x['{k}']".format(v=k, k=k)) start_mem = report_mem(msg="cluster_phases") w5 = np.where((dists(subset[clinds[cl][0]], phases[:, sel]) < d_big) & (bw(freq, frlow, frhigh)) & (shot == shot))[0] print(len(w5), len(unique(shot[w5]))) ph5 = phases[w5] wc = np.where(dists(subset[clinds[cl][0]], ph5[:, sel]) < d_med)[0] wcc = np.where(dists(subset[clinds[cl][0]], ph5[:, sel]) < d_sml)[0] sl_red = compact_str(np.unique(shot[w5[wcc]])) sl_green = compact_str(np.unique(shot[w5[wc]])) titl = "red:d<{d_sml:.1g}:{slr}".format(slr=sl_red, d_sml=d_sml) suptitl = "green:d<{d_med:.1g}:{slr}".format(slr=sl_green, d_med=d_med) pl.figure(num="cl[{cl}] delta phase".format(cl=cl)) if clearfigs: pl.clf()
sel = oldsel if csel is not None: subset=subset[:,csel] # this contrivance allows us to test on uniformly distributed phases if uniform_random is not None: print('evaluating {ur}'.format(ur=uniform_random)) phases = eval(uniform_random) shot = np.array(np.shape(phases)[0]*[shot[0]]) freq = np.array(np.shape(phases)[0]*[freq[0]]) frlow = np.array(np.shape(phases)[0]*[0]) frhigh = np.array(np.shape(phases)[0]*[9e9]) t_mid = np.array(np.shape(phases)[0]*[t_mid[-1]]) cc5 = [] # cluster centres for cl in cls: start_mem = report_mem(msg='cluster_phases') # used to where all at once - but as the distance is most expensive, do # shot and freq first w5_shot_freq = np.where((bw(freq,frlow,frhigh)) & (shot==shot))[0]; print(len(w5_shot_freq),len(np.unique(shot[w5_shot_freq]))) # the [:,sel] below is to avoid gather ops on two indices at once. w5=np.where(dists(subset[clinds[cl][0]], phases[w5_shot_freq][:,sel])<d_big)[0]; w5 = w5_shot_freq[w5] # refer back to the original array # old "all at once" way # w5=np.where((dists(subset[clinds[cl][0]], phases[:,sel])<d_big) & (bw(freq,frlow,frhigh)) & (shot==shot))[0]; print(len(w5),len(np.unique(shot[w5]))) print(len(w5),len(np.unique(shot[w5]))) ph5=phases[w5] wc=np.where(dists(subset[clinds[cl][0]], ph5[:,sel])<d_med)[0] if len(wc)<1: raise ValueError('no points within avg radians of {dm} '.format(dm = d_med)) wcc=np.where(dists(subset[clinds[cl][0]], ph5[:,sel])<d_sml)[0]
da if oldDAfilename != DAfilename: 1 / 0 # create an error to force reload print('Using old data') except: print('loading {f}'.format(f=DAfilename)) da = DA(DAfilename) oldDAfilename = DAfilename da.extract(locals(), 'shot,phases,beta,freq,frlow,frhigh,t_mid,amp,a12') print('loading {f}'.format(f=clusterfile)) x = np.load(clusterfile) for k in x.keys(): exec("{v}=x['{k}']".format(v=k, k=k)) start_mem = report_mem(msg='cluster_phases') w5 = np.where((dists(subset[clinds[cl][0]], phases[:, sel]) < d_big) & (bw(freq, frlow, frhigh)) & (shot == shot))[0] print(len(w5), len(unique(shot[w5]))) ph5 = phases[w5] wc = np.where(dists(subset[clinds[cl][0]], ph5[:, sel]) < d_med)[0] wcc = np.where(dists(subset[clinds[cl][0]], ph5[:, sel]) < d_sml)[0] sl_red = compact_str(np.unique(shot[w5[wcc]])) sl_green = compact_str(np.unique(shot[w5[wc]])) titl = 'red:d<{d_sml:.1g}:{slr}'.format(slr=sl_red, d_sml=d_sml) suptitl = 'green:d<{d_med:.1g}:{slr}'.format(slr=sl_green, d_med=d_med) pl.figure(num='cl[{cl}] delta phase'.format(cl=cl)) if clearfigs: pl.clf()
sys.exit() pyfusion.logging.info(str('Starting with {st}, shape is {shp}' .format(st=srangestr, shp=np.shape(srange)))) if len(np.shape(srange)) == 3: # a bunch of begin/ends srange = [sh for sr in srange for sh in expand_shot_range(*sr)] elif np.shape(srange) == (2,2): print('assume a simple range') srange = expand_shot_range(*srange) else: # already a list pass else: srange=range(92000,95948) print(srange) cache = 3*[None] start_mem = report_mem(msg='Entry', prev_values = None) # on t440p (83808,86450): # FY14-15 (86451,89155):#(36363,88891): #81399,81402): #(81600,84084): for (ish, sh) in enumerate(srange[::-1]):# may want [::-1] here to see the last first if 1000*sh[0] + sh[1] in have_shots: print('Skipping duplicate {shnum} '.format(shnum=str(sh))), continue cur_mem = report_mem(msg='next_shot', prev_values=start_mem, verbose=debug) # if we find the file 'pause' we pause, or if we find 'quit' we quit if pause_while(os.path.join(dbpath, 'pause'), check=2) == 'quit': break else: pass if 'W7' in devname: datdic = dict(shot=sh[0]*1000+sh[1], date=sh[0], sshot=sh[1])
raise LookupError("dd not loaded into memory - can't store") if mode is None: mode = mode_list[0] if not(doM) and not(doN): raise ValueError('Need to choose doN=True and/or doM=True') if inds is None: inds = np.arange(len(dd['shot'])) # the form phases = dd['phases'][inds,11:16] consumes less memory if (sel is not None) and (np.average(np.diff(sel))==1): # smarter version phases = dd['phases'][inds,sel[0]:sel[-1]+1] else: phases = dd["phases"][inds] if sel is not None: phases = phases.T[sel].T #phases = np.array(phases.tolist()) if verbose>0: start_mem = report_mem(msg='phases selected') if (np.shape(mask) != np.shape(np.identity(len(sel)))) or (mask != np.identity(len(sel))).any(): phases = np.dot(phases, mask) if verbose>0: report_mem(start_mem, msg='phases masked') sd = mode.std(phases, csel=csel, mask=mask) # generate mode number entries if not already there. for mname in 'N,NN,M,MM,mode_id'.split(','): if not(mname in dd.keys()): use_dtype=np.int16 minint = np.iinfo(use_dtype).min dd[mname] = minint*np.ones(len(dd['shot']),dtype=use_dtype)