Esempi in Python per read_csv_data

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: pyfusion.utils.read_csv_data

Metodo/funzione: read_csv_data

Esempi su hotexamples.com: 8

read_csv_data in Python: 8 esempi trovati. Questi sono i migliori esempi reali in Python per pyfusion.utils.read_csv_data.read_csv_data, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: import_LHD_summary_csv.py Progetto: pyfusion/pyfusion

def main():
    print('reading..')
    lhd = read_csv_data.read_csv_data(
        '/LINUX23/home/bdb112/LHD_Summary_Long.csv', header=3)
    print('{k} keys, {n} entries read'.format(n=len(lhd['nShotnumber']),
                                              k=len(lhd.keys())))

    # this is hacked in because I missed GAMMA and another in my big file
    lhd2 = read_csv_data.read_csv_data(
        '/home/bdb112/datamining/lhd_summary_data.csv', header=3)
    ksh = 'nShotnumber'
    ws2 = np.where(lhd[ksh] != lhd2[ksh])[0]
    if len(ws2) != 0:
        raise LookupError('{n} mismatched shots'.format(n=len(ws2)))
    # if we already have the key, give this one a different name -otherwise same
    for k in lhd2.keys():
        if k in lhd.keys(): lhd[k + '1'] = lhd2[k]
        else: lhd[k] = lhd2[k]
    """ Do it simply, not necessarily efficiently
    (after wasting 3 hours doing it efficiently)
    First delete all records with blank shot numbers by copying to tmp
    Then convert shot to int, and reorder everything to shot order
    Then create the final shot array, indexed by shot (must be equal or bigger len)
    The target address in the final array is just the shot coulum (sht) in the tmp
    Then for each column, find non blanks (wnn) 
    Prepare a target arrlen array of the right type, with nan entries (or -1, '')
    depost them target[sht[wnn]] = col[wnn]
    Finally, the shot column in the final array (Shot) should be == arange(maxshot+1) 

    """
    LHD = {}
    tmp = {}
    sh = 90091
    err = 0
    str_summary = []

    wnotnull = np.where(
        lhd['nShotnumber'] != '')[0]  # cautiously convert to int
    shots_tmp = lhd['nShotnumber'][wnotnull].astype(np.int32)
    # need unique here, are there are 2 shot 100's ! (what does this mean?)
    shots_test, ws = np.unique(shots_tmp, return_index=1)
    # reorder the strings in a new dict, in shot number order.
    for k in lhd.keys():
        tmp.update({k: lhd[k][ws]})

    # now prepare the final shot array
    arrlen = np.max(shots_tmp) + 1  # need a spot for all shots including 0
    shots = np.zeros(arrlen, dtype=np.int32) - 1  # initialise to shot=-1
    shots[shots_tmp] = shots_tmp
    LHD.update({'Shot': shots})

    for k in tmp.keys():
        as_str_in_order = tmp[k]
        # now look for '' in other cols
        wcolnotnull = np.where(as_str_in_order != '')[0]

        chk_range = min(10, len(wcolnotnull))
        # get a lot of values, in case the first choice is not representative
        values = '_'.join([
            as_str_in_order[wcolnotnull[i]].strip() for i in range(chk_range)
        ])
        if re.match('^[_0-9]*$', values):
            dt = 'int32'
            arr = -np.ones(arrlen).astype(dt)
            wdecimal = np.where(
                np.remainder(as_str_in_order[wcolnotnull].astype(float), 1) !=
                0)[0]
            if len(wdecimal) > 0:
                print('reverting {k} to float based on {eg}'.format(
                    k=k, eg=as_str_in_order[wcolnotnull[wdecimal[0]]]))
            dt = 'float32'
            arr = np.nan + np.ones(arrlen).astype(dt)

        elif re.match('^[_+-.0-9eE]*$', values):
            dt = 'float32'
            arr = np.nan + np.ones(arrlen).astype(dt)
        else:
            dt == 'str'
            #arr = np.empty(arrlen,dtype='|S256')  # need to initialise empty
            arr = np.array(arrlen * [''], dtype='|S256')

        try:  # the conversion may go wrong - protect
            arr[shots_tmp[ws[wcolnotnull]]] = \
                as_str_in_order[wcolnotnull].astype(np.dtype(dt))
        except Exception, details:
            err += 1
            print('Failed on {k} (type was based on "{v}" for shot {sh}, {d}'.
                  format(k=k, d=details, v=values, sh=sh))

            arr = np.array(arrlen * [''], dtype='|S256')
            #arr = np.empty(arrlen,dtype='|S256')
            #arr = np.array(arrlen*[''])
            arr[shots_tmp[ws[wcolnotnull]]] = as_str_in_order[wcolnotnull]

            # compress, but beware assignments in the future.
            arr = np.array([s.strip() for s in arr])
            str_summary.append('{k}: {oldty}-> {dty}'.format(
                k=k, dty=arr.dtype, oldty=as_str_in_order.dtype))
            print('revert {k} to a string, type {dty}'.format(k=k,
                                                              dty=arr.dtype))

        LHD.update({k: arr})  # add the new entry

Esempio n. 2

Mostra file

File: get_basic_params.py Progetto: bdb112/pyfusion

def get_basic_params(diags=None, shot=54196, times=None, delay=None, debug=0):
    """ return a list of np.arrays of normally numeric values for the 
    times given, for the given shot.
    """

    global lhd_summary

    if diags is None: diags = "<n_e19>,b_0,i_p,w_p,dw_pdt,dw_pdt2".split(',')
    
    if delay is None: delay = get_delay(shot)

    if times is None: 
        times = np.linspace(0,4,4000)

    times = np.array(times)
    vals = {}
    # create an extra time array to allow a cross check
    vals.update({'check_tm':times})
    vals.update({'check_shot':np.zeros(len(times),dtype=np.int)+shot})
    for diag in diags:
        if diag not in file_info:
            warn('diagnostic {0} not found in shot {1}'.format(diag, shot),stacklevel=2)
            vals.update({diag: np.nan + times})
        else:
            info = file_info[diag]
            varname = info['name']
            if ':' in varname: (oper,varname) = varname.split(':')
            else: oper = None

            if info['format'].find('.csv') > 0:
                try:
                    test=lhd_summary.keys()
                except:    
                    print('reloading {0}'.format(info['format']))
                    lhd_summary = read_csv_data(acq_LHD+info['format'], header=3)

                val = lhd_summary[varname][shot]    
                valarr = np.double(val)+(times*0)
            else:    
                try:
                    dg = igetfile(local_dir + info['format'], shot=shot)
                except IOError:
                    try:
                        dg = igetfile(local_dir + info['format']+'.bz2', shot=shot)
                    except IOError:
                        try:
                            dg = igetfile(local_dir + info['format']+'.gz', shot=shot)
                        except exception:
                            #debug_(1)
                            dg=None
                            #break  # give up and try next diagnostic
                if dg is None:  # messy - break doesn't do what I want?
                    valarr=None
                else:
                    nd=dg.vardict['DimNo']
                    if nd != 1:
                        raise ValueError(
                            'Expecting a 1 D array in {0}, got {1}!'
                            .format(dg.filename, nd))

                    # pre re. w = np.where(np.array(dg.vardict['ValName'])==varname)[0]
                    matches = [re.match(varname,nam) 
                               != None for nam in dg.vardict['ValName']]
                    w = np.where(np.array(matches) != False)[0]
                    if len(w) != 1:
                        raise LookupError(
                            'Need just one instance of variable {0} in {1}'.
                            format(varname, dg.filename))

                    # get the column of the array corresponding to the name
                    valarr = dg.data[:,nd+w[0]]
                    tim =  dg.data[:,0] - delay

                    if oper == 'ddt':  # derivative operator
                        valarr = np.diff(valarr)/(np.average(np.diff(tim)))
                        tim = (tim[0:-1] + tim[1:])/2.0

                    if oper == 'ddt2':  # abd(ddw)*derivative operator
                        dw = np.diff(valarr)/(np.average(np.diff(tim)))
                        ddw = np.diff(dw)/(np.average(np.diff(tim)))
                        tim = tim[2:]
                        valarr = 4e-6 * dw[1:] * np.abs(ddw)

                    valarr = (stineman_interp(times, tim, valarr))
                    w = np.where(times > max(tim))
                    valarr[w] = np.nan

            if valarr != None: vals.update({diag: valarr})
    debug_(max(pyfusion.DEBUG, debug), level=5, key='interp')
    return(vals)

Esempio n. 3

Mostra file

""" read the LHD summary csv file and put in in a dictionary of arrays
where the index is the shot number.  This may require adding a "0" shot 
(apparently not as of Feb 2013.
Where possible, integers and reals
are converted, and the strings are reduced to the minimum length. (Note - this 
will cause errors if longer strings are added afterwards.
"""

from pyfusion.utils import read_csv_data
import numpy as np
import re

hack_merge_another_file = False

print('reading..')
lhd = read_csv_data.read_csv_data('LHD_Summary_Long.csv', header=3)
print('{k} keys, {n} entries read'.format(n=len(lhd['nShotnumber']),
                                          k=len(lhd.keys())))

# this is hacked in because I missed GAMMA and another in my big file
if hack_merge_another_file:
    lhd2 = read_csv_data.read_csv_data(
        '/home/bdb112/datamining/lhd_summary_data.csv', header=3)
    ksh = 'nShotnumber'
    ws2 = np.where(lhd[ksh] != lhd2[ksh])[0]
    if len(ws2) != 0:
        raise LookupError('{n} mismatched shots'.format(n=len(ws2)))
    # if we already have the key, give this one a different name -otherwise same
    for k in lhd2.keys():
        if k in lhd.keys(): lhd[k + '1'] = lhd2[k]
        else: lhd[k] = lhd2[k]

Esempio n. 4

Mostra file

File: get_basic_diagnostics.py Progetto: bdb112/pyfusion

def get_basic_diagnostics(diags=None,
                          shot=54196,
                          times=None,
                          delay=None,
                          exception=False,
                          debug=0):
    """ return a list of np.arrays of normally numeric values for the 
    times given, for the given shot.
    Will access server if env('IGETFILE') points to an exe, else accesses cache
    """

    global lhd_summary
    # if no exception given and we are not debugging
    # note - exception=None is a valid entry, meaning tolerate no exceptions
    # so the "default" we use is False
    if exception == False and debug == 0: exception = Exception

    if diags is None: diags = "<n_e19>,b_0,i_p,w_p,dw_pdt,dw_pdt2".split(',')
    if len(np.shape(diags)) == 0: diags = [diags]
    if delay is None: delay = get_delay(shot)

    if times is None:
        times = np.linspace(0, 4, 4000)

    times = np.array(times)
    vals = {}
    # create an extra time array to allow a cross check
    vals.update({'check_tm': times})
    vals.update({'check_shot': np.zeros(len(times), dtype=np.int) + shot})
    for diag in diags:
        if diag not in file_info:
            warn('diagnostic {0} not found in shot {1}'.format(diag, shot),
                 stacklevel=2)
            vals.update({diag: np.nan + times})
        else:
            info = file_info[diag]
            varname = info['name']
            subfolder = info['format'].split('@')[0]
            filepath = os.path.sep.join(
                [localigetfilepath, subfolder, info['format']])
            if ':' in varname: (oper, varname) = varname.split(':')
            else: oper = None

            if info['format'].find('.csv') > 0:
                try:
                    test = lhd_summary.keys()
                except:
                    csvfilename = acq_LHD + '/' + info['format']
                    if pyfusion.DBG() > 1:
                        print('looking for lhd summary in' + csvfilename)
                    if not os.path.exists(csvfilename):
                        csvfilename += ".bz2"
                    print('reloading {0}'.format(csvfilename))
                    lhd_summary = read_csv_data(csvfilename, header=3)
                    # should make this more formal - last shots
                    # from an 'extra' file, and finally, from shot info
                if shot > 117000:  # fudge to get latest data
                    lhd_summary = np.load(acq_LHD +
                                          '/LHD_summary.npz')['LHD'].tolist()
                    print('loading newer shots from a separate file - fix-me')
                    #  val = lhd_summary[varname][shot-70000]    # not needed
                #  else:
                val = lhd_summary[varname][shot]
                valarr = np.double(val) + (times * 0)
            else:
                debug_(max(pyfusion.DBG(), debug), level=4, key='find_data')
                try:

                    dg = igetfile(filepath, shot=shot, debug=debug - 1)
                except IOError:
                    try:
                        dg = igetfile(filepath + '.bz2',
                                      shot=shot,
                                      debug=debug - 1)
                    except IOError:
                        try:
                            dg = igetfile(filepath + '.gz',
                                          shot=shot,
                                          debug=debug - 1)
                        except exception as details:
                            if debug > 0:
                                print('diag at {fp} not found'.format(
                                    fp=filepath))
                            print(details, details.args)
                            dg = None
                            #break  # give up and try next diagnostic
                if dg is None:  # messy - break doesn't do what I want?
                    valarr = None
                else:
                    nd = dg.vardict['DimNo']
                    if nd != 1:
                        raise ValueError(
                            'Expecting a 1 D array in {0}, got {1}!'.format(
                                dg.filename, nd))

                    # pre re. w = np.where(np.array(dg.vardict['ValName'])==varname)[0]
                    matches = [
                        re.match(varname, nam) != None
                        for nam in dg.vardict['ValName']
                    ]
                    w = np.where(np.array(matches) != False)[0]
                    # get the column(s) of the array corresponding to the name
                    if (oper in 'sum,average,rms,max,min'.split(',')):
                        if oper == 'sum': op = np.sum
                        elif oper == 'average': op = np.average
                        elif oper == 'min': op = np.min
                        elif oper == 'std': op = np.std
                        else:
                            raise ValueError(
                                'operator {o} in {n} not known to get_basic_diagnostics'
                                .format(o=oper, n=info['name']))
                        valarr = op(dg.data[:, nd + w], 1)
                    else:
                        if len(w) != 1:
                            raise LookupError(
                                'Need just one instance of variable {0} in {1}'
                                .format(varname, dg.filename))
                        if len(np.shape(dg.data)) != 2:
                            raise LookupError(
                                'insufficient data for {0} in {1}'.format(
                                    varname, dg.filename))

                        valarr = dg.data[:, nd + w[0]]

                    tim = dg.data[:, 0] - delay

                    if oper == 'ddt':  # derivative operator
                        valarr = np.diff(valarr) / (np.average(np.diff(tim)))
                        tim = (tim[0:-1] + tim[1:]) / 2.0

                    if oper == 'ddt2':  # abd(ddw)*derivative operator
                        dw = np.diff(valarr) / (np.average(np.diff(tim)))
                        ddw = np.diff(dw) / (np.average(np.diff(tim)))
                        tim = tim[2:]
                        valarr = 4e-6 * dw[1:] * np.abs(ddw)

                    if (len(tim) < 10) or (np.std(tim) < 0.1):
                        raise ValueError('Insufficient points or degenerate'
                                         'timebase data in {0}, {1}'.format(
                                             varname, dg.filename))

                    valarr = (stineman_interp(times, tim, valarr))
                    w = np.where(times > max(tim))
                    valarr[w] = np.nan

            if valarr != None: vals.update({diag: valarr})
    debug_(max(pyfusion.DBG(), debug), level=5, key='interp')
    return (vals)

Esempio n. 5

Mostra file

File: get_basic_diagnostics.py Progetto: dpretty/pyfusion

def get_basic_diagnostics(diags=None, shot=54196, times=None, delay=None, exception=False, debug=0):
    """ return a list of np.arrays of normally numeric values for the 
    times given, for the given shot.
    """

    global lhd_summary
    # if no exception given and we are not debugging
    # note - exception=None is a valid entry, meaning tolerate no exceptions
    # so the "default" we use is False
    if exception==False and debug==0: exception=Exception

    if diags == None: diags = "<n_e19>,b_0,i_p,w_p,dw_pdt,dw_pdt2".split(',')
    if len(np.shape(diags)) == 0: diags = [diags]
    if delay == None: delay = get_delay(shot)

    if times == None: 
        times = np.linspace(0,4,4000)

    times = np.array(times)
    vals = {}
    # create an extra time array to allow a cross check
    vals.update({'check_tm':times})
    vals.update({'check_shot':np.zeros(len(times),dtype=np.int)+shot})
    for diag in diags:
        if not(file_info.has_key(diag)):
            warn('diagnostic {0} not found in shot {1}'.format(diag, shot),stacklevel=2)
            vals.update({diag: np.nan + times})
        else:
            info = file_info[diag]
            varname = info['name']
            subfolder = info['format'].split('@')[0]
            filepath = os.path.sep.join([localigetfilepath,subfolder,info['format']])
            if ':' in varname: (oper,varname) = varname.split(':')
            else: oper = None

            if info['format'].find('.csv') > 0:
                try:
                    test=lhd_summary.keys()
                except:    
                    print('reloading {0}'.format(info['format']))
                    lhd_summary = read_csv_data(acq_LHD+'/'+info['format'], header=3)

                val = lhd_summary[varname][shot]    
                valarr = np.double(val)+(times*0)
            else:    
                debug_(max(pyfusion.DEBUG, debug), level=4, key='find_data')
                try:

                    dg = igetfile(filepath, shot=shot, debug=debug-1)
                except IOError:
                    try:
                        dg = igetfile(filepath+'.bz2', shot=shot, debug=debug-1)
                    except IOError:
                        try:
                            dg = igetfile(filepath + '.gz', shot=shot, debug=debug-1)
                        except exception:
                            if debug>0: print('diag at {fp} not found'
                                              .format(fp=filepath))
                            dg=None
                            #break  # give up and try next diagnostic
                if dg==None:  # messy - break doesn't do what I want?
                    valarr=None
                else:
                    nd=dg.vardict['DimNo']
                    if nd != 1:
                        raise ValueError(
                            'Expecting a 1 D array in {0}, got {1}!'
                            .format(dg.filename, nd))

                    # pre re. w = np.where(np.array(dg.vardict['ValName'])==varname)[0]
                    matches = [re.match(varname,nam) 
                               != None for nam in dg.vardict['ValName']]
                    w = np.where(np.array(matches) != False)[0]
                    # get the column(s) of the array corresponding to the name
                    if (oper in 'sum,average,rms,max,min'.split(',')):
                        if oper=='sum': op = np.sum
                        elif oper=='average': op = np.average
                        elif oper=='min': op = np.min
                        elif oper=='std': op = np.std
                        else: raise ValueError('operator {o} in {n} not known to get_basic_diagnostics'
                                               .format(o=oper, n=info['name']))
                        valarr = op(dg.data[:,nd+w],1)
                    else:
                        if len(w) != 1:
                            raise LookupError(
                                'Need just one instance of variable {0} in {1}'
                                .format(varname, dg.filename))
                        if len(np.shape(dg.data))!=2:
                           raise LookupError(
                                'insufficient data for {0} in {1}'
                                .format(varname, dg.filename))
                             
                        valarr = dg.data[:,nd+w[0]]

                    tim =  dg.data[:,0] - delay

                    if oper == 'ddt':  # derivative operator
                        valarr = np.diff(valarr)/(np.average(np.diff(tim)))
                        tim = (tim[0:-1] + tim[1:])/2.0

                    if oper == 'ddt2':  # abd(ddw)*derivative operator
                        dw = np.diff(valarr)/(np.average(np.diff(tim)))
                        ddw = np.diff(dw)/(np.average(np.diff(tim)))
                        tim = tim[2:]
                        valarr = 4e-6 * dw[1:] * np.abs(ddw)

                    if (len(tim) < 10) or (np.std(tim)<0.1):
                        raise ValueError('Insufficient points or degenerate'
                                         'timebase data in {0}, {1}'
                                         .format(varname, dg.filename))

                    valarr = (stineman_interp(times, tim, valarr))
                    w = np.where(times > max(tim))
                    valarr[w] = np.nan

            if valarr != None: vals.update({diag: valarr})
    debug_(max(pyfusion.DEBUG, debug), level=5, key='interp')
    return(vals)

Esempio n. 6

Mostra file

File: get_basic_diagnostics.py Progetto: bdb112/pyfusion

def get_basic_diagnostics(diags=None, file_info=file_info, shot=54196, times=None, delay=None, exception=False, debug=0):
    """ return a list of np.arrays of normally numeric values for the 
    times given, for the given shot.
    Will access server if env('IGETFILE') points to an exe, else accesses cache
    This is the first version to specifically allow for access through pyfusion.cfg
    There are two types of access:
       I/  single diag on its own timebase
       II/ the original multi diag on a given timebase (i.e. that from flucstrcs)
    Stage 1 puts the file_info into .cfg file just for I/ single diag access.
    Ideally the file_info for II/ sho;d be in .cfg also.
    For stage I/, we call it with a  file_info dict constructed on the spot
    as a dictionary with one just entry (for diags[0]).
    """

    global lhd_summary
    # if no exception given and we are not debugging
    # note - exception=None is a valid entry, meaning tolerate no exceptions
    # so the "default" we use is False
    if exception==False and debug==0: exception=Exception

    if diags is None: diags = "<n_e19>,b_0,i_p,w_p,dw_pdt,dw_pdt2".split(',')
    if len(np.shape(diags)) == 0: diags = [diags]
    if delay is None: delay = get_delay(shot)

    if times is None: 
        if len(diags)>1:
            times = np.linspace(0,4,4000)  # this is a crude guess.
        # else leave it None
    else:    
        # make sure it is an array
        times = np.array(times)

    

    vals = {}

    for diag in diags:
        if not(diag in file_info):
            warn('diagnostic {0} not found in shot {1}'.format(diag, shot),stacklevel=2)
            vals.update({diag: np.nan + times})
        else:
            info = file_info[diag]
            varname = info['name']  # refers to name for igetfile - can contain ':' 
            subfolder = info['format'].split('@')[0]
            filepath = os.path.sep.join([localigetfilepath,subfolder,info['format']])
            if ':' in varname: (oper,varname) = varname.split(':')
            else: oper = None

            if info['format'].find('.csv') > 0:
                try:
                    test=list(lhd_summary.keys())
                except:    
                    csvfilename = acq_LHD+'/'+info['format']
                    if pyfusion.DBG() > 1: print('looking for lhd summary in' + csvfilename)
                    if not os.path.exists(csvfilename):
                        csvfilename += ".bz2"
                    print('reloading {0}'.format(csvfilename))
                    lhd_summary = read_csv_data(csvfilename, header=3)
                    # should make this more formal - last shots
                    # from an 'extra' file, and finally, from shot info
                if shot>117000: # fudge to get latest data
                    lhd_summary = np.load(acq_LHD+'/LHD_summary.npz')['LHD'].tolist()
                    print('loading newer shots from a separate file - fix-me')
                    #  val = lhd_summary[varname][shot-70000]    # not needed
                #  else:
                val = lhd_summary[varname][shot]    
                valarr = np.double(val)+(times*0)
            else:    
                try: # now igetfile checks for .gz etc
                    dg = igetfile(filepath, shot=shot, debug=debug-1)
                except exception as details:
                    if debug>0: print('diag at {fp} not found'
                                      .format(fp=filepath))
                    print(details,details.args)
                    dg=None
                    #break  # give up and try next diagnostic
                if dg is None:  # messy - break doesn't do what I want?
                    valarr=None
                else:
                    nd=dg.vardict['DimNo']
                    if nd != 1:
                        raise ValueError(
                            'Expecting a 1 D array in {0}, got {1}!'
                            .format(dg.filename, nd))

                    # pre re. w = np.where(np.array(dg.vardict['ValName'])==varname)[0]
                    matches = [re.match(varname,nam) 
                               != None for nam in dg.vardict['ValName']]
                    w = np.where(np.array(matches) != False)[0]
                    # get the column(s) of the array corresponding to the name
                    if (oper in 'sum,average,rms,max,min'.split(',')):
                        if oper=='sum': op = np.sum
                        elif oper=='average': op = np.average
                        elif oper=='min': op = np.min
                        elif oper=='std': op = np.std
                        else: raise ValueError('operator {o} in {n} not known to get_basic_diagnostics'
                                               .format(o=oper, n=info['name']))
                        valarr = op(dg.data[:,nd+w],1)
                    else:
                        if len(w) != 1:
                            raise LookupError(
                                'Need just one instance of variable {0} in {1}'
                                .format(varname, dg.filename))
                        if len(np.shape(dg.data))!=2:
                           raise LookupError(
                                'insufficient data for {0} in {1}'
                                .format(varname, dg.filename))
                             
                        valarr = dg.data[:,nd+w[0]]

                    tim =  dg.data[:,0] - delay

                    if oper == 'ddt':  # derivative operator
                        valarr = np.diff(valarr)/(np.average(np.diff(tim)))
                        tim = (tim[0:-1] + tim[1:])/2.0

                    if oper == 'ddt2':  # abd(ddw)*derivative operator
                        dw = np.diff(valarr)/(np.average(np.diff(tim)))
                        ddw = np.diff(dw)/(np.average(np.diff(tim)))
                        tim = tim[2:]
                        valarr = 4e-6 * dw[1:] * np.abs(ddw)

                    if (len(tim) < 10) or (np.std(tim)<0.1):
                        raise ValueError('Insufficient points or degenerate'
                                         'timebase data in {0}, {1}'
                                         .format(varname, dg.filename))

                    if times is not None:
                        debug_(max(pyfusion.DEBUG, debug), level=5, key='interp')
                        valarr = (stineman_interp(times, tim, valarr))
                        w = np.where(times > max(tim))
                        valarr[w] = np.nan
                    else:
                        times = tim
            if valarr is not None: vals.update({diag: valarr})
    # create an extra time array to allow a cross check
    vals.update({'check_tm':times})
    vals.update({'check_shot':np.zeros(len(times),dtype=np.int)+shot})

    return(vals)

Esempio n. 7

Mostra file

File: get_basic_params.py Progetto: bdb112/pyfusion

def get_basic_params(diags=None, shot=54196, times=None, delay=None, debug=0):
    """ return a list of np.arrays of normally numeric values for the 
    times given, for the given shot.
    """

    global lhd_summary

    if diags is None: diags = "<n_e19>,b_0,i_p,w_p,dw_pdt,dw_pdt2".split(',')

    if delay is None: delay = get_delay(shot)

    if times is None:
        times = np.linspace(0, 4, 4000)

    times = np.array(times)
    vals = {}
    # create an extra time array to allow a cross check
    vals.update({'check_tm': times})
    vals.update({'check_shot': np.zeros(len(times), dtype=np.int) + shot})
    for diag in diags:
        if diag not in file_info:
            warn('diagnostic {0} not found in shot {1}'.format(diag, shot),
                 stacklevel=2)
            vals.update({diag: np.nan + times})
        else:
            info = file_info[diag]
            varname = info['name']
            if ':' in varname: (oper, varname) = varname.split(':')
            else: oper = None

            if info['format'].find('.csv') > 0:
                try:
                    test = lhd_summary.keys()
                except:
                    print('reloading {0}'.format(info['format']))
                    lhd_summary = read_csv_data(acq_LHD + info['format'],
                                                header=3)

                val = lhd_summary[varname][shot]
                valarr = np.double(val) + (times * 0)
            else:
                try:
                    dg = igetfile(local_dir + info['format'], shot=shot)
                except IOError:
                    try:
                        dg = igetfile(local_dir + info['format'] + '.bz2',
                                      shot=shot)
                    except IOError:
                        try:
                            dg = igetfile(local_dir + info['format'] + '.gz',
                                          shot=shot)
                        except exception:
                            #debug_(1)
                            dg = None
                            #break  # give up and try next diagnostic
                if dg is None:  # messy - break doesn't do what I want?
                    valarr = None
                else:
                    nd = dg.vardict['DimNo']
                    if nd != 1:
                        raise ValueError(
                            'Expecting a 1 D array in {0}, got {1}!'.format(
                                dg.filename, nd))

                    # pre re. w = np.where(np.array(dg.vardict['ValName'])==varname)[0]
                    matches = [
                        re.match(varname, nam) != None
                        for nam in dg.vardict['ValName']
                    ]
                    w = np.where(np.array(matches) != False)[0]
                    if len(w) != 1:
                        raise LookupError(
                            'Need just one instance of variable {0} in {1}'.
                            format(varname, dg.filename))

                    # get the column of the array corresponding to the name
                    valarr = dg.data[:, nd + w[0]]
                    tim = dg.data[:, 0] - delay

                    if oper == 'ddt':  # derivative operator
                        valarr = np.diff(valarr) / (np.average(np.diff(tim)))
                        tim = (tim[0:-1] + tim[1:]) / 2.0

                    if oper == 'ddt2':  # abd(ddw)*derivative operator
                        dw = np.diff(valarr) / (np.average(np.diff(tim)))
                        ddw = np.diff(dw) / (np.average(np.diff(tim)))
                        tim = tim[2:]
                        valarr = 4e-6 * dw[1:] * np.abs(ddw)

                    valarr = (stineman_interp(times, tim, valarr))
                    w = np.where(times > max(tim))
                    valarr[w] = np.nan

            if valarr != None: vals.update({diag: valarr})
    debug_(max(pyfusion.DEBUG, debug), level=5, key='interp')
    return (vals)

Esempio n. 8

Mostra file

File: import_LHD_summary_csv.py Progetto: dpretty/pyfusion

def main():
    print('reading..')
    lhd=read_csv_data.read_csv_data('/LINUX23/home/bdb112/LHD_Summary_Long.csv',header=3)
    print('{k} keys, {n} entries read'.format(n=len(lhd['nShotnumber']), 
                                              k=len(lhd.keys())))

    # this is hacked in because I missed GAMMA and another in my big file
    lhd2 = read_csv_data.read_csv_data('/home/bdb112/datamining/lhd_summary_data.csv',header=3)
    ksh='nShotnumber'
    ws2 = np.where(lhd[ksh] != lhd2[ksh])[0]
    if len(ws2) != 0: raise LookupError('{n} mismatched shots'.format(n=len(ws2)))
    # if we already have the key, give this one a different name -otherwise same
    for k in lhd2.keys(): 
        if k in lhd.keys(): lhd[k+'1']=lhd2[k]
        else: lhd[k]=lhd2[k]


    """ Do it simply, not necessarily efficiently
    (after wasting 3 hours doing it efficiently)
    First delete all records with blank shot numbers by copying to tmp
    Then convert shot to int, and reorder everything to shot order
    Then create the final shot array, indexed by shot (must be equal or bigger len)
    The target address in the final array is just the shot coulum (sht) in the tmp
    Then for each column, find non blanks (wnn) 
    Prepare a target arrlen array of the right type, with nan entries (or -1, '')
    depost them target[sht[wnn]] = col[wnn]
    Finally, the shot column in the final array (Shot) should be == arange(maxshot+1) 

    """
    LHD = {}
    tmp = {}
    sh = 90091
    err=0
    str_summary=[]

    wnotnull = np.where(lhd['nShotnumber'] != '')[0]  # cautiously convert to int
    shots_tmp = lhd['nShotnumber'][wnotnull].astype(np.int32)
    # need unique here, are there are 2 shot 100's ! (what does this mean?)
    shots_test,ws = np.unique(shots_tmp, return_index=1)
    # reorder the strings in a new dict, in shot number order.
    for k in lhd.keys(): tmp.update({k: lhd[k][ws]})

    # now prepare the final shot array
    arrlen = np.max(shots_tmp)+1  # need a spot for all shots including 0
    shots = np.zeros(arrlen, dtype=np.int32) -1  # initialise to shot=-1
    shots[shots_tmp] = shots_tmp
    LHD.update({'Shot': shots})

    for k in tmp.keys():
        as_str_in_order = tmp[k]
        # now look for '' in other cols
        wcolnotnull = np.where(as_str_in_order != '')[0]

        chk_range = min(10, len(wcolnotnull))
        # get a lot of values, in case the first choice is not representative
        values = '_'.join([as_str_in_order[wcolnotnull[i]].strip() 
                            for i in range(chk_range)])
        if re.match('^[_0-9]*$',values): 
            dt = 'int32'
            arr = -np.ones(arrlen).astype(dt)
            wdecimal = np.where(
                np.remainder(as_str_in_order[wcolnotnull].astype(float),1)!=0)[0]
            if len(wdecimal)>0: 
                print('reverting {k} to float based on {eg}'
                      .format(k=k, eg=as_str_in_order[wcolnotnull[wdecimal[0]]]))
            dt = 'float32'
            arr = np.nan + np.ones(arrlen).astype(dt)

        elif re.match('^[_+-.0-9eE]*$',values): 
            dt = 'float32'
            arr = np.nan + np.ones(arrlen).astype(dt)
        else: 
            dt == 'str'
            #arr = np.empty(arrlen,dtype='|S256')  # need to initialise empty
            arr = np.array(arrlen*[''],dtype='|S256')


        try:  # the conversion may go wrong - protect
            arr[shots_tmp[ws[wcolnotnull]]] = \
                as_str_in_order[wcolnotnull].astype(np.dtype(dt))
        except Exception, details:
            err += 1
            print('Failed on {k} (type was based on "{v}" for shot {sh}, {d}'
                  .format(k=k, d=details, v = values, sh=sh))

            arr = np.array(arrlen*[''],dtype='|S256')
            #arr = np.empty(arrlen,dtype='|S256')
            #arr = np.array(arrlen*[''])
            arr[shots_tmp[ws[wcolnotnull]]] = as_str_in_order[wcolnotnull]

            # compress, but beware assignments in the future.
            arr=np.array([s.strip() for s in arr])
            str_summary.append('{k}: {oldty}-> {dty}'
                               .format(k=k, dty=arr.dtype, 
                                       oldty=as_str_in_order.dtype))
            print('revert {k} to a string, type {dty}'.format(k=k, dty=arr.dtype))


        LHD.update({k: arr})  # add the new entry