def savebin(resdir, d_ix, start, stop, nans, sps, units): global d_c, d_d, processed, differences try: p except NameError: p = "" else: print "waiting for scp" sts = os.waitpid(p.pid, 0) print " %d files processed with in total %d records" % (len(processed), d_ix) print " %d NaN inserted in total (%.3f seconds)" % (nans, nans / sps) if (len(processed) >= 2): s_intervals = "%.4f/%.4f/%.7f/%.7f" % ( max(differences), min(differences), numpy.mean(differences), numpy.std(differences)) print " intervals maximum/minimum/mean/std: (%s) s" % s_intervals jitter = (differences - 60) * 1000 s_jitter = "%+.4f/%.4f/%.7f/%.7f" % ( max(jitter), min(jitter), numpy.mean(jitter), numpy.std(jitter)) print " jitter maximum/minimum/mean/std: (%s) ms" % s_jitter #print "(jitter = nominal - actual = 60.0000 - x)" try: #create file resfile = processed[0].strftime("%Y-%m-%dT%H.nc") resf = os.path.join(resdir, resfile) remf = processed[0].strftime("%Y/%m/%d/") if (os.path.exists(resf) and os.path.getsize(resf) > 1 * 1024L * 1024L): print " files already concatenated! Overwriting %s (%s)" % ( resf, filesize(resf)) print " writing data to file %s" % resf if (i_mode): #interactive mode print " please have patience, this might need several minutes" fidw = netcdf.Dataset(os.path.join(resdir, resfile), 'w', format='NETCDF4') fidw.setncatts({'files':len(processed), 'sps':sps, 'nan':nans,\ 'start':start, 'stop':stop, 'timezone':'UTC',\ 'intervals': s_intervals, 'jitter': s_jitter}) fidw.createDimension('NS', d_ix) fidw.createDimension('WE', d_ix) fNS = fidw.createVariable( 'NS', numpy.float32, ('NS', ), zlib=True) #TODO fid.variables['ch3'].dtype fWE = fidw.createVariable('WE', numpy.float32, ('WE', ), zlib=True) #TODO sign reversed fNS.units = units fWE.units = units # write data back fNS[:] = d_c[:d_ix] fWE[:] = d_d[:d_ix] print " writing %d records to file ..." % d_ix fidw.close() except:
def readfiles(): import re, time from datetime import datetime as datetime import netcdf import socket global t, ch1, ch2, sps, sampl, title """ files = sorted(os.listdir(_LOCATION_IN)) d=len(files) """ import glob newest = os.path.basename( min(glob.iglob('data/tmp/*'), key=os.path.getctime)) d = 0 fn = int(newest[:-3]) #newest=files[-1] #fn=int(newest[:-3]) #remove ending ".nc" and convert to seconds ts1 = time.gmtime(fn) #broken down time format title = time.strftime("%F %T %z", ts1) + ' on ' + socket.gethostname() print "%d files found, most recent: %d.nc (%s)" % (d, fn, title) try: fid = netcdf.Dataset(os.path.join(_LOCATION_IN, newest), 'r') except: print "Unexpected error while reading file: %s" % (os.path.join( _LOCATION_IN, files[-1])) return -1 else: start = getattr(fid, 'start') #some string sps = int(getattr(fid, 'sps')) #sampling rate -> to int sampl = fid.variables['ch1'].size units = fid.variables['ch1'].units print "\tstart time = %s" % (start) print "\tsamples = %d" % (sampl) print "\tsampling frequency = %d" % (sps) ch1 = fid.variables['ch1'][:] ch2 = fid.variables['ch2'][:] t = np.linspace(0, 60, sampl) fid.close() print "file read" return 1
time_step = 7 n_lat = 10 n_lon = 18 n_time = 12 args = sys.argv[1:] if len(args) == 0: args = ['temp'] def var_func(lat, lon, time): return 15 + 15 * np.sin(6.28 * time * time_step / 365) * np.cos(3.14 * lat / 180) + np.random.randint(20) / 10 def is_missing(lat, lon, time): return (lat + lon) % 9 == 3; ds = netcdf.Dataset('sample.nc', 'w', 'NETCDF4') ds.createDimension('lat', n_lat) ds.createDimension('lon', n_lon) ds.createDimension('time', n_time) varlat = ds.createVariable('lat', float, 'lat') varlon = ds.createVariable('lon', float, 'lon') vartime = ds.createVariable('time', float, 'time') varlat[:] = np.arange(-90, 90 + 1, 180 // (n_lat - 1)) varlon[:] = np.arange(0, 360, 360 // n_lon) vartime[:] = np.arange(0, n_time * time_step, time_step) for varname in args: var = ds.createVariable(varname, float, ('lat', 'lon', 'time'), fill_value=missing_value)
def concatenate(daydir, resdir): global d_a, d_b, d_c, d_d, processed, differences, i_mode files = [w for w in sorted(os.listdir(daydir))] print "%d files found, will process %s" % (len(files), "all" if (lim == _MINUTES_A_DAY) else str(lim)) if (len(files) < _MINUTES_A_DAY): print "files are missing. There should be 24*60=1440" if (i_mode): #interactive mode print "\t(press return (enter) to gently abort)" processed = [] differences = [] nans = 0 d_ix = 0 for f in (f for f in files if ( re.search('[0-2][0-9][0-5][0-9][0-5][0-9].[0-9][0-9][0-9][0-9].nc', f))): #(f.endswith(".nc"))): processed.append( datetime.datetime.strptime(dd + f, "%Y/%m/%d/%H%M%S.%f.nc")) d_lim = processed[0] + datetime.timedelta(hours=1) d_lim = d_lim.replace(minute=0, second=0, microsecond=0) try: fid = netcdf.Dataset(os.path.join(daydir, f), 'r') except IOError as e: print "Unexpected error while reading file: %s \n %s" % (f, e) return -1 except RuntimeError as e: # if file is damadged simply skip it print "Unexpected error while reading file: %s \n %s" % (f, e) continue else: if (len(processed) == 1): start = getattr(fid, 'start') sps = getattr(fid, 'sps') #500 import pdb pdb.set_trace() sampl = fid.variables['ch3'].size #30000 units = fid.variables['ch3'].units #mV print "first file: %s has %d samples (%d seconds at %d Hz)" % ( f, sampl, sampl / sps, sps) if ((processed[0] - dy).total_seconds() < 60.0): # if right after midnight dz = dy - datetime.timedelta(days=1) #the day before dzt = dz.strftime('%Y/%m/%d/') dzdir = os.path.join(_LOCATION_IN, dzt) if not os.path.exists( dzdir): #try to get last file of previous day print "could not find the day before (%s) to obtain the file spreading midnight" % dzdir else: filep = sorted(os.listdir(dzdir))[-1] fp_st = datetime.datetime.strptime( dzt + filep, "%Y/%m/%d/%H%M%S.%f.nc") oldd_s = datetime.timedelta.total_seconds(dy - fp_st) if (oldd_s < 60.0): #if file covers midnight fp_y = os.path.join(_LOCATION_IN, dzt + filep) od_s = 60.0 - oldd_s print "file of day before (%s) contains %.4f seconds of current day" % ( dzt + filep, od_s) try: fp = netcdf.Dataset(fp_y, 'r') except (IOError, RuntimeError) as e: print "Unexpected error while reading file: %s \n %s" % ( fp_y, e) continue else: if (getattr(fp, 'sps') == sps and fp.variables['ch3'].size == sampl): si = int(numpy.round(oldd_s * sps)) inn = fp.variables['ch3'][si:].size d_c[d_ix:d_ix + inn] = fp.variables['ch3'][si:] d_d[d_ix:d_ix + inn] = fp.variables['ch4'][si:] d_ix += inn print "imported %d values from %s" % ( inn, dzt + filep) processed[0] = dy start = dy.strftime("%Y-%m-%d %H:%M:%S.%f") else: print "Error while processing %s. File inconsistent!" % ( fp_y) return -1 fp.close() #import pdb; pdb.set_trace() d_c[d_ix:d_ix + fid.variables['ch3']. size] = fid.variables['ch3'][:] # like append d_d[d_ix:d_ix + fid.variables['ch3'].size] = fid.variables['ch4'][:] # d_ix += fid.variables['ch3'].size if (len(processed) > 1): try: fid2 = netcdf.Dataset(os.path.join(daydir, f), 'r') except (IOError, RuntimeError) as e: print "Unexpected error while reading file: %s \n %s" % (f, e) continue else: if (sps != getattr(fid2, 'sps')): print "sampling rate inconsistent at %s! %d, now: %d" % ( f, sps, getattr(fid2, 'sps')) break if (sampl != fid2.variables['ch3'].size): print "file length differs at %s! %d, now: %d" % ( f, sampl, fid2.variables['ch3'].size) break if ( len(processed) > 2 ): #ignore first interval due to its possibly bigger range time_delta = processed[-1] - processed[-2] differences = numpy.hstack( (differences, time_delta.total_seconds())) m = int(numpy.round((differences[-1] - 60.0) * sps)) if (m != 0): if (m > 0): print "WARN: %3d samples are missing. inserted %d*NaN" % ( m, m) print "\t %s" % ( processed[-2].strftime("%H:%M:%S.%f")) print "\t %s" % ( processed[-1].strftime("%H:%M:%S.%f")) print " diff: %10.4f seconds" % ( differences[-1] - 60.0) if ( processed[-1] >= d_lim ): # if more files are missing so that next hour is also rare time_delta = d_lim - processed[-2] m = int( numpy.round( (time_delta.total_seconds() - 60.0) * sps)) print "WARN: even more files are missing\n will insert only %d*NaN (%10.4f secs) and skip some files" % ( m, (float)(m) / sps) d_ins = numpy.empty((m)) d_ins[:] = numpy.NAN d_c[d_ix:d_ix + m] = d_ins d_d[d_ix:d_ix + m] = d_ins d_ix += m nans += m stop = d_lim.strftime( "%Y-%m-%d %H:%M:%S.%f") differences[-1] = time_delta.total_seconds( ) if (savebin(resdir=resdir, d_ix=d_ix, start=start, stop=stop, nans=nans, sps=sps, units=units)): return -1 processed = [processed[-1]] differences = [] nans = 0 d_ix = 0 d_lim = processed[0] + datetime.timedelta( hours=1) d_lim = d_lim.replace(minute=0, second=0, microsecond=0) inn = fid2.variables['ch3'][:].size d_c[d_ix:d_ix + inn] = fid2.variables['ch3'][:] d_d[d_ix:d_ix + inn] = fid2.variables['ch4'][:] d_ix += inn start = stop #import pdb; pdb.set_trace() else: d_ins = numpy.empty((m)) d_ins[:] = numpy.NAN d_c[d_ix:d_ix + m] = d_ins d_d[d_ix:d_ix + m] = d_ins d_ix += m nans += m elif (m < 0): print "WARN: files to dense! %d samples seems to be to much before %s" % ( m, f) print "\t %s" % ( processed[-2].strftime("%H:%M:%S.%f")) print "\t %s" % ( processed[-1].strftime("%H:%M:%S.%f")) print " diff: %10.4f seconds" % ( differences[-1] - 60.0) #check if file not too long if (processed[-1] < d_lim): # present file yet in batch rs = (d_lim - processed[-1]).total_seconds() if ( rs < 60.0 ): #however if it is the last one, which needs to be truncated si = int(numpy.round(rs * sps)) d_c[d_ix:d_ix + si] = fid2.variables['ch3'][:si] d_d[d_ix:d_ix + si] = fid2.variables['ch4'][:si] d_ix += si print "from last file %s only %f seconds (%d samples) taken" % ( f, rs, si) stop = d_lim.strftime("%Y-%m-%d %H:%M:%S.%f") if (savebin(resdir=resdir, d_ix=d_ix, start=start, stop=stop, nans=nans, sps=sps, units=units)): return -1 #import pdb; pdb.set_trace() processed = [] differences = [] nans = 0 d_ix = 0 inn = fid2.variables['ch3'][si:].size d_c[d_ix:d_ix + inn] = fid2.variables['ch3'][si:] d_d[d_ix:d_ix + inn] = fid2.variables['ch4'][si:] d_ix += inn start = stop processed.append(d_lim) else: d_c[d_ix:d_ix + fid2.variables['ch3']. size] = fid2.variables['ch3'][:] # append d_d[d_ix:d_ix + fid2.variables['ch3']. size] = fid2.variables['ch4'][:] d_ix += fid2.variables['ch3'].size fid2.close() fid.close() if (i_mode and checkUser()): print "user interrupt! %d files processed\n" % (len(processed)) return -1 break return len(files)
import os, sys if len(sys.argv) != 2: print 'usage: ./readin.py \"file.nc\"' sys.exit(-1) fname = sys.argv[1] filename = os.path.join(_LOCATION_IN, fname) print "reading file %s..." % (filename) import numpy as np from datetime import datetime as datetime import netcdf try: fid = netcdf.Dataset(filename, 'r') except: print "Unexpected error while reading file: %s" % (filename) sys.exit(-1) else: start = getattr(fid, 'start') #string, however \0 missing #import pdb; pdb.set_trace() start = start[:26] sps = int(getattr(fid, 'sps')) #sampling rate -> to int sampl = fid.variables['ch1'].size units = fid.variables['ch1'].units print "\tstart time = %s" % (start) print "\tsamples = %d" % (sampl) print "\tsampling frequency = %d" % (sps) ch1 = np.zeros(sampl, dtype='float32')