def _read_sonde_data(sonde_file): sonde_data = sonde.Sonde(sonde_file) datetimes = [pd.datetime.strptime(dt.strftime('%m-%d-%y %H:%M:%S'), '%m-%d-%y %H:%M:%S') for dt in sonde_data.dates] sonde_df = pd.DataFrame(sonde_data.data, index=datetimes) return sonde_df
# for site in ['bayt', 'boli', 'midg']: new_sonde_filename = 'twdb_wq_' + site.strip() + '.csv' old_sonde_filename = 'twdb_wq_' + site.strip() + '_provisional.csv' new_sonde_file = os.path.join(data_dir, 'sites', site, new_sonde_filename) old_sonde_file = os.path.join(data_dir, 'sites', site, old_sonde_filename) if os.path.isfile(new_sonde_file): sonde_file = new_sonde_file elif os.path.isfile(old_sonde_file): sonde_file = old_sonde_file else: print "No merged file exists for site: ", site + '\n' continue site_sonde = sonde.Sonde(sonde_file) datetimes = [ pd.datetime.strptime(dt.strftime('%m-%d-%y %H:%M:%S'), '%m-%d-%y %H:%M:%S') for dt in site_sonde.dates ] site_sonde_data = pd.DataFrame(site_sonde.data, index=datetimes) parameter_not_exist.append([parameter not in site_sonde.parameters]) if parameter in site_sonde.parameters: site_sonde_data[parameter][ site_sonde_data[parameter] < -900] = np.nan site_param_data = site_sonde_data[parameter].dropna() site_param_data[:] = site_counter site_param_counter[site] = site_counter site_counter += 1
#for filt in ['data_file','manufacturer','serial_number']: # if qarule[filt] is not '': # exec('mask *= clean_data.'+filt+'!=qarule[filt]') mask = mask | outside_mask if np.all(mask): print 'No data altered for rule: ', qarule else: print str(np.where( mask == False)[0].size) + ' entries altered for rule: ', qarule parameters = qarule['apply_to_parameters'].strip() if parameters == '': clean_data.apply_mask(mask) else: clean_data.apply_mask(mask, parameters=parameters.split(',')) #write final file print 'writing clean data file' clean_header = header.copy() clean_header[ 'qa_level'] = 'data corrected according to rules in file ' + os.path.split( qa_rules_file)[-1] clean_data.write(clean_data_file, format='csv', disclaimer=disclaimer, metadata=clean_header) #create plots #read in data @todo fix copy.copy problems raw_data = sonde.Sonde(raw_data_file) clean_data = sonde.Sonde(clean_data_file)
}, inplace=True) deployment_data.rename(columns={ 'Depth from bottom of instrument to water surface (m) ': 'sonde_bottom_to_surface' }, inplace=True) deployment_data.rename( columns={'Total Water Depth (m) ': 'total_depth'}, inplace=True) site_deployment_data = deployment_data[deployment_data['SITE ID'] == site_name] site_deployment_data.index = site_deployment_data.dep_datetime site_deployment_data['spotcheck_sensor_depth'] = np.nan calibration_data = calibration_data.applymap(string_cleaner) sonde_data = sonde.Sonde(sonde_data_file) sonde_dates = [ pandas.datetime.strptime(dt.strftime('%m-%d-%y %H:%M:%S'), '%m-%d-%y %H:%M:%S') for dt in sonde_data.dates ] sensor_to_gps_height = pandas.read_csv(sensor_to_gps_height_file, sep=',', index_col=[0], na_values=['nd', '\s*']) sonde_series = pandas.DataFrame(sonde_data.data, index=sonde_dates) sonde_series['file_name'] = [ f.lower().strip().split('.')[0] for f in sonde_data.data_file ] sonde_series['sonde_id'] = 'unknown' deploy_filename_list = np.unique(sonde_series.file_name)
if averaging not in ['daily', 'monthly']: raise ValueError("%s is an invalid averaging method. need to enter daily or monthly for averaging" % averaging) if args['--recent_years']: recent_years = int(args['--recent_years']) else: recent_years = 3 ymin = args['--ymin'] ymax = args['--ymax'] for site in sites: sonde_file = os.path.join(data_dir, site, 'twdb_wq_' + site + '.csv') if not os.path.exists(sonde_file): warnings.warn("Sonde file %s could not be found" % sonde_file) continue sonde_parameters = sonde.Sonde(sonde_file).parameters if param_code not in sonde_parameters: warnings.warn("Parameter %s not found in %s" % (parameter, site)) continue plot_statistics(sonde_file, param_code, averaging, recent_years=recent_years, ymin=ymin, ymax=ymax, title=site.upper()) if args['--save-txt']: historical_stat = _calculate_historical_statistics(sonde_file, param_code, averaging, recent_years=recent_years) output_dir = os.getcwd() output_file = os.path.join(output_dir, site + '_historical_stat.csv') save_stat_data(historical_stat, output_file, averaging) print "Historical statistics saved in file: %s" % output_file
end_date = datetime.datetime.strptime(end_date, '%Y-%m-%d %H:%M') # end_date_time = end_date.replace(tzinfo=sonde.find_tz(end_date)) else: end_date_time = raw_data.dates[-1] #data_range_mask = (clean_data.dates >= start_date_time) * (clean_data.dates <= end_date_time) #clean_data.apply_mask(data_range_mask) clean_data.write(clean_data_file, file_format='csv', disclaimer=disclaimer, metadata=clean_header, float_fmt='%5.3f') #create plots #read in data @todo fix copy.copy problems raw_data = sonde.Sonde(str(raw_data_file)) clean_data = sonde.Sonde(str(clean_data_file)) raw_dates = [ pandas.datetime.strptime(dt.strftime('%m-%d-%y %H:%M:%S'), '%m-%d-%y %H:%M:%S') for dt in raw_data.dates ] raw_series = pandas.DataFrame(raw_data.data, index=raw_dates) raw_series['filename'] = raw_data.data_file clean_dates = [ pandas.datetime.strptime(dt.strftime('%m-%d-%y %H:%M:%S'), '%m-%d-%y %H:%M:%S') for dt in clean_data.dates ] clean_series = pandas.DataFrame(clean_data.data, index=clean_dates) clean_series['filename'] = clean_data.data_file