def __init__(self, y1, m1, y2, m2): self.n = 12*(y2-y1) + m2 - m1 + 1 self.years = np.zeros(self.n) self.months = np.zeros(self.n) self.counter = np.zeros(self.n) ct = 0 for year, month in qc.year_month_gen(y1, m1, y2, m2): self.years[ct] = year self.months[ct] = month ct += 1
def main(argv): ''' tracking_qc.py invoked by typing:: python2.7 tracking_qc.py -config configuration.txt -id "SHIPNAME" This quality controls data for the chosen ID (which will be end-padded with spaces). The location of the data and the locations of the climatology files are all to be specified in the configuration files. ''' print '########################' print 'Running tracking_qc' print '########################' parser = argparse.ArgumentParser( description='Marine QC system, main program') parser.add_argument('-config', type=str, default='configuration.txt', help='name of config file') parser.add_argument('-id', type=str, help='ID to read in and process') args = parser.parse_args() id = args.id while len(id) < 9: id += ' ' print "running on ICOADS, this is not a test!" print 'Input file is ', args.config print 'Running for ID ', id print '' config = ConfigParser.ConfigParser() config.read(args.config) icoads_dir = config.get('Directories', 'ICOADS_dir') out_dir = config.get('Directories', 'out_dir') with open(config.get('Files', 'parameter_file'), 'r') as f: parameters = json.load(f) print 'ICOADS directory =', icoads_dir print 'Output to', out_dir print '' v = ex.Voyage() for year, month in qc.year_month_gen(1985, 1, 2014, 12): sy = str(year) sm = "%02d" % (month) filename = out_dir + '/' + sy + '/' + sm + '/Variables_' + sy + sm + '_' + id + '_standard.csv' print(filename) #try top open file containing ID data try: with open(filename, 'r') as csvfile: #get the headers from the CSV file and sort them out: strip trailing carriage return, split by commas, fix duplicates headers = csvfile.readline() headers = headers[:-1] headers = headers.split(',') headers[ 11] = 'AT_anom' #THIS IS AN AWFUL BODGE, HEADERS SHOULD BE UNIQUE GRRRR headers[ 13] = 'SST_anom' #THIS IS AN AWFUL BODGE, HEADERS SHOULD BE UNIQUE GRRRR #now read the rest of the CSV file using the headers as a dictionary. Need to add OSTIA information as "ext" information in the rep reader = csv.DictReader(csvfile, fieldnames=headers) for line in reader: rep = ex.MarineReportQC(easy_imma(line)) rep.setext('OSTIA', line['OSTIA']) rep.setext('ICE', line['ICE']) rep.setext('BGVAR', line['BGVAR']) v.add_report(rep) #Now do soemthing if it goes wrong except: print("Something went wrong. Does the file " + filename + " exist?") print "" print "read in " + str(len(v)) + " reports from the good ship " + id #all the data now read in do something with it. This isn't very exciting, but we can...print out all the obs for rep in v.rep_feed(): print rep.getvar('ID'), rep.getvar('YR'), rep.getvar('MO'), rep.getvar( 'LAT'), rep.getvar('LON') #Or, we can run a positional track check on the combined track... v.track_check(parameters['track_check']) #and print out the results for rep in v.rep_feed(): print rep.get_qc('POS', 'trk')
def main(argv): """ This program reads in data from ICOADS.3.0.0/ICOADS.3.0.1 and applies quality control processes to it, flagging data as good or bad according to a set of different criteria. Optionally it will replace drifting buoy SST data in ICOADS.3.0.1 with drifter data taken from the GDBC portal. The first step of the process is to read in various SST and MAT climatologies from file. These are 1degree latitude by 1 degree longitude by 73 pentad fields in NetCDF format. The program then loops over all specified years and months reads in the data needed to QC that month and then does the QC. There are three stages in the QC basic QC - this proceeds one observation at a time. Checks are relatively simple and detect gross errors track check - this works on Voyages consisting of all the observations from a single ship (or at least a single ID) and identifies observations which make for an implausible ship track buddy check - this works on Decks which are large collections of observations and compares observations to their neighbours """ print('########################') print('Running make_and_full_qc') print('########################') parser = argparse.ArgumentParser( description='Marine QC system, main program') parser.add_argument('-config', type=str, default='configuration.txt', help='name of config file') parser.add_argument('-year1', type=int, default=1850, help='First year for processing') parser.add_argument('-year2', type=int, default=1850, help='Final year for processing') parser.add_argument('-month1', type=int, default=1, help='First month for processing') parser.add_argument('-month2', type=int, default=1, help='Final month for processing') parser.add_argument('-tracking', action='store_true', help='perform tracking QC') args = parser.parse_args() inputfile = args.config year1 = args.year1 year2 = args.year2 month1 = args.month1 month2 = args.month2 tracking = args.tracking print("running on ICOADS, this is not a test!") print('Input file is {}'.format(inputfile)) print('Running from {} {} to {} {}'.format(month1, year1, month2, year2)) print('') config = ConfigParser.ConfigParser() config.read(inputfile) icoads_dir = config.get('Directories', 'ICOADS_dir') out_dir = config.get('Directories', 'out_dir') bad_id_file = config.get('Files', 'IDs_to_exclude') version = config.get('Icoads', 'icoads_version') print('ICOADS directory = {}'.format(icoads_dir)) print('ICOADS version = {}'.format(version)) print('Output to {}'.format(out_dir)) print('List of bad IDs = {}'.format(bad_id_file)) print('Parameter file = {}'.format(config.get('Files', 'parameter_file'))) print('') ids_to_exclude = bf.process_bad_id_file(bad_id_file) # read in climatology files sst_pentad_stdev = clim.Climatology.from_filename( config.get('Climatologies', 'Old_SST_stdev_climatology'), 'sst') sst_stdev_1 = clim.Climatology.from_filename( config.get('Climatologies', 'SST_buddy_one_box_to_buddy_avg'), 'sst') sst_stdev_2 = clim.Climatology.from_filename( config.get('Climatologies', 'SST_buddy_one_ob_to_box_avg'), 'sst') sst_stdev_3 = clim.Climatology.from_filename( config.get('Climatologies', 'SST_buddy_avg_sampling'), 'sst') with open(config.get('Files', 'parameter_file'), 'r') as f: parameters = json.load(f) print("Reading climatologies from parameter file") climlib = ex.ClimatologyLibrary() for entry in parameters['climatologies']: print("{} {}".format(entry[0], entry[1])) climlib.add_field(entry[0], entry[1], clim.Climatology.from_filename(entry[2], entry[3])) for year, month in qc.year_month_gen(year1, month1, year2, month2): print("{} {}".format(year, month)) last_year, last_month = qc.last_month_was(year, month) next_year, next_month = qc.next_month_is(year, month) reps = ex.Deck() count = 0 lastday = -99 for readyear, readmonth in qc.year_month_gen(last_year, last_month, next_year, next_month): print("{} {}".format(readyear, readmonth)) ostia_bg_var = None if tracking: ostia_bg_var = clim.Climatology.from_filename( config.get('Climatologies', qc.season(readmonth) + '_ostia_background'), 'bg_var') filename = bf.icoads_filename_from_stub( parameters['icoads_dir'], parameters['icoads_filenames'], readyear, readmonth) try: icoads_file = gzip.open(filename, "r") except IOError: print("no ICOADS file for {} {}".format(readyear, readmonth)) continue rec = IMMA() for line in icoads_file: try: rec.readstr(line) readob = True except: readob = False print("Rejected ob {}".format(line)) if (not (rec.data['ID'] in ids_to_exclude) and readob and rec.data['YR'] == readyear and rec.data['MO'] == readmonth): rep = ex.MarineReportQC(rec) del rec # if day has changed then read in OSTIA field if available and append SST and sea-ice fraction # to the observation metadata if tracking and readyear >= 1985 and rep.getvar( 'DY') is not None: if rep.getvar('DY') != lastday: lastday = rep.getvar('DY') y_year, y_month, y_day = qc.yesterday( readyear, readmonth, lastday) # ofname = ostia_filename(ostia_dir, y_year, y_month, y_day) ofname = bf.get_background_filename( parameters['background_dir'], parameters['background_filenames'], y_year, y_month, y_day) climlib.add_field( 'OSTIA', 'background', clim.Climatology.from_filename( ofname, 'analysed_sst')) climlib.add_field( 'OSTIA', 'ice', clim.Climatology.from_filename( ofname, 'sea_ice_fraction')) rep_clim = climlib.get_field( 'OSTIA', 'background').get_value_ostia( rep.lat(), rep.lon()) if rep_clim is not None: rep_clim -= 273.15 rep.setext('OSTIA', rep_clim) rep.setext( 'ICE', climlib.get_field('OSTIA', 'ice').get_value_ostia( rep.lat(), rep.lon())) rep.setext( 'BGVAR', ostia_bg_var.get_value_mds_style( rep.lat(), rep.lon(), rep.getvar('MO'), rep.getvar('DY'))) for varname in ['SST']: rep_clim = climlib.get_field( varname, 'mean').get_value_mds_style( rep.lat(), rep.lon(), rep.getvar('MO'), rep.getvar('DY')) rep.add_climate_variable(varname, rep_clim) rep.perform_base_qc(parameters) rep.set_qc( 'POS', 'month_match', qc.month_match(year, month, rep.getvar('YR'), rep.getvar('MO'))) reps.append(rep) count += 1 rec = IMMA() icoads_file.close() print("Read {} ICOADS records".format(count)) # filter the obs into passes and fails of basic positional QC filt = ex.QC_filter() filt.add_qc_filter('POS', 'date', 0) filt.add_qc_filter('POS', 'time', 0) filt.add_qc_filter('POS', 'pos', 0) filt.add_qc_filter('POS', 'blklst', 0) reps.add_filter(filt) # track check the passes one ship at a time count_ships = 0 for one_ship in reps.get_one_platform_at_a_time(): one_ship.track_check(parameters['track_check']) one_ship.iquam_track_check(parameters['IQUAM_track_check']) one_ship.spike_check(parameters['IQUAM_spike_check']) one_ship.find_saturated_runs(parameters['saturated_runs']) one_ship.find_multiple_rounded_values( parameters['multiple_rounded_values']) for varname in ['SST']: one_ship.find_repeated_values( parameters['find_repeated_values'], intype=varname) count_ships += 1 print("Track checked {} ships".format(count_ships)) # SST buddy check filt = ex.QC_filter() filt.add_qc_filter('POS', 'is780', 0) filt.add_qc_filter('POS', 'date', 0) filt.add_qc_filter('POS', 'time', 0) filt.add_qc_filter('POS', 'pos', 0) filt.add_qc_filter('POS', 'blklst', 0) filt.add_qc_filter('POS', 'trk', 0) filt.add_qc_filter('SST', 'noval', 0) filt.add_qc_filter('SST', 'freez', 0) filt.add_qc_filter('SST', 'clim', 0) filt.add_qc_filter('SST', 'nonorm', 0) reps.add_filter(filt) reps.bayesian_buddy_check('SST', sst_stdev_1, sst_stdev_2, sst_stdev_3, parameters) reps.mds_buddy_check('SST', sst_pentad_stdev, parameters['mds_buddy_check']) extdir = bf.safe_make_dir(out_dir, year, month) if tracking: # set QC for output by ID - buoys only and passes base SST QC filt = ex.QC_filter() filt.add_qc_filter('POS', 'month_match', 1) filt.add_qc_filter('POS', 'isdrifter', 1) reps.add_filter(filt) idfile = open(extdir + '/ID_file.txt', 'w') for one_ship in reps.get_one_platform_at_a_time(): if len(one_ship) > 0: thisid = one_ship.getrep(0).getvar('ID') if thisid is not None: idfile.write(thisid + ',' + ex.safe_filename(thisid) + '\n') one_ship.write_output(parameters['runid'], extdir, year, month) idfile.close() del reps
def main(argv): """ This program reads in data from ICOADS.2.5.1 and applies quality control processes to it, flagging data as good or bad according to a set of different criteria. The first step of the process is to read in various SST and MAT climatologies from file. These are 1degree latitude by 1 degree longitude by 73 pentad fields in NetCDF format. The program then loops over all specified years and months reads in the data needed to QC that month and then does the QC. There are three stages in the QC basic QC - this proceeds one observation at a time. Checks are relatively simple and detect gross errors track check - this works on Voyages consisting of all the observations from a single ship (or at least a single ID) and identifies observations which make for an implausible ship track buddy check - this works on Decks which are large collections of observations and compares observations to their neighbours """ print('########################') print('Running make_and_full_qc') print('########################') parser = argparse.ArgumentParser( description='Marine QC system, main program') parser.add_argument('-config', type=str, default='configuration.txt', help='name of config file') parser.add_argument('-tracking', action='store_true', help='perform tracking QC') parser.add_argument('-jobs', type=str, default='jobs.json', help='name of job file') parser.add_argument('-job_index', type=int, default=0, help='job index') args = parser.parse_args() inputfile = args.config jobfile = args.jobs jobindex = args.job_index - 1 tracking = args.tracking with open(jobfile) as fp: jobs = json.load(fp) year1 = jobs['jobs'][jobindex]['year1'] year2 = jobs['jobs'][jobindex]['year2'] month1 = jobs['jobs'][jobindex]['month1'] month2 = jobs['jobs'][jobindex]['month2'] input_schema = jobs['schema'] code_tables = jobs['code_tables'] verbose = True # need set to read as arg in future print('Input file is {}'.format(inputfile)) print('Running from {} {} to {} {}'.format(month1, year1, month2, year2)) print('') config = ConfigParser.ConfigParser() config.read(inputfile) icoads_dir = config.get('Directories', 'ICOADS_dir') out_dir = config.get('Directories', 'out_dir') bad_id_file = config.get('Files', 'IDs_to_exclude') version = config.get('Icoads', 'icoads_version') print('ICOADS directory = {}'.format(icoads_dir)) print('ICOADS version = {}'.format(version)) print('Output to {}'.format(out_dir)) print('List of bad IDs = {}'.format(bad_id_file)) print('Parameter file = {}'.format(config.get('Files', 'parameter_file'))) print('') ids_to_exclude = bf.process_bad_id_file(bad_id_file) # read in climatology files sst_pentad_stdev = clim.Climatology.from_filename( config.get('Climatologies', 'Old_SST_stdev_climatology'), 'sst') sst_stdev_1 = clim.Climatology.from_filename( config.get('Climatologies', 'SST_buddy_one_box_to_buddy_avg'), 'sst') sst_stdev_2 = clim.Climatology.from_filename( config.get('Climatologies', 'SST_buddy_one_ob_to_box_avg'), 'sst') sst_stdev_3 = clim.Climatology.from_filename( config.get('Climatologies', 'SST_buddy_avg_sampling'), 'sst') with open(config.get('Files', 'parameter_file'), 'r') as f: parameters = json.load(f) # read in high resolution SST climatology file for entry in parameters['hires_climatologies']: if entry[0] == 'SST' and entry[1] == 'mean': sst_climatology_file = entry[2] print("hires sst climatology file {}".format(sst_climatology_file)) climlib = ex.ClimatologyLibrary() climlib.add_field( 'SST', 'mean', clim.Climatology.from_filename(sst_climatology_file, 'temperature')) for year, month in qc.year_month_gen(year1, month1, year2, month2): print("{} {}".format(year, month)) last_year, last_month = qc.last_month_was(year, month) next_year, next_month = qc.next_month_is(year, month) reps = ex.Deck() count = 0 for readyear, readmonth in qc.year_month_gen(last_year, last_month, next_year, next_month): print("{} {}".format(readyear, readmonth)) #icoads_dir = '/gws/nopw/j04/c3s311a_lot2/data/level0/marine/sub_daily_data/IMMA1_R3.0.0T-QC/' filename = icoads_dir + '{:4d}-{:02d}.psv'.format( readyear, readmonth) # YR|MO|DY|HR|LAT|LON|DS|VS|ID|AT|SST|DPT|DCK|SLP|SID|PT|UID|W|D|IRF|bad_data|outfile imma_obj = pd.read_csv(filename, sep='|', header=None, names=[ 'YR', 'MO', 'DY', 'HR', 'LAT', 'LON', 'DS', 'VS', 'ID', 'AT', 'SST', 'DPT', 'DCK', 'SLP', 'SID', 'PT', 'UID', 'W', 'D', 'IRF', 'bad_data', 'outfile' ], low_memory=False) imma_obj['ID'].replace(' ', '', inplace=True) imma_obj = imma_obj.sort_values(['YR', 'MO', 'DY', 'HR', 'ID'], axis=0, ascending=True) imma_obj = imma_obj.reset_index(drop=True) data_index = imma_obj.index rec = IMMA() for idx in data_index: # set missing values to None for k, v in imma_obj.loc[idx, ].to_dict().items(): rec.data[k] = to_none(v) readob = True if (not (rec.data['ID'] in ids_to_exclude) and readob and rec.data['YR'] == readyear and rec.data['MO'] == readmonth and rec.data['DY'] is not None): # dyb - new line / check rep = ex.MarineReportQC(rec) del rec rep_clim = climlib.get_field('SST', 'mean').get_value( rep.lat(), rep.lon(), rep.getvar('MO'), rep.getvar('DY')) rep.add_climate_variable('SST', rep_clim) rep.perform_base_sst_qc(parameters) rep.set_qc( 'POS', 'month_match', qc.month_match(year, month, rep.getvar('YR'), rep.getvar('MO'))) reps.append(rep) count += 1 rec = IMMA() #icoads_file.close() print("Read {} ICOADS records".format(count)) # filter the obs into passes and fails of basic positional QC filt = ex.QC_filter() filt.add_qc_filter('POS', 'date', 0) filt.add_qc_filter('POS', 'time', 0) filt.add_qc_filter('POS', 'pos', 0) filt.add_qc_filter('POS', 'blklst', 0) reps.add_filter(filt) # track check the passes one ship at a time count_ships = 0 for one_ship in reps.get_one_platform_at_a_time(): one_ship.sort( ) # corrections applied can move reports between months, corrections currently applied after reading IMMA one_ship.track_check(parameters['track_check']) one_ship.find_repeated_values(parameters['find_repeated_values'], intype='SST') count_ships += 1 print("Track checked {} ships".format(count_ships)) # SST buddy check filt = ex.QC_filter() filt.add_qc_filter('POS', 'is780', 0) filt.add_qc_filter('POS', 'date', 0) filt.add_qc_filter('POS', 'time', 0) filt.add_qc_filter('POS', 'pos', 0) filt.add_qc_filter('POS', 'blklst', 0) filt.add_qc_filter('POS', 'trk', 0) filt.add_qc_filter('SST', 'noval', 0) filt.add_qc_filter('SST', 'freez', 0) filt.add_qc_filter('SST', 'clim', 0) filt.add_qc_filter('SST', 'nonorm', 0) reps.add_filter(filt) reps.bayesian_buddy_check('SST', sst_stdev_1, sst_stdev_2, sst_stdev_3, parameters) reps.mds_buddy_check('SST', sst_pentad_stdev, parameters['mds_buddy_check']) extdir = bf.safe_make_dir(out_dir, year, month) varnames_to_print = { 'SST': [ 'bud', 'clim', 'nonorm', 'freez', 'noval', 'nbud', 'bbud', 'rep', 'spike', 'hardlimit' ] } reps.write_qc('hires_' + parameters['runid'], extdir, year, month, varnames_to_print) if tracking: # set QC for output by ID - buoys only and passes base SST QC filt = ex.QC_filter() filt.add_qc_filter('POS', 'month_match', 1) filt.add_qc_filter('POS', 'isdrifter', 1) reps.add_filter(filt) idfile = open(extdir + '/ID_file.txt', 'w') for one_ship in reps.get_one_platform_at_a_time(): if len(one_ship) > 0: thisid = one_ship.getrep(0).getvar('ID') if thisid is not None: idfile.write(thisid + ',' + ex.safe_filename(thisid) + '\n') one_ship.write_qc('hires_' + parameters['runid'], extdir, year, month, varnames_to_print) idfile.close() del reps
def main(argv): """ Calls the tracking qc checks for a specified drifting buoy Invoked as:: python tracking_qc.py -config configuration.txt -id BUOYID -yr1 YEAR -mn1 MONTH -yr2 YEAR2 -mn2 MONTH2 -edge EDGE -runmonthid RUNID Inputs -config specifies the location of the configuration file. -id ID of the buoy to which tracking QC will be applied -yr1 year of the first month to QC -mn1 month of the first month to QC -yr2 year of the last month to QC -mn2 month of the last month to QC -edge specific type of edge case, one of: new, regular, start_edge_case, end_edge_case -runmonthid used to label special directories for start and end edge cases. This is of the form YYYYMM-YYYYMM This quality controls drifter data for the chosen ID (which will be end-padded with spaces) over the specified time range. The time range should specify a single complete drifter record. The location of the input data and the location of the qc-parameters file are specified in the configuration file. The qc-parameters file specifies the input parameters used by the various tracking checks. Input data are from the marine QC system. These are in 'per-ID per-month' csv format with observation variables, basic QC flags and SST QC flags stored in separate files and linkable via observation UID. A drifting buoy record is first assembled from the input data files and stored as a :class:`.Voyage` of :class:`.MarineReport` s. This record is then passed to the various tracking QC checks. Some observations that fail basic or SST QC are not passed to the tracking QC checks and will not receive tracking QC flags. Which observations are filtered out is dependent on tracking QC check. Output is written to a file in the track_out_dir specified in the configuration file. Where it is written depends on the EDGE flag (EDGE can be 'new', 'regular', 'start_edge_case' or 'end_edge_case'). The RUNID is intended to label the directories to which edge cases are sent. It should be of the form YYYMM-YYYMM specifying the start and end dates for which the overall QC was run. UserWarning is raised for problems with the input files. AssertionError is raised if inputs (parameters or MarineReport data) to a QC check are invalid """ parser = argparse.ArgumentParser(description='Marine QC system, main program') parser.add_argument('-config', type=str, default='configuration.txt', help='name of config file') parser.add_argument('-id', type=str, help='ID to read in and process') parser.add_argument('-yr1', type=int, help='First year of data for drifting buoy') parser.add_argument('-mn1', type=int, help='First month of data for drifting buoy') parser.add_argument('-yr2', type=int, help='Last year of data for drifting buoy') parser.add_argument('-mn2', type=int, help='Last month of data for drifting buoy') parser.add_argument('-edge', nargs='+', help='list of edge case descriptors') parser.add_argument('-runmonthid', type=str, default='', help='string for tagging directories should be of form YYYYMM-YYYYMM') args = parser.parse_args() edge = args.edge runmonthid = args.runmonthid oldqc = False # this can be used to switch in the old versions of the aground and speed checks target_id = args.id while len(target_id) < 9: target_id += ' ' print('Running track QC for ID {}'.format(target_id)) print('') print("Type of case: {}".format(edge[0])) print("Specific run id from wrapper script: {}".format(runmonthid)) config = ConfigParser.ConfigParser() config.read(args.config) out_dir = config.get('Directories', 'out_dir') track_out_dir = config.get('Directories', 'track_out_dir') print("{}".format(out_dir)) with open(config.get('Files', 'parameter_file'), 'r') as f: parameters = json.load(f) rep_list = [] # this will store input data as MarineReports count = 0 for year, month in qc.year_month_gen(args.yr1, args.mn1, args.yr2, args.mn2): sy = str(year) sm = "{:02}".format(month) # input data files filename = "{0}/{1}/{2}/Variables_{1}{2}_{3}_{4}.csv".format(out_dir, sy, sm, target_id, parameters['runid']) posqc_filename = "{0}/{1}/{2}/POS_qc_{1}{2}_{3}_{4}.csv".format(out_dir, sy, sm, target_id, parameters['runid']) sstqc_filename = "{0}/{1}/{2}/SST_qc_{1}{2}_{3}_{4}.csv".format(out_dir, sy, sm, target_id, parameters['runid']) # check if any data exists for this month before continuing if not (os.path.isfile(filename) or os.path.isfile(posqc_filename) or os.path.isfile(sstqc_filename)): continue print('reading data for: {}/{}'.format(sy, sm)) # check all files exist, have data and have same amount of data before proceeding file_fail = False filelines = [] for infile in [filename, posqc_filename, sstqc_filename]: try: with open(infile, 'r') as file: linecount = 0 for line in file: linecount += 1 filelines.append(linecount) if linecount == 0: message = 'empty file: ' + infile file_fail = True if linecount == 1: print('only header in {}'.format(infile)) except IOError: message = 'could not open ' + infile file_fail = True if not all(x == filelines[0] for x in filelines): message = 'file lengths do not match' file_fail = True if file_fail: raise UserWarning('problem with files for {}/{}: '.format(sy, sm), message) # read in ID data try: with open(filename, 'r') as csvfile: # get the headers from the CSV file and sort them out: # strip trailing carriage return, split by commas, fix duplicates headers = csvfile.readline() headers = headers[:-1] headers = headers.split(',') headers[11] = 'AT_anom' # THIS IS AN AWFUL BODGE, HEADERS SHOULD BE UNIQUE GRRRR headers[13] = 'SST_anom' # THIS IS AN AWFUL BODGE, HEADERS SHOULD BE UNIQUE GRRRR # now read the rest of the CSV file using the headers as a dictionary. # Need to add OSTIA information as "ext" information in the rep reader = csv.DictReader(csvfile, fieldnames=headers) nrep = 0 for line in reader: rep = ex.MarineReportQC(EasyImma(line)) # variables not in the Extended_IMMA.py VARLIST are not added by the above step, # so OSTIA, ICE and BGVAR variables now need adding manually rep.setext('OSTIA', None if line['OSTIA'] is None else float(line['OSTIA'])) rep.setext('ICE', None if line['ICE'] is None else float(line['ICE'])) rep.setext('BGVAR', None if line['BGVAR'] is None else float(line['BGVAR'])) rep_list.append(rep) nrep += 1 except Exception as error: print("Something went wrong populating report list") raise # now read in basic qc data try: with open(posqc_filename, 'r') as csvfile: # get the headers from the CSV file and sort them out: # strip trailing carriage return, split by commas, fix duplicates headers = csvfile.readline() headers = headers[:-1] headers = headers.split(',') # now read the rest of the CSV file using the headers as a dictionary. reader = csv.DictReader(csvfile, fieldnames=headers) indx = count for line in reader: for key in line: if key == 'UID': uid = rep_list[indx].getvar('UID') if line[key].strip() != uid.strip(): raise UserWarning("UIDs don't match: {0}-{1}".format(line[key], uid)) else: rep_list[indx].set_qc('POS', key, int(line[key])) indx += 1 except Exception as error: print("Something went wrong adding basic qc to report_list") raise # now read in sst qc data try: with open(sstqc_filename, 'r') as csvfile: # get the headers from the CSV file and sort them out: # strip trailing carriage return, split by commas, fix duplicates headers = csvfile.readline() headers = headers[:-1] headers = headers.split(',') # now read the rest of the CSV file using the headers as a dictionary. reader = csv.DictReader(csvfile, fieldnames=headers) indx = count for line in reader: for key in line: if key == 'UID': uid = rep_list[indx].getvar('UID') if line[key].strip() != uid.strip(): raise UserWarning("UIDs don't match: {0}-{1}".format(line[key], uid)) else: rep_list[indx].set_qc('SST', key, int(line[key])) indx += 1 except Exception as error: print("Something went wrong adding sst qc to report_list") raise count += nrep if len(rep_list) == 0: raise UserWarning('no data for buoy ' + target_id) print("") print("read in {} reports from the buoy {}".format(len(rep_list), target_id)) # now perform various tracking qc checks # note that any obs filtered out ahead of track qc won't receive qc flags # get_qc(trackflag) will return 9 for these obs. # ---aground QC--- # pre-filter obs filt = ex.QC_filter() filt.add_qc_filter('POS', 'isbuoy', 1) # should already be applied, but just in case filt.add_qc_filter('POS', 'date', 0) filt.add_qc_filter('POS', 'time', 0) filt.add_qc_filter('POS', 'pos', 0) filt.add_qc_filter('POS', 'blklst', 0) # includes rejection of (lon,lat)=(0,0) v_filt = ex.Voyage() for rep in rep_list: if filt.test_report(rep) == 0: v_filt.add_report(rep) v_filt.sort() # sort in time print("passing {} to aground check".format(len(v_filt))) if oldqc: v_filt.buoy_aground_check(parameters['buoy_aground_check'], False) # raises AssertionError if check inputs are invalid else: v_filt.new_buoy_aground_check(parameters['new_buoy_aground_check'], False) # raises AssertionError if check inputs are invalid # ---picked up QC--- filt = ex.QC_filter() filt.add_qc_filter('POS', 'isbuoy', 1) # should already be applied, but just in case filt.add_qc_filter('POS', 'date', 0) filt.add_qc_filter('POS', 'time', 0) filt.add_qc_filter('POS', 'pos', 0) filt.add_qc_filter('POS', 'blklst', 0) # includes rejection of (lon,lat)=(0,0) if oldqc: filt.add_qc_filter('POS', 'iquam_track', 0) # NOTE only use this for original buoy speed check v_filt = ex.Voyage() for rep in rep_list: if filt.test_report(rep) == 0: v_filt.add_report(rep) v_filt.sort() # sort in time print("passing {} to picked-up check".format(len(v_filt))) if oldqc: v_filt.buoy_speed_check(parameters['buoy_speed_check'], False) # raises AssertionError if check inputs are invalid else: v_filt.new_buoy_speed_check(parameters['IQUAM_track_check'], parameters['new_buoy_speed_check'], False) # raises AssertionError if check inputs are invalid # ---sst tail QC--- filt = ex.QC_filter() filt.add_qc_filter('POS', 'isbuoy', 1) # should already be applied, but just in case filt.add_qc_filter('POS', 'date', 0) filt.add_qc_filter('POS', 'time', 0) filt.add_qc_filter('POS', 'pos', 0) filt.add_qc_filter('POS', 'blklst', 0) # includes rejection of (lon,lat)=(0,0) filt.add_qc_filter('SST', 'clim', 0) filt.add_qc_filter('SST', 'nonorm', 0) filt.add_qc_filter('SST', 'freez', 0) filt.add_qc_filter('SST', 'noval', 0) filt.add_qc_filter('SST', 'rep', 0) # flags repeated value obs where these are >70% of record (set in parameters) filt.add_qc_filter('SST', 'hardlimit', 0) # limits are -5.0 and 45.0 for SST set in parameters filt.add_qc_filter('POS', 'drf_agr', 0) # remove obs failing preceding track checks filt.add_qc_filter('POS', 'drf_spd', 0) # remove obs failing preceding track checks v_filt = ex.Voyage() for rep in rep_list: if filt.test_report(rep) == 0 and rep.get_qc('SST', 'bbud') < 4: v_filt.add_report(rep) v_filt.sort() # sort in time print("passing {} to tail check".format(len(v_filt))) v_filt.buoy_tail_check(parameters['buoy_tail_check'], False) # raises AssertionError if check inputs are invalid # ---sst biased or noisy buoy QC--- filt = ex.QC_filter() filt.add_qc_filter('POS', 'isbuoy', 1) # should already be applied, but just in case filt.add_qc_filter('POS', 'date', 0) filt.add_qc_filter('POS', 'time', 0) filt.add_qc_filter('POS', 'pos', 0) filt.add_qc_filter('POS', 'blklst', 0) # includes rejection of (lon,lat)=(0,0) filt.add_qc_filter('SST', 'clim', 0) filt.add_qc_filter('SST', 'nonorm', 0) filt.add_qc_filter('SST', 'freez', 0) filt.add_qc_filter('SST', 'noval', 0) filt.add_qc_filter('SST', 'rep', 0) # flags repeated value obs where these are >70% of record (set in parameters) filt.add_qc_filter('SST', 'hardlimit', 0) # limits are -5.0 and 45.0 for SST set in parameters filt.add_qc_filter('POS', 'drf_agr', 0) # remove obs failing preceding track checks filt.add_qc_filter('POS', 'drf_spd', 0) # remove obs failing preceding track checks filt.add_qc_filter('SST', 'drf_tail1', 0) # remove obs failing preceding track checks filt.add_qc_filter('SST', 'drf_tail2', 0) # remove obs failing preceding track checks v_filt = ex.Voyage() for rep in rep_list: if filt.test_report(rep) == 0 and rep.get_qc('SST', 'bbud') < 4: v_filt.add_report(rep) v_filt.sort() # sort in time print("passing {} to biased-noisy check".format(len(v_filt))) v_filt.buoy_bias_noise_check(parameters['buoy_bias_noise_check'], False) # raises AssertionError if check inputs are invalid # return voyage with track QC flags voy = ex.Voyage() for rep in rep_list: voy.add_report(rep) voy.sort() # sort in time # write out the QC outcomes for this chunk for this ID args.yr1,args.mn1,args.yr2,args.mn2) # stored in directory corresponding to last month in the chunk if 'new' in edge or 'regular' in edge: extdir = safe_make_tracking_dir(track_out_dir, args.yr2, args.mn2) if oldqc: voy.write_tracking_output(parameters['runid']+'oldqc', extdir, args.yr2, args.mn2) else: voy.write_tracking_output(parameters['runid'], extdir, args.yr2, args.mn2) if 'start_edge_case' in edge: extdir = safe_make_edge_dir(track_out_dir, args.yr2, args.mn2, 'start_edge_case', runmonthid) if oldqc: voy.write_tracking_output(parameters['runid']+'oldqc', extdir, args.yr2, args.mn2) else: voy.write_tracking_output(parameters['runid'], extdir, args.yr2, args.mn2) if 'end_edge_case' in edge: extdir = safe_make_edge_dir(track_out_dir, args.yr2, args.mn2, 'end_edge_case', runmonthid) if oldqc: voy.write_tracking_output(parameters['runid']+'oldqc', extdir, args.yr2, args.mn2) else: voy.write_tracking_output(parameters['runid'], extdir, args.yr2, args.mn2)
def main(argv): """ This program reads in data from ICOADS.3.0.0/ICOADS.3.0.1 and applies quality control processes to it, flagging data as good or bad according to a set of different criteria. Optionally it will replace drifting buoy SST data in ICOADS.3.0.1 with drifter data taken from the GDBC portal. The first step of the process is to read in various SST and MAT climatologies from file. These are 1degree latitude by 1 degree longitude by 73 pentad fields in NetCDF format. The program then loops over all specified years and months reads in the data needed to QC that month and then does the QC. There are three stages in the QC basic QC - this proceeds one observation at a time. Checks are relatively simple and detect gross errors track check - this works on Voyages consisting of all the observations from a single ship (or at least a single ID) and identifies observations which make for an implausible ship track buddy check - this works on Decks which are large collections of observations and compares observations to their neighbours """ print('########################') print('Running make_and_full_qc') print('########################') parser = argparse.ArgumentParser( description='Marine QC system, main program') parser.add_argument('-config', type=str, default='configuration.txt', help='name of config file') parser.add_argument('-tracking', action='store_true', help='perform tracking QC') parser.add_argument('-jobs', type=str, default='jobs.json', help='name of job file') parser.add_argument('-job_index', type=int, default=0, help='job index') args = parser.parse_args() inputfile = args.config jobfile = args.jobs jobindex = args.job_index - 1 tracking = args.tracking with open(jobfile) as fp: jobs = json.load(fp) year1 = jobs['jobs'][jobindex]['year1'] year2 = jobs['jobs'][jobindex]['year2'] month1 = jobs['jobs'][jobindex]['month1'] month2 = jobs['jobs'][jobindex]['month2'] verbose = True # need set to read as arg in future print("running on ICOADS, this is not a test!") print('Input file is {}'.format(inputfile)) print('Running from {} {} to {} {}'.format(month1, year1, month2, year2)) print('') config = configparser.ConfigParser() config.read(inputfile) icoads_dir = config.get('Directories', 'ICOADS_dir') out_dir = config.get('Directories', 'out_dir') bad_id_file = config.get('Files', 'IDs_to_exclude') version = config.get('Icoads', 'icoads_version') print('ICOADS directory = {}'.format(icoads_dir)) print('ICOADS version = {}'.format(version)) print('Output to {}'.format(out_dir)) print('List of bad IDs = {}'.format(bad_id_file)) print('Parameter file = {}'.format(config.get('Files', 'parameter_file'))) print('') ids_to_exclude = bf.process_bad_id_file(bad_id_file) # read in climatology files sst_pentad_stdev = clim.Climatology.from_filename( config.get('Climatologies', 'Old_SST_stdev_climatology'), 'sst') sst_stdev_1 = clim.Climatology.from_filename( config.get('Climatologies', 'SST_buddy_one_box_to_buddy_avg'), 'sst') sst_stdev_2 = clim.Climatology.from_filename( config.get('Climatologies', 'SST_buddy_one_ob_to_box_avg'), 'sst') sst_stdev_3 = clim.Climatology.from_filename( config.get('Climatologies', 'SST_buddy_avg_sampling'), 'sst') with open(config.get('Files', 'parameter_file'), 'r') as f: parameters = json.load(f) print("Reading climatologies from parameter file") climlib = ex.ClimatologyLibrary() for entry in parameters['climatologies']: print("{} {}".format(entry[0], entry[1])) climlib.add_field(entry[0], entry[1], clim.Climatology.from_filename(entry[2], entry[3])) for year, month in qc.year_month_gen(year1, month1, year2, month2): print("INFO({}): {} {}".format( datetime.now().time().isoformat(timespec='milliseconds'), year, month)) last_year, last_month = qc.last_month_was(year, month) next_year, next_month = qc.next_month_is(year, month) reps = ex.Deck() count = 0 lastday = -99 for readyear, readmonth in qc.year_month_gen(last_year, last_month, next_year, next_month): print("INFO({}): {} {}".format( datetime.now().time().isoformat(timespec='milliseconds'), readyear, readmonth)) ostia_bg_var = None if tracking: ostia_bg_var = clim.Climatology.from_filename( config.get('Climatologies', qc.season(readmonth) + '_ostia_background'), 'bg_var') filename = icoads_dir + '{:4d}-{:02d}.psv'.format( readyear, readmonth) imma_obj = pd.read_csv(filename, sep='|', header=None, names=[ 'YR', 'MO', 'DY', 'HR', 'LAT', 'LON', 'DS', 'VS', 'ID', 'AT', 'SST', 'DPT', 'DCK', 'SLP', 'SID', 'PT', 'UID', 'W', 'D', 'IRF', 'bad_data', 'outfile' ], low_memory=False) # replace ' ' in ID field with '' (corrections introduce bug) imma_obj['ID'].replace(' ', '', inplace=True) imma_obj = imma_obj.sort_values(['YR', 'MO', 'DY', 'HR', 'ID'], axis=0, ascending=True) imma_obj = imma_obj.reset_index(drop=True) data_index = imma_obj.index rec = IMMA() print('INFO({}): Data read, applying first QC'.format( datetime.now().time().isoformat(timespec='milliseconds'))) dyb_count = 0 for idx in data_index: # set missing values to None for k, v in imma_obj.loc[idx, ].to_dict().items(): rec.data[k] = to_none(v) readob = True if (not (rec.data['ID'] in ids_to_exclude) and readob and rec.data['YR'] == readyear and rec.data['MO'] == readmonth and rec.data['DY'] is not None): rep = ex.MarineReportQC(rec) del rec rep.setvar('AT2', rep.getvar('AT')) # if day has changed then read in OSTIA field if available and append SST and sea-ice fraction # to the observation metadata if tracking and readyear >= 1985 and rep.getvar( 'DY') is not None: if rep.getvar('DY') != lastday: lastday = rep.getvar('DY') y_year, y_month, y_day = qc.yesterday( readyear, readmonth, lastday) # ofname = ostia_filename(ostia_dir, y_year, y_month, y_day) ofname = bf.get_background_filename( parameters['background_dir'], parameters['background_filenames'], y_year, y_month, y_day) climlib.add_field( 'OSTIA', 'background', clim.Climatology.from_filename( ofname, 'analysed_sst')) climlib.add_field( 'OSTIA', 'ice', clim.Climatology.from_filename( ofname, 'sea_ice_fraction')) rep_clim = climlib.get_field( 'OSTIA', 'background').get_value_ostia( rep.lat(), rep.lon()) if rep_clim is not None: rep_clim -= 273.15 rep.setext('OSTIA', rep_clim) rep.setext( 'ICE', climlib.get_field('OSTIA', 'ice').get_value_ostia( rep.lat(), rep.lon())) rep.setext( 'BGVAR', ostia_bg_var.get_value_mds_style( rep.lat(), rep.lon(), rep.getvar('MO'), rep.getvar('DY'))) for varname in ['SST', 'AT']: rep_clim = climlib.get_field( varname, 'mean').get_value_mds_style( rep.lat(), rep.lon(), rep.getvar('MO'), rep.getvar('DY')) rep.add_climate_variable(varname, rep_clim) for varname in ['SLP2', 'SHU', 'CRH', 'CWB', 'DPD']: rep_clim = climlib.get_field(varname, 'mean').get_value( rep.lat(), rep.lon(), rep.getvar('MO'), rep.getvar('DY')) rep.add_climate_variable(varname, rep_clim) for varname in ['DPT', 'AT2', 'SLP']: rep_clim = climlib.get_field(varname, 'mean').get_value( rep.lat(), rep.lon(), rep.getvar('MO'), rep.getvar('DY')) rep_stdev = climlib.get_field(varname, 'stdev').get_value( rep.lat(), rep.lon(), rep.getvar('MO'), rep.getvar('DY')) rep.add_climate_variable(varname, rep_clim, rep_stdev) rep.calculate_humidity_variables( ['SHU', 'VAP', 'CRH', 'CWB', 'DPD']) rep.perform_base_qc(parameters) rep.set_qc( 'POS', 'month_match', qc.month_match(year, month, rep.getvar('YR'), rep.getvar('MO'))) reps.append(rep) count += 1 rec = IMMA() dyb_count += 1 if dyb_count % 1000 == 0: print('INFO({}): {} out of {} processed'.format( datetime.now().time().isoformat( timespec='milliseconds'), dyb_count, imma_obj.index.size)) # icoads_file.close() print("INFO({}): Read {} ICOADS records".format( datetime.now().time().isoformat(timespec='milliseconds'), count)) # filter the obs into passes and fails of basic positional QC filt = ex.QC_filter() filt.add_qc_filter('POS', 'date', 0) filt.add_qc_filter('POS', 'time', 0) filt.add_qc_filter('POS', 'pos', 0) filt.add_qc_filter('POS', 'blklst', 0) reps.add_filter(filt) if verbose: print('INFO ({}) .... Track checking individual ships'.format( datetime.now().time().isoformat(timespec='milliseconds'))) # track check the passes one ship at a time count_ships = 0 for one_ship in reps.get_one_platform_at_a_time(): one_ship.track_check(parameters['track_check']) one_ship.iquam_track_check(parameters['IQUAM_track_check']) one_ship.spike_check(parameters['IQUAM_spike_check']) one_ship.find_saturated_runs(parameters['saturated_runs']) one_ship.find_multiple_rounded_values( parameters['multiple_rounded_values']) for varname in ['SST', 'AT', 'AT2', 'DPT', 'SLP']: one_ship.find_repeated_values(parameters['find_repeated_values'], intype=varname) count_ships += 1 print("Track checked {} ships".format(count_ships)) if verbose: print('INFO ({}) .... Applying buddy checks'.format( datetime.now().time().isoformat(timespec='milliseconds'))) if verbose: print('INFO ({}) ........ SST'.format( datetime.now().time().isoformat(timespec='milliseconds'))) # SST buddy check filt = ex.QC_filter() filt.add_qc_filter('POS', 'is780', 0) filt.add_qc_filter('POS', 'date', 0) filt.add_qc_filter('POS', 'time', 0) filt.add_qc_filter('POS', 'pos', 0) filt.add_qc_filter('POS', 'blklst', 0) filt.add_qc_filter('POS', 'trk', 0) filt.add_qc_filter('SST', 'noval', 0) filt.add_qc_filter('SST', 'freez', 0) filt.add_qc_filter('SST', 'clim', 0) filt.add_qc_filter('SST', 'nonorm', 0) reps.add_filter(filt) reps.bayesian_buddy_check('SST', sst_stdev_1, sst_stdev_2, sst_stdev_3, parameters) reps.mds_buddy_check('SST', sst_pentad_stdev, parameters['mds_buddy_check']) if verbose: print('INFO ({}) ........ NMAT'.format( datetime.now().time().isoformat(timespec='milliseconds'))) # NMAT buddy check filt = ex.QC_filter() filt.add_qc_filter('POS', 'isship', 1) # only do ships mat_blacklist filt.add_qc_filter('AT', 'mat_blacklist', 0) filt.add_qc_filter('POS', 'date', 0) filt.add_qc_filter('POS', 'time', 0) filt.add_qc_filter('POS', 'pos', 0) filt.add_qc_filter('POS', 'blklst', 0) filt.add_qc_filter('POS', 'trk', 0) filt.add_qc_filter('POS', 'day', 0) filt.add_qc_filter('AT', 'noval', 0) filt.add_qc_filter('AT', 'clim', 0) filt.add_qc_filter('AT', 'nonorm', 0) reps.add_filter(filt) reps.bayesian_buddy_check('AT', sst_stdev_1, sst_stdev_2, sst_stdev_3, parameters) reps.mds_buddy_check('AT', sst_pentad_stdev, parameters['mds_buddy_check']) # DPT buddy check #NB no day check for this one filt = ex.QC_filter() filt.add_qc_filter('DPT', 'hum_blacklist', 0) filt.add_qc_filter('POS', 'date', 0) filt.add_qc_filter('POS', 'time', 0) filt.add_qc_filter('POS', 'pos', 0) filt.add_qc_filter('POS', 'blklst', 0) filt.add_qc_filter('POS', 'trk', 0) filt.add_qc_filter('DPT', 'noval', 0) filt.add_qc_filter('DPT', 'clim', 0) filt.add_qc_filter('DPT', 'nonorm', 0) reps.add_filter(filt) reps.mds_buddy_check('DPT', climlib.get_field('DPT', 'stdev'), parameters['mds_buddy_check']) if verbose: print('INFO ({}) ........ SLP'.format( datetime.now().time().isoformat(timespec='milliseconds'))) # SLP buddy check filt = ex.QC_filter() filt.add_qc_filter('POS', 'date', 0) filt.add_qc_filter('POS', 'time', 0) filt.add_qc_filter('POS', 'pos', 0) filt.add_qc_filter('POS', 'blklst', 0) filt.add_qc_filter('POS', 'trk', 0) filt.add_qc_filter('SLP', 'noval', 0) filt.add_qc_filter('SLP', 'clim', 0) filt.add_qc_filter('SLP', 'nonorm', 0) reps.add_filter(filt) reps.mds_buddy_check('SLP', climlib.get_field('SLP', 'stdev'), parameters['slp_buddy_check']) extdir = bf.safe_make_dir(out_dir, year, month) reps.write_output(parameters['runid'], extdir, year, month) if tracking: if verbose: print('INFO ({}) .... Tracking'.format( datetime.now().time().isoformat(timespec='milliseconds'))) # set QC for output by ID - buoys only and passes base SST QC filt = ex.QC_filter() filt.add_qc_filter('POS', 'month_match', 1) filt.add_qc_filter('POS', 'isdrifter', 1) reps.add_filter(filt) idfile = open(extdir + '/ID_file.txt', 'w') for one_ship in reps.get_one_platform_at_a_time(): if len(one_ship) > 0: thisid = one_ship.getrep(0).getvar('ID') if thisid is not None: idfile.write(thisid + ',' + ex.safe_filename(thisid) + '\n') one_ship.write_output(parameters['runid'], extdir, year, month) idfile.close() del reps
def main(argv): ''' This program reads in data from ICOADS.2.5.1 and applies quality control processes to it, flagging data as good or bad according to a set of different criteria. The first step of the process is to read in various SST and MAT climatologies from file. These are 1degree latitude by 1 degree longitude by 73 pentad fields in NetCDF format. The program then loops over all specified years and months reads in the data needed to QC that month and then does the QC. There are three stages in the QC basic QC - this proceeds one observation at a time. Checks are relatively simple and detect gross errors track check - this works on Voyages consisting of all the observations from a single ship (or at least a single ID) and identifies observations which make for an implausible ship track buddy check - this works on Decks which are large collections of observations and compares observations to their neighbours ''' print '########################' print 'Running make_and_full_qc' print '########################' parser = argparse.ArgumentParser(description='Marine QC system, main program') parser.add_argument('-config', type=str, default='configuration.txt', help='name of config file') parser.add_argument('-year1', type=int, default=1850, help='First year for processing') parser.add_argument('-year2', type=int, default=1850, help='Final year for processing') parser.add_argument('-month1', type=int, default=1, help='First month for processing') parser.add_argument('-month2', type=int, default=1, help='Final month for processing') parser.add_argument('-test', action='store_true', help='run test suite') args = parser.parse_args() inputfile = args.config year1 = args.year1 year2 = args.year2 month1 = args.month1 month2 = args.month2 Test = args.test print 'Input file is ', inputfile print 'Running from ', month1, year1, ' to ', month2, year2 print '' config = ConfigParser.ConfigParser() config.read(inputfile) sst_climatology_file = '/project/mds/HADISST2/OIv2_clim_MDS_6190_0.25x0.25xdaily_365.nc' icoads_dir = config.get('Directories', 'ICOADS_dir') out_dir = config.get('Directories', 'out_dir') bad_id_file = config.get('Files', 'IDs_to_exclude') version = config.get('Icoads', 'icoads_version') print 'ICOADS directory =', icoads_dir print 'ICOADS version =', version print 'List of bad IDs =', bad_id_file print '' ids_to_exclude = process_bad_id_file(bad_id_file) #read in climatology files sst_pentad_stdev = clim.Climatology.from_filename(config.get('Climatologies', 'Old_SST_stdev_climatology'), 'sst') sst_stdev_1 = clim.Climatology.from_filename(config.get('Climatologies', 'SST_buddy_one_box_to_buddy_avg'), 'sst') sst_stdev_2 = clim.Climatology.from_filename(config.get('Climatologies', 'SST_buddy_one_ob_to_box_avg'), 'sst') sst_stdev_3 = clim.Climatology.from_filename(config.get('Climatologies', 'SST_buddy_avg_sampling'), 'sst') with open(config.get('Files','parameter_file'), 'r') as f: parameters = json.load(f) climlib = ex.ClimatologyLibrary() climlib.add_field('SST', 'mean', clim.Climatology.from_filename(sst_climatology_file, 'temperature')) for year, month in qc.year_month_gen(year1, month1, year2, month2): print year, month last_year, last_month = qc.last_month_was(year, month) next_year, next_month = qc.next_month_is(year, month) reps = ex.Deck() count = 0 count2 = 0 for readyear, readmonth in qc.year_month_gen(last_year, last_month, next_year, next_month): print readyear, readmonth syr = str(readyear) smn = "%02d" % (readmonth) filename = icoads_filename(icoads_dir, readyear, readmonth, version) try: icoads_file = gzip.open(filename, "r") except IOError: print "no ICOADS file ",filename," for ", readyear, readmonth continue rec = IMMA() for line in icoads_file: try: rec.readstr(line) readob = True except: readob = False print "Rejected ob", line #if this is not on the exclusion list, readable and not a buoy in the NRT runs if (not(rec.data['ID'] in ids_to_exclude) and readob and rec.data['YR'] == readyear and rec.data['MO'] == readmonth): rep = ex.MarineReportQC(rec) del rec rep_clim = climlib.get_field('SST', 'mean').get_value(rep.lat(), rep.lon(), rep.getvar('MO'), rep.getvar('DY')) rep.add_climate_variable('SST', rep_clim) rep.perform_base_sst_qc(parameters) reps.append(rep) count += 1 rec = IMMA() icoads_file.close() print "Read ", count, " ICOADS records" #filter the obs into passes and fails of basic positional QC filt = ex.QC_filter() filt.add_qc_filter('POS', 'date', 0) filt.add_qc_filter('POS', 'time', 0) filt.add_qc_filter('POS', 'pos', 0) filt.add_qc_filter('POS', 'blklst', 0) reps.add_filter(filt) #track check the passes one ship at a time count_ships = 0 for one_ship in reps.get_one_platform_at_a_time(): one_ship.track_check(parameters['track_check']) one_ship.find_repeated_values(parameters['find_repeated_values'], intype='SST') count_ships += 1 print "Track checked ", count_ships, " ships" #SST buddy check filt = ex.QC_filter() filt.add_qc_filter('POS', 'is780', 0) filt.add_qc_filter('POS', 'date', 0) filt.add_qc_filter('POS', 'time', 0) filt.add_qc_filter('POS', 'pos', 0) filt.add_qc_filter('POS', 'blklst', 0) filt.add_qc_filter('POS', 'trk', 0) filt.add_qc_filter('SST', 'noval', 0) filt.add_qc_filter('SST', 'freez', 0) filt.add_qc_filter('SST', 'clim', 0) filt.add_qc_filter('SST', 'nonorm', 0) reps.add_filter(filt) reps.bayesian_buddy_check('SST', sst_stdev_1, sst_stdev_2, sst_stdev_3, parameters) reps.mds_buddy_check('SST', sst_pentad_stdev, parameters['mds_buddy_check']) varnames_to_print = {'SST':['bud', 'clim', 'nonorm', 'freez', 'noval', 'nbud', 'bbud', 'rep', 'spike', 'hardlimit']} reps.write_qc('hires_'+parameters['runid'], out_dir, year, month, varnames_to_print) del reps
def main(argv): ''' This is the program that runs the base QC on data in the data base (created by Make_DB.py. The checks are the simpler checks, which can be performed on an observation-by-observation basis. ''' print '###############' print 'Running base_qc' print '###############' inputfile = 'configuration.txt' month1 = 1 month2 = 12 try: opts, args = getopt.getopt(argv,"hi:", ["ifile=", "year1=", "year2=", "month1=", "month2="]) except getopt.GetoptError: print 'Usage Make_DB.py -i <configuration_file> '+\ '--year1 <start year> --year2 <end year>'+\ '--month1 <start month> --month2 <end month>' sys.exit(2) inputfile, year1, year2, month1, month2 = qc.get_arguments(opts) print 'Input file is ', inputfile print 'Running from ',year1,' to ',year2 print 'Running from ',month1,' to ',month2 print '' config = qc.get_config(inputfile) data_base_host = config['data_base_host'] data_base_name = config['data_base_name'] print 'Data base host =', data_base_host print 'Data base name =', data_base_name print '' #connect to data base connection = MySQLdb.connect(host=data_base_host, user='******', db=data_base_name) for years,months in qc.year_month_gen(year1, month1, year2, month2): print '\nRunning Base QC for',years,months cursor = connection.cursor() cursor2 = connection.cursor() syr = str(years) '''set up a QC filter and use it to extract obs from the database direct into MarineReport format''' filter = db.Quality_Control_Filter() filter.year = years filter.month = months t0 = time.time() reps = db.get_marine_report_from_db(cursor,years,filter) t1 = time.time() total_time = t1-t0 print "read",total_time '''For each report, do all the basic QC checks then update the QC flags in the data base''' for rep in reps: rep.bad_position = qc.position_check(rep.lat, rep.lon) rep.bad_date = qc.date_check(rep.year, rep.month, rep.day, rep.hour) if rep.bad_position == 0 and rep.bad_date == 0: rep.day_check = qc.day_test(rep.year,rep.month,rep.day,rep.hour,rep.lat,rep.lon) else: rep.day_check = 1 rep.no_sst = qc.value_check(rep.sst) rep.sst_below_freezing = qc.sst_freeze_check(rep.sst, 0.0) rep.sst_climatology_fail = qc.climatology_check(rep.sst,rep.sst_norm,8.0) rep.no_sst_normal = qc.no_normal_check(rep.sst_norm) rep.no_mat = qc.value_check(rep.mat) rep.mat_climatology_fail = qc.climatology_check(rep.mat,rep.mat_norm,10.0) rep.no_mat_normal = qc.no_normal_check(rep.mat_norm) rep.blacklist = qc.blacklist(rep.id, rep.dck, rep.year, rep.lat, rep.lon) t15 = time.time() print "qcd",t15-t1 for rep in reps: result = db.update_db_basic_qc_flags(rep,years,cursor2) t2 = time.time() print "added to db",t2-t15 '''Commit the changes then print a summary''' connection.commit() #db.report_qc_counts(cursor,years,months) t3 = time.time() print "commited",t3-t2 connection.close() print "All Done :)"
def main(argv): ''' For input year range, extract and print obs from the database. ''' inputfile = 'configuration.txt' try: opts, args = getopt.getopt(argv, "hi:", ["ifile=", "year1=", "year2="]) except getopt.GetoptError: print 'Usage Make_DB.py -i <configuration_file>'+\ ' --year1 <start year> --year2 <end year>' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'Usage Make_DB.py -i <configuration_file> '+\ '--year1 <start year> --year2 <end year>' sys.exit() elif opt in ("-i", "--ifile"): inputfile = arg elif opt in ("-x", "--year1"): try: year1 = int(arg) except: sys.exit("Failed: year1 not an integer") elif opt in ("-y", "--year2"): try: year2 = int(arg) except: sys.exit("Failed: year2 not an integer") print 'Input file is ', inputfile print 'Running from ', year1, ' to ', year2 print '' config = qc.get_config(inputfile) data_base_host = config['data_base_host'] data_base_name = config['data_base_name'] print 'Data base host =', data_base_host print 'Data base name =', data_base_name #connect to data base connection = MySQLdb.connect(host=data_base_host, user='******', db=data_base_name) #need two cursors, one for reading and one for making QC changes cursor = connection.cursor() for years, months in qc.year_month_gen(year1, 1, year2, 12): print years, months syr = str(years) smn = "%02d" % (months,) print syr+smn outfile = open('/data/local/hadjj/ICOADS.2.5.1/blobs_'+syr+smn+'.txt','w') sql_request = 'SELECT \ marinereports'+syr+'.id, \ marinereports'+syr+'.lat, \ marinereports'+syr+'.lon, \ marinereports'+syr+'.sst, \ marinereports'+syr+'.mat, \ marinereports'+syr+'.year, \ marinereports'+syr+'.month, \ marinereports'+syr+'.day, \ marinereports'+syr+'.hour, \ marinereports'+syr+'.icoads_ds, \ marinereports'+syr+'.icoads_vs, \ marinereports'+syr+'.uid, \ base_qc'+syr+'.bad_position , \ base_qc'+syr+'.bad_date , \ base_qc'+syr+'.bad_track , \ sst_qc'+syr+'.no_sst , \ sst_qc'+syr+'.sst_below_freezing , \ sst_qc'+syr+'.sst_climatology_fail , \ sst_qc'+syr+'.no_sst_normal , \ sst_qc'+syr+'.sst_buddy_fail, \ mat_qc'+syr+'.no_mat , \ mat_qc'+syr+'.mat_climatology_fail, \ mat_qc'+syr+'.no_mat_normal , \ mat_qc'+syr+'.mat_buddy_fail, \ marinereports'+syr+'.dck, \ marinereports'+syr+'.sid, \ base_qc'+syr+'.day_check, \ base_qc'+syr+'.blacklist, \ base_qc'+syr+'.fewsome_check, \ extra_qc'+syr+'.new_track_check, \ extra_qc'+syr+'.bayesian_sst_buddy_check \ FROM marinereports'+syr+' \ INNER JOIN base_qc'+syr+' ON \ marinereports'+syr+'.uid = base_qc'+syr+'.uid \ INNER JOIN sst_qc'+syr+ ' ON \ marinereports'+syr+'.uid = sst_qc'+syr+'.uid \ INNER JOIN mat_qc'+syr+ ' ON \ marinereports'+syr+'.uid = mat_qc'+syr+'.uid \ INNER JOIN extra_qc'+syr+ ' ON \ marinereports'+syr+'.uid = extra_qc'+syr+'.uid \ WHERE marinereports'+syr+'.month = '+str(months) reps = [] cursor.execute(sql_request) numrows = cursor.rowcount for i in range(numrows): rows = cursor.fetchone() rep = qc.MarineReport(rows[0], rows[1], rows[2], rows[3], rows[4], rows[5], rows[6], rows[7], rows[8], rows[9], rows[10], rows[11]) rep.bad_position = rows[12] rep.bad_time = rows[13] rep.bad_track = rows[14] rep.no_sst = rows[15] rep.sst_below_freezing = rows[16] rep.sst_climatology_fail = rows[17] rep.no_sst_normal = rows[18] rep.sst_buddy_fail = rows[19] rep.no_mat = rows[20] rep.mat_climatology_fail = rows[21] rep.no_mat_normal = rows[22] rep.mat_buddy_fail = rows[23] rep.dck = rows[24] rep.sid = rows[25] rep.day_check = rows[26] rep.blacklist = rows[27] rep.fewsome_check = rows[28] rep.new_track_check = rows[29] rep.bayesian_sst_buddy_check = rows[30] reps.append(rep) reps.sort() for rep in reps: outfile.write(rep.print_report()) outfile.close() print "out ", years, months connection.close()
def main(argv): ''' The buddy check compares observations to other nearby observations. If the observation differs substantially from the neighbour-average, the observation will be rejected. ''' print '###################' print 'Running buddy_check' print '###################' inputfile = 'configuration.txt' try: opts, args = getopt.getopt(argv, "hi:", ["ifile=", "year1=", "year2="]) except getopt.GetoptError: print 'Usage Make_DB.py -i <configuration_file> '+\ '--year1 <start year> --year2 <end year>' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'test.py -i <inputfile> -o <outputfile>' sys.exit() elif opt in ("-i", "--ifile"): inputfile = arg elif opt in ("-x", "--year1"): try: year1 = int(arg) except: sys.exit("Failed: year1 not an integer") elif opt in ("-y", "--year2"): try: year2 = int(arg) except: sys.exit("Failed: year2 not an integer") print 'Input file is ', inputfile print 'Running from ', year1, ' to ', year2 print '' config = qc.get_config(inputfile) sst_climatology_file = config['SST_climatology'] nmat_climatology_file = config['MAT_climatology'] icoads_dir = config['ICOADS_dir'] sst_stdev_climatology_file = config['Old_SST_stdev_climatology'] data_base_host = config['data_base_host'] data_base_name = config['data_base_name'] print 'Data base host =', data_base_host print 'Data base name =', data_base_name print 'SST climatology =', sst_climatology_file print 'NMAT climatology =', nmat_climatology_file print 'ICOADS directory =', icoads_dir print '' #read in the pentad climatology of standard deviations climatology = Dataset(sst_stdev_climatology_file) sst_pentad_stdev = climatology.variables['sst'][:] connection = MySQLdb.connect(host=data_base_host, user='******', db=data_base_name) cursor = connection.cursor() #read cursor2 = connection.cursor() #write for years, months in qc.year_month_gen(year1, 1, year2, 12): #want to get a month either side of the #target month, which may be in different years last_year, last_month = qc.last_month_was(years, months) next_year, next_month = qc.next_month_is(years, months) print years, months first_year = min([last_year, years, next_year]) final_year = max([last_year, years, next_year]) if first_year < 1850: first_year = 1850 if final_year > 2014: final_year = 2014 #first and last julian days are +- approximately one month month_lengths = qc.month_lengths(years) jul1 = qc.jul_day(years, months, 1)-25 jul2 = qc.jul_day(years, months, month_lengths[months-1])+25 for check_variable in ['SST','MAT']: reps = [] for yyy in range(first_year, final_year+1): qcfilter = db.Quality_Control_Filter() qcfilter.jul1 = jul1 qcfilter.jul2 = jul2 qcfilter.set_multiple_qc_flags_to_pass(['bad_position', 'bad_date', 'blacklist']) if check_variable == 'SST': qcfilter.set_multiple_qc_flags_to_pass(['no_sst', 'sst_below_freezing', 'no_sst_normal', 'sst_climatology_fail']) elif check_variable == 'MAT': qcfilter.set_multiple_qc_flags_to_pass(['no_mat', 'no_mat_normal', 'mat_climatology_fail']) else: print "no such type ", check_variable assert False sql_request = db.build_sql_query(yyy, qcfilter) cursor.execute(sql_request) numrows = cursor.rowcount for i in range(numrows): rows = cursor.fetchone() rep = qc.MarineReport.report_from_array(rows) reps.append(rep) print len(reps)," observations read in" #Do the buddy check if check_variable == 'SST': qcs = qc_buddy_check.mds_buddy_check(reps, sst_pentad_stdev, 'SST') elif check_variable == 'MAT': qcs = qc_buddy_check.mds_buddy_check(reps, sst_pentad_stdev, 'MAT') else: print "no such type ", check_variable assert False #put updated QC flags into data base for rep in reps: if rep.month == months: if check_variable == 'SST': result = db.update_db_qc_single_flag(rep, rep.sst_buddy_fail, 'sst_qc', 'sst_buddy_fail', years, cursor2) elif check_variable == 'MAT': result = db.update_db_qc_single_flag(rep, rep.mat_buddy_fail, 'mat_qc', 'mat_buddy_fail', years, cursor2) else: print "no such type ", check_variable assert False print "Of "+str(len(qcs))+" observations "+\ str(np.sum(qcs))+" failed "+check_variable+\ " buddy check" connection.commit() #Each month #db.report_qc_counts(cursor, years, months) connection.close() print "All Done :)"
def main(argv): ''' This program builds the marine data base which will be used to store the subset of ICOADS used in QC and other data processing. The current version reads in IMMA1 data from ICOADS.2.5.1 and the UID is used as the primary key for the data base so that it can be easily matched to individual obs if need be. #KW added para The database is now just a set of ascii files for each year/month. Later it may be the SQL database. The first step of the process is to read in the SST and MAT climatologies from file. These are 1degree latitude by 1 degree longitude by 73 pentad fields in NetCDF format. The data are read into numpy arrays. Next a connection is made to the data base, which may or may not already exist. If it does not exist, a database will be created. The program then loops over all years and months and DROPs existing tables for each year if they already exist and then recreates them. It then loops over all months in the year, opens the appropriate IMMA file and reads in the data one observation at a time. ''' print '########################' print 'Running make_and_full_qc' print '########################' inputfile = 'configuration.txt' month1 = 1 month2 = 1 year1 = 1880 year2 = 1880 # KW Querying second instance of inputfile - I have commented this out for now # inputfile = 'configuration_local.txt' try: opts, args = getopt.getopt(argv, "hi:", ["ifile=", "year1=", "year2=", "month1=", "month2="]) except getopt.GetoptError: # KW changed Make_DB.py to make_and_full_qc.py print 'Usage make_and_full_qc.py -i <configuration_file> '+\ '--year1 <start year> --year2 <end year> '+\ '--month1 <start month> --month2 <end month>' sys.exit(2) inputfile, year1, year2, month1, month2 = qc.get_arguments(opts) print 'Input file is ', inputfile print 'Running from ', year1, ' to ', year2 print '' config = qc.get_config(inputfile) # KW Added a 'switch' to tell the code whether to run in HadISDH only (HadISDHSwitch == True) mode or # full mode (HadISDHSwitch == False) HadISDHSwitch = config['HadISDHSwitch'] sst_climatology_file = config['SST_climatology'] nmat_climatology_file = config['MAT_climatology'] # KW Added climatology files for the humidity variables at_climatology_file = config['AT_climatology'] dpt_climatology_file = config['DPT_climatology'] shu_climatology_file = config['SHU_climatology'] vap_climatology_file = config['VAP_climatology'] crh_climatology_file = config['CRH_climatology'] cwb_climatology_file = config['CWB_climatology'] dpd_climatology_file = config['DPD_climatology'] # KW Added climatology file for the SLP which is needed if no SLP ob exists, or if it has failed qc - or if we choose to derive humidity using climatological P (which we have) slp_climatology_file = config['SLP_climatology'] icoads_dir = config['ICOADS_dir'] #KW Added the 'recent' ICOADS dir for files 2015+ recent_icoads_dir = config['RECENT_ICOADS_dir'] bad_id_file = config['IDs_to_exclude'] # KW added an item for the database dir to write out the QC'd ascii data to - hijacking SQL data_base_dir for now data_base_dir = config['data_base_dir'] # KW added an item as a suffix for the output file name to note which iteration we're on output_suffix = config['output_suffix'] # KW Noting this is set to read the OLD SST stdevs - nothing reads in the newer OSTIA one yet. sst_stdev_climatology_file = config['Old_SST_stdev_climatology'] sst_stdev_1_file = config['SST_buddy_one_box_to_buddy_avg'] sst_stdev_2_file = config['SST_buddy_one_ob_to_box_avg'] sst_stdev_3_file = config['SST_buddy_avg_sampling'] # KW added standard deviation files for AT and DPT - for MDSKate_buddy_check at_stdev_climatology_file = config['AT_stdev_climatology'] dpt_stdev_climatology_file = config['DPT_stdev_climatology'] # KW Added a look for hardwired limits passed through the config file or set to None if ('HardLimits' in config): HardLimit = np.float(config['HardLimits']) else: HardLimit = None print "This is the provided HardLimit: ",HardLimit #pdb.set_trace() print 'SST climatology =', sst_climatology_file print 'NMAT climatology =', nmat_climatology_file # KW Added climatology files for the humidity variables print 'DPT climatology =', dpt_climatology_file print 'SHU climatology =', shu_climatology_file print 'VAP climatology =', vap_climatology_file print 'CRH climatology =', crh_climatology_file print 'CWB climatology =', cwb_climatology_file print 'DPD climatology =', dpd_climatology_file ## KW Added climatology files for SLP for calculation of humidity variables if no good quality SLP ob exists print 'SLP climatology =', slp_climatology_file print 'ICOADS directory =', icoads_dir # KW added 'recent' icoads dir print 'RECENT ICOADS directory =', recent_icoads_dir print 'List of bad IDs =', bad_id_file # KW added an item for the database dir to write out the QC'd ascii data to - hijacking SQL data_base_dir for now print 'QCd Database directory =', data_base_dir print 'QCd File Suffix =', output_suffix print '' ids_to_exclude = process_bad_id_file(bad_id_file) #read in climatology files climsst = read_climatology(sst_climatology_file, 'sst') climnmat = read_climatology(nmat_climatology_file, 'nmat') # KW Added climatology read in files for the humidity variables climat = read_climatology(at_climatology_file, 't2m_clims') climdpt = read_climatology(dpt_climatology_file, 'td2m_clims') climshu = read_climatology(shu_climatology_file, 'q2m_clims') climvap = read_climatology(vap_climatology_file, 'e2m_clims') climcrh = read_climatology(crh_climatology_file, 'rh2m_clims') climcwb = read_climatology(cwb_climatology_file, 'tw2m_clims') climdpd = read_climatology(dpd_climatology_file, 'dpd2m_clims') ## KW Added climatology read in files for SLP for calculating humidity variabls if no SLP value exists climslp = read_climatology(slp_climatology_file, 'p2m_clims') # KW Note that if this points to OLD_SST_stdev_climatology then it is a 73,180,360 array whereas the SST_stdev_climatology file is just 180,360 sst_pentad_stdev = read_climatology(sst_stdev_climatology_file, 'sst') sst_stdev_1 = read_climatology(sst_stdev_1_file, 'sst') sst_stdev_2 = read_climatology(sst_stdev_2_file, 'sst') sst_stdev_3 = read_climatology(sst_stdev_3_file, 'sst') # KW added standard deviation files for AT and DPT - for MDSKate_buddy_check at_pentad_stdev = read_climatology(at_stdev_climatology_file, 't2m_stdevs') dpt_pentad_stdev = read_climatology(dpt_stdev_climatology_file, 'td2m_stdevs') print 'Read climatology files' tim00 = time.time() for year, month in qc.year_month_gen(year1, month1, year2, month2): tim0 = time.time() print year, month last_year, last_month = qc.last_month_was(year, month) next_year, next_month = qc.next_month_is(year, month) if last_year < 1850: last_year = 1850 # KW don't understand why last year forced to be 1850 yet last_month = 1 print last_year, last_month, next_year, next_month reps = ex.Deck() count = 0 # KW This takes a long time to read in each year/month and process # For every candidate year/month the year/month before and after are also read in # Can we store the candidate year/month and following year/month for the next loop? # Hopefully there will be enough memory on spice # HOWEVER - IF WE RUN MANY YEARS IN PARALELL THEN OK TO READ IN EACH TIME for readyear, readmonth in qc.year_month_gen(last_year, last_month, next_year, next_month): print readyear, readmonth syr = str(readyear) smn = "%02d" % (readmonth) # KW THIS BIT IS FOR 2.5.0/1 # filename = icoads_dir+'/R2.5.1.'+syr+'.'+smn+'.gz' # KW FOUND A BUG - changed 'year' to 'readyear' below because it was trying to # read R2.5.2.2007.12.gz because 'year'=2008, 'month'=1 # KW Now added a catch for 'recent' years - at present this is anything from 2015 onwards - data only available in IMMA (not IMMA2) format - no UID! # if ((readyear > 2007) & (readyear < 2015)): # filename = icoads_dir+'/R2.5.2.'+syr+'.'+smn+'.gz' # if (readyear >= 2015): # filename = recent_icoads_dir+'/IMMA.'+syr+'.'+smn+'.gz' # KW THIS BIT IS FOR 3.0.0/1 filename = icoads_dir+'/IMMA1_R3.0.0_'+syr+'-'+smn+'.gz' if (readyear >= 2015): filename = recent_icoads_dir+'/IMMA1_R3.0.1_'+syr+'-'+smn+'.gz' icoads_file = gzip.open(filename,"r") # KW Noted that this creates an object of whole month of IMMA data separated into all available parameters from all available attachments # The rec.read bit later could be speeded up by ignoring the attachments we are not interested in in the first place? # The rec object has a .data dictionary of all variables (see IMMA2.py for variable IDs/keys rec = IMMA() EOF = False while not(EOF): #need to wrap the read in a exception catching thingy #becasuse there are some IMMA records which contain control #characters try: result = rec.read(icoads_file) if result == None: EOF = True # KW are we sure this isn't doing anything silly later when rec is overwritten with a new rec - could # this overwrite ids_to_exclude[0]? rec.data['ID'] = ids_to_exclude[0] except: rec.data['ID'] = ids_to_exclude[0] if not(rec.data['ID'] in ids_to_exclude): #strip everything out of the IMMA record except what we # KW (Kate Robert and John)# need # KW this should work for both IMMA and IMMA1 e.g. C4 (IMMA) and C7 (IMMA1) use same 'key's so it 'should' find # them because both are encoded in IMMA2.py keys = [] for key in rec.data: keys.append(key) for key in keys: # KW Added quite a few things in here - assume these don't have to be all from attachment 0 because UID isn't # Assume they don't have to be in a particular order either # I've put them in the order they appear in the attachments # See: RequiredIMMAColumnsforHadISDH.xlsx # Only a few of these will be written out but they are useful in the QC and bias adjustment process # May remove some of these later if they are not useful - to save time/memory # if not(key in ['YR','MO','DY','HR','LAT','LON', # 'SST','AT','DCK','ID','PT','SI', # 'SIM','DS','VS','SLP','UID','SID']): if not(key in ['YR','MO','DY','HR','LAT','LON', 'DS','VS','II','ID','C1', 'DI','D','WI','W','VI','VV','SLP', 'IT','AT','WBTI','WBT','DPTI','DPT','SI','SST', 'DCK','SID','PT','DUPS', 'COR','TOB','TOT','EOT','TOH','EOH', 'SIM','LOV','HOP','HOT','HOB','HOA','SMF', 'UID']): if key in rec.data: del rec.data[key] # KW So I've noticed that if one of the listed keys above isn't in the ob then a data['key'] isn't # set up (makes sense!) so when I come to print them later it all goes to pot # So, I loop through the non-core0 keys here to add blank keys where they are missing # KW Added 'UID' to this list because it is not present in the RECENT_ICOADS (2015+) for inkey in ['DUPS','COR','TOB','TOT','EOT', 'TOH','EOH','SIM','LOV','HOP','HOT','HOB','HOA','SMF','UID']: if not(inkey in keys): #print("Missing key: ",inkey) rec.data[inkey] = None rep = ex.MarineReport(rec) del rec #************HadISDH ONLY******************************* # KW Added a catch here to check the platform type and whether there is both a T (AT) and DPT present. # Only keep the ob if it is from a ship (0,1,2,3,4,5) or moored platform/buoy (6,8,9,10,15) and has # AT and DPT present. # This may not be desirable for a full run but should save time/memory for HadISDH # If HadISDHSwitch == True then the ob needs to pass the test else all obs are processed # No QC performed yet so cannot call get_qc - qc.value_check returns 0 if present and 1 if noval # Previously I had also pulled through PT=14 but this can be a coastal or island station - so not what we want. # KW Oct 2016 - I've now decided that future runs shoudl NOT include any platforms. We don't have height # info and they can vary from <10 to >200m so its just too screwy # if (not (HadISDHSwitch)) | ((rep.data['PT'] in [0,1,2,3,4,5,6,8,9,10,15]) & if (not (HadISDHSwitch)) | ((rep.data['PT'] in [0,1,2,3,4,5,6,8]) & (qc.value_check(rep.getvar('AT')) == 0) & (qc.value_check(rep.getvar('DPT')) == 0)): # KW TESTED: WORKS IF VALUES ARE BLANK AT LEAST # KW CHECK THAT THIS KICKS OUT OBS WITH REPORTED MISSING VALUES (e.g. -99.9 or 99.9) FOR AT or DPT #******************************************************* # KW Call my rep.setvar routine that I built into the MarineReport in Extended_IMMA.py # Use this to add blank var containers for the humidity variables that are calculated # later rep.setvar(['SHU','VAP','CRH','CWB','DPD']) # KW Get climatologies for slp to calculate humidity values if no good quality qc ob exists rep_slp_clim = get_clim(rep, climslp) #print('SLP: ',rep_slp_clim) #if (count == 10): # pdb.set_trace() rep.add_climate_variable('SLP', rep_slp_clim) # KW Calculate humidity variables here - so we can then kick out anything really silly e.g. RH>150 # Very silly values can cause longer line lengths at output which is an extra problem for post processing # For the longer term these could be set to missing but we just want to focus on 'good' humidity obs for now # Use my new routine as part of the Extended_IMMA MarineReport class rep.calcvar() # This routine returns values as None if there is no climslp or if RH is < 0 or > 150. rep.calcvar(['SHU','VAP','CRH','CWB','DPD']) # Now we have the checker for very silly values - which will just break the loop # No RH - means that there is either an AT or DPT missing # RH must be between 0 and 150 # AT must be between -80 and 65 # DPT must be between -80 and 65 # SHU must be greater than 0.0 # Inadvertantly, this kicks out any ob for which no climatology is available - the ones that would late fail pos or date checks # Later on - we may change this to just set the humidity values to missing rather than delete the ob. SST might be ok after all. if (rep.getvar('CRH') == None): # print('Found a SILLINESS ',rep.getvar('AT'),rep.getvar('DPT')) # pdb.set_trace() # delete the rep to keep things tidy del rep # create a new rec because we're skipping the end of the WHILE loop rec = IMMA() continue if ((rep.getvar('CRH') <= 0.0) | (rep.getvar('CRH') > 150.0)): # print('Found a SILLINESS ',rep.getvar('AT'),rep.getvar('DPT')) # pdb.set_trace() # delete the rep to keep things tidy del rep # create a new rec because we're skipping the end of the WHILE loop rec = IMMA() continue if ((rep.getvar('AT') < -80.) | (rep.getvar('AT') > 65.)): # print('Found a SILLINESS ',rep.getvar('AT'),rep.getvar('DPT')) # pdb.set_trace() # delete the rep to keep things tidy del rep # create a new rec because we're skipping the end of the WHILE loop rec = IMMA() continue if ((rep.getvar('DPT') < -80.) | (rep.getvar('DPT') > 65.)): # print('Found a SILLINESS ',rep.getvar('AT'),rep.getvar('DPT')) # pdb.set_trace() # delete the rep to keep things tidy del rep # create a new rec because we're skipping the end of the WHILE loop rec = IMMA() continue if (rep.getvar('SHU') <= 0.0): # print('Found a SILLINESS ',rep.getvar('AT'),rep.getvar('DPT')) # pdb.set_trace() # delete the rep to keep things tidy del rep # create a new rec because we're skipping the end of the WHILE loop rec = IMMA() continue # Get climatologies for all variables (for outlier test and anomaly creation [done in buddy check and for final print out] - if AT or DPT are missing (None) then do not carry on processing that variable # If we're using OBSclims then there are missing data which will be returned as None (NOT A STRING!!!) # KW Added bit to find and store climatological stdev for AT and DPT - for outlier test rep_sst_clim = get_clim(rep, climsst) rep.add_climate_variable('SST', rep_sst_clim) # KW Set to read in ERA (or OBS+ERA) clim file for AT (not NMAT) # rep_mat_clim = get_clim(rep, climnmat) rep_mat_clim = get_clim(rep, climat) rep_mat_stdev = get_clim(rep, at_pentad_stdev) #print(rep_mat_clim,rep_mat_stdev) #pdb.set_trace() ## KW added to test clim value pulled out # print(rep.getvar('UID'),rep.getvar('AT'),rep_mat_clim,rep.getnorm('AT')) # print(rep.getvar('UID'),rep.getvar('AT'),rep_mat_stdev,rep.getstdev('AT')) # if (count == 10): # pdb.set_trace() ## KW This seems to be pulling out the correct climatological value if ((rep_mat_clim == None) | (rep_mat_stdev == None)): del rep # create a new rec because we're skipping the end of the WHILE loop rec = IMMA() continue else: rep.add_climate_variable('AT', rep_mat_clim) rep.add_stdev_variable('AT', rep_mat_stdev) rep_dpt_clim = get_clim(rep, climdpt) rep_dpt_stdev = get_clim(rep, dpt_pentad_stdev) if ((rep_dpt_clim == None) | (rep_dpt_stdev == None)): del rep rec = IMMA() continue else: rep.add_climate_variable('DPT', rep_dpt_clim) rep.add_stdev_variable('DPT', rep_dpt_stdev) rep_shu_clim = get_clim(rep, climshu) if (rep_shu_clim == None) : # if there is no SHU then either an AT or DPT would be missing I think so loop shoudld already be stopped del rep rec = IMMA() continue else: rep.add_climate_variable('SHU', rep_shu_clim) rep_vap_clim = get_clim(rep, climvap) if (rep_vap_clim == None) : # if there is no SHU then either an AT or DPT would be missing I think so loop shoudld already be stopped del rep rec = IMMA() continue else: rep.add_climate_variable('VAP', rep_vap_clim) rep_crh_clim = get_clim(rep, climcrh) if (rep_crh_clim == None) : # if there is no SHU then either an AT or DPT would be missing I think so loop shoudld already be stopped del rep rec = IMMA() continue else: rep.add_climate_variable('CRH', rep_crh_clim) rep_cwb_clim = get_clim(rep, climcwb) if (rep_cwb_clim == None) : # if there is no SHU then either an AT or DPT would be missing I think so loop shoudld already be stopped del rep rec = IMMA() continue else: rep.add_climate_variable('CWB', rep_cwb_clim) rep_dpd_clim = get_clim(rep, climdpd) if (rep_dpd_clim == None) : # if there is no SHU then either an AT or DPT would be missing I think so loop shoudld already be stopped del rep rec = IMMA() continue else: rep.add_climate_variable('DPD', rep_dpd_clim) #Deck 701 has a whole bunch of otherwise good obs with missing Hours. #Set to 0000UTC and recalculate the ob time if (rep.getvar('DCK') == 701 and rep.getvar('YR') < 1860 and rep.getvar('HR') == None): rep.data['HR'] = 0 rep.calculate_dt() # KW Added a HardLimit variable that has to be passed to the base_qc_report #rep = base_qc_report(rep) rep = base_qc_report(rep,HardLimit) # print(rep.getvar('ID'),rep.getvar('AT'),rep.getvar('DPT'),rep.getvar('SHU'),rep.getvar('CRH'),rep.getvar('VAP')) # pdb.set_trace() reps.append(rep) count += 1 rec = IMMA() icoads_file.close() tim1 = time.time() print count, " obs read and base QC ", tim1-tim0 #filter the obs into passes and fails of basic positional QC # KW NOtes that this uses the month before and after to apply track check - and so actually spends time applying # track check to the month before and month after too, which will then be ignored and redone later, with its following month # Is there scope to save effort here by only checking the candidate month while still passing the surrounding months for info reps.sort() filt = ex.QC_filter() filt.add_qc_filter('POS', 'date', 0) filt.add_qc_filter('POS', 'pos', 0) filt.add_qc_filter('POS', 'blklst', 0) passes, reps = filt.split_reports(reps) passes.sort() tim2 = time.time() print "obs filtered and sorted in ", tim2-tim1, len(reps)+len(passes) # KW So in here we could put some kind of parsing loop to say that if you are looping through more than one month # then you could save the candidate and previous month # KW ALSO NOW ONLY CARRY ON WITH THOSE OBS THAT PASS BASE QC (date, pos, blacklist) # KW commented out the following: ##all fails pass track check # reps.set_qc('POS', 'trk', 0) # reps.set_qc('POS', 'few', 0) # reps.set_qc('SST', 'rep', 0) # reps.set_qc('AT', 'rep', 0) ## KW Added for DPT # reps.set_qc('DPT', 'rep', 0) # reps.set_qc('DPT', 'repsat', 0) # KW End of commenting out # KW now clear and reset reps so that it gets overwritten and filled with only passes del reps reps = ex.Deck() #track check the passes one ship at a time for one_ship in passes.get_one_ship_at_a_time(): one_ship.track_check() # KW I don't think we need to spend time doing this for SST so have commented out # one_ship.find_repeated_values(threshold=0.7, intype='SST') # KW FOr AT and DPT this procedure now also looks at the proportion of obs in a track (>20 obs - same as rep value check) that have .0 precision # Where >=50% obs end in .0 the ATround or DPTround flag is set to 1 one_ship.find_repeated_values(threshold=0.7, intype='AT') # KW Added for DPT # KW For DPT this QC procedure now also searches for persistant streaks of 100% RH (AT == DPT) and flags repsat one_ship.find_repeated_values(threshold=0.7, intype='DPT') for rep in one_ship.rep_feed(): rep.reset_ext() reps.append(rep) del passes reps.sort() tim3 = time.time() print "obs track checked in ", tim3-tim2, len(reps) #******************************* # KW Commented out for now to save time on debug ##SST buddy check ## KW NOtes that this uses the month before and after to apply track check - and so actually spends time applying ## track check to the month before and month after too, which will then be ignored and redone later, with its following month ## Is there scope to save effort here by only checking the candidate month while still passing the surrounding months for info # filt = ex.QC_filter() # filt.add_qc_filter('POS', 'date', 0) # filt.add_qc_filter('POS', 'pos', 0) # filt.add_qc_filter('POS', 'blklst', 0) # filt.add_qc_filter('POS', 'trk', 0) # filt.add_qc_filter('SST', 'noval', 0) # filt.add_qc_filter('SST', 'freez', 0) # filt.add_qc_filter('SST', 'clim', 0) # filt.add_qc_filter('SST', 'nonorm', 0) # ## KW Notes splitting marine obs into passes and fails # passes, reps = filt.split_reports(reps) # ## KW Thinks this only buddy checks those obs that pass the filter of QC above # passes.bayesian_buddy_check('SST', sst_stdev_1, sst_stdev_2, sst_stdev_3) # passes.mds_buddy_check('SST', sst_pentad_stdev) # #****************************************** ## KW Thinks all fails obs that do not pass teh QC filter above are not buddy checked - they are set to 0 ## which means pass but should not be used later because they fail one of the other basic checks # reps.set_qc('SST', 'bbud', 0) # reps.set_qc('SST', 'bud', 0) #**************************************** # KW Commented out to save time # for i in range(0, len(passes)): # rep = passes.pop(0) # reps.append(rep) # # del passes # # reps.sort() #**************************************** tim4 = time.time() print "obs SST buddy checked in ", tim4-tim3, len(reps) #NMAT buddy check # KW NOtes that this uses the month before and after to apply track check - and so actually spends time applying # track check to the month before and month after too, which will then be ignored and redone later, with its following month # Is there scope to save effort here by only checking the candidate month while still passing the surrounding months for info? # For now I've made mdsKATE_buddy_check which only applies actual check to candidate month and year. It also uses actual pentad # for that time of year rather than the average pentad stdev. filt = ex.QC_filter() ## KW Commented out date/pos/blklst as these have already been filtered out # filt.add_qc_filter('POS', 'date', 0) # filt.add_qc_filter('POS', 'pos', 0) # filt.add_qc_filter('POS', 'blklst', 0) filt.add_qc_filter('POS', 'trk', 0) # KW commented out because we want to try to use all obs for AT and SPT # filt.add_qc_filter('POS', 'day', 0) # KW Commented out because we've already filtered so that only present obs are retained # filt.add_qc_filter('AT', 'noval', 0) filt.add_qc_filter('AT', 'clim', 0) filt.add_qc_filter('AT', 'nonorm', 0) # KW Notes that 'reps' are those obs that have failed one of the tests in the filter above passes, reps = filt.split_reports(reps) # KW Notes that passes is an object containing a months worth of marine obs that pass (flag=0) for all above filters # Both the bayesian buddy check and the mds buddy check test for distance to neighbours in space and time and flag # with a 1 where it is too great/fails. # KW NOT GOING TO APPLY BAYESIAN BUDDY CHECK BECAUSE WE CAN'T USE IT FOR DPT AND # ITS EXPERIMENTAL??? # passes.bayesian_buddy_check('AT', sst_stdev_1, sst_stdev_2, sst_stdev_3) # KW Commented out original mds_buddy_check to use mdsKATE_buddy_check instead (like DPT) which uses the seasonal stdev # rather than the average and only applies buddy check to candidate month # ALSO = we now use clim T stdevs from ERA (will eventually be obs+ERA combo?) # passes.mds_buddy_check('AT', sst_pentad_stdev) # KW Added a HardLimit variable that has to be passed to mdsKATE_buddy_check for the stdev multiplier passes.mdsKATE_buddy_check('AT', at_pentad_stdev, year, month, HardLimit) # KW - all fails (reps) are set to have a flag of 0 which means to pass the buddy checks.because there is no point in spending # further time buddy checking them, same as for track checks # KW NOT GOING TO APPLY BAYESIAN BUDDY CHECK BECAUSE WE CAN'T USE IT FOR DPT AND # ITS EXPERIMENTAL??? # reps.set_qc('AT', 'bbud', 8) reps.set_qc('AT', 'bud', 8) for i in range(0, len(passes)): rep = passes.pop(0) reps.append(rep) del passes reps.sort() tim5 = time.time() print "obs MAT buddy checked in ", tim5-tim4, len(reps) # Don't think we need to set - if its not set it will be 9! ## KW Added buddy check for DPT - NOT RUNNING BAYESIAN BECAUSE WE DON'T HAVE APPROPRIATE DATA - SET FLAG TO 8! # reps.set_qc('DPT', 'bbud', 8) #DPT buddy check # KW NOtes that this uses the month before and after to apply track check - and so actually spends time applying # track check to the month before and month after too, which will then be ignored and redone later, with its following month # Is there scope to save effort here by only checking the candidate month while still passing the surrounding months for info filt = ex.QC_filter() # KW commented out date, pos, blklst because we've already got rid of those that fail these # filt.add_qc_filter('POS', 'date', 0) # filt.add_qc_filter('POS', 'pos', 0) # filt.add_qc_filter('POS', 'blklst', 0) filt.add_qc_filter('POS', 'trk', 0) # KW Commented out day because we want to try to use all obs for DPT and AT # filt.add_qc_filter('POS', 'day', 0) # Hmmm so only checking the nightime obs # KW Commented out because we've already filtered so that only present obs are retained # filt.add_qc_filter('DPT', 'noval', 0) filt.add_qc_filter('DPT', 'clim', 0) # KW commented out nonorm because there will always be a norm (if using ERA or combo ERA+obs) # filt.add_qc_filter('DPT', 'nonorm', 0) # KW could change this to ERANorm when we have actual climatologies from data - more useful because there always will be a norm from ERA # KW Notes that 'reps' are those obs that have failed one of the tests in the filter above passes, reps = filt.split_reports(reps) # KW Notes that passes is an object containing a months worth of marine obs that pass (flag=0) for all above filters # Both the bayesian buddy check and the mds buddy check test for distance to neighbours in space and time and flag # with a 1 where it is too great/fails. # passes.bayesian_buddy_check('DPT', sst_stdev_1, sst_stdev_2, sst_stdev_3) # passes.mds_buddy_check('DPT', dpt_pentad_stdev) # KW Added a HardLimit variable that has to be passed to mdsKATE_buddy_check for the stdev multiplier # KW Using Kate's version of MDS buddy check now which has a stdev for each pentad and only checks candidate month passes.mdsKATE_buddy_check('DPT', dpt_pentad_stdev, year, month, HardLimit) # KW - all fails (reps) are set to have a flag of 0 which means to pass the buddy checks.because there is no point in spending # further time buddy checking them, same as for track checks # reps.set_qc('DPT', 'bbud', 8) reps.set_qc('DPT', 'bud', 8) # KW set as 8 for now for i in range(0, len(passes)): rep = passes.pop(0) reps.append(rep) del passes reps.sort() tim6 = time.time() print "obs DPT buddy checked in ", tim6-tim5, len(reps) syr = str(year) smn = "%02d" % (month) # KW changed outfile from icoards_dir to data_base_dir so that it writes to a different place to where the original # data are stored - don't want to mess with John's working version. outfile = open(data_base_dir+'/new_suite_'+syr+smn+'_'+output_suffix+'.txt', 'w') for rep in reps.reps: if rep.data['YR'] == year and rep.data['MO'] == month: outfile.write(rep.print_report()) outfile.close() del reps tim11 = time.time() print year, " so far in ", tim11-tim00
def main(argv): """ Tracking_QC_wrapper.py script to control the running of the tracking QC:: python Tracking_QC_wrapper.py -config configuration.txt -gap 3 -yr1 1985 -yr2 2005 -mn1 1 -mn2 12 -edge new Reads in files containing list of IDs for each month and decides when to quality control the observations: Inputs -config specifies the location of the configuration file. -gap specifies the gap in months that must separate chunks of data -yr1 year of start month. -mn1 month of start month. -yr2 year of end month. -mn2 month of end month. -edge specifies how different cases should be treated. 'all' will run QC for all chunks separated by "gap" months of data; 'standard' will run for all chunks except for those that start or end fewer than "gap" months from the start or end of the series; 'new' will run only those chunks that have a gap of exactly "gap" months from the end of the series; 'noend' will run for all chunks except for those that end fewer than "gap" months from the end of the series. The four "edge" cases allow for running in different modes. In principle, 'standard' will QC everything that will not change from the addition of data to the start or end of the series. It is intended for running all the historical QC in preparation for monthly updates. The flag 'new' can be used for real time updates to only QC those IDs that have not been eligible for QC in earlier months and have an appropriate gap at the end of the series. The flag 'all' will QC everything, including chunks at the start and end of the series which may change with extra data appended to either end of the series. The 'noend' flag can be used to QC everything that will not change from addition of data to the end of the series, which may be more appropriate ahead of monthly updates. Note that adding extra data in the middle of the series is liable to change all QC outcomes regardless of whether QC was run in 'all', 'standard', 'noend' or 'new' configurations. """ parser = argparse.ArgumentParser( description='Marine QC system, main program') parser.add_argument('-config', type=str, default='configuration.txt', help='name of config file') parser.add_argument('-gap', type=int, default=3, help='gap of -gap months needed to trigger QC of ID') parser.add_argument('-yr1', type=int, default=1985, help='first year to analyse') parser.add_argument('-yr2', type=int, default=2019, help='last year to analyse') parser.add_argument('-mn1', type=int, default=1, help='first month to analyse in first year') parser.add_argument('-mn2', type=int, default=12, help='last month to analyse in last year') parser.add_argument('-edge', type=str, default='standard', help='How to deal with edge cases') args = parser.parse_args() inputfile = args.config y1 = args.yr1 y2 = args.yr2 m1 = args.mn1 m2 = args.mn2 gap = args.gap edge = args.edge runmonthid = "{}{:02}-{}{:02}".format(y1, m1, y2, m2) if edge not in ['standard', 'all', 'new', 'noend']: raise Exception("edge not one of 'standard', 'all', 'new' or 'noend'") config = ConfigParser.ConfigParser() config.read(inputfile) out_dir = config.get('Directories', 'out_dir') track_out_dir = config.get('Directories', 'track_out_dir') with open(config.get('Files', 'parameter_file'), 'r') as f: parameters = json.load(f) # establish full list of IDs to QC id_dictionary = {} for year, month in qc.year_month_gen(y1, m1, y2, m2): # create directory and file names for the ID list extdir = safe_dir(out_dir, year, month) idfile = open(extdir + '/ID_file.txt', 'r') for line in idfile: line = line.rstrip("\n") columns = line.split(',') if columns[0] in id_dictionary: id_dictionary[columns[0]].setym(year, month, 1) else: id_dictionary[columns[0]] = ym.YMCounter(y1, m1, y2, m2) id_dictionary[columns[0]].setym(year, month, 1) idfile.close() for targetid in id_dictionary: g = id_dictionary[targetid] print(targetid, g.counter) for yy1, mm1, yy2, mm2, cl in g.yield_start_and_end_dates(gap): if edge == 'all': print('Submit', yy1, mm1, yy2, mm2, cl) write_submission(inputfile, targetid, yy1, mm1, yy2, mm2, cl, runmonthid, track_out_dir, parameters['runid']) if edge == 'standard': if 'regular' in cl: print('Submit', yy1, mm1, yy2, mm2, cl) write_submission(inputfile, targetid, yy1, mm1, yy2, mm2, cl, runmonthid, track_out_dir, parameters['runid']) else: print('Ignore', yy1, mm1, yy2, mm2, cl) if edge == 'new': if 'new' in cl: print('Submit', yy1, mm1, yy2, mm2, cl) write_submission(inputfile, targetid, yy1, mm1, yy2, mm2, cl, runmonthid, track_out_dir, parameters['runid']) else: print('Ignore', yy1, mm1, yy2, mm2, cl) if edge == 'noend': if 'regular' in cl or 'start_edge_case' in cl: print('Submit', yy1, mm1, yy2, mm2, cl) write_submission(inputfile, targetid, yy1, mm1, yy2, mm2, cl, runmonthid, track_out_dir, parameters['runid']) else: print('Ignore', yy1, mm1, yy2, mm2, cl) print()
def main(argv): ''' The new track check program. First the program gets a list of all unique IDs in the month that is to be track checked. It then reads in three months of data at a time: the month you want to track check, a month before and a month after. For each unique ID, the track check is run. Track check comprises as set of related tests This program checks positional data for individual ships and buoys for internal consistency; checking reported positions against positions calculated using reported speeds and directions. The obs are sorted by call-sign then date. Obs can only be checked if they have a valid call-sign that is unique to one ship or buoy, so obs with no call-sign or with the generic call-signs 'SHIP' or 'PLAT' are passed unchecked. The call-sign '0102' was apparently shared by several ships, so obs with this call-sign are also passed unchecked. ''' print '###################' print 'Running New Track Check' print '###################' inputfile = 'configuration.txt' try: opts, args = getopt.getopt(argv, "hi:", ["ifile=", "year1=", "year2="]) except getopt.GetoptError: print 'Usage Make_DB.py -i <configuration_file>'+\ ' --year1 <start year> --year2 <end year>' sys.exit(2) for opt, arg in opts: if opt == '-h': print 'test.py -i <inputfile> -o <outputfile>' sys.exit() elif opt in ("-i", "--ifile"): inputfile = arg elif opt in ("-x", "--year1"): try: year1 = int(arg) except: sys.exit("Failed: year1 not an integer") elif opt in ("-y", "--year2"): try: year2 = int(arg) except: sys.exit("Failed: year2 not an integer") print 'Input file is ', inputfile print 'Running from ', year1, ' to ', year2 print '' config = qc.get_config(inputfile) data_base_host = config['data_base_host'] data_base_name = config['data_base_name'] print 'Data base host =', data_base_host print 'Data base name =', data_base_name print '' connection = MySQLdb.connect(host=data_base_host, user='******', db=data_base_name) #need two cursors, one for reading and one for making QC changes cursor = connection.cursor() cursor2 = connection.cursor() t00 = time.time() for years, months in qc.year_month_gen(year1, 1, year2, 12): #want to get a month either side of the target month, #which may be in different years last_year, last_month = qc.last_month_was(years, months) next_year, next_month = qc.next_month_is(years, months) print years, months t0 = time.time() first_year = min([last_year, years, next_year]) final_year = max([last_year, years, next_year]) if first_year < 1850: first_year = 1850 if final_year > 1990: final_year = 1990 #first and last julian days are +- approximately one month month_lengths = qc.month_lengths(years) jul1 = qc.jul_day(years, months, 1)-10 jul2 = qc.jul_day(years, months, month_lengths[months-1])+10 '''Get all unique IDs for this month and fill a dictionary with all the distinct ids that we want to QC as keys and an empty Voyage for each key''' allids = db.get_unique_ids(cursor, years, months) reps = {} for idrows in allids: thisid = idrows[0] reps[thisid] = qc.Voyage() t1 = time.time() print "got all IDs ",t1-t0 #extract all data for this month and a month either side for yyy in range(first_year, final_year+1): ''' Build filter for extracting data from data base and then extract. In this case, we want observations between jul1 and jul2 which pass the base QC checks. ''' qcfilter = db.Quality_Control_Filter() qcfilter.jul1 = jul1 qcfilter.jul2 = jul2 qcfilter.set_multiple_qc_flags_to_pass(['bad_position', 'bad_date', 'blacklist']) sql_request = db.build_sql_query(yyy, qcfilter) cursor.execute(sql_request) numrows = cursor.rowcount #put each ob into the dictionary if there is a key for it for i in range(numrows): rows = cursor.fetchone() rep = qc.ExtendedMarineReport.report_from_array(rows) if rep.id in reps: reps[rep.id].add_report(rep) t2 = time.time() print "read all obs from DB",t2-t1 #loop over all the distinct callsigns, extract the obs #where the callsign matches and track check them for idrows in allids: thisid = idrows[0] matches = reps[thisid] matches.sort() #run improved track check with spherical geometry etc. mqcs = qc_new_track_check.mds_full_track_check(matches) matches.find_repeated_values() for rep in matches.reps: if rep.month == months: result = db.update_db_qc_single_flag(rep,rep.bad_track, 'extra_qc', 'bayesian_track_check', years,cursor2) result = db.update_db_qc_single_flag(rep,rep.repeated_value, 'extra_qc', 'repeated_value', years,cursor2) split_matches = qc.split_generic_callsign(matches) for split in split_matches: qcs = qc_new_track_check.mds_full_track_check(split) #update QC in the data base but only for the target month for i, rep in enumerate(split.reps): if rep.month == months: result = db.update_db_qc_single_flag(rep, qcs[i], 'extra_qc', 'new_track_check', years, cursor2) result = db.update_db_qc_single_flag(rep, rep.fewsome_check, 'base_qc', 'fewsome_check', years, cursor2) connection.commit() t3 = time.time() print "done ",t3-t2 #db.report_qc_counts(cursor, years, months) connection.close() print "All Done :)"