Example #1
0
 def test_bad_longitudes(self):
     self.assertEqual(1, qc.position_check(0.0, -180.1))
     self.assertEqual(1, qc.position_check(0.0, 360.1))
Example #2
0
 def test_bad_latitudes(self):
     self.assertEqual(1, qc.position_check(91.0, 0.0))
     self.assertEqual(1, qc.position_check(-91.0, 0.0))
Example #3
0
 def test_good_position(self):
     self.assertEqual(0, qc.position_check(0.0, 0.0))
def main(argv):
    '''
    This is the program that runs the base QC on data in the data base (created by Make_DB.py. The checks are the simpler 
    checks, which can be performed on an observation-by-observation basis.
    '''
    
    print '###############'
    print 'Running base_qc'
    print '###############'
    
    inputfile = 'configuration.txt'
    month1 = 1
    month2 = 12

    try:
        opts, args = getopt.getopt(argv,"hi:",
                                   ["ifile=",
                                    "year1=",
                                    "year2=",
                                    "month1=",
                                    "month2="])
    except getopt.GetoptError:
        print 'Usage Make_DB.py -i <configuration_file> '+\
        '--year1 <start year> --year2 <end year>'+\
        '--month1 <start month> --month2 <end month>'
        sys.exit(2)

    inputfile, year1, year2, month1, month2 = qc.get_arguments(opts)

    print 'Input file is ', inputfile
    print 'Running from ',year1,' to ',year2
    print 'Running from ',month1,' to ',month2
    print ''

    config = qc.get_config(inputfile)

    data_base_host        = config['data_base_host']
    data_base_name        = config['data_base_name'] 

    print 'Data base host =', data_base_host
    print 'Data base name =', data_base_name
    print ''

#connect to data base	
    connection = MySQLdb.connect(host=data_base_host, 
                                 user='******',
                                 db=data_base_name)

    for years,months in qc.year_month_gen(year1, month1, year2, month2):

        print '\nRunning Base QC for',years,months

        cursor = connection.cursor()
        cursor2 = connection.cursor()

        syr = str(years)
        
        '''set up a QC filter and use it to extract obs from the database direct into MarineReport format'''
        filter = db.Quality_Control_Filter()
        filter.year = years
        filter.month = months

        t0 = time.time()
        reps = db.get_marine_report_from_db(cursor,years,filter)
        t1 = time.time()
        total_time = t1-t0
        print "read",total_time

        '''For each report, do all the basic QC checks then update the QC flags in the data base'''
        for rep in reps:

            rep.bad_position = qc.position_check(rep.lat, rep.lon)
            rep.bad_date = qc.date_check(rep.year, rep.month, rep.day, rep.hour)
            if rep.bad_position == 0 and rep.bad_date == 0:
                rep.day_check = qc.day_test(rep.year,rep.month,rep.day,rep.hour,rep.lat,rep.lon)
            else:
                rep.day_check = 1

            rep.no_sst = qc.value_check(rep.sst)
            rep.sst_below_freezing = qc.sst_freeze_check(rep.sst, 0.0)
            rep.sst_climatology_fail = qc.climatology_check(rep.sst,rep.sst_norm,8.0)
            rep.no_sst_normal = qc.no_normal_check(rep.sst_norm)
            
            rep.no_mat = qc.value_check(rep.mat)
            rep.mat_climatology_fail = qc.climatology_check(rep.mat,rep.mat_norm,10.0)
            rep.no_mat_normal = qc.no_normal_check(rep.mat_norm)
            
            rep.blacklist = qc.blacklist(rep.id, rep.dck, rep.year, rep.lat, rep.lon)
            
        t15 = time.time()
        print "qcd",t15-t1
        for rep in reps:
            result = db.update_db_basic_qc_flags(rep,years,cursor2)
            
        t2 = time.time()
        print "added to db",t2-t15
        '''Commit the changes then print a summary'''
        connection.commit()
        #db.report_qc_counts(cursor,years,months)
        t3 = time.time()
        print "commited",t3-t2

    connection.close()

    print "All Done :)"
def base_qc_report(rep,HardLimit):
    '''
    Take a marine report and do some base qc on it.
    HardLimit: either a float value or None - this is a given maximum limit for clim test
    '''
#Basic positional QC
    rep.set_qc('POS', 'pos', 
                qc.position_check(rep.getvar('LAT'), 
                                  rep.getvar('LON')))
    
    rep.set_qc('POS', 'date', 
                qc.date_check(rep.getvar('YR'), rep.getvar('MO'),
                              rep.getvar('DY'), rep.getvar('HR')))
    
# KW Test for day 1=day, 0=night
    if (rep.get_qc('POS', 'pos') == 0 and 
        rep.get_qc('POS', 'date') == 0):
        rep.set_qc('POS', 'day', 
                   qc.day_test(rep.getvar('YR'),
                               rep.getvar('MO'),
                               rep.getvar('DY'),
                               rep.getvar('HR'),
                               rep.getvar('LAT'),
                               rep.getvar('LON')))
    else:
        rep.set_qc('POS', 'day', 1)

    rep.set_qc('POS', 'blklst', 
                qc.blacklist(rep.getvar('ID'),
                             rep.getvar('DCK'), 
                             rep.getvar('YR'), 
                             rep.getvar('LAT'), 
                             rep.getvar('LON')))

# KW NEW climatology check that uses the simultaneous climatological stdev (of all obs in pentad climatology) to
# provide a threshold for outlier detection. According to ERA-Interim (?) AT over ocean stdev doesn't vary that much
# but it is higher in the mid- to high lats, especially around the n. hemi coastlines. It is a little higher around the El Nino
# tropical pacific warm pool region. stdev for DPT is higher than for AT - esp in the mid-lats.
# Howmany stdevs to use? Looks like average stdev is 1-2. So 4.5*stdev = 4.5 to 9 deg.
# 1 stdev ~68.2%, 2 stdev ~95.4%, 3 stdev 99.7%, 4 stdev ~99.9%, 4.5 stdev >99.9%
# So for the 138196 workable obs from Dec 1973 4.5 stdev < 138 obs-ish
# Lets start with 4.5
# I have added in the climatological stdevs to each rep so this should be easy
# I'm only applying to AT and DPT
# This really needs a minimum and maximum threshold on it to prevent too much removal of very small anomalies and not
# enough removal of ridiculously large ones (>50deg for Dec 1973 which does seem crazy - needs checking with old use of SST clim
# Min: stdev<0.5 are forced to be 0.5 so minimum threshold is 2.25 deg
# Max: (was previously 10 deg - needs to be large enough to account for diurnal cycle vs pentad mean) stdev>3 forced 
# to be 3 so max threshold is 13.25
# KW - NEED TO CHANGE THE MAX/MIN PERMITTED SD AS WE'RE CUTTING OFF ABRUPTLY, ESP IF WE CONTINUE TO USE ERA
# PROBABLY GO FOR MIN = 1 (4.5 deg) and MAX = 4 (18 deg)? Don't want to let too much rubbish in 

#SST base QC
# KW Could noval = 0 be a value that is present in IMMA but actually a missing data indicator e.g. -99.9 or 99.9?
    rep.set_qc('SST', 'noval', qc.value_check(rep.getvar('SST'))) 
    rep.set_qc('SST', 'freez', 
               qc.sst_freeze_check(rep.getvar('SST'), 0.0))
    rep.set_qc('SST', 'clim', 
               qc.climatology_check(rep.getvar('SST'), rep.getnorm('SST'), 8.0))
    rep.set_qc('SST', 'nonorm', qc.no_normal_check(rep.getnorm('SST')))

#MAT base QC
# KW Could noval = 0 be a value that is present in IMMA but actually a missing data indicator e.g. -99.9 or 99.9?
    rep.set_qc('AT', 'noval', qc.value_check(rep.getvar('AT')))
# KW commented out old clim test and trying new one that uses 4.5*stdev as the threshold with minimum allowed limit and test for 
# no stdev found
#    rep.set_qc('AT', 'clim', 
#               qc.climatology_check(rep.getvar('AT'), rep.getnorm('AT'), 10.0))
    if (qc.value_check(rep.getstdev('AT')) == 0): 
# KW check for HardLimit or set to default of 4.5
        if HardLimit != None:
	    MyMulti = HardLimit
	else:
	    MyMulti = 4.5
        if (rep.getstdev('AT') > 4.):
	    atlimit = MyMulti*4	    
	elif ((rep.getstdev('AT') >= 1.) & (rep.getstdev('AT') <= 4.)):
	    atlimit = MyMulti*rep.getstdev('AT')
	else: 
	    atlimit = MyMulti*1.
    else:
        atlimit = 10.
    rep.set_qc('AT', 'clim', 
               qc.climatology_check(rep.getvar('AT'), rep.getnorm('AT'), atlimit))
    #print('CLIMTEST: ',rep.getvar('AT'), rep.getnorm('AT'),rep.getstdev('AT'),qc.climatology_check(rep.getvar('AT'), rep.getnorm('AT'), HardLimit,dptlimit))	       
    #pdb.set_trace()
    rep.set_qc('AT', 'nonorm', qc.no_normal_check(rep.getnorm('AT')))

# KW Added QC for DPT
# DPT base QC
    rep.set_qc('DPT', 'noval', qc.value_check(rep.getvar('DPT')))
# KW commented out old clim test and trying new one that uses 4.5*stdev as the threshold with minimum allowed limit and test for 
# no stdev found
#    rep.set_qc('DPT', 'clim', 
#               qc.climatology_check(rep.getvar('DPT'), rep.getnorm('DPT'), 10.0))
    if (qc.value_check(rep.getstdev('DPT')) == 0): 
# KW check for HardLimit or set to default of 4.5
        if HardLimit != None:
	    MyMulti = HardLimit
	else:
	    MyMulti = 4.5
        if (rep.getstdev('DPT') > 4.):
	    dptlimit = MyMulti*4.	    # greater than clim+/-10deg (13.5 deg)
	elif ((rep.getstdev('DPT') >= 1.) & (rep.getstdev('DPT') <= 4)):
	    dptlimit = MyMulti*rep.getstdev('DPT')
	else: 
	    dptlimit = MyMulti*1. 	    # less than clim+/- 10deg (2.25 deg)
    else:
        dptlimit = 10.
    rep.set_qc('DPT', 'clim', 
               qc.climatology_check(rep.getvar('DPT'), rep.getnorm('DPT'), dptlimit))
    #print('CLIMTEST: ',rep.getvar('DPT'), rep.getnorm('DPT'),rep.getstdev('DPT'),qc.climatology_check(rep.getvar('DPT'), rep.getnorm('DPT'), HardLimit,dptlimit))	       
    #pdb.set_trace()
    rep.set_qc('DPT', 'nonorm', qc.no_normal_check(rep.getnorm('DPT')))
# KW New QC tests specifically for humidity
    rep.set_qc('DPT', 'ssat', qc.supersat_check(rep.getvar('DPT'),rep.getvar('AT')))
     
    return rep
def main(argv):
    '''
    This program builds the marine data base which will be used to store the subset of ICOADS used in QC and 
    other data processing. The current version reads in IMMA1 data from ICOADS.2.5.1 and the UID is used as the 
    primary key for the data base so that it can be easily matched to individual obs if need be.

    The first step of the process is to read in the SST and MAT climatologies from file. These are 1degree latitude 
    by 1 degree longitude by 73 pentad fields in NetCDF format. The data are read into numpy arrays.

    Next a connection is made to the data base, which may or may not already exist. If it does not exist, a database 
    will be created.
    
    The program then loops over all years and months and DROPs existing tables for each year if they already exist and 
    then recreates them. It then loops over all months in the year, opens the appropriate IMMA file and reads in 
    the data one observation at a time.
    '''
    
    print '######################'
    print 'Running Make_and_qc_DB'
    print '######################'
    
    inputfile = 'configuration.txt'
    month1 = 1
    month2 = 12

    try:
        opts, args = getopt.getopt(argv, "hi:", 
                                   ["ifile=", 
                                    "year1=", 
                                    "year2=",
                                    "month1=",
                                    "month2="])
    except getopt.GetoptError:
        print 'Usage Make_DB.py -i <configuration_file> '+\
        '--year1 <start year> --year2 <end year> '+\
        '--month1 <start month> --month2 <end month>'
        sys.exit(2)
    
    inputfile, year1, year2, month1, month2 = qc.get_arguments(opts)

    print 'Input file is ', inputfile
    print 'Running from ', year1, ' to ', year2
    print ''

    config = qc.get_config(inputfile)

    sst_climatology_file  = config['SST_climatology'] 
    nmat_climatology_file = config['MAT_climatology'] 
    data_base_host        = config['data_base_host']
    data_base_name        = config['data_base_name'] 
    icoads_dir            = config['ICOADS_dir'] 
    bad_id_file           = config['IDs_to_exclude']

    print 'SST climatology =', sst_climatology_file
    print 'NMAT climatology =', nmat_climatology_file
    print 'Data base host =', data_base_host
    print 'Data base name =', data_base_name
    print 'ICOADS directory =', icoads_dir
    print 'List of bad IDs =', bad_id_file 
    print ''


    idfile = open(bad_id_file, 'r')
    ids_to_exclude = []
    for line in idfile:
        line = line.rstrip()
        while len(line) < 9:
            line = line+' '
        if line != '         ':
            ids_to_exclude.append(line)
    idfile.close()

#read in climatology files
    climatology = Dataset(sst_climatology_file)
    climsst = climatology.variables['sst'][:]

    climatology = Dataset(nmat_climatology_file)
    climnmat = climatology.variables['nmat'][:]

    print 'Read climatology files'

#connect to database
    connection = MySQLdb.connect(host=data_base_host, 
                                 user='******',
                                 db=data_base_name)
    cursor = connection.cursor()

    t00 = time.time()

    for year in range(year1, year2+1):

        db.make_tables_for_year(cursor, year)
        db.make_additional_qc_table_for_year(cursor, year)
        
#        db.disable_keys(cursor, year)
      
        connection.commit()
        
        for month in range(1, 13):

            t0 = time.time()

            print year, month
            syr = str(year)
            smn = "%02d" % (month,)

            filename = icoads_dir+'/R2.5.1.'+syr+'.'+smn+'.gz'
            if year > 2007:
                filename = icoads_dir+'/R2.5.2.'+syr+'.'+smn+'.gz'

            icoads_file = gzip.open(filename,"r")

            rec = IMMA()

            count = 0
            reps = []
            while rec.read(icoads_file):
                if not(rec.data['ID'] in ids_to_exclude):
                    try:

                        rep = qc.imma1_record_to_marine_rep(rec, 
                                                            climsst, 
                                                            climnmat)
                        
                        rep.bad_position = qc.position_check(rep.lat, rep.lon)
                        rep.bad_date = qc.date_check(rep.year, rep.month, rep.day, rep.hour)
                        if rep.bad_position == 0 and rep.bad_date == 0:
                            rep.day_check = qc.day_test(rep.year,rep.month,rep.day,rep.hour,rep.lat,rep.lon)
                        else:
                            rep.day_check = 1
    
                        rep.no_sst = qc.value_check(rep.sst)
                        rep.sst_below_freezing = qc.sst_freeze_check(rep.sst, 0.0)
                        rep.sst_climatology_fail = qc.climatology_check(rep.sst,rep.sst_norm,8.0)
                        rep.no_sst_normal = qc.no_normal_check(rep.sst_norm)
            
                        rep.no_mat = qc.value_check(rep.mat)
                        rep.mat_climatology_fail = qc.climatology_check(rep.mat,rep.mat_norm,10.0)
                        rep.no_mat_normal = qc.no_normal_check(rep.mat_norm)
            
                        rep.blacklist = qc.blacklist(rep.id, rep.dck, rep.year, rep.lat, rep.lon)
    
                        inyear = rep.year
    
                        if year == rep.year:
                            reps.append(rep)
    
                        if len(reps) == 1000:
                            db.add_multiple_marine_reports_to_db(cursor,year,reps)
                            reps = []
    
    #                        db.add_marine_report_to_db(cursor,inyear,rep)
                        
                        count += 1
                        
                    except:
                        
                        assert False, "Failed to add records to database"

#catch the as yet unadded obs
            db.add_multiple_marine_reports_to_db(cursor,year,reps)

            icoads_file.close()

            t1 = time.time()

            print count," obs ingested. ",t1-t0
                  
#commit once per month
            connection.commit()
        
#        db.enable_keys(cursor, year)

        t11 = time.time()
        print year," done in ",t11-t00

#close the connection to the data base        
    connection.close()