def site_iter_process(valid_refs,c):
    #for ref_i in range(len(valid_refs)):
        data_valid = True

        site_ref = valid_refs[c]
        print 'Current Ref is = ', site_ref

        s_files = glob.glob('/work/home/db876/observations/surface/%s/CAPMON/ozon_smpls_%s*'%(species,site_ref))
        site_files = []
        for y in year_array:
            for f in s_files:
                if str(y) in f:
                    site_files.append(f)
                           

        site_files = modules.natsorted(site_files)

        yymmdd = []
        hhmm = []
        vals = []

        #create max possible o3 grid
        full_data = np.empty(n_hours)
        full_data[:] = -99999

        for file_i in range(len(site_files)):

            count = 0
            meta_start = -99999
            start_read_1 = False
            start_read_2 = False

            with open(site_files[file_i], 'rb') as f:
                reader = csv.reader(f,delimiter=',')
                print site_files[file_i]
                for row in reader:
                    #print count
                   #break out of loop at bottom of file
                    if (start_read_2 == True) & (row[0] == '*TABLE ENDS'):
                        break
               
                   #get metadata
                    try:
                        if (row[0] =='*TABLE NAME') & (row[1] == 'Site information'):
                            meta_start = count+2
                    except:
                        pass
                    if count == meta_start:
                        lat_i = row.index('Latitude: decimal degrees')
                        lon_i = row.index('Longitude: decimal degrees')
                        try:
                            alt_i = row.index('Ground elevation: above mean sea level')
                        except:
                            alt_i = row.index('Ground altitude')
                        class_i = row.index('Site land use')
                
                    if count == (meta_start+6):
                        latitude = row[lat_i]
                        longitude = row[lon_i]
                        altitude = row[alt_i]
                        raw_class_name = row[class_i]
                      
                    #get data
                    if start_read_2 == True:
                        #read dates, times, and vals
                        date = row[8]
                        time = row[9]
                        yymmdd.append(date[:4]+date[5:7] + date[8:])
                        hhmm.append(time[:2]+time[3:])
                        quality_code = row[13]
                        if quality_code == 'V0':
                            vals = np.append(vals,np.float64(row[12]))
                        else:
                            vals = np.append(vals,-99999)
                    
                    try:
                        if (row[0] == '*TABLE NAME') & (row[1] == 'OZONE_HOURLY'):
                            start_read_1 = True
                    except:
                        pass
                   
                    if (start_read_1 == True) & (row[0] == '*TABLE COLUMN UNITS'):
                        unit = row[12]
                
                    if (start_read_1 == True) & (row[0] == '*TABLE BEGINS'):
                        start_read_2 = True
                    count+=1

        #convert all invalids to -99999
        test_inv = vals < 0
        vals[test_inv] = -99999

        #put o3 vals into full grid
        date_con = np.array(yymmdd).astype(int)
        time_con = np.array(hhmm).astype(int)
    
        #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
        converted_time = modules.date_process(date_con,time_con,start_year)
        converted_time = np.round(converted_time,decimals=5)
        syn_grid_time = np.arange(0,n_days,1./24)
        syn_grid_time = np.round(syn_grid_time,decimals=5)
        #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
        indices = np.searchsorted(syn_grid_time, converted_time, side='left')
        vals = np.array(vals)
        #make sure no data is past end year
        index_test = indices < len(full_data)
        indices = indices[index_test]
        vals = vals[index_test]
        full_data[indices] = vals
    
    
        #get metadata
        lat = np.float64(latitude)
        lon = np.float64(longitude)
        alt = np.float64(altitude)
        
        #check site is valid by class
        if ('Urban' in raw_class_name) or ('urban' in raw_class_name):
            data_valid=False
            print 'Data is invalid. Raw Class is Urban.'
    
        #check site is not urban using anthrome map from 2000
        anthfile = '/work/home/db876/plotting_tools/core_tools/anthro2_a2000.nc'
        anthload = Dataset(anthfile)
        class_result,anthrome_class_name = modules.anthrome_classify(anthload,[lat],[lon])
        if class_result == 'invalid':
            data_valid = False
            print 'Site Invalid, site classed as urban by anthrome map.'
        
        #do data quality checks
        full_data,data_valid,data_complete = modules.quality_check_periodic(full_data,data_valid,data_resolution,np.float64(altitude),grid_dates,start_year,end_year)
    
        #set measurement method
        mm = 'ultraviolet photometry'
    
        #set site file resolution
        file_res = 'H'
    
        #set sampling as average
        st = 'average'
    
        return c,full_data,data_valid,lat,lon,alt,raw_class_name,anthrome_class_name,mm,st,file_res,data_complete
    def site_iter_process(valid_refs, c):
        # for r in range(len(valid_refs)):
        ref = valid_refs[c]
        print ref

        # get site instrument for species
        met_i = met_refs.index(ref)
        print len(met_refs)
        print len(met_species)
        site_species = list(met_species[met_i])
        site_instruments = list(met_instruments[met_i])
        print site_species
        print site_instruments
        mm = site_instruments[site_species.index(species)]

        site_resolutions = []
        data_valid = True

        s_files = insensitive_glob("/work/home/db876/observations/surface/%s/EANET/*%s.csv" % (species, ref))
        site_files = []
        for y in year_array:
            for f in s_files:
                if str(y)[-2:] in f:
                    site_files.append(f)

        site_files = modules.natsorted(site_files)

        if site_files == []:
            print "No files for ref.\n"

        years = []
        months = []
        days = []
        hours = []

        vals = []

        last_year_index = len(site_files)
        for y in year_array:
            print "Processing Year %s" % y
            got_year = False
            for file in site_files:
                last_file_split = file.split("/")[-1]
                if str(y)[2:] in last_file_split:
                    got_year = True
                    break
            if got_year == False:
                # fill in data for missing year
                timedelta_diff = datetime.date(y + 1, 1, 1) - datetime.date(y, 1, 1)
                ndays_missing = timedelta_diff.days
                print "ndays missing = ", ndays_missing

                vals = np.append(vals, [-99999] * (ndays_missing * 24))

                continue

            print file

            valid = True
            with open(file, "rb") as f:
                reader = csv.reader(f, delimiter=",")
                counter = 0

                # get resolution
                for row in reader:
                    if counter == 0:
                        all_units = row

                    elif counter == 1:
                        file_res = "H"

                        try:
                            hour_index = row.index("Hour")
                        except:
                            file_res = "D"
                        try:
                            day_index = row.index("Day")
                        except:
                            file_res = "M"
                        month_index = row.index("Month")
                        year_index = row.index("Year")

                        try:
                            spec_index = row.index(species.upper())
                            units = all_units[spec_index]
                        except:
                            valid = False
                            break

                        # make sure each year units are ppb
                        if units != "ppb":
                            print "Units not ppb!"
                            1 + "a"

                    if counter == 2:
                        if file_res == "H":
                            yyyy = row[year_index]
                            mm = row[month_index]
                            dd = row[day_index]
                            hh = row[hour_index]
                        elif file_res == "D":
                            yyyy = row[year_index]
                            mm = row[month_index]
                            dd = row[day_index]
                            hh = 1
                        elif file_res == "M":
                            yyyy = row[year_index]
                            mm = row[month_index]
                            dd = 1
                            hh = 1

                        start_datetime = datetime.datetime(int(yyyy), int(mm), int(dd), int(hh))

                    if counter == 3:
                        if file_res == "H":
                            yyyy = row[year_index]
                            mm = row[month_index]
                            dd = row[day_index]
                            hh = row[hour_index]
                        elif file_res == "D":
                            yyyy = row[year_index]
                            mm = row[month_index]
                            dd = row[day_index]
                            hh = 1
                        elif file_res == "M":
                            yyyy = row[year_index]
                            mm = row[month_index]
                            dd = 1
                            hh = 1

                        present_datetime = datetime.datetime(int(yyyy), int(mm), int(dd), int(hh))

                        time_delt = present_datetime - start_datetime
                        hour_delt = datetime.timedelta(hours=1)
                        day_delt = datetime.timedelta(hours=24)
                        week_delt = datetime.timedelta(hours=24 * 7)
                        month_delt = datetime.timedelta(hours=24 * 28)

                        print time_delt

                        if time_delt < day_delt:
                            print "Hourly Data"
                            file_res = "H"
                            site_resolutions.append(file_res)

                        elif (time_delt > hour_delt) & (time_delt < week_delt):
                            print "Daily Data"
                            file_res = "D"
                            site_resolutions.append(file_res)

                        elif time_delt > week_delt:
                            print "Monthly Data"
                            file_res = "M"
                            site_resolutions.append(file_res)

                        # break
                        # limit files by timeres return if not suitable for output res
                        if output_res == "H":
                            if (file_res == "D") or (file_res == "M"):
                                print "File resolution has to be Minimum Hourly. Skipping"
                                data_valid = False
                                return c, vals, data_valid, -999, -999, -999, "na", "na", "na", "na", "na", -999
                        elif output_res == "D":
                            if file_res == "M":
                                print "File resolution has to be Minimum Daily. Skipping"
                                data_valid = False
                                return c, vals, data_valid, -999, -999, -999, "na", "na", "na", "na", "na", -999

                    counter += 1

            # READ IN DATA
            if valid == True:
                with open(file, "rb") as f:
                    reader = csv.reader(f, delimiter=",")
                    counter = 0
                    for row in reader:

                        if counter >= 2:
                            yyyy = row[year_index]
                            mm = row[month_index]

                            if file_res == "H":
                                try:
                                    vals = np.append(vals, np.float64(row[spec_index]))
                                except:
                                    vals = np.append(vals, -99999)

                            elif file_res == "D":
                                try:
                                    vals = np.append(vals, [np.float64(row[spec_index])] * 24)
                                except:
                                    vals = np.append(vals, [-99999] * 24)

                            elif file_res == "M":
                                month_days = monthrange(int(yyyy), int(mm))[1]
                                try:
                                    vals = np.append(vals, [np.float64(row[spec_index])] * (month_days * 24))
                                except:
                                    vals = np.append(vals, [-99999] * (month_days * 24))

                        counter += 1
            else:
                print "Species is not in file header. Skipping Year"
                timedelta_diff = datetime.date(y + 1, 1, 1) - datetime.date(y, 1, 1)
                ndays_missing = timedelta_diff.days
                print "ndays missing = ", ndays_missing
                vals = np.append(vals, [-99999] * (ndays_missing * 24))

        valid_refs_rev.append(ref)

        i_ref = met_refs.index(ref)
        tz = np.float64(met_tz[i_ref])
        lat = np.float64(met_lats[i_ref])
        lon = np.float64(met_lons[i_ref])
        alt = np.float64(met_alts[i_ref])
        raw_class_name = met_class[i_ref]
        anthrome_class_name = class_name[i_ref]

        # check tz is whole number else skip site
        if (tz % 1) != 0:
            print "Timezone is not even. Skipping"
            data_valid = False

        tz = int(tz)
        # correct time to UTC
        if tz < 0:
            # get rid of values at start and append -99999's at end
            cut = vals[:tz]
            for num in range(np.abs(tz)):
                cut = np.insert(cut, 0, -99999)
            vals = cut
        elif tz > 0:
            # put -99999's at start and get rid of values at end
            cut = vals[tz:]
            for num in range(tz):
                cut = np.append(cut, -99999)
            vals = cut

        # do data quality checks
        full_data, data_valid, data_complete = modules.quality_check_periodic(
            vals, data_valid, data_resolution, np.float64(alt), grid_dates, start_year, end_year
        )

        # if all site resolutions are same continue, make program exit
        all_same = all(x == site_resolutions[0] for x in site_resolutions)
        if all_same == True:
            pass
        else:
            print "Not all files for site have same resolution. Skipping."
            data_valid = False
            return c, full_data, data_valid, -999, -999, -999, "na", "na", "na", "na", "na", -999

        # set sampling as average
        st = "average"

        return (
            c,
            full_data,
            data_valid,
            lat,
            lon,
            alt,
            raw_class_name,
            anthrome_class_name,
            mm,
            st,
            file_res,
            data_complete,
        )
    def site_iter_process(valid_refs,c):
    #process data for each site at a time
    #for site_ref in valid_refs:
        site_ref = valid_refs[c]
        data_valid = True
        print 'ref = ',site_ref
        site_test = all_refs == site_ref
    
        site_yyyymmdd = yyyymmdd[site_test]
        site_hhmm = hhmm[site_test]
        site_vals = vals[site_test]
        site_mm = all_mm[site_test]
        site_units = units[site_test]
    
        if species == 'ISOP':
            site_sample_len = sample_len[site_test]
     
        #check for data below limit of detection (only for ISOP) as other species have LOD check by line in file. If it is change to -99999
        #LOD for ISOP if 0.01 ppbv
        if species == 'ISOP':
            lod_test =  site_vals < 0.01
    
        #convert from ppm to ppb
        if (species == 'O3') or (species == 'NO') or (species == 'NO2'):
            for i in range(len(site_vals)):
                if site_units[i] == 'Parts per million':
                    site_vals[i] = site_vals[i]*1.e3
                elif site_units[i] == 'Parts per billion':
                    pass
                else:
                    print site_units[i]
                    1+'a'
        
        # convert from ppbC to ppb
        if species == 'ISOP':
            for i in range(len(site_vals)):
                #078 is Parts per billion Carbon, Isoprene has 5 Carbons
                if site_units[i] == '078':
                    site_vals[i] = site_vals[i]/5.  
                #008 is Parts per billion
                if site_units[i] == '008':
                    pass
                #101 is Parts per million Carbon
                if site_units[i] == '101':
                    site_vals[i] = (site_vals[i]/5.)*1.e3
                
            site_vals[lod_test] = -99999
 
        #put vals into full grid
        date_con = np.array(site_yyyymmdd).astype(int)
        time_con = np.array(site_hhmm).astype(int)
    
        #create max possible o3 grid
        full_data = np.empty(n_hours)
        full_data[:] = -99999
    
        #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
        converted_time = modules.date_process(date_con,time_con,start_year)
        converted_time = np.round(converted_time,decimals=5)
    
        syn_grid_time = np.arange(0,n_days,1./24)
        syn_grid_time = np.round(syn_grid_time,decimals=5)
        #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
        indices = np.searchsorted(syn_grid_time, converted_time, side='left')
        site_vals = np.array(site_vals)
    
        #if date goes past where it should finish, omit it.
        inv_i = indices < len(full_data)
        indices = indices[inv_i]
        site_vals = site_vals[inv_i]
 
        full_data[indices] = site_vals
    
        #get site meta
        meta_index = meta_refs.index(site_ref)
        lat = np.float64(meta_lats[meta_index])
        lon =  np.float64(meta_lons[meta_index])
        alt =  np.float64(meta_alts[meta_index])
        raw_class_name = meta_class[meta_index]
        anthrome_class_name = class_name[meta_index]
    
        #get measurement method, take mode of big methods array
        site_mm = stats.mode(site_mm)[0][0]
        if (site_mm.upper() == 'INSTRUMENTAL-ULTRAVIOLETABSORPTION') or (site_mm.upper() == 'INSTRUMENTAL-ULTRAVIOLET2BMODEL202') or (site_mm.upper() == 'INSTRUMENTAL-UVPHOTOMETRIC') or (site_mm.upper() == 'INSTRUMENTAL-ULTRAVIOLETRADIATIONABSORBTN') or (site_mm.upper() == 'INSTRUMENTAL-ULTRAVIOLET') or (site_mm.upper() == 'INSTRUMENTAL-ULTRAVIOLETPHOTOMETRY') or (site_mm.upper() == 'INSTRUMENTAL-UVABSORPTIONPHOTOMETRY/UV2BMODEL202AND205') or (site_mm.upper() == 'INSTRUMENTAL-ECOTECHSERINUS10'):
            mm = 'ultraviolet photometry'
        
        elif (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCE') or (site_mm.upper() == 'INSTRUMENTAL-GASPHASECHEMILUMINESCENCE') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCEAPIMODEL265EANDT265') or (site_mm.upper() == 'LOWLEVELNOXINSTRUMENTAL-TECO42SCHEMILUMINESCENCE') or (site_mm.upper() == 'INSTRUMENTAL-GAS-PHASECHEMILUMINESCENCE') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCETELEDYNEAPIT200UPPHOTOLYTIC') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCETELEDYNEAPI200EU/501') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCEECOTECHEC9841T') or (site_mm.upper() == 'TELEDYNE-APIMODEL200EUPORT200UP-PHOTOLYTIC-CHEMILUMINESCENCE') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCETHERMOELECTRON42C-TL,42I-TL') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCERHODAMINEBDYE') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCETHERMOELECTRON42C-Y,42I-Y') or (site_mm.upper() == 'INSTRUMENTAL-CHEMILUMINESCENCEECOTECHEC9843'):
            mm = 'chemiluminescence'
        
        elif (site_mm.upper() == 'INSTRUMENTAL-OPENPATHO3ANALYZER') or (site_mm.upper() == 'INSTRUMENTAL-OPENPATHNOANALYZER'):
            mm = 'differential optical absorption spectrosocopy'
        
        elif (site_mm.upper() == 'TELEDYNEMODELT500U-CAVITYATTENUATEDPHASESHIFTSPECTROSCOPY'):
            mm = 'cavity attenuated phase shift spectroscopy'

        elif (site_mm.upper() == 'INSTRUMENTAL-COLORIMETRIC-GRIESS-SALTZMAN') or (site_mm.upper() == 'INSTRUMENTAL-COLORIMETRIC'):
            mm = 'griess saltzman colorimetric'
        
        elif (site_mm.upper() == 'INSTRUMENTAL-COLORIMETRIC-LYSHKOW(MOD)'):
            mm = 'lyshkow colorimetric '
        
        elif (site_mm.upper() == 'INSTRUMENTAL-COULOMETRIC'):
            mm = 'coulometry'
        
        else:
            print site_mm.upper()
            1+'a'

        #do data quality checks
        full_data,data_valid,data_complete = modules.quality_check_periodic(full_data,data_valid,data_resolution,alt,grid_dates,start_year,end_year)

        #set site file resolution
        file_res = 'H'
    
        #set sampling as average
        st = 'average'

        return c,full_data,data_valid,lat,lon,alt,raw_class_name,anthrome_class_name,mm,st,file_res,data_complete
    def site_iter_process(valid_refs,c):
    #for site_ref in valid_refs:
        data_valid = True
        site_ref = valid_refs[c]
        print 'ref = ',site_ref
        site_test = all_refs == site_ref
    
        site_yyyymmdd = yyyymmdd[site_test]
        site_hhmm = hhmm[site_test]
        site_vals = vals[site_test]
 
        #convert blank invalids to -99999
        test_inv = site_vals == ''
        site_vals[test_inv] = -99999
    
        site_vals = np.float64(site_vals)
    
        #convert number invalids to -99999
        test_inv = site_vals < 0
        site_vals[test_inv] = -99999
    
        #put vals into full grid
        date_con = np.array(site_yyyymmdd).astype(int)
        time_con = np.array(site_hhmm).astype(int)
    
        #create max possible o3 grid
        full_data = np.empty(n_hours)
        full_data[:] = -99999
    
        #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
        converted_time = modules.date_process(date_con,time_con,start_year)
        converted_time = np.round(converted_time,decimals=5)
    
        syn_grid_time = np.arange(0,n_days,1./24)
        syn_grid_time = np.round(syn_grid_time,decimals=5)
        #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
        indices = np.searchsorted(syn_grid_time, converted_time, side='left')
        site_vals = np.array(site_vals)
        full_data[indices] = site_vals
    
        meta_index = meta_refs.index(site_ref)
        tz = meta_tz[meta_index]
        lat = np.float64(meta_lats[meta_index])
        lon = np.float64(meta_lons[meta_index])
        alt = np.float64(meta_alts[meta_index])
        raw_class_name = meta_class[meta_index]
        anthrome_class_name = class_name[meta_index]
    
        #correct timezone to UTC
        if tz < 0:
            #get rid of values at start and append -99999's at end
            cut = full_data[:tz]
            for num in range(np.abs(tz)):
                cut = np.insert(cut,0, -99999)
            full_data = cut
        elif tz > 0:
            #put -99999's at start and get rid of values at end
            cut = full_data[tz:]
            for num in range(tz):
                cut = np.append(cut, -99999)
            full_data = cut
            
        #do data quality checks        
        full_data,data_valid,data_complete = modules.quality_check_periodic(full_data,data_valid,data_resolution,alt,grid_dates,start_year,end_year)

        #set mm
        if species == 'O3':
            mm = 'ultraviolet photometry'
        elif (species == 'NO') or (species == 'NO2') or (species == 'CO'):
            mm = 'non-dispersive infrared absorption'
    
        #set sampling as average
        st = 'average'     
    
        #set site file resolution
        file_res = 'H'
    
        return c,full_data,data_valid,lat,lon,alt,raw_class_name,anthrome_class_name,mm,st,file_res,data_complete
    def site_iter_process(valid_refs,c):

        #for each valid location process
        #limit obs data due for each site in valid_obs_site_names
        #for c in range(len(valid_refs)):
    
        all_lat = []
        all_lon = []
        all_alt = []
        all_st = []
        all_mm = []

        site_ref = valid_refs[c]

        file_valid = True
        data_valid = True

        print site_ref
        file_res = data_resolutions[c]
        print file_res

        #read files for each valid site
        s_files = sorted(glob.glob('/work/home/db876/observations/surface/%s/GAW/%s**.%s**.dat'%(species,site_ref.lower(),file_res))) 
                  
        print s_files      
        if file_res == 'hr':
            site_files = sorted(s_files, key = lambda x: x.split(".hr")[1])

        else:
            site_files = sorted(s_files)

        delete_inds = []
        if file_res == 'hr':
            #limit site files before and after year limit
        
            for i in range(len(site_files)):
                f = site_files[i]
                year = f.split(".hr")[1][:4]
                if int(year) < int(start_year):
                    delete_inds.append(i)
                if int(year) > int(end_year):
                    delete_inds.append(i)

            site_files = np.delete(site_files,delete_inds)
            print site_files
    
            if len(site_files) == 0:
                print 'No valid files in date range. Skipping.'
                data_valid = False
                return c,[],data_valid,-999,-999,-999,'na','na','na','na','na',-999

        site_file_len = len(site_files)
        s_count = 0
        start_ind = 0
        end_ind = 0
        for f in site_files:
            print f
            read = np.loadtxt(f,dtype="S10,S5,f8",comments='C',usecols=(0,1,4),unpack =True) 	
            read = np.array(read)
    
            dates = read[0,:]
            times = read[1,:]
            conc = read[2,:]
            conc = np.array(conc)
            conc = conc.astype(float)
    
            #change all vals < 0 to np.NaN
            inv_test = conc < 0
            conc[inv_test] = np.NaN
    
            start_ind = end_ind
            end_ind+=len(conc)
    
            s_count+=1
    
            units = [] 
            mycsv = csv.reader(open(f))
            row_count = 0
            for row in mycsv:
                if row_count == 11:
                    val = " ".join(row)
                    lat = val.replace(" ", "")
                    lat = lat[12:]
                    lat = float(lat)
                    all_lat.append(lat)
                # get lon
                if row_count == 12:
                    val = " ".join(row)
                    lon = val.replace(" ", "")
                    lon = lon[13:]
                    lon = float(lon)
                    all_lon.append(lon)
                # get altitude
                if row_count == 13:
                    val = " ".join(row)
                    alt = val.replace(" ", "")
                    alt = alt[12:] 
                    alt = float(alt) 
                    all_alt.append(alt)
                # get units
                if row_count == 20:
                    val = " ".join(row)
                    unit = val.replace(" ", "")
                    unit = unit[19:]           
                # get measurement method
                if row_count == 21:
                    val = " ".join(row)
                    mm = val.replace(" ", "")
                    mm = mm[21:]  
                    all_mm.append(mm)
                # get sampling type
                if row_count == 22:
                    val = " ".join(row)
                    st = val.replace(" ", "")
                    st = st[16:]  
                    all_st.append(st)
                if row_count == 23:
                    val = " ".join(row)
                    tz = val.replace(" ", "")
                    tz = tz[12:]  

        
                row_count+=1   
        
            # test if units are in ppb for each file - if not convert
    
            if (unit != 'ppb') & (unit != 'ppbv'):
                if (unit == 'ug/m3') or (unit == 'ugN/m3'): 
                    print 'converting units, temp = 20degC'
                    #calculate conversion factor from mg/m3 assuming 20 degC and 1 atm - default for GAW site O3 instruments
                    #R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10
                    conv_fact = 8.3144/mol_mass*(273.15+20)/(1013.25/10)
                    conc = conv_fact*conc
                elif (unit == 'ug/m3-20C') or (unit == 'ugN/m3-20C'):
                    print 'converting units, temp = 20degC'
                    #calculate conversion factor from mg/m3 assuming 20 degC and 1 atm - default for GAW site O3 instruments
                    #R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10
                    conv_fact = 8.3144/mol_mass*(273.15+20)/(1013.25/10)
                    conc = conv_fact*conc
                elif (unit == 'ug/m3-25C') or (unit == 'ugN/m3-25C') or (unit == 'ug/m3at25C'):
                    print 'converting units, temp = 25degC'
                    #calculate conversion factor from mg/m3 assuming 25 degC and 1 atm
                    #R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10
                    conv_fact = 8.3144/mol_mass*(273.15+25)/(1013.25/10)
                    conc = conv_fact*conc
                elif (unit == 'mg/m3-20C') or (unit == 'mgN/m3-20C'):
                    print 'converting units, temp = 25degC'
                    #calculate conversion factor from mg/m3 assuming 25 degC and 1 atm
                    #R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10
                    conv_fact = 8.3144/mol_mass*(273.15+20)/(1013.25/10)
                    conc = (conv_fact*conc)*1e3
                elif (unit == 'mg/m3-25C') or (unit == 'mgN/m3-25C'):
                    print 'converting units, temp = 25degC'
                    #calculate conversion factor from mg/m3 assuming 25 degC and 1 atm
                    #R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10
                    conv_fact = 8.3144/mol_mass*(273.15+25)/(1013.25/10)
                    conc = (conv_fact*conc)*1e3
                elif (unit == 'ppm') or (unit == 'ppmv'):
                    conc = conc*1.e3
                elif (unit == 'ppt') or (unit == 'pptv'):
                    conc = conc/1.e3
        
                else:
                    print 'Unknown Unit'
                    print unit
                    1+'a'
                    break
            
            if tz != 'UTC':
                if tz == '':
                    if site_ref.lower() in ['plm']:
                        tz = -5
        
                    if site_ref.lower() in ['kos','edm','vdl','nwr']:
                        tz = 0

                    if site_ref.lower() in ['jfj','kps','rig','pay','glh','cmn','zep','dig','hhe','ktb','stp','ivn','jcz','kam','lzp','snz','zbl','kmw','don','mhn','nia','roq','spm']: 
                        tz = 1

                    if site_ref.lower() in ['rcv','aht','oul','uto','vir','fdt','sem','stn']:
                        tz = 2
                
                    if site_ref.lower() in ['dak']:
                        tz = 3
                
                    if site_ref.lower() in ['shp']:
                        tz = 4
                    
                    if site_ref.lower() in ['isk']:
                        tz = 5
    
                    if site_ref.lower() in ['hkg']:
                        tz = 8

                    if site_ref.lower() in ['cgo']:
                        tz = 10
                else:        
                    tz = tz.replace('LocaltimeUTC', '')
                    tz = tz.replace('OtherUTC', '')
                    tz = tz.replace('Localtime', '')
                    tz = tz.replace(':', '.')
        
                    try:
                        before, sep, after = tz.rpartiton('.')
                        after = int(after)
                        conv = (100./60) * after
                        tz = before+sep+str(conv)
                    except:
                        1+1 
                    tz = float(tz)
        
            else: 
                tz = 0
    
            #check tz is whole number else skip site
            if (tz % 1) != 0:
                print 'File Invalid, timezone is not a whole number.'
                conc[:] = -99999
    
            #process dates from date, time to days since start year
            dates = [s.replace('-', '') for s in dates]			
            times = [s.replace(':', '') for s in times]
    
            if file_res == 'hr':
                #some times go from 0100 to 2400, assume this is when sites report ave for hour previous. Thus all times should have hour minused
                for i in range(len(times)):
                    if times[i] == '2400':
                        current_date = dates[i]
                        test = np.array(dates) == current_date
                        indices = [i for i, x in enumerate(test) if x]
                        for x in indices:
                            current_time = times[x]
                            if current_time == '2400':
                                current_time = '0000'
                            date_datetime = datetime.datetime(int(current_date[0:4]),int(current_date[4:6]),int(current_date[6:]),int(current_time[:2]),int(current_time[2:]))
                            date_datetime = date_datetime - datetime.timedelta(hours = 1)
                            times[x] = date_datetime.strftime("%H%M")
    
                #adjust dates and times if tz is not equal to 0
                if tz != 0:
                    for i in range(len(dates)):
                        #create datetime
                        dt = datetime.datetime(int(dates[i][:4]),int(dates[i][4:6]),int(dates[i][6:]),int(times[i][:2]),int(times[i][2:]))
                        if tz > 0:
                            #print 'Old dt', dt
                            dt  = dt - datetime.timedelta(hours = int(tz))
                            #print 'New dt', dt
                        elif tz < 0:
                            #print 'Old dt', dt
                            dt  = dt + datetime.timedelta(hours = np.abs(int(tz)))
                            #print 'New dt', dt
                        dates[i] = dt.strftime("%Y%m%d")
                        times[i] = dt.strftime("%H%M")
        
            data = [dates,times,conc]
            try:
                big_list = np.hstack((big_list,data))
            except:
                big_list = np.array(data)    
            
    
            if (s_count == site_file_len):	
          
                #make sure big list exists
                try:
                    big_list
                except:
                    data_valid = False
            
                if data_valid == True:          
  
                    #get dates and times
                    date_con = big_list[0,:]
                    time_con = big_list[1,:]
              
                    #get vals
                    vals = np.array(big_list[2,:]).astype(float) 

                    #delete big list
                    del big_list

                    #if dates outside what asked for exclude          
                    first_date_val = int('%s0101'%(start_year))
                    last_date_val = int('%s1231'%(end_year))
        
                    test_valid = (np.array(date_con).astype(int) >= first_date_val) & (np.array(date_con).astype(int) <= last_date_val)
                    date_con = date_con[test_valid]
                    time_con = time_con[test_valid]
                    vals = vals[test_valid]
            
                    #Check if any times are duplicate, if so delete all but first
                    del_list = []
                    for d in range(len(date_con)-1):
                        if (date_con[d] == date_con[d+1]) & (time_con[d] == time_con[d+1]):
                            del_list.append(d+1)
                    if len(del_list) > 0:
                        print 'Deleting duplicate timepoints'
                        print date_con[del_list],time_con[del_list]
                        date_con = np.delete(date_con,del_list)
                        time_con = np.delete(time_con,del_list)
                        vals = np.delete(vals,del_list)
            
                    #if file resolution is daily or monthly then replicate times after point, to fill hourly data array.
                    count=0
                    if file_res == 'da':
                        file_hours = len(date_con)
                        for i in range(file_hours):
                            current_hh = int(time_con[count][:2])
                            current_mm = int(time_con[count][2:])
                            s = datetime.datetime(year = start_year, month = 1, day = 1, hour = current_hh, minute = current_mm)
                            e = datetime.datetime(year = start_year, month = 1, day = 2, hour = current_hh, minute = current_mm)
                            day_hours = [d.strftime('%H%M') for d in pd.date_range(s,e,freq='H')][1:-1]
        
                            date_con = np.insert(date_con,count+1,[date_con[count]]*23)
                            time_con = np.insert(time_con,count+1,day_hours)
                            vals = np.insert(vals,count+1,[vals[count]]*23)
               
                            count +=24
        
            
                    if file_res == 'mo':
                        file_hours = len(date_con)
                        for i in range(file_hours):
                            current_year = int(date_con[count][:4])
                            current_month = int(date_con[count][4:6])
                
                            next_month = current_month+1
                            if next_month > 12:
                                next_month = 1
                                next_year = current_year+1
                            else:
                                next_year = current_year 
                
                            s = datetime.datetime(year = current_year, month = current_month, day = 1, hour = 1, minute = 0)
                            e = datetime.datetime(year = next_year, month = next_month, day = 1, hour = 0, minute = 0)
                
                            day_date = [d.strftime('%Y%m%d') for d in pd.date_range(s,e,freq='H')][:-1]
                            day_hour = [d.strftime('%H%M') for d in pd.date_range(s,e,freq='H')][:-1]
                            date_con = np.insert(date_con,count+1,day_date)
                            time_con = np.insert(time_con,count+1,day_hour)
                            vals = np.insert(vals,count+1,[vals[count]]*len(day_date))
                            count += (len(day_date)+1)
        
                    date_con = np.array(date_con).astype(int)
                    time_con = np.array(time_con).astype(int)
        
                    #create max possible o3 grid
                    o3_data = np.empty(n_hours)
                    o3_data[:] = -99999
                
                    #delete dates,times and var outside date range
                    val_test = (date_con >= int(output_res_dates_strings[0])) & (date_con <= int(output_res_dates_strings[-1]))
                    date_con = date_con[val_test]
                    time_con = time_con[val_test]
                    vals = vals[val_test]
                
                    print date_con
        
                    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
                    converted_time = modules.date_process(date_con,time_con,start_year)
                    converted_time = np.round(converted_time,decimals=5)
                    syn_grid_time = np.arange(0,n_days,1./24)
                    syn_grid_time = np.round(syn_grid_time,decimals=5)
                    #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
                    indices = np.searchsorted(syn_grid_time, converted_time, side='left')
                    o3_data[indices] = vals 
        
                    #convert all Nans back to -99999
                    test = np.isnan(o3_data)
                    o3_data[test] = -99999
        
                    #get mode of metadata
                    lat = np.float64(stats.mode(all_lat)[0][0]) 
                    lon = np.float64(stats.mode(all_lon)[0][0])  
                    alt = np.float64(stats.mode(all_alt)[0][0]) 
                    st = stats.mode(all_st)[0][0]
                    mm = stats.mode(all_mm)[0][0]

                    #check site is not urban using anthrome map from 2000
                    anthfile = '/work/home/db876/plotting_tools/core_tools/anthro2_a2000.nc'
                    anthload = Dataset(anthfile)
                    class_valid,anthrome_class_name = modules.anthrome_classify(anthload,[lat],[lon])
                    if class_valid == 'invalid':
                        data_valid = False
                        print 'Site Invalid, site classed as urban by anthrome map.'

                    #get measurement type and sampling type (take mode from collected list)
                    if (st == 'continuous') or (st == 'continuous(carbondioxide),remotespectroscopicmethod(methaneandsurfaceozone)' or (st == 'continuous(carbondioxide)remotespectroscopicmethod(methaneandsurfaceozone)')):
                        st = 'average'
                    elif st == 'flask':
                        st = 'flask'
                    elif st == 'filter':
                        st = 'filter'
                    else:
                        print st
                        1+'a'

                    if mm == 'Lightabsorptionanalysis(UV)':
                        mm = 'ultraviolet photometry'
            
                    elif  mm == 'CavityRingdownSpectroscopy':
                        mm = 'cavity ringdown spectroscopy'
            
                    elif  mm == 'NDIR':
                        site_mm = 'non-dispersive infrared spectroscopy' 
            
                    elif (mm == 'GasChromatography(FID)'): 
                        site_mm = 'gas chromatography flame ionisation detection' 
            
                    elif (mm == 'Gas Chromatography (RGD)'):
                        site_mm = 'gas chromatography reduction gas detection'
        
                    elif mm == 'Chemiluminescence':
                        mm = 'chemiluminescence'
            
                    elif (mm == 'Spectrophotometry') or (mm == 'spectrophotometry,naphthyl-ethylenediaminedihydrochloridemethod'):
                        mm = 'spectrophotometry'

                    elif mm == 'continuous(carbondioxide)remotespectroscopicmethod(methaneandsurfaceozone)':        
                        mm = 'near infrared spectroscopy'

                    elif mm == '':
                        if species == 'O3':
                            mm = 'ultraviolet photometry'
                        if species == 'CO':
                            mm = 'non-dispersive infrared spectroscopy'
                        if species == 'NO2':
                            mm = 'chemiluminescence'
                        if species == 'NO':
                            mm = 'chemiluminescence'
                        if species == 'ISOP':
                            mm = 'gas chromatography flame ionisation detection'
                
                    #do data quality checks        
                    full_data,data_valid,data_complete = modules.quality_check_periodic(o3_data,data_valid,data_resolution,alt,grid_dates,start_year,end_year)
        
                    #convert file res to standard format
                    if file_res == 'hr':
                        file_res = 'H'
                    elif file_res == 'da':
                        file_res = 'D'
                    elif file_res == 'mo':
                        file_res = 'M'
                    
                    #no raw class so set as na
                    raw_class_name = 'na'

                    return c,full_data,data_valid,lat,lon,alt,raw_class_name,anthrome_class_name,mm,st,file_res,data_complete
    def site_iter_process(valid_refs,c):
    #read files site at a time
    #for ref_i in range(len(valid_refs)):
        site_ref = valid_refs[c]

        all_latitudes = []
        all_longitudes = []
        all_altitudes = []
        all_mm = []

        print 'Current Ref is = ', site_ref
        #find if sites have full valid range from start year and finishing in end year
        s_files = glob.glob('/work/home/db876/observations/surface/%s/EMEP/%s*'%(species,site_ref))
        year_files = [file.replace("/work/home/db876/observations/surface/%s/EMEP/"%(species), "") for file in s_files]
        cut_year_files = [file[8:12] for file in year_files]
        site_files = []
        for y in year_array:
            for i in range(len(s_files)):
                if str(y) in cut_year_files[i]:
                    site_files.append(s_files[i])
                  
        site_files = modules.natsorted(site_files)
        year_files = modules.natsorted(year_files)
  
        file_startdate = []
        file_height = []
        instr_names = []
        file_lasttime = []
    
        data_valid = True

        yyyymmdd = []
        hhmm = []
        vals = []
        flags = []

        #create max possible o3 grid
        full_data = np.empty(n_hours)
        full_data[:] = -99999

        if site_files == []:
            print 'No valid files for site\n'
            return
    
        for y in year_array:
    
            print 'Processing Year %s'%y 
            got_year = False
            for file in site_files:
                last_file_split = file.split('/')[-1]
                if str(y) in last_file_split[8:12]:
                    got_year = True
                    break
            if got_year == False:
                #fill in data for missing year
                timedelta_diff = datetime.date(y+1, 1, 1) - datetime.date(y, 1, 1)
                ndays_missing = timedelta_diff.days
                print 'ndays missing = ', ndays_missing        
                continue
    
            if data_valid == True:
                data_start = 9999999
                count = 0
                start_read = False
                with open(file, 'rb') as f:
                    read_count = 0
                    reader = csv.reader(f,delimiter=' ')
                    print file
                    for row in reader:
                        try:
                            row = filter(lambda a: a != '', row)
                        except:
                            pass
                        try:
                            row = filter(lambda a: a != ',', row)
                        except:
                            pass
                                    
                        #get start date of file
                        if row[0] == 'Startdate:':
                            data = row[1]
                            s_yyyy = data[:4]
                            s_mm = data[4:6]
                            s_dd = data[6:8]
                            s_hh = data[8:10]
                            s_min = data[10:12]


                            start_datetime = datetime.datetime(int(s_yyyy),1,1,0,0)
                    
                        #get unit
                        if row[0] == 'Unit:':
                            try:
                                unit_part1 = row[1]
                                unit_part2 = row[2]
                                unit = unit_part1+'_'+unit_part2
                            except:
                                unit = row[1]   
            
                        #get resolution
                        if row[0] == 'Resolution':
                            if row[1] == 'code:':
                                file_res = row[2]
                                print 'Resolution = %s'%file_res
                                if (output_res == 'H'):
                                    if (file_res == '1d') or (file_res == '1mo'):
                                        print 'File resolution has to be Minimum Hourly. Skipping'
                                        data_valid = False
                                        return c,full_data,data_valid,-999,-999,-999,'na','na','na','na','na',-999
                                elif (output_res == 'D'):
                                    if (file_res == '1mo'):
                                        print 'File resolution has to be Minimum Daily. Skipping'
                                        data_valid = False
                                        return c,full_data,data_valid,-999,-999,-999,'na','na','na','na','na',-999
                        #get latitude
                        if row[0] == 'Station':
                            if row[1] == 'latitude:':
                                latitude = row[2]
                                all_latitudes.append(latitude)
                
                        #get longitude
                        if row[0] == 'Station':
                            if row[1] == 'longitude:':
                                longitude = row[2]
                                all_longitudes.append(longitude)
                        
                        #get altitude
                        if row[0] == 'Station':
                            if row[1] == 'altitude:':
                                altitude = row[2][:-1]
                                all_altitudes.append(altitude)
                
                        #get period
                        if row[0] == 'Period':
                            period_code = row[2]
                    
                        #get stats method
                        if row[0] == 'Statistics:':
                            try:
                                st = row[1] + row[2]
                                if st != 'arithmeticmean':
                                    print 'Not Arithmetic Mean!'
                                    print row[1]
                                    print 1+'a'  
                            except:
                                print 'Not Arithmetic Mean!'
                                print row[1]
                                print 1+'a'
                
                        #get instrument method
                        if row[0] == 'Instrument':
                            if row[1] == 'type:':
                                mm_list = row[2:]
                                if len(mm_list) > 1:
                                    site_mm = ''
                                    for x in range(len(mm_list)):
                                        site_mm = site_mm+mm_list[x]+' '
                                    site_mm = site_mm.strip()
                                else:
                                    site_mm = mm_list[0]
                                all_mm.append(site_mm)
                    
                        #get data
                        if start_read == True:
                            #calc dates, times, and take o3 vals

                            time_since_start = np.float64(row[0])
                            days_since_start = math.trunc(time_since_start)
                            remainder = time_since_start - days_since_start
                            unrounded_hour = remainder*24
                            hour = np.round(unrounded_hour)
                            time_delta = datetime.timedelta(days = days_since_start,hours = hour)
                            calc_datetime = start_datetime + time_delta
                            calc_yyyymmdd = calc_datetime.strftime("%Y%m%d") 
                            calc_hhmm = calc_datetime.strftime("%H%M")        
                            
                            line_val = np.float64(row[2])
                    
                            #convert units by line (only if value is >= than 0
                            if line_val >= 0:
                                if (unit.lower() != 'ppb') & (unit.lower() != 'ppbv'):
                                    if unit == 'ug/m3':
                                        #print 'converting units, temp = 20degC'
                                        #calculate conversion factor from mg/m3 assuming 20 degC and 1 atm - default for O3 instruments
                                        #R/MW*(TEMP0C(K)*TEMP(degC)/P(hPa)/10
                                        conv_fact = 8.3144/mol_mass*(273.15+20)/(1013.25/10)
                                        line_val = conv_fact*line_val
                                        #print 'Converting Units from ug/m3 20degC to ppbv'
                                    elif unit == 'ug_N/m3':
                                        conv_fact = 8.3144/mol_mass*(273.15+20)/(1013.25/10)
                                        line_val = conv_fact*line_val
                                        #print 'Converting Units from ug/Nm3 20degC to ppbv' 
                                    elif (unit == 'ppm') or (unit == 'ppmv'):
                                        line_val = line_val*1e3
                                        #print 'Converting Units from ppmv to ppbv'
                                    elif (unit == 'ppt') or (unit == 'pptv'):
                                        line_val = line_val/1e3
                                        #print 'Converting Units from pptv to ppbv'
                                    else:
                                        print 'Unknown Unit'
                                        data_valid = False
                                        1+'a'
                       
                            if file_res == '1h':
                                yyyymmdd=np.append(yyyymmdd,calc_yyyymmdd)
                                hhmm=np.append(hhmm,calc_hhmm)
                                vals = np.append(vals,line_val)
                                flags = np.append(flags,np.float64(row[3]))
                    
                            elif file_res == '1d':
                                yyyymmdd=np.append(yyyymmdd,calc_yyyymmdd)
                                hhmm=np.append(hhmm,'0000')
                                vals = np.append(vals,line_val)
                                flags = np.append(flags,np.float64(row[3]))
                        
                                for j in range(1,24):
                                    time_delta = datetime.timedelta(days = days_since_start,hours = j)
                                    calc_datetime = start_datetime + time_delta 
                                    vals = np.append(vals,vals[-1])
                                    flags = np.append(flags,flags[-1])
                                    yyyymmdd = np.append(yyyymmdd,calc_datetime.strftime("%Y%m%d"))
                                    hhmm = np.append(hhmm,calc_datetime.strftime("%H%M"))
                        
                            elif file_res == '1mo':
                                yyyymmdd=np.append(yyyymmdd,calc_yyyymmdd)
                                hhmm=np.append(hhmm,'0000')
                                vals = np.append(vals,line_val)
                                flags = np.append(flags,np.float64(row[3]))
                        
                                month_days = monthrange(int(yyyymmdd[-1][:4]), int(yyyymmdd[-1][4:6]))[1]
                                for j in range(1,24*month_days):
                                    time_delta = datetime.timedelta(days = days_since_start,hours = j)
                                    calc_datetime = start_datetime + time_delta
                                    vals = np.append(vals,vals[-1])
                                    flags = np.append(flags,flags[-1])
                                    yyyymmdd = np.append(yyyymmdd,calc_datetime.strftime("%Y%m%d"))
                                    hhmm = np.append(hhmm,calc_datetime.strftime("%H%M"))
        
                        if row[0] == 'starttime':
                            start_read = True
                
                        count+=1
                
        if (y == year_array[-1]):    
            
            #convert all invalids by flags to -99999
            test_inv = flags != 0
            if len(test_inv) != 0:
                vals[test_inv] = -99999
        
            #any values less than zero are -99999
            test_inv = vals < 0
            if len(test_inv) != 0:
                vals[test_inv] = -99999
        
            #do additional invalid test, as flags not always correct
            #test_inv_2 = vals > 300
            #vals[test_inv_2] = -99999

            #put o3 vals into full grid
            date_con = np.array(yyyymmdd).astype(int)
            time_con = np.array(hhmm).astype(int)
        
            #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
            converted_time = date_process(date_con,time_con,start_year)
            converted_time = np.round(converted_time,decimals=5)
            syn_grid_time = np.arange(0,n_days,1./24)
            syn_grid_time = np.round(syn_grid_time,decimals=5)
            #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
    
            indices = np.searchsorted(syn_grid_time, converted_time, side='left')
            vals = np.array(vals)
            #make sure no data is past end year
            index_test = indices < len(full_data)
            indices = indices[index_test]
            vals = vals[index_test]
            full_data[indices] = vals
    
        #get mode of metadata
        lat = np.float64(stats.mode(all_latitudes)[0][0]) 
        lon = np.float64(stats.mode(all_longitudes)[0][0])  
        alt = np.float64(stats.mode(all_altitudes)[0][0]) 
        mm = stats.mode(all_mm)[0][0]
    
        #check site is not urban using anthrome map from 2000
        anthfile = '/work/home/db876/plotting_tools/core_tools/anthro2_a2000.nc'
        anthload = Dataset(anthfile)
        class_valid,anthrome_class_name = modules.anthrome_classify(anthload,[lat],[lon])
        if class_valid == 'invalid':
            data_valid = False
            print 'Site Invalid, site classed as urban by anthrome map.'
    
        #get measurement method
        if (mm == 'uv_abs') or (mm == 'chemiluminesc') or (mm == 'uv_fluoresc'):
            if species == 'O3':
                mm = 'ultraviolet photometry'
            if (species == 'NO') or (species == 'NO2') or (species == 'CO'):
                mm = 'chemiluminescence'
        
        elif (mm == 'ndir') or (mm == 'infrared_absorption'):
            mm = 'non-dispersive infrared spectroscopy'
        
        elif (mm == 'GC-HgO'):
            mm = 'gas chromatography reduction gas detection'
    
        elif (mm == 'tracegas_monitor'):
            mm = 'cavity attenuated phase shift spectroscopy'
    
        elif (mm == 'filter_1pack') or (mm == 'filter_2pack') or (mm == 'filter_3pack'):
            if species == 'NO2':
                mm = 'griess saltzman colorimetric'
            elif species == 'CO':
                mm = 'ion chromatography'
        
        elif (mm == 'steel_canister'):
            mm = 'gas chromatography flame ionisation detection'
        
        elif (mm == 'online_gc'):
            mm = 'online gas chromatography'
    
        elif (mm == 'glass_sinter') or (mm == 'abs_solution') or (mm == 'filter_abs_solution') or (mm == 'abs_tube') or (mm == 'continuous_colorimetric'):
            mm = 'griess saltzman colorimetric'
        
        elif (mm == 'NaJ_solution'):
            mm = 'flame ionisation detection'
        
        elif (mm == 'doas'):
            mm = 'differential optical absorption spectrosocopy'
    
        elif (mm == 'diffusion_tube'):
            mm = 'diffusive sampler'
    
        elif (mm == 'NA') or (mm == ''):
            if species == 'O3':
                mm = 'ultraviolet photometry'
            if species == 'CO':
                mm = 'non-dispersive infrared spectroscopy'
            if species == 'NO2':
                mm = 'chemiluminescence'
            if species == 'NO':
                mm = 'chemiluminescence'
            if species == 'ISOP':
                mm = 'gas chromatography flame ionisation detection'
        
        else:
            print mm
            1+'a'
    
        #do data quality checks        
        full_data,data_valid,data_complete = modules.quality_check_periodic(full_data,data_valid,data_resolution,alt,grid_dates,start_year,end_year)

        #convert file res to standard format
        if file_res == '1h':
            file_res = 'H'
        elif file_res == '1d':
            file_res = 'D'
        elif file_res == '1mo':
            file_res = 'M'

        #no raw class so set as na
        raw_class_name = 'na'
    
        #set sampling as average
        st = 'average'

        return c,full_data,data_valid,lat,lon,alt,raw_class_name,anthrome_class_name,mm,st,file_res,data_complete
    def site_iter_process(valid_refs,c):
    #for site_ref in valid_refs:
        site_ref = valid_refs[c]
 
        data_valid = True
        print 'ref = ',site_ref
        site_test = all_refs == site_ref
    
        site_yyyymmdd = yyyymmdd[site_test]
        site_hhmm = hhmm[site_test]
        site_vals = vals[site_test]
    
        site_vals = np.float64(site_vals)
 
        #convert all invalids to -99999
        test_inv = site_vals < 0
        site_vals[test_inv] = -99999

        #put vals into full grid
        date_con = np.array(site_yyyymmdd).astype(int)
        time_con = np.array(site_hhmm).astype(int)
    
        #create max possible o3 grid
        full_data = np.empty(n_hours)
        full_data[:] = -99999
    
        #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
        converted_time = modules.date_process(date_con,time_con,start_year)
        converted_time = np.round(converted_time,decimals=5)
        syn_grid_time = np.arange(0,n_days,1./24)
        syn_grid_time = np.round(syn_grid_time,decimals=5)
        #find matching times between actual times and grid of times, return big array of indices of matched indices in grid
        indices = np.searchsorted(syn_grid_time, converted_time, side='left')
        site_vals = np.array(site_vals)
        full_data[indices] = site_vals
    
        meta_index = meta_refs.index(site_ref)
        tz = float(meta_tz[meta_index])
        lat = np.float64(meta_lats[meta_index])
        lon = np.float64(meta_lons[meta_index])
        alt = np.float64(meta_alts[meta_index])
        raw_class_name = meta_class[meta_index]
        anthrome_class_name = class_name[meta_index]
        
        #check tz is whole number else skip site
        if (tz % 1) != 0:
            data_valid = False
            print 'Timezone is not a whole number. Skipping.'
        
        #correct timezone to UTC
        tz = int(tz)
        if tz < 0:
            #get rid of values at start and append -99999's at end
            cut = full_data[:tz]
            for num in range(np.abs(tz)):
                cut = np.insert(cut,0, -99999)
            full_data = cut
        elif tz > 0:
            #put -99999's at start and get rid of values at end
            cut = full_data[tz:]
            for num in range(tz):
                cut = np.append(cut, -99999)
            full_data = cut
            
        #if species is CO then convert units from ppmv to ppbv
        if species == 'CO':
            valid_inds = full_data != -99999 
            full_data[valid_inds] = full_data[valid_inds]*1e3        
    
        #do data quality checks        
        full_data,data_valid,data_complete = modules.quality_check_periodic(full_data,data_valid,data_resolution,alt,grid_dates,start_year,end_year)
    
        #set sampling as average
        if (species == 'O3') or (species == 'CO') or(species == 'NO') or (species == 'NO2'):
            st = 'average'
        elif (species == 'ISOP'):
            st = 'flask'   
    
        #set site file resolution
        if (species == 'O3') or (species == 'CO') or(species == 'NO') or (species == 'NO2'):
            file_res = 'H'
        elif (species == 'ISOP'):
            file_res = 'D'
    
        #check file res is ok for output res
        if (output_res == 'H'):
            if (file_res == 'D') or (file_res == 'M'):
                print 'File resolution has to be Minimum Hourly. Skipping'
                data_valid = False
                return c,full_data,data_valid,-999,-999,-999,'na','na','na','na','na',-999
        elif (output_res == 'D'):
            if (file_res == 'M'):
                print 'File resolution has to be Minimum Daily. Skipping'
                data_valid = False
                return c,full_data,data_valid,-999,-999,-999,'na','na','na','na','na',-999
    
        #set mm
        if species == 'O3':
            mm = 'ultraviolet photometry'
        elif (species == 'NO') or (species == 'NO2'):
            mm = 'chemiluminescence'
        elif species == 'CO':
            mm = 'non-dispersive infrared spectrometry'
        elif species == 'ISOP':
            mm = 'gas chromatography flame ionisation detection'
        
        
        return c,full_data,data_valid,lat,lon,alt,raw_class_name,anthrome_class_name,mm,st,file_res,data_complete