Example #1
0
def data_collection():

    rawDataPath = '../../Data/'
    derivedDataPath = '../../DataOutput/'
    forecastVar = 'conc'
    hemStr = 'N'
    
    startYr    = 1980
    forecastYr = 2012
    startMonth = 1 
    stopMonth  = 12 
    regionMask = np.load(derivedDataPath + 'Regions/regionMaskA100km')
    
    numFeat = 18
    numSamples = (forecastYr - startYr) * (stopMonth+1 - startMonth)
    
    #data collection
    #start off with just the summer months and without ice thickness
    
    feat = []
    for year in range(startYr, forecastYr):
        for index, month in enumerate(range(startMonth, stopMonth+1)):
            
            #set month to array
            sample = np.zeros(numFeat)
            sample[0] = (month-1) #gridded months are actually indexed at 0
            #divide by 100 to put in the same magnitude as the other values
            sample[1] = np.ma.mean(ff.get_pmas_month(rawDataPath, year, month-1))
            
            #set region features to array
            iceConc = ff.get_gridvar(derivedDataPath, forecastVar, month, 
                              array(year), hemStr)
            
            
            for regInd in range(16):
                    
                regionData = iceConc.data
                regionData[iceConc.mask == True] = 0 #get rid of nan
                desiredRegion = regionMask == regInd
                sample[regInd+2] = np.ma.mean(np.multiply(regionData, desiredRegion))
                #sample[regInd+1] = np.ma.mean(np.multiply(regionData, desiredRegion))
                
            """        
            for regInd in range(numFeat-1):
                
                regionData = iceThick.data
                regionData[iceThick.mask == True] = 0 #get rid of nan
                desiredRegion = regionMask == regInd
                sample[numFeat+regInd] = np.ma.mean(np.multiply(regionData, desiredRegion))                   
            """    
            feat.append(sample)
            
    feat = np.reshape(feat, (numSamples, numFeat))
    groundTruth = feat[1:np.size(feat,0), 1: np.size(feat,1)]  
    feat = feat[0:np.size(feat,0)-1, :]
        
    return feat, groundTruth
def data_collection():

    rawDataPath = '../../Data/'
    derivedDataPath = '../../DataOutput/'
    forecastVar = 'conc'
    iceType = 'extent'
    hemStr = 'N'
    siiVersion = 'v3.0'

    startYr = 1990
    forecastYr = 2010
    startMonth = 2
    stopMonth = 11
    regionMask = np.load(derivedDataPath + 'Regions/regionMaskA100km')

    numFeat = 18
    numSamples = (forecastYr - startYr) * (stopMonth + 1 - startMonth)

    #data collection
    #start off with just the summer months and without ice thickness

    feat = []
    groundTruth = []
    for year in range(startYr, forecastYr):
        for index, month in enumerate(range(startMonth, stopMonth + 1)):

            _, extent = ff.get_ice_extentN(rawDataPath, month, year, year,
                                           iceType, siiVersion, hemStr)
            groundTruth.append(extent[0])

            #set month to array
            sample = np.zeros(numFeat)
            sample[0] = (month - 1)  #gridded months are actually indexed at 0
            #divide by 100 to put in the same magnitude as the other values
            sample[1] = np.ma.mean(
                ff.get_pmas_month(rawDataPath, year, month - 1))

            #set region features to array
            iceConc = ff.get_gridvar(derivedDataPath, forecastVar, month,
                                     array(year), hemStr)

            for regInd in range(16):

                regionData = iceConc.data
                regionData[iceConc.mask == True] = 0  #get rid of nan
                desiredRegion = regionMask == regInd
                sample[regInd + 2] = np.ma.mean(
                    np.multiply(regionData, desiredRegion))
                #sample[regInd+1] = np.ma.mean(np.multiply(regionData, desiredRegion))

            feat.append(sample)

    feat = np.reshape(feat, (numSamples, numFeat))
    groundTruth = np.reshape(groundTruth, (numSamples, 1))

    return feat, groundTruth
yrForecast = 2015
randSeedNum = 50

#thick, forecastThickMean = ff.get_ice_thickness(rawDataPath, startYr, yrForecast, forecastMonth)

yrsTrain, extentTrain = ff.get_ice_extentN(rawDataPath,
                                           predMonth,
                                           startYr,
                                           yrForecast - 1,
                                           icetype=iceType,
                                           version=siiVersion,
                                           hemStr=hemStr)

extentDetrendTrain, lineTrain = ff.get_varDT(yrsTrain, extentTrain)

varTrain = ff.get_gridvar(derivedDataPath, forecastVar, forecastMonth,
                          yrsTrain, hemStr)
varForecast = ff.get_gridvar(derivedDataPath, forecastVar, forecastMonth,
                             array(yrForecast), hemStr)
years, extentYr = ff.get_ice_extentN(rawDataPath,
                                     predMonth,
                                     yrForecast,
                                     yrForecast,
                                     icetype=iceType,
                                     version=siiVersion,
                                     hemStr=hemStr)
observed = extentYr[-1]
extentTrendPersist = (lineTrain[-1] + (lineTrain[-1] - lineTrain[-2])
                      )  #add to detrended forecast predictions

regionMask = np.load(derivedDataPath + 'Regions/regionMaskA100km')
fmonth = 7

iceType = 'extent'
siiVersion = 'v3.0'
forecastVar = 'conc'
hemStr = 'N'
rawDataPath = '../../Data/'
derivedDataPath = '../../DataOutput/'
regionMask = np.load(derivedDataPath + 'Regions/regionMaskA100km')

sample = np.zeros(numFeat)
sample[0] = (month - 1)
sample[1] = np.ma.mean(ff.get_pmas_month(rawDataPath, year, month - 1))

#set region features to array
iceConc = ff.get_gridvar(derivedDataPath, forecastVar, month, array(year),
                         hemStr)

for regInd in range(16):
    regionData = iceConc.data
    regionData[iceConc.mask == True] = 0  #get rid of nan
    desiredRegion = regionMask == regInd
    sample[regInd +
           2] = 100 * np.ma.mean(np.multiply(regionData, desiredRegion))

sample = np.reshape(sample, (1, -1))
feat = pcaConc.transform(sample)
#feat=sample

_, extent = ff.get_ice_extentN(rawDataPath, fmonth, year, year, iceType,
                               siiVersion, hemStr)
gTruth = extent[0]