def deserialize(lines): # type: (list[str]) -> InputDataset from cliMLe.climatele import ClimateleDataset sources = [] for iLine in range(len(lines)): if lines[iLine].startswith("#PCDataset"): sources.append(ClimateleDataset.deserialize(lines[iLine:])) return InputDataset(sources)
nTS = 1 smooth = 0 learning_range = "1980-1-1", "2005-12-30" ) prediction_lag = CDuration.months(1) nInterationsPerProc = 10 batchSize = 100 nEpocs = 200 validation_fraction = 0.15 hiddenLayers = [100] activation = "relu" plotPrediction = True variables = [ Variable("ts"), Variable( "zg", 50000 ) ] project = Project(outDir,projectName) pcDataset = ClimateleDataset([Experiment(project, start_year, end_year, nModes, variable) for variable in variables], nts = nTS, smooth = smooth, timeRange = learning_range) td = IITMDataSource( "AI", "monthly" ) trainingDataset = [td], pcDataset, prediction_lag, decycle=True ) #ref_time_range = ( "1980-1-1", "2014-12-1" ) #ref_ts = ProjectDataSource( "HadISST_1.cvdp_data.1980-2017", [ "nino34" ], ref_time_range ) def learning_model_factory( weights = None ): return LearningModel( pcDataset, trainingDataset, batch=batchSize, epocs=nEpocs, vf=validation_fraction, hidden=hiddenLayers, activation=activation, weights=weights ) result = LearningModel.parallel_execute( learning_model_factory, nInterationsPerProc ) print "Got Best result, valuation loss = " + str( result.val_loss ) + " training loss = " + str( result.train_loss ) if plotPrediction: plot_title = "Training data with Prediction ({0}->IITM-AI, lag {1}) {2}-{3} (loss: {4}, Epochs: {5})".format(pcDataset.getVariableIds(),prediction_lag,start_year,end_year,result.val_loss,result.nEpocs)
from cliMLe.trainingData import * from cliMLe.learning import FitResult, LearningModel import multiprocessing as mp import time, keras from datetime import datetime outDir = os.path.expanduser("~/results") projectName = "MERRA2_EOFs" start_year = 1980 end_year = 2015 nModes = 32 variables = [Variable("ts")] project = Project(outDir, projectName) pcDataset = ClimateleDataset([ Experiment(project, start_year, end_year, nModes, variable) for variable in variables ]) prediction_lag = 0 nInterationsPerProc = 10 batchSize = 50 nEpocs = 300 validation_fraction = 0.1 hiddenLayers = [8] activation = "relu" plotPrediction = True training_time_range = ("1980-{0}-1".format(prediction_lag + 1), "2014-12-1" if prediction_lag == 0 else "2015-{0}-1".format(prediction_lag)) td = ProjectDataSource(
from cliMLe.trainingData import * from cliMLe.learning import FitResult import time, keras from datetime import datetime from keras.callbacks import TensorBoard, History from cliMLe.learning import FitResult, LearningModel import matplotlib.pyplot as plt outDir = os.path.expanduser("~/results") projectName = "MERRA2_EOFs" start_year = 1980 end_year = 2015 nModes = 32 variables = [ Variable("ts") ] project = Project(outDir,projectName) pcDataset = ClimateleDataset([Experiment(project, start_year, end_year, nModes, variable) for variable in variables]) nInterations = 10 batchSize = 50 nEpocs = 300 validation_fraction = 0.1 lag = 0 hiddenLayers = [8] activation = "relu" timestamp ="%m-%d-%y.%H:%M:%S") time_range = ( "1980-{0}-1".format(lag+1), "2014-12-1" if lag == 0 else "2015-{0}-1".format(lag) ) logDir = os.path.expanduser("~/results/logs/{}".format( projectName + "_" + timestamp ) ) plotPrediction = True td = ProjectDataSource( "HadISST_1.cvdp_data.1980-2017", [ "amo_timeseries_mon" ], time_range ) # , "pdo_timeseries_mon", "indian_ocean_dipole", "nino34" dset = TrainingDataset( [td], pcDataset )
biases = np.zeros([nModes]) print("Weights Shape = " + str(weights.shape)) print("Weights Sample = " + str(weights[0])) variables1 = [ Variable("ts"), Variable("zg", 80000) ] # [ Variable("ts"), Variable( "zg", 80000 ), Variable( "zg", 50000 ), Variable( "zg", 25000 ) ] project =, projectName) experiments = [ Experiment(project, proj_start_year, proj_end_year, 64, variable) for variable in variables1 ] pcDataset = ClimateleDataset(projectName, experiments, nts=nTS, smooth=smooth, filter=filter, nmodes=nModes, freq=freq, timeRange=learning_range) pcInputDataset = InputDataset([pcDataset]) prediction_lag = CDuration.years(1) nInterationsPerProc = 5 nShuffles = 3 batchSize = 200 nEpocs = 500 learnRate = 0.005 momentum = 0.9 decay = 0.002 loss_function = "mse" nesterov = False
end_year = 2015 nModes = 32 nTS = 1 smooth = 0 learning_range = "1980-1-1", "2014-12-30" ) prediction_lag = CDuration.months(6) nInterations = 10 batchSize = 100 nEpocs = 200 validation_fraction = 0.15 hiddenLayers = [100] activation = "relu" plotPrediction = True variables = [ Variable("ts") ] # , Variable( "zg", 50000 ) ] project = Project(outDir,projectName) pcDataset = ClimateleDataset([Experiment(project, start_year, end_year, nModes, variable) for variable in variables], nts = nTS, smooth = smooth, timeRange = learning_range) td = ProjectDataSource( "HadISST_1.cvdp_data.1980-2017", [ "nino34" ] ) # , "pdo_timeseries_mon", "indian_ocean_dipole", "nino34" trainingDataset = [td], pcDataset, prediction_lag ) def learning_model_factory(): return LearningModel( pcDataset, trainingDataset, batch=batchSize, epocs=nEpocs, vf=validation_fraction, hidden=hiddenLayers, activation=activation ) result = LearningModel.serial_execute( learning_model_factory, nInterations ) print "Got Best result, val_loss = " + str( result.val_loss ) if plotPrediction: plot_title = "Training data with Prediction ({0}->nino34, lag {1}) {2}-{3} ({4} Epochs)".format(pcDataset.getVariableIds(),prediction_lag,start_year,end_year,nEpocs) learningModel = learning_model_factory( ) learningModel.plotPrediction( result, plot_title )
prediction_lag = CDuration.months(6) nInterationsPerProc = 10 batchSize = 200 nEpocs = 500 validation_fraction = 0.2 hiddenLayers = [100] activation = "relu" plotPrediction = True variables = [Variable("ts")] # , Variable( "zg", 50000 ) ] project = Project(outDir, projectName) pcDataset = ClimateleDataset([ Experiment(project, start_year, end_year, nModes, variable) for variable in variables ], nts=nTS, smooth=smooth, timeRange=learning_range) td = ProjectDataSource( "HadISST_1.cvdp_data.1980-2017", ["nino34"]) # , "pdo_timeseries_mon", "indian_ocean_dipole", "nino34" trainingDataset =[td], pcDataset, prediction_lag) #ref_time_range = ( "1980-1-1", "2014-12-1" ) #ref_ts = ProjectDataSource( "HadISST_1.cvdp_data.1980-2017", [ "nino34" ], ref_time_range ) def learning_model_factory(weights=None): return LearningModel(pcDataset, trainingDataset,
end_year = 2012 nTS = 1 smooth = 0 freq = "Y" # Yearly input/outputs filter = "ja" # Filter months out of each year. learning_range ="1851-1-1", "2005-12-1") variables = [ Variable("ts"), Variable("zg", 80000) ] # [ Variable("ts"), Variable( "zg", 80000 ), Variable( "zg", 50000 ), Variable( "zg", 25000 ) ] project =, projectName) pcDataset = ClimateleDataset(projectName, [ Experiment(project, start_year, end_year, 64, variable) for variable in variables ], nts=nTS, smooth=smooth, filter=filter, nmodes=nModes, freq=freq, timeRange=learning_range) inputDataset = InputDataset([pcDataset]) prediction_lag = CDuration.years(1) nInterationsPerProc = 25 batchSize = 200 nEpocs = 500 learnRate = 0.005 momentum = 0.0 decay = 0.01 loss_function = "mse" nesterov = False
verificationSplitDate, trainEndDate) if plotVerification else trainStartDate, trainEndDate) variables = [ Variable("zg", 50000) ] # [ Variable("ts"), Variable( "zg", 80000 ), Variable( "zg", 50000 ), Variable( "zg", 25000 ) ] project =, projectName) experiments = [ Experiment(project, proj_start_year, proj_end_year, 64, variable) for variable in variables ] pcDataset = ClimateleDataset(projectName, experiments, nts=nTS, smooth=smooth, filter=filter, nmodes=nModes, freq=freq, timeRange=learning_range) inputDataset = InputDataset([pcDataset]) prediction_lag = CDuration.years(1) nInterationsPerProc = 20 shuffle = True batchSize = 50 nEpocs = 500 learnRate = 0.005 momentum = 0.9 decay = 0.002 loss_function = "mse" nesterov = False