def main(): start = datetime.now() # get the data train_data = helpers.load_data(numpy_path, 'train_set.npy') valid_data = helpers.load_data(numpy_path, 'valid_set.npy') test_data = helpers.load_data(numpy_path, 'test_set.npy') # filter the data test_data_labels = np.array([item[0] for item in test_data[:, 2]]) test_data_countries = np.array([item[0] for item in test_data[:, 0]]) test_data_month = test_data[:, 5] # convert the data train_dataset, train_shape = convert_dataset(train_data, batchsize=batchsize, shuffle=1000, shape=True) valid_dataset = convert_dataset(valid_data, batchsize=1000, shuffle=100) test_dataset = convert_dataset(test_data, batchsize=1000) # build the model model = build_model(train_shape[1], train_shape[2]) # Print Model # modelprovider.printModel(model, dir=os.path.join( # logdir, expname), name=expname+".png") # compiling the model lossfn = loss.crps_cost_function opt = Adam(lr=learning_rate, amsgrad=True) model.compile(loss=lossfn, optimizer=opt) # checkdir path checkpoint_dir = os.path.join(logdir, expname, 'checkpoints/') # begin with training 10 times print('[INFO] Starting training') predictions = [] for i in range(1, 11): print('Round number: ' + str(i)) model = build_model(train_shape[1], train_shape[2]) # compile new model with new inital weights model.compile(loss=lossfn, optimizer=opt) # checkpoint callbacks # all checkpoints cp_callback_versuch = tf.keras.callbacks.ModelCheckpoint( os.path.join(checkpoint_dir, 'round-' + str(i) + '/') + "checkpoint_{epoch}", monitor='val_loss', save_weights_only=True, mode='min', verbose=0) # best checkpoint cp_callback = tf.keras.callbacks.ModelCheckpoint( os.path.join(checkpoint_dir, 'round-' + str(i) + '/checkpoint'), monitor='val_loss', save_weights_only=True, mode='min', save_best_only=True, verbose=0) # train the model if train_model: model.fit( train_dataset, epochs=epochs, initial_epoch=initial_epochs, batch_size=batchsize, verbose=1, validation_data=valid_dataset, validation_batch_size=1000, callbacks=[cp_callback, cp_callback_versuch], ) # load the best checkpoint of round i model.load_weights( os.path.join(checkpoint_dir, 'round-' + str(i) + '/checkpoint')).expect_partial() predictions.append( model.predict(test_dataset, batch_size=1000, verbose=0)) # convert to numpy array predictions = np.array(predictions) # Make sure std is positive predictions[:, :, 1] = np.abs(predictions[:, :, 1]) # calculate mean between the 10 results mean_predictions = np.mean(predictions, 0) # calculate the score for each record in test set test_crps = crps.norm_data(test_data_labels, mean_predictions) # print the results with filters helpers.printIntCountries(test_data_labels, test_data_countries, mean_predictions) helpers.printHist(helpers.datasetPIT(mean_predictions, test_data_labels)) np.save(os.path.join(logdir, expname, 'prediction'), predictions) print(datetime.now() - start)
def read(): start = datetime.datetime.now() print('[INFO] starting reading...') # read all data and concat the train set for mean and std df = pd.read_csv(file, index_col=0) test_set = df[(df['init date'] > '2014-01-01') & (df['init date'] < '2017-12-31')].to_numpy() test_data = helpers.load_data(numpy_path, 'test_set.npy') test_data_labels = test_data[:, 2] test_data_labels = np.array([item[0] for item in test_data_labels]) test_data_countries = test_data[:, 0] test_data_countries = np.array([item[0] for item in test_data_countries]) test_data_month = test_data[:, 5] scores = [] ranks = [] length = [] print('[INFO] starting calculating') for case in test_set: date = datetime.datetime.strptime(case[0],'%Y-%m-%d') delta = datetime.timedelta(days=16) ensemble = [] for year in range(1998,2013): start = (datetime.datetime.strptime(str(year)+'-'+date.strftime( '%m-%d' ),'%Y-%m-%d') - delta ).strftime( '%Y-%m-%d' ) end = (datetime.datetime.strptime(str(year)+'-'+date.strftime( '%m-%d' ),'%Y-%m-%d') + delta ).strftime( '%Y-%m-%d' ) train_set = df[(df['init date'] > start) & (df['init date'] < end)&(df['country']==case[1])].to_numpy() for i in train_set[:,2]: ensemble.append(i) score = ps.crps_ensemble(case[2], ensemble) rank = verificationRank(case[2], ensemble) length.append(len(ensemble)) ranks.append(rank) scores.append(score) print(max(ranks)) print(list(dict.fromkeys(length))) scores = np.array(scores) helpers.printHist(ranks, r=(0,276)) print(('all', round(scores.mean() , 2 ) )) result = str(scores.mean())+'&' for i in [8,16,2,5,20]: filter = test_data_countries == i filter_data = scores[filter] if len(filter_data) > 0: item = str(round(np.array(filter_data).mean(), 2)) else: item = str(0) result += item+'&' print(result) for i in range(1, 13): filter = test_data_month == i filter_data = scores[filter] if len(filter_data) > 0: item = (i, round(np.array(filter_data).mean(), 2)) else: item = (i, 0, 0) print(item)