def __init__(self, config: Config): super().__init__(config) if config.experiment.use_all_gfs_as_input: self.time_2_vec_time_distributed = TimeDistributed( Time2Vec( self.features_len + len( process_config( config.experiment.train_parameters_config_file)), config.experiment.time2vec_embedding_size), batch_first=True) self.embed_dim += len( process_config( config.experiment.train_parameters_config_file)) * ( config.experiment.time2vec_embedding_size + 1) encoder_layer = nn.TransformerEncoderLayer( d_model=self.embed_dim, nhead=config.experiment.transformer_attention_heads, dim_feedforward=config.experiment.transformer_ff_dim, dropout=config.experiment.dropout, batch_first=True) encoder_norm = nn.LayerNorm(self.embed_dim) self.encoder = nn.TransformerEncoder( encoder_layer, config.experiment.transformer_attention_layers, encoder_norm) dense_layers = [] features = self.embed_dim + 1 for neurons in config.experiment.transformer_head_dims: dense_layers.append( nn.Linear(in_features=features, out_features=neurons)) features = neurons dense_layers.append(nn.Linear(in_features=features, out_features=1)) self.classification_head = nn.Sequential(*dense_layers) self.classification_head_time_distributed = TimeDistributed( self.classification_head, batch_first=True)
def __init__(self, config: Config): super().__init__() self.config = config self.val_split = config.experiment.val_split self.batch_size = config.experiment.batch_size self.shuffle = config.experiment.shuffle self.dataset_train = ... self.dataset_val = ... self.dataset_test = ... self.train_parameters = process_config( config.experiment.train_parameters_config_file) self.prediction_offset = config.experiment.prediction_offset self.synop_file = config.experiment.synop_file self.target_param = config.experiment.target_parameter self.sequence_length = config.experiment.sequence_length self.labels, self.label_mean, self.label_std = prepare_synop_dataset( self.synop_file, [self.target_param], dataset_dir=SYNOP_DATASETS_DIRECTORY, from_year=config.experiment.synop_from_year, to_year=config.experiment.synop_to_year) available_ids = get_available_gfs_date_keys(self.train_parameters, self.prediction_offset, self.sequence_length) self.IDs = initialize_GFS_date_keys_for_sequence( available_ids, self.labels, self.train_parameters, self.target_param, self.sequence_length)
def __init__(self, config: Config): """Initialization""" self.train_parameters = process_config( config.experiment.train_parameters_config_file) self.target_param = config.experiment.target_parameter self.synop_file = config.experiment.synop_file self.prediction_offset = config.experiment.prediction_offset self.target_coords = config.experiment.target_coords synop_data, synop_mean, synop_std = prepare_synop_dataset( self.synop_file, [self.target_param], dataset_dir=SYNOP_DATASETS_DIRECTORY, from_year=config.experiment.synop_from_year, to_year=config.experiment.synop_to_year) synop_data_dates = synop_data['date'] labels = pd.concat([synop_data_dates, synop_data[self.target_param]], axis=1).to_numpy().tolist() _, self.gfs_data, self.targets = match_gfs_with_synop_sequence( labels, labels, self.target_coords[0], self.target_coords[1], self.prediction_offset, self.train_parameters) self.targets = self.targets.reshape((len(self.targets), 1)) if config.experiment.normalization_type == NormalizationType.STANDARD: self.gfs_data = (self.gfs_data - np.mean( self.gfs_data, axis=0)) / np.std(self.gfs_data, axis=0) else: self.gfs_data = (self.gfs_data - np.min(self.gfs_data, axis=0)) / ( np.max(self.gfs_data, axis=0) - np.min(self.gfs_data, axis=0)) assert len(self.gfs_data) == len(self.targets) self.data = list(zip(self.gfs_data, self.targets)) print(synop_mean) print(synop_std)
def __init__(self, config: Config, list_IDs, train=True, normalize=True): 'Initialization' self.list_IDs = list_IDs self.train_parameters = process_config( config.experiment.train_parameters_config_file) self.target_param = config.experiment.target_parameter self.synop_file = config.experiment.synop_file self.labels, self.label_mean, self.label_std = prepare_synop_dataset( self.synop_file, [self.target_param], dataset_dir=SYNOP_DATASETS_DIRECTORY) self.dim = config.experiment.cnn_input_size self.channels = len(self.train_parameters) self.normalization_type = config.experiment.normalization_type length = len(self.list_IDs) training_data, test_data = self.list_IDs[:int(length * 0.8)], self.list_IDs[ int(length * 0.8):] if train: data = training_data else: data = test_data self.data = data self.mean, self.std = [], [] self.normalize = normalize if normalize: if config.experiment.normalization_type == NormalizationType.STANDARD: self.mean, self.std = initialize_mean_and_std( self.list_IDs, self.train_parameters, self.dim) else: self.min, self.max = initialize_min_max( self.list_IDs, self.train_parameters)
def __init__(self, config: Config, train_IDs, labels, normalize=True): self.train_parameters = process_config( config.experiment.train_parameters_config_file) self.target_param = config.experiment.target_parameter self.synop_file = config.experiment.synop_file self.labels = labels self.subregion_coords = Coords(config.experiment.subregion_nlat, config.experiment.subregion_slat, config.experiment.subregion_wlon, config.experiment.subregion_elon) self.prediction_offset = config.experiment.prediction_offset self.dim = get_dim_of_GFS_slice_for_coords(self.subregion_coords) self.channels = len(self.train_parameters) self.normalization_type = config.experiment.normalization_type self.sequence_length = config.experiment.sequence_length self.list_IDs = train_IDs self.data = self.list_IDs[str(self.prediction_offset)] self.mean, self.std = [], [] self.normalize = normalize if normalize: self.normalize_data(config.experiment.normalization_type)
def __init__(self, config: Config): super().__init__() self.config = config self.val_split = config.experiment.val_split self.batch_size = config.experiment.batch_size self.shuffle = config.experiment.shuffle self.dataset_train = ... self.dataset_val = ... self.dataset_test = ... self.train_params = config.experiment.synop_train_features self.target_param = config.experiment.target_parameter all_params = add_param_to_train_params(self.train_params, self.target_param) self.feature_names = list(list(zip(*all_params))[1]) self.target_param_index = [x for x in self.feature_names ].index(self.target_param) self.removed_dataset_indices = [] self.synop_file = config.experiment.synop_file self.synop_from_year = config.experiment.synop_from_year self.synop_to_year = config.experiment.synop_to_year self.sequence_length = config.experiment.sequence_length self.future_sequence_length = config.experiment.future_sequence_length self.normalization_type = config.experiment.normalization_type self.prediction_offset = config.experiment.prediction_offset self.target_coords = config.experiment.target_coords self.gfs_train_params = process_config( config.experiment.train_parameters_config_file ) if config.experiment.use_all_gfs_as_input else None self.synop_data = ... self.synop_data_indices = ... self.synop_mean = ... self.synop_std = ...
def __init__(self, cfg: Config): super(CNNModel, self).__init__() self.cfg = cfg ff_input_dim = cfg.experiment.cnn_ff_input_dim channels = len( process_config(cfg.experiment.train_parameters_config_file)) cnn_layers = [] for index, filters in enumerate(cfg.experiment.cnn_filters): cnn_layers.append( nn.Conv2d(in_channels=channels, out_channels=filters, kernel_size=(3, 3), padding=(1, 1)), ) cnn_layers.append(nn.ReLU()) cnn_layers.append(nn.BatchNorm2d(num_features=filters)) if index != len(cfg.experiment.cnn_filters) - 1: cnn_layers.append( nn.MaxPool2d(padding=(1, 1), kernel_size=(2, 2))) cnn_layers.append(nn.Dropout(cfg.experiment.dropout)) channels = filters self.model = nn.Sequential( *cnn_layers, nn.Flatten(), nn.Linear(in_features=ff_input_dim[0], out_features=ff_input_dim[1]), nn.ReLU(), nn.Linear(in_features=ff_input_dim[1], out_features=1))
def __init__(self, cfg: Config): super(TCNWithCNNModel, self).__init__() self.cfg = cfg tcn_cnn_ff_input_dim = cfg.experiment.tcn_cnn_ff_input_dim cnn_channels = len( process_config(cfg.experiment.train_parameters_config_file)) cnn_layers = [] for index, filters in enumerate(cfg.experiment.cnn_filters): cnn_layers.append( nn.Conv2d(in_channels=cnn_channels, out_channels=filters, kernel_size=(3, 3), padding=(1, 1))) cnn_layers.append(nn.ReLU()) cnn_layers.append(nn.BatchNorm2d(num_features=filters)) if index != len(cfg.experiment.cnn_filters) - 1: cnn_layers.append( nn.MaxPool2d(padding=(1, 1), kernel_size=(2, 2))) cnn_layers.append(nn.Dropout(cfg.experiment.dropout)) cnn_channels = filters cnn_layers.append(nn.Flatten()) tcn_layers = [] tcn_channels = cfg.experiment.tcn_channels tcn_levels = len(tcn_channels) kernel_size = cfg.experiment.tcn_kernel_size for i in range(tcn_levels): dilation_size = 2**i in_channels = cfg.experiment.tcn_input_features if i == 0 else tcn_channels[ i - 1] out_channels = tcn_channels[i] tcn_layers += [ TemporalBlock(in_channels, out_channels, kernel_size, dilation=dilation_size, padding=(kernel_size - 1) * dilation_size) ] self.cnn_layers = nn.Sequential(*cnn_layers) self.tcn_layers = nn.Sequential(*tcn_layers) self.ff = nn.Sequential( nn.Flatten(), nn.Linear(in_features=tcn_cnn_ff_input_dim, out_features=512), nn.ReLU(), nn.Linear(in_features=512, out_features=1))
def __init__(self, config: Config, train_IDs, labels, normalize=True): self.train_parameters = process_config( config.experiment.train_parameters_config_file) self.target_param = config.experiment.target_parameter self.labels = labels self.dim = config.experiment.cnn_input_size self.prediction_offset = config.experiment.prediction_offset self.channels = len(self.train_parameters) self.normalization_type = config.experiment.normalization_type self.sequence_length = config.experiment.sequence_length self.list_IDs = train_IDs self.data = self.list_IDs[str(self.prediction_offset)] self.mean, self.std = [], [] self.normalize = normalize self.gfs_loader = GFSLoader() if normalize: self.normalize_data(config.experiment.normalization_type)
def create_cnn_layers(self, config: Config): cnn_channels = len( process_config(config.experiment.train_parameters_config_file)) cnn_layers = [] for index, filters in enumerate(config.experiment.cnn_filters): cnn_layers.append( nn.Conv2d(in_channels=cnn_channels, out_channels=filters, kernel_size=(3, 3), padding=(1, 1), stride=(2, 2))) cnn_layers.append(nn.ReLU()) cnn_layers.append(nn.BatchNorm2d(num_features=filters)) if index != len(config.experiment.cnn_filters) - 1: cnn_layers.append(nn.Dropout(config.experiment.dropout)) cnn_channels = filters cnn_layers.append(nn.Flatten()) return nn.Sequential(*cnn_layers)
def __init__(self, config: Config): super(TCNS2SCMAXWithGFS, self).__init__() self.cnn = TimeDistributed(self.create_cnn_layers(config), batch_first=True) if config.experiment.use_all_gfs_as_input: out_features = config.experiment.tcn_channels[0] - len(config.experiment.synop_train_features) \ - len(process_config(config.experiment.train_parameters_config_file)) else: out_features = config.experiment.tcn_channels[0] - len( config.experiment.synop_train_features) self.cnn_lin_tcn = TimeDistributed(nn.Linear( in_features=config.experiment.cnn_lin_tcn_in_features, out_features=out_features), batch_first=True) self.tcn = self.create_tcn_layers(config) tcn_channels = config.experiment.tcn_channels linear = nn.Sequential( nn.Linear(in_features=tcn_channels[-1] + 1, out_features=32), nn.ReLU(), nn.Linear(in_features=32, out_features=1)) self.linear = TimeDistributed(linear, batch_first=True)
def __init__(self, config: Config): super().__init__(config) conv_H = config.experiment.cmax_h conv_W = config.experiment.cmax_w conv_layers = [] in_channels = 1 for index, filters in enumerate(config.experiment.cnn_filters): out_channels = filters conv_layers.extend([ nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=(3, 3), stride=(2, 2), padding=1), nn.ReLU(), nn.BatchNorm2d(num_features=out_channels), ]) if index != len(config.experiment.cnn_filters) - 1: conv_layers.append(nn.Dropout(config.experiment.dropout)) conv_W = math.ceil(conv_W / 2) conv_H = math.ceil(conv_H / 2) in_channels = out_channels self.conv = nn.Sequential( *conv_layers, nn.Flatten(), nn.Linear(in_features=conv_W * conv_H * out_channels, out_features=conv_W * conv_H * out_channels)) self.conv_time_distributed = TimeDistributed(self.conv) self.embed_dim = self.features_len * ( config.experiment.time2vec_embedding_size + 1) + conv_W * conv_H * out_channels if config.experiment.use_all_gfs_as_input: self.time_2_vec_time_distributed = TimeDistributed( Time2Vec( self.features_len + len( process_config( config.experiment.train_parameters_config_file)), config.experiment.time2vec_embedding_size), batch_first=True) self.embed_dim += len( process_config( config.experiment.train_parameters_config_file)) * ( config.experiment.time2vec_embedding_size + 1) self.pos_encoder = PositionalEncoding(self.embed_dim, self.dropout, self.sequence_length) encoder_layer = nn.TransformerEncoderLayer( d_model=self.embed_dim, nhead=config.experiment.transformer_attention_heads, dim_feedforward=config.experiment.transformer_ff_dim, dropout=config.experiment.dropout, batch_first=True) encoder_norm = nn.LayerNorm(self.embed_dim) self.encoder = nn.TransformerEncoder( encoder_layer, config.experiment.transformer_attention_layers, encoder_norm) dense_layers = [] features = self.embed_dim + 1 for neurons in config.experiment.transformer_head_dims: dense_layers.append( nn.Linear(in_features=features, out_features=neurons)) features = neurons dense_layers.append(nn.Linear(in_features=features, out_features=1)) self.classification_head = nn.Sequential(*dense_layers) self.classification_head_time_distributed = TimeDistributed( self.classification_head, batch_first=True)