Ejemplo n.º 1
0
    def __init__(self, config: Config):
        super().__init__(config)
        if config.experiment.use_all_gfs_as_input:
            self.time_2_vec_time_distributed = TimeDistributed(
                Time2Vec(
                    self.features_len + len(
                        process_config(
                            config.experiment.train_parameters_config_file)),
                    config.experiment.time2vec_embedding_size),
                batch_first=True)
            self.embed_dim += len(
                process_config(
                    config.experiment.train_parameters_config_file)) * (
                        config.experiment.time2vec_embedding_size + 1)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=self.embed_dim,
            nhead=config.experiment.transformer_attention_heads,
            dim_feedforward=config.experiment.transformer_ff_dim,
            dropout=config.experiment.dropout,
            batch_first=True)
        encoder_norm = nn.LayerNorm(self.embed_dim)
        self.encoder = nn.TransformerEncoder(
            encoder_layer, config.experiment.transformer_attention_layers,
            encoder_norm)
        dense_layers = []
        features = self.embed_dim + 1
        for neurons in config.experiment.transformer_head_dims:
            dense_layers.append(
                nn.Linear(in_features=features, out_features=neurons))
            features = neurons
        dense_layers.append(nn.Linear(in_features=features, out_features=1))
        self.classification_head = nn.Sequential(*dense_layers)
        self.classification_head_time_distributed = TimeDistributed(
            self.classification_head, batch_first=True)
 def __init__(self, config: Config):
     super().__init__()
     self.config = config
     self.val_split = config.experiment.val_split
     self.batch_size = config.experiment.batch_size
     self.shuffle = config.experiment.shuffle
     self.dataset_train = ...
     self.dataset_val = ...
     self.dataset_test = ...
     self.train_parameters = process_config(
         config.experiment.train_parameters_config_file)
     self.prediction_offset = config.experiment.prediction_offset
     self.synop_file = config.experiment.synop_file
     self.target_param = config.experiment.target_parameter
     self.sequence_length = config.experiment.sequence_length
     self.labels, self.label_mean, self.label_std = prepare_synop_dataset(
         self.synop_file, [self.target_param],
         dataset_dir=SYNOP_DATASETS_DIRECTORY,
         from_year=config.experiment.synop_from_year,
         to_year=config.experiment.synop_to_year)
     available_ids = get_available_gfs_date_keys(self.train_parameters,
                                                 self.prediction_offset,
                                                 self.sequence_length)
     self.IDs = initialize_GFS_date_keys_for_sequence(
         available_ids, self.labels, self.train_parameters,
         self.target_param, self.sequence_length)
Ejemplo n.º 3
0
    def __init__(self, config: Config):
        """Initialization"""
        self.train_parameters = process_config(
            config.experiment.train_parameters_config_file)
        self.target_param = config.experiment.target_parameter
        self.synop_file = config.experiment.synop_file
        self.prediction_offset = config.experiment.prediction_offset
        self.target_coords = config.experiment.target_coords

        synop_data, synop_mean, synop_std = prepare_synop_dataset(
            self.synop_file, [self.target_param],
            dataset_dir=SYNOP_DATASETS_DIRECTORY,
            from_year=config.experiment.synop_from_year,
            to_year=config.experiment.synop_to_year)

        synop_data_dates = synop_data['date']
        labels = pd.concat([synop_data_dates, synop_data[self.target_param]],
                           axis=1).to_numpy().tolist()
        _, self.gfs_data, self.targets = match_gfs_with_synop_sequence(
            labels, labels, self.target_coords[0], self.target_coords[1],
            self.prediction_offset, self.train_parameters)

        self.targets = self.targets.reshape((len(self.targets), 1))

        if config.experiment.normalization_type == NormalizationType.STANDARD:
            self.gfs_data = (self.gfs_data - np.mean(
                self.gfs_data, axis=0)) / np.std(self.gfs_data, axis=0)
        else:
            self.gfs_data = (self.gfs_data - np.min(self.gfs_data, axis=0)) / (
                np.max(self.gfs_data, axis=0) - np.min(self.gfs_data, axis=0))

        assert len(self.gfs_data) == len(self.targets)
        self.data = list(zip(self.gfs_data, self.targets))
        print(synop_mean)
        print(synop_std)
    def __init__(self, config: Config, list_IDs, train=True, normalize=True):
        'Initialization'
        self.list_IDs = list_IDs
        self.train_parameters = process_config(
            config.experiment.train_parameters_config_file)
        self.target_param = config.experiment.target_parameter
        self.synop_file = config.experiment.synop_file
        self.labels, self.label_mean, self.label_std = prepare_synop_dataset(
            self.synop_file, [self.target_param],
            dataset_dir=SYNOP_DATASETS_DIRECTORY)
        self.dim = config.experiment.cnn_input_size
        self.channels = len(self.train_parameters)
        self.normalization_type = config.experiment.normalization_type

        length = len(self.list_IDs)
        training_data, test_data = self.list_IDs[:int(length *
                                                      0.8)], self.list_IDs[
                                                          int(length * 0.8):]
        if train:
            data = training_data
        else:
            data = test_data

        self.data = data
        self.mean, self.std = [], []
        self.normalize = normalize
        if normalize:
            if config.experiment.normalization_type == NormalizationType.STANDARD:
                self.mean, self.std = initialize_mean_and_std(
                    self.list_IDs, self.train_parameters, self.dim)
            else:
                self.min, self.max = initialize_min_max(
                    self.list_IDs, self.train_parameters)
    def __init__(self, config: Config, train_IDs, labels, normalize=True):
        self.train_parameters = process_config(
            config.experiment.train_parameters_config_file)
        self.target_param = config.experiment.target_parameter
        self.synop_file = config.experiment.synop_file
        self.labels = labels
        self.subregion_coords = Coords(config.experiment.subregion_nlat,
                                       config.experiment.subregion_slat,
                                       config.experiment.subregion_wlon,
                                       config.experiment.subregion_elon)

        self.prediction_offset = config.experiment.prediction_offset
        self.dim = get_dim_of_GFS_slice_for_coords(self.subregion_coords)

        self.channels = len(self.train_parameters)
        self.normalization_type = config.experiment.normalization_type
        self.sequence_length = config.experiment.sequence_length

        self.list_IDs = train_IDs

        self.data = self.list_IDs[str(self.prediction_offset)]
        self.mean, self.std = [], []
        self.normalize = normalize
        if normalize:
            self.normalize_data(config.experiment.normalization_type)
Ejemplo n.º 6
0
    def __init__(self, config: Config):
        super().__init__()
        self.config = config
        self.val_split = config.experiment.val_split
        self.batch_size = config.experiment.batch_size
        self.shuffle = config.experiment.shuffle
        self.dataset_train = ...
        self.dataset_val = ...
        self.dataset_test = ...

        self.train_params = config.experiment.synop_train_features
        self.target_param = config.experiment.target_parameter
        all_params = add_param_to_train_params(self.train_params,
                                               self.target_param)
        self.feature_names = list(list(zip(*all_params))[1])
        self.target_param_index = [x for x in self.feature_names
                                   ].index(self.target_param)
        self.removed_dataset_indices = []

        self.synop_file = config.experiment.synop_file
        self.synop_from_year = config.experiment.synop_from_year
        self.synop_to_year = config.experiment.synop_to_year
        self.sequence_length = config.experiment.sequence_length
        self.future_sequence_length = config.experiment.future_sequence_length
        self.normalization_type = config.experiment.normalization_type
        self.prediction_offset = config.experiment.prediction_offset
        self.target_coords = config.experiment.target_coords
        self.gfs_train_params = process_config(
            config.experiment.train_parameters_config_file
        ) if config.experiment.use_all_gfs_as_input else None

        self.synop_data = ...
        self.synop_data_indices = ...
        self.synop_mean = ...
        self.synop_std = ...
Ejemplo n.º 7
0
    def __init__(self, cfg: Config):
        super(CNNModel, self).__init__()
        self.cfg = cfg
        ff_input_dim = cfg.experiment.cnn_ff_input_dim
        channels = len(
            process_config(cfg.experiment.train_parameters_config_file))
        cnn_layers = []

        for index, filters in enumerate(cfg.experiment.cnn_filters):
            cnn_layers.append(
                nn.Conv2d(in_channels=channels,
                          out_channels=filters,
                          kernel_size=(3, 3),
                          padding=(1, 1)), )
            cnn_layers.append(nn.ReLU())
            cnn_layers.append(nn.BatchNorm2d(num_features=filters))
            if index != len(cfg.experiment.cnn_filters) - 1:
                cnn_layers.append(
                    nn.MaxPool2d(padding=(1, 1), kernel_size=(2, 2)))
                cnn_layers.append(nn.Dropout(cfg.experiment.dropout))
            channels = filters

        self.model = nn.Sequential(
            *cnn_layers, nn.Flatten(),
            nn.Linear(in_features=ff_input_dim[0],
                      out_features=ff_input_dim[1]), nn.ReLU(),
            nn.Linear(in_features=ff_input_dim[1], out_features=1))
Ejemplo n.º 8
0
    def __init__(self, cfg: Config):
        super(TCNWithCNNModel, self).__init__()
        self.cfg = cfg
        tcn_cnn_ff_input_dim = cfg.experiment.tcn_cnn_ff_input_dim
        cnn_channels = len(
            process_config(cfg.experiment.train_parameters_config_file))
        cnn_layers = []

        for index, filters in enumerate(cfg.experiment.cnn_filters):
            cnn_layers.append(
                nn.Conv2d(in_channels=cnn_channels,
                          out_channels=filters,
                          kernel_size=(3, 3),
                          padding=(1, 1)))
            cnn_layers.append(nn.ReLU())
            cnn_layers.append(nn.BatchNorm2d(num_features=filters))
            if index != len(cfg.experiment.cnn_filters) - 1:
                cnn_layers.append(
                    nn.MaxPool2d(padding=(1, 1), kernel_size=(2, 2)))
                cnn_layers.append(nn.Dropout(cfg.experiment.dropout))
            cnn_channels = filters

        cnn_layers.append(nn.Flatten())

        tcn_layers = []
        tcn_channels = cfg.experiment.tcn_channels
        tcn_levels = len(tcn_channels)
        kernel_size = cfg.experiment.tcn_kernel_size

        for i in range(tcn_levels):
            dilation_size = 2**i
            in_channels = cfg.experiment.tcn_input_features if i == 0 else tcn_channels[
                i - 1]
            out_channels = tcn_channels[i]
            tcn_layers += [
                TemporalBlock(in_channels,
                              out_channels,
                              kernel_size,
                              dilation=dilation_size,
                              padding=(kernel_size - 1) * dilation_size)
            ]

        self.cnn_layers = nn.Sequential(*cnn_layers)
        self.tcn_layers = nn.Sequential(*tcn_layers)
        self.ff = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=tcn_cnn_ff_input_dim, out_features=512),
            nn.ReLU(), nn.Linear(in_features=512, out_features=1))
Ejemplo n.º 9
0
    def __init__(self, config: Config, train_IDs, labels, normalize=True):
        self.train_parameters = process_config(
            config.experiment.train_parameters_config_file)
        self.target_param = config.experiment.target_parameter
        self.labels = labels
        self.dim = config.experiment.cnn_input_size
        self.prediction_offset = config.experiment.prediction_offset
        self.channels = len(self.train_parameters)
        self.normalization_type = config.experiment.normalization_type
        self.sequence_length = config.experiment.sequence_length
        self.list_IDs = train_IDs

        self.data = self.list_IDs[str(self.prediction_offset)]
        self.mean, self.std = [], []
        self.normalize = normalize
        self.gfs_loader = GFSLoader()
        if normalize:
            self.normalize_data(config.experiment.normalization_type)
Ejemplo n.º 10
0
    def create_cnn_layers(self, config: Config):
        cnn_channels = len(
            process_config(config.experiment.train_parameters_config_file))
        cnn_layers = []

        for index, filters in enumerate(config.experiment.cnn_filters):
            cnn_layers.append(
                nn.Conv2d(in_channels=cnn_channels,
                          out_channels=filters,
                          kernel_size=(3, 3),
                          padding=(1, 1),
                          stride=(2, 2)))
            cnn_layers.append(nn.ReLU())
            cnn_layers.append(nn.BatchNorm2d(num_features=filters))
            if index != len(config.experiment.cnn_filters) - 1:
                cnn_layers.append(nn.Dropout(config.experiment.dropout))
            cnn_channels = filters

        cnn_layers.append(nn.Flatten())
        return nn.Sequential(*cnn_layers)
Ejemplo n.º 11
0
    def __init__(self, config: Config):
        super(TCNS2SCMAXWithGFS, self).__init__()
        self.cnn = TimeDistributed(self.create_cnn_layers(config),
                                   batch_first=True)
        if config.experiment.use_all_gfs_as_input:
            out_features = config.experiment.tcn_channels[0] - len(config.experiment.synop_train_features) \
                           - len(process_config(config.experiment.train_parameters_config_file))
        else:
            out_features = config.experiment.tcn_channels[0] - len(
                config.experiment.synop_train_features)

        self.cnn_lin_tcn = TimeDistributed(nn.Linear(
            in_features=config.experiment.cnn_lin_tcn_in_features,
            out_features=out_features),
                                           batch_first=True)
        self.tcn = self.create_tcn_layers(config)

        tcn_channels = config.experiment.tcn_channels

        linear = nn.Sequential(
            nn.Linear(in_features=tcn_channels[-1] + 1, out_features=32),
            nn.ReLU(), nn.Linear(in_features=32, out_features=1))

        self.linear = TimeDistributed(linear, batch_first=True)
Ejemplo n.º 12
0
    def __init__(self, config: Config):
        super().__init__(config)
        conv_H = config.experiment.cmax_h
        conv_W = config.experiment.cmax_w
        conv_layers = []
        in_channels = 1
        for index, filters in enumerate(config.experiment.cnn_filters):
            out_channels = filters
            conv_layers.extend([
                nn.Conv2d(in_channels=in_channels,
                          out_channels=out_channels,
                          kernel_size=(3, 3),
                          stride=(2, 2),
                          padding=1),
                nn.ReLU(),
                nn.BatchNorm2d(num_features=out_channels),
            ])
            if index != len(config.experiment.cnn_filters) - 1:
                conv_layers.append(nn.Dropout(config.experiment.dropout))
            conv_W = math.ceil(conv_W / 2)
            conv_H = math.ceil(conv_H / 2)
            in_channels = out_channels

        self.conv = nn.Sequential(
            *conv_layers, nn.Flatten(),
            nn.Linear(in_features=conv_W * conv_H * out_channels,
                      out_features=conv_W * conv_H * out_channels))
        self.conv_time_distributed = TimeDistributed(self.conv)

        self.embed_dim = self.features_len * (
            config.experiment.time2vec_embedding_size +
            1) + conv_W * conv_H * out_channels
        if config.experiment.use_all_gfs_as_input:
            self.time_2_vec_time_distributed = TimeDistributed(
                Time2Vec(
                    self.features_len + len(
                        process_config(
                            config.experiment.train_parameters_config_file)),
                    config.experiment.time2vec_embedding_size),
                batch_first=True)
            self.embed_dim += len(
                process_config(
                    config.experiment.train_parameters_config_file)) * (
                        config.experiment.time2vec_embedding_size + 1)

        self.pos_encoder = PositionalEncoding(self.embed_dim, self.dropout,
                                              self.sequence_length)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=self.embed_dim,
            nhead=config.experiment.transformer_attention_heads,
            dim_feedforward=config.experiment.transformer_ff_dim,
            dropout=config.experiment.dropout,
            batch_first=True)
        encoder_norm = nn.LayerNorm(self.embed_dim)
        self.encoder = nn.TransformerEncoder(
            encoder_layer, config.experiment.transformer_attention_layers,
            encoder_norm)

        dense_layers = []
        features = self.embed_dim + 1
        for neurons in config.experiment.transformer_head_dims:
            dense_layers.append(
                nn.Linear(in_features=features, out_features=neurons))
            features = neurons
        dense_layers.append(nn.Linear(in_features=features, out_features=1))
        self.classification_head = nn.Sequential(*dense_layers)
        self.classification_head_time_distributed = TimeDistributed(
            self.classification_head, batch_first=True)