예제 #1
0
파일: seqnn.py 프로젝트: polyaB/basenji
    def evaluate(self, seq_data, head_i=None, loss='poisson'):
        """ Evaluate model on SeqDataset. """
        # choose model
        if self.ensemble is not None:
            model = self.ensemble
        elif head_i is not None:
            model = self.models[head_i]
        else:
            model = self.model

        # compile with dense metrics
        num_targets = model.output_shape[-1]

        if loss == 'bce':
            model.compile(optimizer=tf.keras.optimizers.SGD(),
                          loss=loss,
                          metrics=[
                              metrics.SeqAUC(curve='ROC', summarize=False),
                              metrics.SeqAUC(curve='PR', summarize=False)
                          ])
        else:
            model.compile(optimizer=tf.keras.optimizers.SGD(),
                          loss=loss,
                          metrics=[
                              metrics.PearsonR(num_targets, summarize=False),
                              metrics.R2(num_targets, summarize=False)
                          ])

        # evaluate
        return model.evaluate(seq_data.dataset)
def cluster_evaluation(mask):

    metric_list = []

    name = mask[0:6]

    sample_list = [1000, 3000, 5000, 7000, 9000, 11000, 14000]

    for i in tqdm(sample_list):
        try:
            xtrain, xval, _, ytrain, yval, _ = dp.areal_model(length=i,
                                                              mask=mask)
            m = gpm.multi_gp(xtrain, xval, ytrain, yval)

            training_R2 = me.R2(m, xtrain, ytrain)
            training_RMSE = me.RMSE(m, xtrain, ytrain)
            val_R2 = me.R2(m, xval, yval)
            val_RMSE = me.RMSE(m, xval, yval)

            metric_list.append(
                [i, training_R2, training_RMSE, val_R2, val_RMSE])

        except Exception:
            print(i + 100)
            xtrain, xval, _, ytrain, yval, _ = dp.areal_model(length=i + 100,
                                                              mask=mask)
            m = gpm.multi_gp(xtrain, xval, ytrain, yval)

            training_R2 = me.R2(m, xtrain, ytrain)
            training_RMSE = me.RMSE(m, xtrain, ytrain)
            val_R2 = me.R2(m, xval, yval)
            val_RMSE = me.RMSE(m, xval, yval)

            metric_list.append(
                [i, training_R2, training_RMSE, val_R2, val_RMSE])

    df = pd.DataFrame(
        metric_list,
        columns=[
            "samples", "training_R2", "training_RMSE", "val_R2", "val_RMSE"
        ],
    )
    df.to_csv(name + "-eval-2020-07-22.csv")
def multi_gp(xtrain, xval, ytrain, yval, save=False):
    """ Returns simple GP model """

    # model construction
    k1 = gpflow.kernels.Periodic(
        gpflow.kernels.RBF(lengthscales=1, variance=1, active_dims=[0]))
    k1b = gpflow.kernels.RBF(lengthscales=2, variance=1, active_dims=[0])
    k2 = gpflow.kernels.RBF(lengthscales=np.ones(len(xval[0]) - 1),
                            active_dims=np.arange(1, len(xval[0])))
    # k3 = gpflow.kernels.White()

    k = k1 * k1b + k2  # +k

    # mean_function = gpflow.mean_functions.Linear(A=np.ones((len(xtrain[0]),
    # 1)), b=[1])

    # , mean_function=mean_function)
    m = gpflow.models.GPR(data=(xtrain, ytrain.reshape(-1, 1)), kernel=k)

    opt = gpflow.optimizers.Scipy()
    # , options=dict(maxiter=1000)
    opt.minimize(m.training_loss, m.trainable_variables)
    # print_summary(m)

    x_plot = np.concatenate((xtrain, xval))
    y_gpr, y_std = m.predict_y(x_plot)

    print(
        " {0:.3f} | {1:.3f} | {2:.3f} | {3:.3f} | {4:.3f} | {5:.3f} |".format(
            me.R2(m, xtrain, ytrain),
            me.RMSE(m, xtrain, ytrain),
            me.R2(m, xval, yval),
            me.RMSE(m, xval, yval),
            np.mean(y_gpr),
            np.mean(y_std),
        ))

    if save is not False:
        filepath = save_model(m, xval, save)
        print(filepath)

    return m
예제 #4
0
def perform_analysis():
    """



    :return:
    """
    parser = parse_arg()
    args = parser.parse_args()

    # if len(sys.argv) == 1:  # no arguments, so print help message
    #     print("""Usage: python script.py data_path program_input out_path""")
    #     return
    #
    # dir_in = os.getcwd()
    # dir_out = os.getcwd()
    #
    # try:
    #     dir_in = sys.argv[1]
    #     dir_out = sys.argv[2]
    # except:
    #     print("Parameters: path/to/simple/file  input/folder  output/folder")
    #     sys.exit(0)

    #df = pd.read_csv(args.dir_in)
    df = pd.read_csv(args.input)
    (cal_df, tst_df) = separating_data_set(df)

    (x_train, y_train) = splitting_dataset(cal_df)
    (x_test, y_test) = splitting_dataset(tst_df)

    print(x_train)
    print(y_train)
    print(x_test)
    print(y_test)

    model = build_fnn(x_train)

    model.summary()
    early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=25)
    random.seed(1)
    model.fit(x_train, y_train, batch_size=args.batchSize, epochs=args.epochSize, validation_data=(x_test, y_test), callbacks=[early_stop])

    fnn_losses = pd.DataFrame(model.history.history)
    create_losses_plot(fnn_losses)

    predictions = model.predict(x_test)

    print("MSE:", metrics.MSE(y_test, predictions))
    print("RMSE:", metrics.RMSE(y_test, predictions))
    print("R-square:", metrics.R2(y_test, predictions))
    print("RPD:", metrics.RPD(y_test, predictions))

    create_prediction_plot(y_test, predictions)
def uib_evaluation(average=False):

    metric_list = []

    sample_list = [1000, 3000, 5000, 7000, 9000, 11000, 14000]

    for i in tqdm(sample_list):
        try:
            xtrain, xval, _, ytrain, yval, _ = dp.areal_model(
                length=i, EDA_average=average)
            m = gpm.multi_gp(xtrain, xval, ytrain, yval)

            training_R2 = me.R2(m, xtrain, ytrain)
            training_RMSE = me.RMSE(m, xtrain, ytrain)
            val_R2 = me.R2(m, xval, yval)
            val_RMSE = me.RMSE(m, xval, yval)

            metric_list.append(
                [i, training_R2, training_RMSE, val_R2, val_RMSE])

        except Exception:
            print(i + 100)
            xtrain, xval, _, ytrain, yval, _ = dp.areal_model(
                length=i + 100, EDA_average=average)
            m = gpm.multi_gp(xtrain, xval, ytrain, yval)

            training_R2 = me.R2(m, xtrain, ytrain)
            training_RMSE = me.RMSE(m, xtrain, ytrain)
            val_R2 = me.R2(m, xval, yval)
            val_RMSE = me.RMSE(m, xval, yval)

            metric_list.append(
                [i, training_R2, training_RMSE, val_R2, val_RMSE])

    df = pd.DataFrame(
        metric_list,
        columns=[
            "samples", "training_R2", "training_RMSE", "val_R2", "val_RMSE"
        ],
    )
    df.to_csv("uib-eval-2020-07-22.csv")
def hybrid_gp(xtrain, xval, ytrain, yval, save=False):
    """ Returns whole basin or cluster GP model with hybrid kernel """

    dimensions = len(xtrain[0])

    k1 = gpflow.kernels.RBF(lengthscales=np.ones(dimensions),
                            active_dims=np.arange(0, dimensions))
    k2 = gpflow.kernels.RBF(lengthscales=np.ones(dimensions),
                            active_dims=np.arange(0, dimensions))

    alpha1 = hybrid_kernel(dimensions, 1)
    alpha2 = hybrid_kernel(dimensions, 2)

    k = alpha1 * k1 + alpha2 * k2

    m = gpflow.models.GPR(data=(xtrain, ytrain.reshape(-1, 1)), kernel=k)

    opt = gpflow.optimizers.Scipy()
    opt.minimize(m.training_loss, m.trainable_variables)
    # print_summary(m)

    x_plot = np.concatenate((xtrain, xval))
    y_gpr, y_std = m.predict_y(x_plot)

    print(
        " {0:.3f} | {1:.3f} | {2:.3f} | {3:.3f} | {4:.3f} | {5:.3f} |".format(
            me.R2(m, xtrain, ytrain),
            me.RMSE(m, xtrain, ytrain),
            me.R2(m, xval, yval),
            me.RMSE(m, xval, yval),
            np.mean(y_gpr),
            np.mean(y_std),
        ))

    if save is True:
        filepath = save_model(m, xval, "")
        print(filepath)

    return m
예제 #7
0
  def compile(self, seqnn_model):
    # for model in seqnn_model.models:
    if self.loss == 'bce':
      model_metrics = [metrics.SeqAUC(curve='ROC'), metrics.SeqAUC(curve='PR')]
    else:

      # num_targets = model.output_shape[-1]
      num_targets = seqnn_model.layers[-1].output_shape[-1]
      model_metrics = [metrics.PearsonR(num_targets), metrics.R2(num_targets)]

    seqnn_model.compile(loss=self.loss_fn,
                    optimizer=self.optimizer,
                    metrics=model_metrics)
    self.compiled = True
예제 #8
0
    def fit_tape(self, seqnn_model):
        if not self.compiled:
            self.compile(seqnn_model)
        model = seqnn_model.model

        # metrics
        num_targets = model.output_shape[-1]
        train_loss = tf.keras.metrics.Mean(name='train_loss')
        train_r = metrics.PearsonR(num_targets, name='train_r')
        train_r2 = metrics.R2(num_targets, name='train_r2')
        valid_loss = tf.keras.metrics.Mean(name='valid_loss')
        valid_r = metrics.PearsonR(num_targets, name='valid_r')
        valid_r2 = metrics.R2(num_targets, name='valid_r2')

        if self.strategy is None:

            @tf.function
            def train_step(x, y):
                with tf.GradientTape() as tape:
                    pred = model(x, training=True)
                    loss = self.loss_fn(y, pred) + sum(model.losses)
                train_loss(loss)
                train_r(y, pred)
                train_r2(y, pred)
                gradients = tape.gradient(loss, model.trainable_variables)
                self.optimizer.apply_gradients(
                    zip(gradients, model.trainable_variables))

            @tf.function
            def eval_step(x, y):
                pred = model(x, training=False)
                loss = self.loss_fn(y, pred) + sum(model.losses)
                valid_loss(loss)
                valid_r(y, pred)
                valid_r2(y, pred)

        else:

            def train_step(x, y):
                with tf.GradientTape() as tape:
                    pred = model(x, training=True)
                    loss_batch_len = self.loss_fn(y, pred)
                    loss_batch = tf.reduce_mean(loss_batch_len, axis=-1)
                    loss = tf.reduce_sum(loss_batch) / self.batch_size
                    loss += sum(model.losses) / self.num_gpu
                train_r(y, pred)
                train_r2(y, pred)
                gradients = tape.gradient(loss, model.trainable_variables)
                self.optimizer.apply_gradients(
                    zip(gradients, model.trainable_variables))
                return loss

            @tf.function
            def train_step_distr(xd, yd):
                replica_losses = self.strategy.run(train_step, args=(xd, yd))
                loss = self.strategy.reduce(tf.distribute.ReduceOp.SUM,
                                            replica_losses,
                                            axis=None)
                train_loss(loss)

            def eval_step(x, y):
                pred = model(x, training=False)
                loss = self.loss_fn(y, pred) + sum(model.losses)
                valid_loss(loss)
                valid_r(y, pred)
                valid_r2(y, pred)

            @tf.function
            def eval_step_distr(xd, yd):
                return self.strategy.run(eval_step, args=(xd, yd))

        # improvement variables
        valid_best = -np.inf
        unimproved = 0

        # training loop
        for ei in range(self.train_epochs_max):
            if ei >= self.train_epochs_min and unimproved > self.patience:
                break
            else:
                # train
                t0 = time.time()
                train_iter = iter(self.train_data[0].dataset)
                for si in range(self.train_epoch_batches[0]):
                    x, y = next(train_iter)
                    if self.strategy is not None:
                        train_step_distr(x, y)
                    else:
                        train_step(x, y)

                # evaluate
                # eval_iter = iter(self.eval_data[0].dataset)
                # for si in range(self.eval_epoch_batches[0]):
                #   x, y = next(eval_iter)
                for x, y in self.eval_data[0].dataset:
                    if self.strategy is not None:
                        eval_step_distr(x, y)
                    else:
                        eval_step(x, y)

                # print training accuracy
                train_loss_epoch = train_loss.result().numpy()
                train_r_epoch = train_r.result().numpy()
                train_r2_epoch = train_r2.result().numpy()
                print('Epoch %d - %ds - train_loss: %.4f - train_r: %.4f - train_r2: %.4f' % \
                  (ei, (time.time()-t0), train_loss_epoch, train_r_epoch, train_r2_epoch), end='')

                # print validation accuracy
                # valid_loss, valid_pr, valid_r2 = model.evaluate(self.eval_data[0].dataset, verbose=0)
                valid_loss_epoch = valid_loss.result().numpy()
                valid_r_epoch = valid_r.result().numpy()
                valid_r2_epoch = valid_r2.result().numpy()
                print(' - valid_loss: %.4f - valid_r: %.4f - valid_r2: %.4f' % \
                  (valid_loss_epoch, valid_r_epoch, valid_r2_epoch), end='')

                # checkpoint
                seqnn_model.save('%s/model_check.h5' % self.out_dir)

                # check best
                if valid_r_epoch > valid_best:
                    print(' - best!', end='')
                    unimproved = 0
                    valid_best = valid_r_epoch
                    seqnn_model.save('%s/model_best.h5' % self.out_dir)
                else:
                    unimproved += 1
                print('', flush=True)

                # reset metrics
                train_loss.reset_states()
                train_r.reset_states()
                train_r2.reset_states()
                valid_loss.reset_states()
                valid_r.reset_states()
                valid_r2.reset_states()
예제 #9
0
    def fit2(self, seqnn_model):
        if not self.compiled:
            self.compile(seqnn_model)

        assert (len(seqnn_model.models) >= self.num_datasets)

        ################################################################
        # prep

        # metrics
        train_loss, train_r, train_r2 = [], [], []
        valid_loss, valid_r, valid_r2 = [], [], []
        for di in range(self.num_datasets):
            num_targets = seqnn_model.models[di].output_shape[-1]
            train_loss.append(tf.keras.metrics.Mean(name='train%d_loss' % di))
            train_r.append(metrics.PearsonR(num_targets,
                                            name='train%d_r' % di))
            train_r2.append(metrics.R2(num_targets, name='train%d_r2' % di))
            valid_loss.append(tf.keras.metrics.Mean(name='valid%d_loss' % di))
            valid_r.append(metrics.PearsonR(num_targets,
                                            name='valid%d_r' % di))
            valid_r2.append(metrics.R2(num_targets, name='valid%d_r2' % di))

        # generate decorated train steps
        """
    train_steps = []
    for di in range(self.num_datasets):
      model = seqnn_model.models[di]

      @tf.function
      def train_step(x, y):
        with tf.GradientTape() as tape:
          pred = model(x, training=tf.constant(True))
          loss = self.loss_fn(y, pred) + sum(model.losses)
        train_loss[di](loss)
        train_r[di](y, pred)
        train_r2[di](y, pred)
        gradients = tape.gradient(loss, model.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, model.trainable_variables))

      train_steps.append(train_step)
    """
        @tf.function
        def train_step0(x, y):
            with tf.GradientTape() as tape:
                pred = seqnn_model.models[0](x, training=True)
                loss = self.loss_fn(y, pred) + sum(
                    seqnn_model.models[0].losses)
            train_loss[0](loss)
            train_r[0](y, pred)
            train_r2[0](y, pred)
            gradients = tape.gradient(
                loss, seqnn_model.models[0].trainable_variables)
            self.optimizer.apply_gradients(
                zip(gradients, seqnn_model.models[0].trainable_variables))

        @tf.function
        def eval_step0(x, y):
            pred = seqnn_model.models[0](x, training=False)
            loss = self.loss_fn(y, pred) + sum(seqnn_model.models[0].losses)
            valid_loss[0](loss)
            valid_r[0](y, pred)
            valid_r2[0](y, pred)

        if self.num_datasets > 1:

            @tf.function
            def train_step1(x, y):
                with tf.GradientTape() as tape:
                    pred = seqnn_model.models[1](x, training=True)
                    loss = self.loss_fn(y, pred) + sum(
                        seqnn_model.models[1].losses)
                train_loss[1](loss)
                train_r[1](y, pred)
                train_r2[1](y, pred)
                gradients = tape.gradient(
                    loss, seqnn_model.models[1].trainable_variables)
                self.optimizer.apply_gradients(
                    zip(gradients, seqnn_model.models[1].trainable_variables))

            @tf.function
            def eval_step1(x, y):
                pred = seqnn_model.models[1](x, training=False)
                loss = self.loss_fn(y, pred) + sum(
                    seqnn_model.models[1].losses)
                valid_loss[1](loss)
                valid_r[1](y, pred)
                valid_r2[1](y, pred)

        # improvement variables
        valid_best = [-np.inf] * self.num_datasets
        unimproved = [0] * self.num_datasets

        ################################################################
        # training loop

        for ei in range(self.train_epochs_max):
            if ei >= self.train_epochs_min and np.min(
                    unimproved) > self.patience:
                break
            else:
                # shuffle datasets
                np.random.shuffle(self.dataset_indexes)

                # get iterators
                train_data_iters = [iter(td.dataset) for td in self.train_data]

                # train
                t0 = time.time()
                for di in self.dataset_indexes:
                    x, y = next(train_data_iters[di])
                    if di == 0:
                        train_step0(x, y)
                    else:
                        train_step1(x, y)

                print('Epoch %d - %ds' % (ei, (time.time() - t0)))
                for di in range(self.num_datasets):
                    print('  Data %d' % di, end='')
                    model = seqnn_model.models[di]

                    # print training accuracy
                    print(' - train_loss: %.4f' %
                          train_loss[di].result().numpy(),
                          end='')
                    print(' - train_r: %.4f' % train_r[di].result().numpy(),
                          end='')
                    print(' - train_r: %.4f' % train_r2[di].result().numpy(),
                          end='')

                    # evaluate
                    for x, y in self.eval_data[di].dataset:
                        if di == 0:
                            eval_step0(x, y)
                        else:
                            eval_step1(x, y)

                    # print validation accuracy
                    print(' - valid_loss: %.4f' %
                          valid_loss[di].result().numpy(),
                          end='')
                    print(' - valid_r: %.4f' % valid_r[di].result().numpy(),
                          end='')
                    print(' - valid_r2: %.4f' % valid_r2[di].result().numpy(),
                          end='')
                    early_stop_stat = valid_r[di].result().numpy()

                    # checkpoint
                    model.save('%s/model%d_check.h5' % (self.out_dir, di))

                    # check best
                    if early_stop_stat > valid_best[di]:
                        print(' - best!', end='')
                        unimproved[di] = 0
                        valid_best[di] = early_stop_stat
                        model.save('%s/model%d_best.h5' % (self.out_dir, di))
                    else:
                        unimproved[di] += 1
                    print('', flush=True)

                    # reset metrics
                    train_loss[di].reset_states()
                    train_r[di].reset_states()
                    train_r2[di].reset_states()
                    valid_loss[di].reset_states()
                    valid_r[di].reset_states()
                    valid_r2[di].reset_states()
예제 #10
0
def perform_analysis():
    """



    :return:
    """
    parser = parse_arg()
    args = parser.parse_args()

    df = pd.read_csv(args.input)
    # df = df.apply(lambda x: preProcessing.scaling_y_data(x) if x.name == 'OC' else x)  # scaling OC data

    (cal_df, tst_df) = separating_data_set(df)

    (X_train, y_train) = splitting_dataset(cal_df)
    (X_test, y_test) = splitting_dataset(tst_df)

    # Scale the features
    X_train = preProcessing.scaler_min_max_x_data(X_train)
    X_test = preProcessing.scaler_min_max_x_data(X_test)

    y_train = preProcessing.scaler_min_max_y_data(y_train)
    y_test = preProcessing.scaler_min_max_y_data(y_test)

    print(X_train)
    print(y_train)
    print(X_test)
    print(y_test)

    print(X_train.shape)
    print(y_train.shape)

    if args.hiddenLayers == 5:
        model = build_fnn_5l(X_train)
    elif args.hiddenLayers == 4:
        model = build_fnn_4l(X_train)
    elif args.hiddenLayers == 3:
        model = build_fnn_3l(X_train)
    elif args.hiddenLayers == 2:
        model = build_fnn_2l(X_train)
    else:
        model = build_fnn_1l(X_train)

    model.summary()

    early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=25)
    random.seed(1)
    model.fit(X_train, y_train, batch_size=args.batchSize, epochs=args.epochSize, validation_data=(X_test, y_test),
              callbacks=[early_stop])

    fnn_losses = pd.DataFrame(model.history.history)
    create_losses_plot(fnn_losses)

    trained_model_save(model, "trained_model.h5")

    predictions = model.predict(X_test)

    print("MSE:", metrics.MSE(y_test, predictions))
    print("RMSE:", metrics.RMSE(y_test, predictions))
    print("R-square:", metrics.R2(y_test, predictions))
    print("RPD:", metrics.RPD(y_test, predictions))

    create_prediction_plot(y_test, predictions)
예제 #11
0
if __name__ == "__main__":
    data = pd.read_csv(FILENAME_READ)
    data = data.drop('Unnamed: 0', axis=1)
    columns = data.drop('Target', axis=1).columns

    weights, y_trains, X_trains, y_tests, X_tests = lr.cross_validation(data)

    data_write = pd.DataFrame(
        columns=["", "T1", "T2", "T3", "T4", "T5", "E", "STD"])

    for i in range(5):
        y_pred_test = lr.predict(X_tests[i], weights[i])
        y_pred_train = lr.predict(X_trains[i], weights[i])
        y_test = np.array(y_tests[i])
        y_train = np.array(y_trains[i])

        r2_test = m.R2(y_test, y_pred_test)
        r2_train = m.R2(y_train, y_pred_train)
        rmse_test = m.RMSE(y_test, y_pred_test)
        rmse_train = m.RMSE(y_train, y_pred_train)

        data_write["T" + str(i + 1)] = [
            r2_test, r2_train, rmse_test, rmse_train
        ] + list(weights[i].reshape(weights[i].shape[0], 1))

    data_write["E"] = data_write[["T1", "T2", "T3", "T4", "T5"]].mean(axis=1)
    data_write["STD"] = data_write[["T1", "T2", "T3", "T4", "T5"]].std(axis=1)

    data_write.index = ["R^2_test", "R^2_train", "RMSE_test", "RMSE_train"
                        ] + list(columns) + list(["1"])
    data_write.to_csv("result.csv")
def single_loc_evaluation(location, perf_plot=False, hpar_plot=False):

    metric_list = []
    coord_list = sa.random_location_generator(location)
    n = len(coord_list)

    for i in tqdm(range(n)):
        try:
            xtrain, xval, _, ytrain, yval, _ = dp.point_model(
                coords=list(coord_list[i]))
            m = gpm.multi_gp(xtrain, xval, ytrain, yval)

            training_R2 = me.R2(m, xtrain, ytrain)
            training_RMSE = me.RMSE(m, xtrain, ytrain)
            val_R2 = me.R2(m, xval, yval)
            val_RMSE = me.RMSE(m, xval, yval)
            time_kernel_lengthscale = float(
                m.kernel.kernels[0].base_kernel.variance.value())
            time_kernel_variance = float(
                m.kernel.kernels[0].base_kernel.lengthscales.value())
            time_kernel_periodicity = float(m.kernel.kernels[0].period.value())
            N34_lengthscale = np.array(
                m.kernel.kernels[1].lengthscales.value())[2]
            d2m_lengthscale = np.array(
                m.kernel.kernels[1].lengthscales.value())[0]
            tcwv_lengthscale = np.array(
                m.kernel.kernels[1].lengthscales.value())[1]
            rbf_kernel_variance = float(m.kernel.kernels[1].variance.value())

            metric_list.append([
                coord_list[i, 0], coord_list[i, 1], training_R2, training_RMSE,
                val_R2, val_RMSE, time_kernel_lengthscale,
                time_kernel_variance, time_kernel_periodicity, N34_lengthscale,
                d2m_lengthscale, tcwv_lengthscale, rbf_kernel_variance
            ])

        except Exception:
            pass

    df = pd.DataFrame(
        metric_list,
        columns=[
            "latitude", "longitude", "training_R2", "training_RMSE", "val_R2",
            "val_RMSE", "time_kernel_lengthscale", "time_kernel_variance",
            "time_kernel_periodicity", "N34_lengthscale", "d2m_lengthscale",
            "tcwv_lengthscale", "rbf_kernel_variance"
        ],
    )

    now = datetime.datetime.now()
    df.to_csv("_Data/single-locations-eval-" + now.strftime("%Y-%m-%d") +
              ".csv")

    print(df.mean(axis=0))

    df_prep = df.set_index(["latitude", "longitude"])
    da = df_prep.to_xarray()

    if perf_plot is True:
        slm_perf_plots(da)

    if hpar_plot is True:
        slm_hpar_plots(da)
예제 #13
0
def main():
    usage = 'usage: %prog [options] <data_dir> <model_name> <output_dir> <params_file>...'
    parser = OptionParser(usage)
    parser.add_option(
        '-b',
        dest='batch_size',
        default=4,
        help='Batch size for the model training [Default: %default]')
    parser.add_option('-p',
                      dest='patience',
                      default=8,
                      help='Training patience [Default: %default]')
    parser.add_option('-l',
                      dest='learning_rate',
                      default=0.1,
                      help='Learning rate [Default: %default]')
    parser.add_option('-m',
                      dest='momentum',
                      default=0.99,
                      help='SGD momentum [Default: %default]')
    parser.add_option('-e',
                      dest='n_epochs',
                      default=8,
                      help='Training patience [Default: %default]')
    parser.add_option('--clip_norm',
                      dest='clip_norm',
                      default=1000000,
                      help='Training patience [Default: %default]')
    (options, args) = parser.parse_args()
    ########TODO:ADD THE REST OF THE parameters
    if len(args) < 4:
        parser.error('Must provide data_dir, model and output directory.')
    else:
        data_dir = args[0]
        model_name = args[1]
        output_dir = args[2]
        params_file = args[3]

    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)
    ####LOAD DATA
# read model parameters
    with open(params_file) as params_open:
        params = json.load(params_open)
    params_model = params['model']
    params_train = params['train']

    # read datasets
    train_data = []
    eval_data = []

    # load train data
    train_data.append(
        dataset.SeqDataset(data_dir,
                           split_label='train',
                           batch_size=params_train['batch_size'],
                           mode='train'))

    # load eval data
    eval_data.append(
        dataset.SeqDataset(data_dir,
                           split_label='valid',
                           batch_size=params_train['batch_size'],
                           mode='eval'))
    ##########################################

    # train, valid = load_data(data_dir, options.batch_size)
    # print(type(valid[0]))
    # print(len(valid))
    # print(valid)
    if model_name == 'basenji':
        model = model_zoo.basenji_model((131072, 4), 3)
    loss_fn = tf.keras.losses.Poisson(reduction=tf.keras.losses.Reduction.NONE)
    early_stop = tf.keras.callbacks.EarlyStopping(
        monitor='val_pearsonr',  #'val_aupr',#
        patience=options.patience,
        verbose=1,
        mode='max')
    # early_stop = EarlyStoppingMin(monitor='val_pearsonr', mode='max', verbose=1,
    #                patience=options.patience, min_epoch=1)
    save_best = tf.keras.callbacks.ModelCheckpoint(
        '{}/model_best.h5'.format(output_dir),
        save_best_only=True,
        mode='max',
        monitor='val_pearsonr',
        verbose=1)
    callbacks = [
        early_stop,
        tf.keras.callbacks.TensorBoard(output_dir),
        tf.keras.callbacks.ModelCheckpoint('%s/model_check.h5' % output_dir),
        save_best
    ]
    # fit model
    num_targets = model.output_shape[-1]
    print('num_targets ', num_targets)
    model_metrics = [metrics.PearsonR(num_targets), metrics.R2(num_targets)]

    optimizer = tf.keras.optimizers.SGD(learning_rate=options.learning_rate,
                                        momentum=options.momentum,
                                        clipnorm=options.clip_norm)

    model.compile(loss=loss_fn, optimizer=optimizer, metrics=model_metrics)
    model.fit(train,
              epochs=options.n_epochs,
              callbacks=callbacks,
              validation_data=valid)