예제 #1
0
 def initialize_metrics(self, metric_names):
     """metric names can be list or dict"""
     if 'acc' in metric_names:
         self.metric_update['acc'] = tf.keras.metrics.BinaryAccuracy()
         self.metric['acc'] = []
     if 'pearsonr' in metric_names:
         self.metric_update['pearsonr'] = metrics.PearsonR(self.num_targets)
         self.metric['pearsonr'] = []
     if 'auroc' in metric_names:
         self.metric_update['auroc'] = tf.keras.metrics.AUC(curve='ROC')
         self.metric['auroc'] = []
     if 'aupr' in metric_names:
         self.metric_update['aupr'] = tf.keras.metrics.AUC(curve='PR')
         self.metric['aupr'] = []
     if 'cosine' in metric_names:
         self.metric_update['cosine'] = tf.keras.metrics.CosineSimilarity()
         self.metric['cosine'] = []
     if 'kld' in metric_names:
         self.metric_update['kld'] = tf.keras.metrics.KLDivergence()
         self.metric['kld'] = []
     if 'mse' in metric_names:
         self.metric_update['mse'] = tf.keras.metrics.MeanSquaredError()
         self.metric['mse'] = []
     if 'mae' in metric_names:
         self.metric_update['mae'] = tf.keras.metrics.MeanAbsoluteError()
         self.metric['mae'] = []
     if 'poisson' in metric_names:
         self.metric_update['poisson'] = tf.keras.metrics.Poisson()
         self.metric['poisson'] = []
예제 #2
0
파일: seqnn.py 프로젝트: polyaB/basenji
    def evaluate(self, seq_data, head_i=None, loss='poisson'):
        """ Evaluate model on SeqDataset. """
        # choose model
        if self.ensemble is not None:
            model = self.ensemble
        elif head_i is not None:
            model = self.models[head_i]
        else:
            model = self.model

        # compile with dense metrics
        num_targets = model.output_shape[-1]

        if loss == 'bce':
            model.compile(optimizer=tf.keras.optimizers.SGD(),
                          loss=loss,
                          metrics=[
                              metrics.SeqAUC(curve='ROC', summarize=False),
                              metrics.SeqAUC(curve='PR', summarize=False)
                          ])
        else:
            model.compile(optimizer=tf.keras.optimizers.SGD(),
                          loss=loss,
                          metrics=[
                              metrics.PearsonR(num_targets, summarize=False),
                              metrics.R2(num_targets, summarize=False)
                          ])

        # evaluate
        return model.evaluate(seq_data.dataset)
예제 #3
0
  def compile(self, seqnn_model):
    # for model in seqnn_model.models:
    if self.loss == 'bce':
      model_metrics = [metrics.SeqAUC(curve='ROC'), metrics.SeqAUC(curve='PR')]
    else:

      # num_targets = model.output_shape[-1]
      num_targets = seqnn_model.layers[-1].output_shape[-1]
      model_metrics = [metrics.PearsonR(num_targets), metrics.R2(num_targets)]

    seqnn_model.compile(loss=self.loss_fn,
                    optimizer=self.optimizer,
                    metrics=model_metrics)
    self.compiled = True
예제 #4
0
    def fit_tape(self, seqnn_model):
        if not self.compiled:
            self.compile(seqnn_model)
        model = seqnn_model.model

        # metrics
        num_targets = model.output_shape[-1]
        train_loss = tf.keras.metrics.Mean(name='train_loss')
        train_r = metrics.PearsonR(num_targets, name='train_r')
        train_r2 = metrics.R2(num_targets, name='train_r2')
        valid_loss = tf.keras.metrics.Mean(name='valid_loss')
        valid_r = metrics.PearsonR(num_targets, name='valid_r')
        valid_r2 = metrics.R2(num_targets, name='valid_r2')

        if self.strategy is None:

            @tf.function
            def train_step(x, y):
                with tf.GradientTape() as tape:
                    pred = model(x, training=True)
                    loss = self.loss_fn(y, pred) + sum(model.losses)
                train_loss(loss)
                train_r(y, pred)
                train_r2(y, pred)
                gradients = tape.gradient(loss, model.trainable_variables)
                self.optimizer.apply_gradients(
                    zip(gradients, model.trainable_variables))

            @tf.function
            def eval_step(x, y):
                pred = model(x, training=False)
                loss = self.loss_fn(y, pred) + sum(model.losses)
                valid_loss(loss)
                valid_r(y, pred)
                valid_r2(y, pred)

        else:

            def train_step(x, y):
                with tf.GradientTape() as tape:
                    pred = model(x, training=True)
                    loss_batch_len = self.loss_fn(y, pred)
                    loss_batch = tf.reduce_mean(loss_batch_len, axis=-1)
                    loss = tf.reduce_sum(loss_batch) / self.batch_size
                    loss += sum(model.losses) / self.num_gpu
                train_r(y, pred)
                train_r2(y, pred)
                gradients = tape.gradient(loss, model.trainable_variables)
                self.optimizer.apply_gradients(
                    zip(gradients, model.trainable_variables))
                return loss

            @tf.function
            def train_step_distr(xd, yd):
                replica_losses = self.strategy.run(train_step, args=(xd, yd))
                loss = self.strategy.reduce(tf.distribute.ReduceOp.SUM,
                                            replica_losses,
                                            axis=None)
                train_loss(loss)

            def eval_step(x, y):
                pred = model(x, training=False)
                loss = self.loss_fn(y, pred) + sum(model.losses)
                valid_loss(loss)
                valid_r(y, pred)
                valid_r2(y, pred)

            @tf.function
            def eval_step_distr(xd, yd):
                return self.strategy.run(eval_step, args=(xd, yd))

        # improvement variables
        valid_best = -np.inf
        unimproved = 0

        # training loop
        for ei in range(self.train_epochs_max):
            if ei >= self.train_epochs_min and unimproved > self.patience:
                break
            else:
                # train
                t0 = time.time()
                train_iter = iter(self.train_data[0].dataset)
                for si in range(self.train_epoch_batches[0]):
                    x, y = next(train_iter)
                    if self.strategy is not None:
                        train_step_distr(x, y)
                    else:
                        train_step(x, y)

                # evaluate
                # eval_iter = iter(self.eval_data[0].dataset)
                # for si in range(self.eval_epoch_batches[0]):
                #   x, y = next(eval_iter)
                for x, y in self.eval_data[0].dataset:
                    if self.strategy is not None:
                        eval_step_distr(x, y)
                    else:
                        eval_step(x, y)

                # print training accuracy
                train_loss_epoch = train_loss.result().numpy()
                train_r_epoch = train_r.result().numpy()
                train_r2_epoch = train_r2.result().numpy()
                print('Epoch %d - %ds - train_loss: %.4f - train_r: %.4f - train_r2: %.4f' % \
                  (ei, (time.time()-t0), train_loss_epoch, train_r_epoch, train_r2_epoch), end='')

                # print validation accuracy
                # valid_loss, valid_pr, valid_r2 = model.evaluate(self.eval_data[0].dataset, verbose=0)
                valid_loss_epoch = valid_loss.result().numpy()
                valid_r_epoch = valid_r.result().numpy()
                valid_r2_epoch = valid_r2.result().numpy()
                print(' - valid_loss: %.4f - valid_r: %.4f - valid_r2: %.4f' % \
                  (valid_loss_epoch, valid_r_epoch, valid_r2_epoch), end='')

                # checkpoint
                seqnn_model.save('%s/model_check.h5' % self.out_dir)

                # check best
                if valid_r_epoch > valid_best:
                    print(' - best!', end='')
                    unimproved = 0
                    valid_best = valid_r_epoch
                    seqnn_model.save('%s/model_best.h5' % self.out_dir)
                else:
                    unimproved += 1
                print('', flush=True)

                # reset metrics
                train_loss.reset_states()
                train_r.reset_states()
                train_r2.reset_states()
                valid_loss.reset_states()
                valid_r.reset_states()
                valid_r2.reset_states()
예제 #5
0
    def fit2(self, seqnn_model):
        if not self.compiled:
            self.compile(seqnn_model)

        assert (len(seqnn_model.models) >= self.num_datasets)

        ################################################################
        # prep

        # metrics
        train_loss, train_r, train_r2 = [], [], []
        valid_loss, valid_r, valid_r2 = [], [], []
        for di in range(self.num_datasets):
            num_targets = seqnn_model.models[di].output_shape[-1]
            train_loss.append(tf.keras.metrics.Mean(name='train%d_loss' % di))
            train_r.append(metrics.PearsonR(num_targets,
                                            name='train%d_r' % di))
            train_r2.append(metrics.R2(num_targets, name='train%d_r2' % di))
            valid_loss.append(tf.keras.metrics.Mean(name='valid%d_loss' % di))
            valid_r.append(metrics.PearsonR(num_targets,
                                            name='valid%d_r' % di))
            valid_r2.append(metrics.R2(num_targets, name='valid%d_r2' % di))

        # generate decorated train steps
        """
    train_steps = []
    for di in range(self.num_datasets):
      model = seqnn_model.models[di]

      @tf.function
      def train_step(x, y):
        with tf.GradientTape() as tape:
          pred = model(x, training=tf.constant(True))
          loss = self.loss_fn(y, pred) + sum(model.losses)
        train_loss[di](loss)
        train_r[di](y, pred)
        train_r2[di](y, pred)
        gradients = tape.gradient(loss, model.trainable_variables)
        self.optimizer.apply_gradients(zip(gradients, model.trainable_variables))

      train_steps.append(train_step)
    """
        @tf.function
        def train_step0(x, y):
            with tf.GradientTape() as tape:
                pred = seqnn_model.models[0](x, training=True)
                loss = self.loss_fn(y, pred) + sum(
                    seqnn_model.models[0].losses)
            train_loss[0](loss)
            train_r[0](y, pred)
            train_r2[0](y, pred)
            gradients = tape.gradient(
                loss, seqnn_model.models[0].trainable_variables)
            self.optimizer.apply_gradients(
                zip(gradients, seqnn_model.models[0].trainable_variables))

        @tf.function
        def eval_step0(x, y):
            pred = seqnn_model.models[0](x, training=False)
            loss = self.loss_fn(y, pred) + sum(seqnn_model.models[0].losses)
            valid_loss[0](loss)
            valid_r[0](y, pred)
            valid_r2[0](y, pred)

        if self.num_datasets > 1:

            @tf.function
            def train_step1(x, y):
                with tf.GradientTape() as tape:
                    pred = seqnn_model.models[1](x, training=True)
                    loss = self.loss_fn(y, pred) + sum(
                        seqnn_model.models[1].losses)
                train_loss[1](loss)
                train_r[1](y, pred)
                train_r2[1](y, pred)
                gradients = tape.gradient(
                    loss, seqnn_model.models[1].trainable_variables)
                self.optimizer.apply_gradients(
                    zip(gradients, seqnn_model.models[1].trainable_variables))

            @tf.function
            def eval_step1(x, y):
                pred = seqnn_model.models[1](x, training=False)
                loss = self.loss_fn(y, pred) + sum(
                    seqnn_model.models[1].losses)
                valid_loss[1](loss)
                valid_r[1](y, pred)
                valid_r2[1](y, pred)

        # improvement variables
        valid_best = [-np.inf] * self.num_datasets
        unimproved = [0] * self.num_datasets

        ################################################################
        # training loop

        for ei in range(self.train_epochs_max):
            if ei >= self.train_epochs_min and np.min(
                    unimproved) > self.patience:
                break
            else:
                # shuffle datasets
                np.random.shuffle(self.dataset_indexes)

                # get iterators
                train_data_iters = [iter(td.dataset) for td in self.train_data]

                # train
                t0 = time.time()
                for di in self.dataset_indexes:
                    x, y = next(train_data_iters[di])
                    if di == 0:
                        train_step0(x, y)
                    else:
                        train_step1(x, y)

                print('Epoch %d - %ds' % (ei, (time.time() - t0)))
                for di in range(self.num_datasets):
                    print('  Data %d' % di, end='')
                    model = seqnn_model.models[di]

                    # print training accuracy
                    print(' - train_loss: %.4f' %
                          train_loss[di].result().numpy(),
                          end='')
                    print(' - train_r: %.4f' % train_r[di].result().numpy(),
                          end='')
                    print(' - train_r: %.4f' % train_r2[di].result().numpy(),
                          end='')

                    # evaluate
                    for x, y in self.eval_data[di].dataset:
                        if di == 0:
                            eval_step0(x, y)
                        else:
                            eval_step1(x, y)

                    # print validation accuracy
                    print(' - valid_loss: %.4f' %
                          valid_loss[di].result().numpy(),
                          end='')
                    print(' - valid_r: %.4f' % valid_r[di].result().numpy(),
                          end='')
                    print(' - valid_r2: %.4f' % valid_r2[di].result().numpy(),
                          end='')
                    early_stop_stat = valid_r[di].result().numpy()

                    # checkpoint
                    model.save('%s/model%d_check.h5' % (self.out_dir, di))

                    # check best
                    if early_stop_stat > valid_best[di]:
                        print(' - best!', end='')
                        unimproved[di] = 0
                        valid_best[di] = early_stop_stat
                        model.save('%s/model%d_best.h5' % (self.out_dir, di))
                    else:
                        unimproved[di] += 1
                    print('', flush=True)

                    # reset metrics
                    train_loss[di].reset_states()
                    train_r[di].reset_states()
                    train_r2[di].reset_states()
                    valid_loss[di].reset_states()
                    valid_r[di].reset_states()
                    valid_r2[di].reset_states()
예제 #6
0
def main():
    usage = 'usage: %prog [options] <data_dir> <model_name> <output_dir> <params_file>...'
    parser = OptionParser(usage)
    parser.add_option(
        '-b',
        dest='batch_size',
        default=4,
        help='Batch size for the model training [Default: %default]')
    parser.add_option('-p',
                      dest='patience',
                      default=8,
                      help='Training patience [Default: %default]')
    parser.add_option('-l',
                      dest='learning_rate',
                      default=0.1,
                      help='Learning rate [Default: %default]')
    parser.add_option('-m',
                      dest='momentum',
                      default=0.99,
                      help='SGD momentum [Default: %default]')
    parser.add_option('-e',
                      dest='n_epochs',
                      default=8,
                      help='Training patience [Default: %default]')
    parser.add_option('--clip_norm',
                      dest='clip_norm',
                      default=1000000,
                      help='Training patience [Default: %default]')
    (options, args) = parser.parse_args()
    ########TODO:ADD THE REST OF THE parameters
    if len(args) < 4:
        parser.error('Must provide data_dir, model and output directory.')
    else:
        data_dir = args[0]
        model_name = args[1]
        output_dir = args[2]
        params_file = args[3]

    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)
    ####LOAD DATA
# read model parameters
    with open(params_file) as params_open:
        params = json.load(params_open)
    params_model = params['model']
    params_train = params['train']

    # read datasets
    train_data = []
    eval_data = []

    # load train data
    train_data.append(
        dataset.SeqDataset(data_dir,
                           split_label='train',
                           batch_size=params_train['batch_size'],
                           mode='train'))

    # load eval data
    eval_data.append(
        dataset.SeqDataset(data_dir,
                           split_label='valid',
                           batch_size=params_train['batch_size'],
                           mode='eval'))
    ##########################################

    # train, valid = load_data(data_dir, options.batch_size)
    # print(type(valid[0]))
    # print(len(valid))
    # print(valid)
    if model_name == 'basenji':
        model = model_zoo.basenji_model((131072, 4), 3)
    loss_fn = tf.keras.losses.Poisson(reduction=tf.keras.losses.Reduction.NONE)
    early_stop = tf.keras.callbacks.EarlyStopping(
        monitor='val_pearsonr',  #'val_aupr',#
        patience=options.patience,
        verbose=1,
        mode='max')
    # early_stop = EarlyStoppingMin(monitor='val_pearsonr', mode='max', verbose=1,
    #                patience=options.patience, min_epoch=1)
    save_best = tf.keras.callbacks.ModelCheckpoint(
        '{}/model_best.h5'.format(output_dir),
        save_best_only=True,
        mode='max',
        monitor='val_pearsonr',
        verbose=1)
    callbacks = [
        early_stop,
        tf.keras.callbacks.TensorBoard(output_dir),
        tf.keras.callbacks.ModelCheckpoint('%s/model_check.h5' % output_dir),
        save_best
    ]
    # fit model
    num_targets = model.output_shape[-1]
    print('num_targets ', num_targets)
    model_metrics = [metrics.PearsonR(num_targets), metrics.R2(num_targets)]

    optimizer = tf.keras.optimizers.SGD(learning_rate=options.learning_rate,
                                        momentum=options.momentum,
                                        clipnorm=options.clip_norm)

    model.compile(loss=loss_fn, optimizer=optimizer, metrics=model_metrics)
    model.fit(train,
              epochs=options.n_epochs,
              callbacks=callbacks,
              validation_data=valid)