Esempio n. 1
0
    def build_sad(self,
                  job,
                  data_ops,
                  embed_penultimate=False,
                  target_subset=None):
        """Build SAD predict ops."""
        if not self.hparams_set:
            self.hp = params.make_hparams(job)
            self.hparams_set = True

        # training conditional
        self.is_training = tf.placeholder(tf.bool, name='is_training')

        # eval data ops w/ deterministic augmentation
        data_ops_eval = augmentation.augment_deterministic_set(
            data_ops, job["ensemble_rc"], job["ensemble_shifts"])
        data_seq_eval = tf.stack([do['sequence'] for do in data_ops_eval])
        data_rev_eval = tf.stack([do['reverse_preds'] for do in data_ops_eval])

        # compute eval representation
        map_elems_eval = (data_seq_eval, data_rev_eval)
        build_rep = lambda do: self.build_predict(do[0], do[
            1], embed_penultimate, target_subset)
        self.preds_ensemble = tf.map_fn(build_rep,
                                        map_elems_eval,
                                        dtype=tf.float32,
                                        back_prop=False)
        self.preds_eval = tf.reduce_mean(self.preds_ensemble, axis=0)

        # update # targets
        if target_subset is not None:
            self.hp.num_targets = len(target_subset)

        # helper variables
        self.preds_length = self.preds_eval.shape[1]
Esempio n. 2
0
    def build_feed(
        self,
        job,
        augment_rc=False,
        augment_shifts=[0],
        ensemble_rc=False,
        ensemble_shifts=[0],
        embed_penultimate=False,
        target_subset=None,
    ):
        """Build training ops that depend on placeholders."""

        self.hp = params.make_hparams(job)
        self.hparams_set = True
        data_ops = self.make_placeholders()

        self.build_from_data_ops(
            job,
            data_ops,
            augment_rc=augment_rc,
            augment_shifts=augment_shifts,
            ensemble_rc=ensemble_rc,
            ensemble_shifts=ensemble_shifts,
            embed_penultimate=embed_penultimate,
            target_subset=target_subset,
        )
Esempio n. 3
0
    def build(self, job, target_subset=None):
        """Build training ops that depend on placeholders."""

        self.hp = params.make_hparams(job)
        self.hparams_set = True
        data_ops = self.make_placeholders()

        self.build_from_data_ops(job, data_ops, target_subset)
Esempio n. 4
0
    def build_feed(self, job, embed_penultimate=False, target_subset=None):
        """Build training ops that depend on placeholders."""

        self.hp = params.make_hparams(job)
        self.hparams_set = True
        data_ops = self.make_placeholders()

        self.build_from_data_ops(job,
                                 data_ops,
                                 embed_penultimate=embed_penultimate,
                                 target_subset=target_subset)
Esempio n. 5
0
    def build_from_data_ops(self, job, data_ops, target_subset=None):
        """Build training ops from input data ops."""
        if not self.hparams_set:
            self.hp = params.make_hparams(job)
            self.hparams_set = True
        self.targets = data_ops['label']
        self.inputs = data_ops['sequence']
        self.targets_na = data_ops['na']

        seqs_repr = self.build_representation(data_ops, target_subset)
        self.loss_op, self.loss_adhoc = self.build_loss(
            seqs_repr, data_ops, target_subset)
        self.build_optimizer(self.loss_op)
Esempio n. 6
0
  def build_feed_sad(self, job,
                     ensemble_rc=False, ensemble_shifts=[0],
                     embed_penultimate=False, target_subset=None):
    """Build SAD predict ops that depend on placeholders."""

    self.hp = params.make_hparams(job)
    self.hparams_set = True
    data_ops = self.make_placeholders()

    self.build_sad(job, data_ops,
                   ensemble_rc=ensemble_rc,
                   ensemble_shifts=ensemble_shifts,
                   embed_penultimate=embed_penultimate,
                   target_subset=target_subset)
Esempio n. 7
0
    def build_from_data_ops(self,
                            job,
                            data_ops,
                            embed_penultimate=False,
                            target_subset=None):
        """Build training ops from input data ops."""
        if not self.hparams_set:
            self.hp = params.make_hparams(job)
            self.hparams_set = True

        # training conditional
        self.is_training = tf.placeholder(tf.bool, name='is_training')

        ##################################################
        # training

        # training data_ops w/ stochastic augmentation
        data_ops_train = augmentation.augment_stochastic(
            data_ops, job["augment_rc"], job["augment_shifts"])

        # compute train representation
        self.preds_train = self.build_predict(data_ops_train['sequence'],
                                              None,
                                              embed_penultimate,
                                              target_subset,
                                              save_reprs=True)
        self.target_length = self.preds_train.shape[1].value

        # training losses
        if not embed_penultimate:
            loss_returns = self.build_loss(self.preds_train,
                                           data_ops_train['label'],
                                           target_subset)
            self.loss_train, self.loss_train_targets, self.targets_train = loss_returns

            # optimizer
            self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            self.build_optimizer(self.loss_train)

        ##################################################
        # eval

        # eval data ops w/ deterministic augmentation
        data_ops_eval = augmentation.augment_deterministic_set(
            data_ops, job["ensemble_rc"], job["ensemble_shifts"])
        data_seq_eval = tf.stack([do['sequence'] for do in data_ops_eval])
        data_rev_eval = tf.stack([do['reverse_preds'] for do in data_ops_eval])

        # compute eval representation
        map_elems_eval = (data_seq_eval, data_rev_eval)
        build_rep = lambda do: self.build_predict(do[0], do[
            1], embed_penultimate, target_subset)
        self.preds_ensemble = tf.map_fn(build_rep,
                                        map_elems_eval,
                                        dtype=tf.float32,
                                        back_prop=False)
        self.preds_eval = tf.reduce_mean(self.preds_ensemble, axis=0)

        # eval loss
        if not embed_penultimate:
            loss_returns = self.build_loss(self.preds_eval, data_ops['label'],
                                           target_subset)
            self.loss_eval, self.loss_eval_targets, self.targets_eval = loss_returns

        # update # targets
        if target_subset is not None:
            self.hp.num_targets = len(target_subset)

        # helper variables
        self.preds_length = self.preds_train.shape[1]
Esempio n. 8
0
    def build_from_data_ops(
        self,
        job,
        data_ops,
        augment_rc=False,
        augment_shifts=[0],
        ensemble_rc=False,
        ensemble_shifts=[0],
        embed_penultimate=False,
        target_subset=None,
    ):
        """Build training ops from input data ops."""

        if not self.hparams_set:
            self.hp = params.make_hparams(job)
            self.hparams_set = True

        # training conditional
        self.is_training = tf.placeholder(tf.bool, name="is_training")

        ##################################################
        # training

        # training data_ops w/ stochastic augmentation
        data_ops_train = augmentation.augment_stochastic(
            data_ops, augment_rc, augment_shifts)

        # compute train representation
        self.preds_train = self.build_predict(
            data_ops_train["sequence"],
            None,
            embed_penultimate,
            target_subset,
            save_reprs=True,
        )
        self.target_length = self.preds_train.shape[1].value

        # training losses
        if not embed_penultimate:
            loss_returns = self.build_loss(
                self.preds_train,
                data_ops_train["label"],
                data_ops.get("genome", None),
                target_subset,
            )
            self.loss_train, self.loss_train_targets, self.targets_train = loss_returns[:
                                                                                        3]

            # optimizer
            self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            self.build_optimizer(self.loss_train)

            # allegedly correct, but outperformed by skipping
            # with tf.control_dependencies(self.update_ops):
            #   self.build_optimizer(self.loss_train)

        ##################################################
        # eval

        # eval data ops w/ deterministic augmentation
        data_ops_eval = augmentation.augment_deterministic_set(
            data_ops, ensemble_rc, ensemble_shifts)
        data_seq_eval = tf.stack([do["sequence"] for do in data_ops_eval])
        data_rev_eval = tf.stack([do["reverse_preds"] for do in data_ops_eval])

        # compute eval representation
        map_elems_eval = (data_seq_eval, data_rev_eval)
        build_rep = lambda do: self.build_predict(do[0], do[
            1], embed_penultimate, target_subset)
        self.preds_ensemble = tf.map_fn(build_rep,
                                        map_elems_eval,
                                        dtype=tf.float32,
                                        back_prop=False)
        self.preds_eval = tf.reduce_mean(self.preds_ensemble, axis=0)

        # eval loss and metrics
        if not embed_penultimate:
            loss_returns = self.build_loss(
                self.preds_eval,
                data_ops["label"],
                data_ops.get("genome", None),
                target_subset,
            )
            self.loss_eval, self.loss_eval_targets, self.targets_eval, self.preds_eval_loss = (
                loss_returns)

        # update # targets
        if target_subset is not None:
            self.hp.num_targets = len(target_subset)

        # helper variables
        self.preds_length = self.preds_train.shape[1].value